implemented D field in halfspace

David Mayerich
1 parent 559d0fcb
Showing 11 changed files with 485 additions and 280 deletions Show diff stats
math/complex.h
math/matrix.h
math/plane.h
math/realfield.cuh
math/vector.h
optics/beam.h
optics/efield.cuh
optics/esphere.cuh
optics/halfspace.cuh
optics/halfspace.h
optics/planewave.h
@@ -67,10 +67,17 @@ struct complex
         return result;
     }
  
+    //returns the complex signum (-1, 0, 1)
+    CUDA_CALLABLE int sgn(){
+        if(r > 0) return 1;
+        else if(r < 0) return -1;
+        else return (0 < i - i < 0);
+    }
+
 	//ARITHMETIC OPERATORS--------------------
  
     //binary + operator (returns the result of adding two complex values)
-    CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs)
+    CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const
     {
         complex<T> result;
         result.r = r + rhs.r;
@@ -78,7 +85,7 @@ struct complex
         return result;
     }
  
-	CUDA_CALLABLE complex<T> operator+ (const T rhs)
+	CUDA_CALLABLE complex<T> operator+ (const T rhs) const
     {
         complex<T> result;
         result.r = r + rhs;
@@ -87,7 +94,7 @@ struct complex
     }
  
     //binary - operator (returns the result of adding two complex values)
-    CUDA_CALLABLE complex<T> operator- (const complex<T> rhs)
+    CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const
     {
         complex<T> result;
         result.r = r - rhs.r;
@@ -105,7 +112,7 @@ struct complex
     }
  
     //binary MULTIPLICATION operators (returns the result of multiplying complex values)
-    CUDA_CALLABLE complex<T> operator* (const complex<T> rhs)
+    CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const
     {
         complex<T> result;
         result.r = r * rhs.r - i * rhs.i;
@@ -118,7 +125,7 @@ struct complex
     }
  
     //binary DIVISION operators (returns the result of dividing complex values)
-    CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs)
+    CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const
     {
         complex<T> result;
         T denom = rhs.r * rhs.r + rhs.i * rhs.i;
@@ -257,7 +264,7 @@ struct complex
 		return result;
 	}
  
-	std::string toStr()
+	std::string str()
 	{
 		std::stringstream ss;
 		ss<<"("<<r<<","<<i<<")";
@@ -280,6 +287,13 @@ struct complex
         return false;
     }
  
+    CUDA_CALLABLE bool operator!=(T rhs)
+    {
+        if(r != rhs || i != 0)
+            return true;
+        return false;
+    }
+
 	//CASTING operators
 	template < typename otherT >
 	operator complex<otherT>()
@@ -451,7 +465,7 @@ CUDA_CALLABLE rts::complex&lt;A&gt; cos(const rts::complex&lt;A&gt; x)
 template<class A>
 std::ostream& operator<<(std::ostream& os, rts::complex<A> x)
 {
-    os<<x.toStr();
+    os<<x.str();
     return os;
 }
  
@@ -40,9 +40,10 @@ struct matrix
 		return *this;
 	}
  
-	CUDA_CALLABLE vec<T, N> operator*(vec<T, N> rhs)
+	template<typename Y>
+	CUDA_CALLABLE vec<Y, N> operator*(vec<Y, N> rhs)
 	{
-		vec<T, N> result;
+		vec<Y, N> result;
  
 		for(int r=0; r<N; r++)
 			for(int c=0; c<N; c++)
@@ -58,6 +58,13 @@ public:
 			N = N.norm();
 	}
  
+	template< typename U >
+	CUDA_CALLABLE operator plane<U, D>(){
+
+		plane<U, D> result(N, P);
+		return result;
+	}
+
 	CUDA_CALLABLE vec<T, D> norm(){
 		return N;
 	}
@@ -105,6 +112,11 @@ public:
 		return v - perpendicular(v);
 	}
  
+	CUDA_CALLABLE void decompose(vec<T, D> v, vec<T, D>& para, vec<T, D>& perp){
+		perp = N * v.dot(N);
+		para = v - perp;
+	}
+
 	//get both the parallel and perpendicular components of a vector v w.r.t. the plane
 	CUDA_CALLABLE void project(vec<T, D> v, vec<T, D> &v_par, vec<T, D> &v_perp){
  
@@ -129,6 +141,16 @@ public:
  
 	}
  
+	CUDA_CALLABLE rts::plane<T, D> operator-()
+	{
+		rts::plane<T, D> p = *this;
+
+		//negate the normal vector
+		p.N = -p.N;
+
+		return p;
+	}
+
 	//output a string
 	std::string str(){
 		std::stringstream ss;
@@ -138,7 +160,7 @@ public:
 	}
  
 	///////Friendship
-	friend CUDA_CALLABLE rts::plane<T, D> operator- <> (rts::plane<T, D> v);
+	//friend CUDA_CALLABLE rts::plane<T, D> operator- <> (rts::plane<T, D> v);
  
  
  
@@ -149,16 +171,8 @@ public:
 //arithmetic operators
  
 //negative operator flips the plane (front to back)
-template <typename T, int D>
-CUDA_CALLABLE rts::plane<T, D> operator-(rts::plane<T, D> p_rhs)
-{
-	rts::plane<T, D> p = p_rhs;
-
-	//negate the normal vector
-	p.N = -p.N;
+//template <typename T, int D>
  
-	return p;
-}
  
  
  
@@ -35,6 +35,24 @@ __global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, 
  
 namespace rts{
  
+//multiply R = X * Y
+template<typename T>
+__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){
+
+	int iu = blockIdx.x * blockDim.x + threadIdx.x;
+    int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+    //make sure that the thread indices are in-bounds
+    if(iu >= r0 || iv >= r1) return;
+
+    //compute the index into the field
+    int i = iv*r0 + iu;
+
+    //calculate and store the result
+    R[i] = X[i] * Y[i];
+
+}
+
 template<typename P, unsigned int N = 1, bool positive = false>
 class realfield{
  
@@ -107,7 +125,7 @@ public:
 		//std::cout<<"realfield DESTRUCTOR"<<std::endl;
     }
  
-	P* ptr(unsigned int n)
+	P* ptr(unsigned int n = 0)
 	{
 		if(n < N)
 			return X[n];
@@ -212,6 +230,27 @@ public:
         return *this;
     }
  
+    //multiply two fields (element-wise multiplication)
+    realfield<P, N, positive> operator* (const realfield & rhs){
+
+    	int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
+        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+        //create one thread for each detector pixel
+        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+        //create a scalar field to store the result
+        realfield<P, N, positive> result(R[0], R[1]);
+
+        for(int n=0; n<N; n++)
+        	rts::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]);
+
+        return result;
+
+
+    }
+
 	///copy constructor
 	realfield(const realfield &rhs)
 	{
@@ -56,6 +56,19 @@ struct vec
 			v[i] = other.v[i];
 	}
  
+	
+	template< typename U >
+	CUDA_CALLABLE operator vec<U, N>(){
+		vec<U, N> result;
+		for(int i=0; i<N; i++)
+			result.v[i] = v[i];
+
+		return result;
+	}
+
+	//template<class U>
+	//friend vec<U, N>::operator vec<T, N>();
+
 	CUDA_CALLABLE T len() const
 	{
         //compute and return the vector length
@@ -64,7 +77,7 @@ struct vec
         {
             sum_sq += v[i] * v[i];
         }
-        return std::sqrt(sum_sq);
+        return sqrt(sum_sq);
  
 	}
  
@@ -190,6 +203,13 @@ struct vec
 			v[i] = rhs;
 		return *this;
 	}
+
+	template<typename Y>
+	CUDA_CALLABLE vec<T, N> & operator=(vec<Y, N> rhs){
+		for(int i=0; i<N; i++)
+			v[i] = rhs.v[i];
+		return *this;
+	}
 	//unary minus
 	CUDA_CALLABLE vec<T, N> operator-() const{
 		vec<T, N> r;
@@ -209,7 +229,7 @@ struct vec
         return false;
 	}
  
-	std::string toStr() const
+	std::string str() const
 	{
 		std::stringstream ss;
  
@@ -239,7 +259,7 @@ struct vec
 template <typename T, int N>
 std::ostream& operator<<(std::ostream& os, rts::vec<T, N> v)
 {
-    os<<v.toStr();
+    os<<v.str();
     return os;
 }
  
@@ -184,7 +184,8 @@ public:
 	{
 		std::stringstream ss;
 		ss<<"Beam:"<<std::endl;
-		ss<<"	Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
+		//ss<<"	Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
+		ss<<"	Central Plane Wave: "<<beam::k<<std::endl;
 		if(_na[0] == 0)
 			ss<<"	NA: "<<_na[1];
 		else
@@ -7,15 +7,8 @@
 #include "../optics/planewave.h"
 #include "../cuda/devices.h"
 #include "../optics/beam.h"
+#include "../math/rect.h"
  
-namespace rts{
-template<typename T> class halfspace;
-
-template<typename T> class efield;
-}
-
-template<typename T>
-void operator<<(rts::efield<T> &ef, rts::halfspace<T> hs);
  
 namespace rts{
  
@@ -107,8 +100,8 @@ __global__ void gpu_efield_polarization(complex&lt;T&gt;* X, complex&lt;T&gt;* Y, complex&lt;T&gt;
     //compute the field polarization
     Px[i] = X[i].abs();
     Py[i] = Y[i].abs();
-
     Pz[i] = Z[i].abs();
+
 }
  
 /*	This function computes the sum of two complex fields and stores the result in *dest
@@ -132,7 +125,7 @@ __global__ void gpu_efield_sum(complex&lt;T&gt;* dest, complex&lt;T&gt;* src, unsigned int r
 /*  This class implements a discrete representation of an electromagnetic field
     in 2D. The majority of this representation is done on the GPU.
 */
-template<typename P>
+template<typename T>
 class efield
 {
 protected:
@@ -140,17 +133,17 @@ protected:
     bool vector;
  
     //gpu pointer to the field data
-    rts::complex<P>* X;
-    rts::complex<P>* Y;
-    rts::complex<P>* Z;
+    rts::complex<T>* X;
+    rts::complex<T>* Y;
+    rts::complex<T>* Z;
  
     //resolution of the discrete field
     unsigned int R[2];
  
     //shape of the 2D field
-    rect<P> pos;
+    rect<T> pos;
  
-	void from_planewave(planewave<P> p)
+	void from_planewave(planewave<T> p)
 	{
         unsigned int SQRT_BLOCK = 16;
         //create one thread for each detector pixel
@@ -158,7 +151,7 @@ protected:
         dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  
  
-        gpu_planewave2efield<P> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos);
+        gpu_planewave2efield<T> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos);
     }
  
     void init(unsigned int res0, unsigned int res1, bool _vector)
@@ -170,16 +163,16 @@ protected:
         R[1] = res1;
  
         //allocate memory on the gpu
-        cudaMalloc(&X, sizeof(rts::complex<P>) * R[0] * R[1]);
-		cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
+        cudaMalloc(&X, sizeof(rts::complex<T>) * R[0] * R[1]);
+		cudaMemset(X, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
  
         if(vector)
         {
-            cudaMalloc(&Y, sizeof(rts::complex<P>) * R[0] * R[1]);
-			cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
+            cudaMalloc(&Y, sizeof(rts::complex<T>) * R[0] * R[1]);
+			cudaMemset(Y, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
  
-            cudaMalloc(&Z, sizeof(rts::complex<P>) * R[0] * R[1]);
-			cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
+            cudaMalloc(&Z, sizeof(rts::complex<T>) * R[0] * R[1]);
+			cudaMemset(Z, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
         }
     }
  
@@ -190,14 +183,14 @@ protected:
 		if(Z != NULL) cudaFree(Z);
     }
  
-    void shallowcpy(const rts::efield<P> & src)
+    void shallowcpy(const rts::efield<T> & src)
     {
     	vector = src.vector;
     	R[0] = src.R[0];
     	R[1] = src.R[1];
     }
  
-    void deepcpy(const rts::efield<P> & src)
+    void deepcpy(const rts::efield<T> & src)
     {
     	//perform a shallow copy
     	shallowcpy(src);
@@ -205,18 +198,18 @@ protected:
     	//allocate memory on the gpu
     	if(src.X != NULL)
     	{
-    		cudaMalloc(&X, sizeof(rts::complex<P>) * R[0] * R[1]);
-    		cudaMemcpy(X, src.X, sizeof(rts::complex<P>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
+    		cudaMalloc(&X, sizeof(rts::complex<T>) * R[0] * R[1]);
+    		cudaMemcpy(X, src.X, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
     	}
     	if(src.Y != NULL)
     	{
-    		cudaMalloc(&Y, sizeof(rts::complex<P>) * R[0] * R[1]);
-    		cudaMemcpy(Y, src.Y, sizeof(rts::complex<P>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
+    		cudaMalloc(&Y, sizeof(rts::complex<T>) * R[0] * R[1]);
+    		cudaMemcpy(Y, src.Y, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
     	}
     	if(src.Z != NULL)
     	{
-    		cudaMalloc(&Z, sizeof(rts::complex<P>) * R[0] * R[1]);
-    		cudaMemcpy(Z, src.Z, sizeof(rts::complex<P>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
+    		cudaMalloc(&Z, sizeof(rts::complex<T>) * R[0] * R[1]);
+    		cudaMemcpy(Z, src.Z, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
     	}
     }
  
@@ -224,7 +217,7 @@ public:
     efield(unsigned int res0, unsigned int res1, bool _vector = true)
     {
         init(res0, res1, _vector);
-        //pos = rts::rect<P>(rts::vec<P>(-10, 0, -10), rts::vec<P>(-10, 0, 10), rts::vec<P>(10, 0, 10));
+        //pos = rts::rect<T>(rts::vec<T>(-10, 0, -10), rts::vec<T>(-10, 0, 10), rts::vec<T>(10, 0, 10));
     }
  
     //destructor
@@ -236,20 +229,40 @@ public:
     ///Clear the field - set all points to zero
     void clear()
     {
-        cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
+        cudaMemset(X, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
  
         if(vector)
         {
-            cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
-            cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
+            cudaMemset(Y, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
+            cudaMemset(Z, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
         }
     }
  
-    void position(rect<P> _p)
+    void position(rect<T> _p)
     {
         pos = _p;
     }
  
+    //access functions
+    complex<T>* x(){
+        return X;
+    }
+    complex<T>* y(){
+        return Y;
+    }
+    complex<T>* z(){
+        return Z;
+    }
+    unsigned int Ru(){
+        return R[0];
+    }
+    unsigned int Rv(){
+        return R[1];
+    }
+    rect<T> p(){
+        return pos;
+    }
+
     std::string str()
     {
         stringstream ss;
@@ -262,7 +275,7 @@ public:
     }
  
     //assignment operator: assignment from another electric field
-    efield<P> & operator= (const efield<P> & rhs)
+    efield<T> & operator= (const efield<T> & rhs)
     {
     	destroy();				//destroy any previous information about this field
     	deepcpy(rhs);			//create a deep copy
@@ -270,7 +283,7 @@ public:
     }
  
     //assignment operator: build an electric field from a plane wave
-    efield<P> & operator= (const planewave<P> & rhs)
+    efield<T> & operator= (const planewave<T> & rhs)
 	{
  
 		clear();				//clear any previous field data
@@ -279,7 +292,7 @@ public:
 	}
  
 	//assignment operator: add an existing electric field
-	efield<P> & operator+= (const efield<P> & rhs)
+	efield<T> & operator+= (const efield<T> & rhs)
 	{
 		//if this field and the source field represent the same regions in space
 		if(R[0] == rhs.R[0] && R[1] == rhs.R[1] && pos == rhs.pos)
@@ -309,10 +322,10 @@ public:
 	}
  
     //assignment operator: build an electric field from a beam
-    efield<P> & operator= (const rts::beam<P> & rhs)
+    efield<T> & operator= (const rts::beam<T> & rhs)
     {
         //get a vector of monte-carlo samples
-        std::vector< rts::planewave<P> > p_list = rhs.mc();
+        std::vector< rts::planewave<T> > p_list = rhs.mc();
  
         clear();                //clear any previous field data
         for(unsigned int i = 0; i < p_list.size(); i++)
@@ -322,13 +335,13 @@ public:
  
  
 	//return a scalar field representing field magnitude
-    realfield<P, 1, true> mag()
+    realfield<T, 1, true> mag()
     {
 		int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
 		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  
 		//create a scalar field to store the result
-		realfield<P, 1, true> M(R[0], R[1]);
+		realfield<T, 1, true> M(R[0], R[1]);
  
 		//create one thread for each detector pixel
 		dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
@@ -341,13 +354,13 @@ public:
     }
  
     //return a sacalar field representing the field magnitude at an infinitely small point in time
-    realfield<P, 1, true> real_mag()
+    realfield<T, 1, true> real_mag()
     {
         int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
         int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  
         //create a scalar field to store the result
-        realfield<P, 1, true> M(R[0], R[1]);
+        realfield<T, 1, true> M(R[0], R[1]);
  
         //create one thread for each detector pixel
         dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
@@ -360,7 +373,7 @@ public:
     }
  
     //return a vector field representing field polarization
-    realfield<P, 3, true> polarization()
+    realfield<T, 3, true> polarization()
     {
         if(!vector)
         {
@@ -375,7 +388,7 @@ public:
         dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  
  
-        realfield<P, 3, true> Pol(R[0], R[1]);     //create a vector field to store the result
+        realfield<T, 3, true> Pol(R[0], R[1]);     //create a vector field to store the result
  
         //compute the polarization and store it in the vector field
         gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.ptr(0), Pol.ptr(1), Pol.ptr(2));
@@ -384,7 +397,7 @@ public:
     }
  
     ////////FRIENDSHIP
-    friend void operator<< <>(rts::efield<P> &ef, rts::halfspace<P> hs);
+    //friend void operator<< <T>(rts::efield<T> &ef, rts::halfspace<T> hs);
  
  
 };
@@ -135,7 +135,7 @@ public:
  
 		//determine important parameters for the scattering domain
 		unsigned int sR = ceil(sqrt( (P)(pow((P)esphere::R[0],2) + pow((P)esphere::R[1],2))) );
-		unsigned int thetaR = 256;
+		//unsigned int thetaR = 256;
  
 		/////////////////////continue scattering code here/////////////////////////
  
-#ifndef	RTS_HALFSPACE_CUH
-#define	RTS_HALFSPACE_CUH
+#ifndef	RTS_HALFSPACE_H
+#define	RTS_HALFSPACE_H
+
+#include "../math/plane.h"
  
-#include "../optics/halfspace.h"
-#include "../optics/efield.cuh"
  
 namespace rts{
  
+//GPU kernel to compute the electric field
 template<typename T>
 __global__ void gpu_halfspace_pw2ef(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, 
 									 plane<T> P, planewave<T> w, rect<T> q, bool at_surface = false)
@@ -36,56 +37,303 @@ __global__ void gpu_halfspace_pw2ef(complex&lt;T&gt;* X, complex&lt;T&gt;* Y, complex&lt;T&gt;* Z,
 	//vec<T> r(p[0], p[1], p[2]);
  
 	complex<T> x( 0.0f, w.kvec().dot(p) );
-	complex<T> phase( 0.0f, w.phase());
+	//complex<T> phase( 0.0f, w.phase());
  
     if(Y == NULL)                       //if this is a scalar simulation
         X[i] += w.E().len() * exp(x);    //use the vector magnitude as the plane wave amplitude
     else
     {
     	//X[i] = Y[i] = Z[i] = 1;
-        X[i] += w.E()[0] * exp(x) * exp(phase);
-        Y[i] += w.E()[1] * exp(x) * exp(phase);
-        Z[i] += w.E()[2] * exp(x) * exp(phase);
+        X[i] += w.E()[0] * exp(x);// * exp(phase);
+        Y[i] += w.E()[1] * exp(x);// * exp(phase);
+        Z[i] += w.E()[2] * exp(x);// * exp(phase);
+    }
+}
+
+//GPU kernel to compute the electric displacement field
+template<typename T>
+__global__ void gpu_halfspace_pw2df(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, 
+									 plane<T> P, planewave<T> w, rect<T> q, T n, bool at_surface = false)
+{
+    int iu = blockIdx.x * blockDim.x + threadIdx.x;
+    int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+    //make sure that the thread indices are in-bounds
+    if(iu >= r0 || iv >= r1) return;
+
+    //compute the index into the field
+    int i = iv*r0 + iu;
+
+	//get the current position
+	vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 );
+
+	if(at_surface){
+		if(P.side(p) > 0)
+			return;
+	}
+	else{
+		if(P.side(p) >= 0)
+			return;
+	}
+
+	//if the current position is on the wrong side of the plane
+
+	//vec<T> r(p[0], p[1], p[2]);
+
+	complex<T> x( 0.0f, w.kvec().dot(p) );
+	//complex<T> phase( 0.0f, w.phase());
+
+	//vec< complex<T> > testE(1, 0, 0);
+
+	
+
+    if(Y == NULL)                       //if this is a scalar simulation
+        X[i] += w.E().len() * exp(x);    //use the vector magnitude as the plane wave amplitude
+    else
+    {
+    	plane< complex<T> > cplane = plane< complex<T>, 3>(P);
+    	vec< complex<T> > E_para;// = cplane.parallel(w.E());
+		vec< complex<T> > E_perp;// = cplane.perpendicular(w.E()) * (n*n);
+		cplane.decompose(w.E(), E_para, E_perp);
+		T epsilon = n*n;
+
+        X[i] += (E_para[0] + E_perp[0] * epsilon) * exp(x);
+        Y[i] += (E_para[1] + E_perp[1] * epsilon) * exp(x);
+        Z[i] += (E_para[2] + E_perp[2] * epsilon) * exp(x);
     }
 }
+
+//computes a scalar field containing the refractive index of the half-space at each point
+template<typename T>
+__global__ void gpu_halfspace_n(T* n, unsigned int r0, unsigned int r1, rect<T> q, plane<T> P, T n0, T n1){
+
+	int iu = blockIdx.x * blockDim.x + threadIdx.x;
+    int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+    //make sure that the thread indices are in-bounds
+    if(iu >= r0 || iv >= r1) return;
+
+    //compute the index into the field
+    int i = iv*r0 + iu;
+
+	//get the current position
+	vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 );
+
+	//set the appropriate refractive index
+	if(P.side(p) < 0) n[i] = n0;
+	else n[i] = n1;
 }
  
 template<class T>
-void operator<<(rts::efield<T> &ef, rts::halfspace<T> hs){
+class halfspace
+{
+private:
+	rts::plane<T> S;		//surface plane splitting the half space
+	rts::complex<T> n0;		//refractive index at the front of the plane
+	rts::complex<T> n1;		//refractive index at the back of the plane
+
+	//lists of waves in front (pw0) and behind (pw1) the plane
+	std::vector< rts::planewave<T> > w0;
+	std::vector< rts::planewave<T> > w1;
  
-	//std::cout<<"---------RENDER HALFSPACE--------------"<<std::endl;
-	//output a parameter of the efield
-	//std::cout<<ef.pos<<std::endl;
-	//std::cout<<hs.S.str()<<std::endl;
+	//rts::planewave<T> pi;	//incident plane wave
+	//rts::planewave<T> pr;	//plane wave reflected from the surface
+	//rts::planewave<T> pt;	//plane wave transmitted through the surface
  
-	unsigned int SQRT_BLOCK = 16;
-	//create one thread for each detector pixel
-	dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
-	dim3 dimGrid((ef.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (ef.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+	void init(){
+		n0 = 1.0;
+		n1 = 1.5;
+	}
  
+	//compute which side of the interface is hit by the incoming plane wave (0 = front, 1 = back)
+	bool facing(planewave<T> p){
+		if(p.kvec().dot(S.norm()) > 0)
+			return 1;
+		else
+			return 0;
+	}
  
+	T calc_theta_i(vec<T> v){
  
-	//render each plane wave
+		vec<T> v_hat = v.norm();
  
-	//plane waves at the surface front
-	for(unsigned int w = 0; w < hs.w0.size(); w++){
-		//std::cout<<"w0 "<<w<<": "<<hs.w0[w].str()<<std::endl;
-		rts::gpu_halfspace_pw2ef<T> <<<dimGrid, dimBlock>>> (ef.X, ef.Y, ef.Z, ef.R[0], ef.R[1], hs.S, hs.w0[w], ef.pos);
+		//compute the cosine of the angle between k_hat and the plane normal
+		T cos_theta_i = v_hat.dot(S.norm());
+
+		return acos(abs(cos_theta_i));
 	}
  
-	//plane waves at the surface back
-	for(unsigned int w = 0; w < hs.w1.size(); w++){
-		//std::cout<<"w1 "<<w<<": "<<hs.w1[w].str()<<std::endl;
-		rts::gpu_halfspace_pw2ef<T> <<<dimGrid, dimBlock>>> (ef.X, ef.Y, ef.Z, ef.R[0], ef.R[1], -hs.S, hs.w1[w], ef.pos, true);
+	T calc_theta_t(T ni_nt, T theta_i){
+
+		T sin_theta_t = ni_nt * sin(theta_i);
+		return asin(sin_theta_t);
 	}
-	//rts::gpu_halfspace_pw2ef<T> <<<dimGrid, dimBlock>>> (ef.X, ef.Y, ef.Z, ef.R[0], ef.R[1], hs.S, hs.pi, ef.pos);
-	//rts::gpu_halfspace_pw2ef<T> <<<dimGrid, dimBlock>>> (ef.X, ef.Y, ef.Z, ef.R[0], ef.R[1], hs.S, hs.pr, ef.pos);
-	//rts::gpu_halfspace_pw2ef<T> <<<dimGrid, dimBlock>>> (ef.X, ef.Y, ef.Z, ef.R[0], ef.R[1], -hs.S, hs.pt, ef.pos, true);
  
  
-	//return ef;
-}
+public:
+
+	//constructors
+	halfspace(){
+		init();
+	}
+
+	halfspace(T na, T nb){
+		init();
+		n0 = na;
+		n1 = nb;
+	}
+
+	halfspace(T na, T nb, plane<T> p){
+		n0 = na;
+		n1 = nb;
+		S = p;
+	}
+
+	//compute the transmitted and reflective waves given the incident (vacuum) plane wave p
+	void incident(rts::planewave<T> p){
+
+		planewave<T> r, t;
+		p.scatter(S, n1.real()/n0.real(), r, t);
+
+		//std::cout<<"i+r: "<<p.r()[0] + r.r()[0]<<std::endl;
+		//std::cout<<"t:   "<<t.r()[0]<<std::endl;
+
+		if(facing(p)){
+			w1.push_back(p);
+
+			if(r.E().len() != 0)
+				w1.push_back(r);
+			if(t.E().len() != 0)
+				w0.push_back(t);
+		}
+		else{
+			w0.push_back(p);
+
+			if(r.E().len() != 0)
+				w0.push_back(r);
+			if(t.E().len() != 0)
+				w1.push_back(t);
+		}
+	}
+
+	void incident(rts::beam<T> b, unsigned int N = 10000){
+
+		//generate a plane wave decomposition for the beam
+		std::vector< planewave<T> > pw_list = b.mc(N);
+
+		//calculate the reflected and refracted waves for each incident wave
+		for(unsigned int w = 0; w < pw_list.size(); w++){
+			incident(pw_list[w]);
+		}
+	}
+
+	//return the electric field at the specified resolution and position
+	rts::efield<T> E(unsigned int r0, unsigned int r1, rect<T> R){
+		
+		int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
+		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+		//create one thread for each detector pixel
+		dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+		dim3 dimGrid((r0 + SQRT_BLOCK -1)/SQRT_BLOCK, (r1 + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+		//create an electric field
+		rts::efield<T> ef(r0, r1);
+		ef.position(R);
  
+		//render each plane wave
+
+		//plane waves at the surface front
+		for(unsigned int w = 0; w < w0.size(); w++){
+			//std::cout<<"w0 "<<w<<": "<<hs.w0[w].str()<<std::endl;
+			rts::gpu_halfspace_pw2ef<T> <<<dimGrid, dimBlock>>> (ef.x(), ef.y(), ef.z(), r0, r1, S, w0[w], ef.p());
+		}
+
+		//plane waves at the surface back
+		for(unsigned int w = 0; w < w1.size(); w++){
+			//std::cout<<"w1 "<<w<<": "<<hs.w1[w].str()<<std::endl;
+			rts::gpu_halfspace_pw2ef<T> <<<dimGrid, dimBlock>>> (ef.x(), ef.y(), ef.z(), r0, r1, -S, w1[w], ef.p(), true);
+		}
+
+		return ef;
+	}
+
+	//return the electric displacement at the specified resolution and position
+	rts::efield<T> D(unsigned int r0, unsigned int r1, rect<T> R){
+
+		int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
+		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+		//create one thread for each detector pixel
+		dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+		dim3 dimGrid((r0 + SQRT_BLOCK -1)/SQRT_BLOCK, (r1 + SQRT_BLOCK - 1)/SQRT_BLOCK);
+		
+		//create a complex vector field
+		rts::efield<T> df(r0, r1);
+		df.position(R);
+		
+		//render each plane wave
+
+		//plane waves at the surface front
+		for(unsigned int w = 0; w < w0.size(); w++){
+			//std::cout<<"w0 "<<w<<": "<<hs.w0[w].str()<<std::endl;
+			rts::gpu_halfspace_pw2df<T> <<<dimGrid, dimBlock>>> (df.x(), df.y(), df.z(), r0, r1, S, w0[w], df.p(), n0.real());
+		}
+		
+		//plane waves at the surface back
+		for(unsigned int w = 0; w < w1.size(); w++){
+			//std::cout<<"w1 "<<w<<": "<<hs.w1[w].str()<<std::endl;
+			rts::gpu_halfspace_pw2df<T> <<<dimGrid, dimBlock>>> (df.x(), df.y(), df.z(), r0, r1, -S, w1[w], df.p(), n1.real(), true);
+		}
+		
+		return df;
+	}
+
+	realfield<T, 1, true> nfield(unsigned int Ru, unsigned int Rv, rect<T> p){
+
+		int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
+		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+		//create one thread for each detector pixel
+		dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+		dim3 dimGrid((Ru + SQRT_BLOCK -1)/SQRT_BLOCK, (Rv + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+		realfield<T, 1, true> n(Ru, Rv);	//create a scalar field to store the result of n
+
+		rts::gpu_halfspace_n<T> <<<dimGrid, dimBlock>>> (n.ptr(), Ru, Rv, p, S, n0.real(), n1.real());
+
+		return n;
+	}
+
+	std::string str(){
+		std::stringstream ss;
+		ss<<"Half Space-------------"<<std::endl;
+		ss<<"n0: "<<n0<<std::endl;
+		ss<<"n1: "<<n1<<std::endl;
+		ss<<S.str();
+		
+
+		if(w0.size() != 0 || w1.size() != 0){
+			ss<<std::endl<<"Plane Waves:"<<std::endl;
+			for(unsigned int w = 0; w < w0.size(); w++){
+				ss<<"w0 = "<<w<<": "<<w0[w]<<std::endl;
+			}
+			for(unsigned int w = 0; w < w1.size(); w++){
+				ss<<"w1 = "<<w<<": "<<w1[w]<<std::endl;
+			}
+		}
+
+		return ss.str();
+	}
+	////////FRIENDSHIP
+    //friend void operator<< <> (rts::efield<T> &ef, rts::halfspace<T> hs);
+
+};
+
+
+
+
+}
  
  
 #endif
 \ No newline at end of file
-#ifndef	RTS_HALFSPACE_H
-#define	RTS_HALFSPACE_H
-
-namespace rts{
-template<typename T> class halfspace;
-
-template<typename T> class efield;
-}
-
-template<typename T>
-void operator<<(rts::efield<T> &ef, rts::halfspace<T> hs);
-
-namespace rts{
-
-template<class T>
-class halfspace
-{
-private:
-	rts::plane<T> S;		//surface plane splitting the half space
-	rts::complex<T> n0;		//refractive index at the front of the plane
-	rts::complex<T> n1;		//refractive index at the back of the plane
-
-	//lists of waves in front (pw0) and behind (pw1) the plane
-	std::vector< rts::planewave<T> > w0;
-	std::vector< rts::planewave<T> > w1;
-
-	//rts::planewave<T> pi;	//incident plane wave
-	//rts::planewave<T> pr;	//plane wave reflected from the surface
-	//rts::planewave<T> pt;	//plane wave transmitted through the surface
-
-	void init(){
-		n0 = 1.0;
-		n1 = 1.5;
-	}
-
-	//compute which side of the interface is hit by the incoming plane wave (0 = front, 1 = back)
-	bool facing(planewave<T> p){
-		if(p.kvec().dot(S.norm()) > 0)
-			return 1;
-		else
-			return 0;
-	}
-
-	T calc_theta_i(vec<T> v){
-
-		vec<T> v_hat = v.norm();
-
-		//compute the cosine of the angle between k_hat and the plane normal
-		T cos_theta_i = v_hat.dot(S.norm());
-
-		return acos(abs(cos_theta_i));
-	}
-
-	T calc_theta_t(T ni_nt, T theta_i){
-
-		T sin_theta_t = ni_nt * sin(theta_i);
-		return asin(sin_theta_t);
-	}
-
-
-public:
-
-	//constructors
-	halfspace(){
-		init();
-	}
-
-	halfspace(T na, T nb){
-		init();
-		n0 = na;
-		n1 = nb;
-	}
-
-	halfspace(T na, T nb, plane<T> p){
-		n0 = na;
-		n1 = nb;
-		S = p;
-	}
-
-	//compute the transmitted and reflective waves given the incident (vacuum) plane wave p
-	void incident(rts::planewave<T> p){
-
-		planewave<T> r, t;
-		p.scatter(S, n1.real()/n0.real(), r, t);
-
-		//std::cout<<"i+r: "<<p.r()[0] + r.r()[0]<<std::endl;
-		//std::cout<<"t:   "<<t.r()[0]<<std::endl;
-
-		if(facing(p)){
-			w1.push_back(p);
-
-			if(r.E().len() != 0)
-				w1.push_back(r);
-			if(t.E().len() != 0)
-				w0.push_back(t);
-		}
-		else{
-			w0.push_back(p);
-
-			if(r.E().len() != 0)
-				w0.push_back(r);
-			if(t.E().len() != 0)
-				w1.push_back(t);
-		}
-	}
-
-	void incident(rts::beam<T> b, unsigned int N = 10000){
-
-		//generate a plane wave decomposition for the beam
-		std::vector< planewave<T> > pw_list = b.mc(N);
-
-		//calculate the reflected and refracted waves for each incident wave
-		for(unsigned int w = 0; w < pw_list.size(); w++){
-			incident(pw_list[w]);
-		}
-	}
-
-	std::string str(){
-		std::stringstream ss;
-		ss<<"Half Space-------------"<<std::endl;
-		ss<<"n0: "<<n0<<std::endl;
-		ss<<"n1: "<<n1<<std::endl;
-		ss<<S.str();
-		
-
-		if(w0.size() != 0 || w1.size() != 0){
-			ss<<std::endl<<"Plane Waves:"<<std::endl;
-			for(unsigned int w = 0; w < w0.size(); w++){
-				ss<<"w0 = "<<w<<": "<<w0[w]<<std::endl;
-			}
-			for(unsigned int w = 0; w < w1.size(); w++){
-				ss<<"w1 = "<<w<<": "<<w1[w]<<std::endl;
-			}
-		}
-
-		return ss.str();
-	}
-	////////FRIENDSHIP
-    friend void operator<< <> (rts::efield<T> &ef, rts::halfspace<T> hs);
-
-};
-
-
-
-
-}
-
-
-#endif
 \ No newline at end of file
@@ -22,10 +22,10 @@ class planewave{
 protected:
  
 	vec<T> k;	//k = tau / lambda
-	vec<T> E0;		//amplitude
-	T phi;
+	vec< complex<T> > E0;		//amplitude
+	//T phi;
  
-	planewave<T> bend(rts::vec<T> kn) const{
+	CUDA_CALLABLE planewave<T> bend(rts::vec<T> kn) const{
  
 		vec<T> kn_hat = kn.norm();				//normalize the new k
 		vec<T> k_hat = k.norm();				//normalize the current k
@@ -76,7 +76,7 @@ protected:
 		q.CreateRotation(theta, r.norm());
  
 		//apply the rotation to E0
-		vec<T> E0n = q.toMatrix3() * E0;
+		vec< complex<T> > E0n = q.toMatrix3() * E0;
  
 		new_p.k = kn_hat * kmag();
 		new_p.E0 = E0n;
@@ -95,20 +95,22 @@ public:
 	}*/
 	///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega
 	CUDA_CALLABLE planewave(vec<T> kvec = rts::vec<T>(0, 0, rtsTAU), 
-							vec<T> E = rts::vec<T>(1, 0, 0), 
-							T phase = 0)
+							vec< complex<T> > E = rts::vec<T>(1, 0, 0), T phase = 0)
 	{
-		phi = phase;
+		//phi = phase;
+
 		k = kvec;
-		vec<T> k_hat = k.norm();
+		vec< complex<T> > k_hat = k.norm();
  
 		if(E.len() == 0)			//if the plane wave has an amplitude of 0
 			E0 = vec<T>(0);			//just return it
 		else{
-			vec<T> s = (k.cross(E)).norm();		//compute an orthogonal side vector
-			vec<T> E_hat = (s.cross(k)).norm();	//compute a normalized E0 direction vector
+			vec< complex<T> > s = (k_hat.cross(E)).norm();		//compute an orthogonal side vector
+			vec< complex<T> > E_hat = (s.cross(k)).norm();	//compute a normalized E0 direction vector
 			E0 = E_hat * E_hat.dot(E);					//compute the projection of _E0 onto E0_hat
 		}
+
+		E0 = E0 * exp( complex<T>(0, phase) );
 	}
  
 	///multiplication operator: scale E0
@@ -129,7 +131,7 @@ public:
 		return k.len();
 	}
  
-	CUDA_CALLABLE vec<T> E(){
+	CUDA_CALLABLE vec< complex<T> > E(){
 		return E0;
 	}
  
@@ -137,13 +139,13 @@ public:
 		return k;
 	}
  
-	CUDA_CALLABLE T phase(){
+	/*CUDA_CALLABLE T phase(){
 		return phi;
 	}
  
 	CUDA_CALLABLE void phase(T p){
 		phi = p;
-	}
+	}*/
  
 	CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){
 		vec< complex<T> > result;
@@ -199,8 +201,8 @@ public:
 			T tp = 2 / (1 + nr);
 			vec<T> kr = -k;
 			vec<T> kt = k * nr;			//set the k vectors for theta_i = 0
-			vec<T> Er = E0 * rp;		//compute the E vectors
-			vec<T> Et = E0 * tp;
+			vec< complex<T> > Er = E0 * rp;		//compute the E vectors
+			vec< complex<T> > Et = E0 * tp;
 			T phase_t = P.p().dot(k - kt);	//compute the phase offset
 			T phase_r = P.p().dot(k - kr);
 			//std::cout<<"Degeneracy: Head-On"<<std::endl;
@@ -242,16 +244,18 @@ public:
 		kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;
  
 		//compute the magnitude of the p- and s-polarized components of the incident E vector
-		T Ei_s = E0.dot(x_hat);
-		int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0);
-		T Ei_p = sgn * ( E0 - x_hat * Ei_s ).len();
+		complex<T> Ei_s = E0.dot(x_hat);
+		//int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0);
+		int sgn = E0.dot(y_hat).sgn();
+		vec< complex<T> > cx_hat = x_hat;
+		complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
 		//T Ei_p = ( E0 - x_hat * Ei_s ).len();
 		//compute the magnitude of the p- and s-polarized components of the reflected E vector
-		T Er_s = Ei_s * rs;
-		T Er_p = Ei_p * rp;
+		complex<T> Er_s = Ei_s * rs;
+		complex<T> Er_p = Ei_p * rp;
 		//compute the magnitude of the p- and s-polarized components of the transmitted E vector
-		T Et_s = Ei_s * ts;
-		T Et_p = Ei_p * tp;
+		complex<T> Et_s = Ei_s * ts;
+		complex<T> Et_p = Ei_p * tp;
  
 		//std::cout<<"E0: "<<E0<<std::endl;
 		//std::cout<<"E0 dot y_hat: "<<E0.dot(y_hat)<<std::endl;
@@ -262,9 +266,9 @@ public:
  
  
 		//compute the reflected E vector
-		vec<T> Er = (y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + x_hat * Er_s;
+		vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
 		//compute the transmitted E vector
-		vec<T> Et = (y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + x_hat * Et_s;
+		vec< complex<T> > Et = vec< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;
  
 		T phase_t = P.p().dot(k - kt);
 		T phase_r = P.p().dot(k - kr);
@@ -275,15 +279,15 @@ public:
  
 		//create the plane waves
 		r.k = kr;
-		r.E0 = Er;
-		r.phi = phase_r;
+		r.E0 = Er * exp( complex<T>(0, phase_r) );
+		//r.phi = phase_r;
  
 		//t = bend(kt);
 		//t.k = t.k * nr;
  
 		t.k = kt;
-		t.E0 = Et;
-		t.phi = phase_t;
+		t.E0 = Et * exp( complex<T>(0, phase_t) );
+		//t.phi = phase_t;
 		//std::cout<<"i: "<<str()<<std::endl;
 		//std::cout<<"r: "<<r.str()<<std::endl;
 		//std::cout<<"t: "<<t.str()<<std::endl;