fixed bug in plane wave refraction

David Mayerich
1 parent 821513c8
Showing 8 changed files with 483 additions and 247 deletions Show diff stats
math/matrix.h
math/quad.h
math/quaternion.h
math/realfield.cuh
math/rect.h
math/vector.h
optics/efield.cuh
optics/planewave.h
@@ -5,6 +5,7 @@
 #include <string.h>
 #include <iostream>
 #include "vector.h"
+#include "../cuda/callable.h"
  
 namespace rts
 {
@@ -15,7 +16,7 @@ struct matrix
 	//the matrix will be stored in column-major order (compatible with OpenGL)
 	T M[N*N];
  
-	matrix()
+	CUDA_CALLABLE matrix()
 	{
 		for(int r=0; r<N; r++)
 			for(int c=0; c<N; c++)
@@ -25,12 +26,12 @@ struct matrix
 					(*this)(r, c) = 0;
 	}
  
-	T& operator()(int row, int col)
+	CUDA_CALLABLE T& operator()(int row, int col)
 	{
 		return M[col * N + row];
 	}
  
-	matrix<T, N> operator=(T rhs)
+	CUDA_CALLABLE matrix<T, N> operator=(T rhs)
 	{
 		int Nsq = N*N;
 		for(int i=0; i<Nsq; i++)
@@ -39,15 +40,7 @@ struct matrix
 		return *this;
 	}
  
-	/*matrix<T, N> operator=(matrix<T, N> rhs)
-	{
-		for(int i=0; i<N; i++)
-			M[i] = rhs.M[i];
-
-		return *this;
-	}*/
-
-	vec<T, N> operator*(vec<T, N> rhs)
+	CUDA_CALLABLE vec<T, N> operator*(vec<T, N> rhs)
 	{
 		vec<T, N> result;
  
@@ -58,7 +51,7 @@ struct matrix
 		return result;
 	}
  
-	std::string toStr()
+	CUDA_CALLABLE std::string toStr()
 	{
 		std::stringstream ss;
  
-#ifndef RTS_RECT_H
-#define RTS_RECT_H
+#ifndef RTS_QUAD_H
+#define RTS_QUAD_H
  
 //enable CUDA_CALLABLE macro
 #include "../cuda/callable.h"
-#include "rts/math/matrix.h"
-
 #ifndef RTS_QUATERNION_H
 #define RTS_QUATERNION_H
  
+#include "rts/math/matrix.h"
+#include "../cuda/callable.h"
+
 namespace rts{
  
 template<typename T>
@@ -14,160 +15,146 @@ public:
 	T y;
 	T z;
  
-	void normalize();
-	void CreateRotation(T theta, T axis_x, T axis_y, T axis_z);
-	void CreateRotation(T theta, vec<T, 3> axis);
-	quaternion<T> operator*(quaternion<T> &rhs);
-	matrix<T, 3> toMatrix3();
-	matrix<T, 4> toMatrix4();
+	CUDA_CALLABLE void normalize(){
  
+		double length=sqrt(w*w + x*x + y*y + z*z);
+		w=w/length;
+		x=x/length;
+		y=y/length;
+		z=z/length;
+	}
  
-	quaternion();
-	quaternion(T w, T x, T y, T z);
+	CUDA_CALLABLE void CreateRotation(T theta, T ux, T uy, T uz){
  
-};
+		vec<T, 3> u(ux, uy, uz);
+		CreateRotation(theta, u);		
+	}
  
-template<typename T>
-void quaternion<T>::normalize()
-{
-	double length=sqrt(w*w + x*x + y*y + z*z);
-	w=w/length;
-	x=x/length;
-	y=y/length;
-	z=z/length;
-}
+	CUDA_CALLABLE void CreateRotation(T theta, vec<T, 3> u){
  
-template<typename T>
-void quaternion<T>::CreateRotation(T theta, T ux, T uy, T uz)
-{
-	vec<T, 3> u(ux, uy, uz);
+		vec<T, 3> u_hat = u.norm();
  
-	CreateRotation(theta, u);
-	
-}
+		//assign the given Euler rotation to this quaternion
+		w = (T)cos(theta/2);
+		x = u_hat[0]*(T)sin(theta/2);
+		y = u_hat[1]*(T)sin(theta/2);
+		z = u_hat[2]*(T)sin(theta/2);
+	}
  
-template<typename T>
-void quaternion<T>::CreateRotation(T theta, vec<T, 3> u)
-{
-	vec<T, 3> u_hat = u.norm();
+	CUDA_CALLABLE void CreateRotation(vec<T, 3> from, vec<T, 3> to){
  
-	//assign the given Euler rotation to this quaternion
-	w = (T)cos(theta/2);
-	x = u_hat[0]*(T)sin(theta/2);
-	y = u_hat[1]*(T)sin(theta/2);
-	z = u_hat[2]*(T)sin(theta/2);
-}
+		vec<T> r = from.cross(to);			//compute the rotation vector
+		T theta = asin(r.len());				//compute the angle of the rotation about r
+		//deal with a zero vector (both k and kn point in the same direction)
+		if(theta == (T)0)
+			return;
  
-template<typename T>
-quaternion<T> quaternion<T>::operator *(quaternion<T> &param)
-{
-	float A, B, C, D, E, F, G, H;
+		//create a quaternion to capture the rotation
+		CreateRotation(theta, r.norm());
+	}
  
  
-	A = (w + x)*(param.w + param.x);
-	B = (z - y)*(param.y - param.z);
-	C = (w - x)*(param.y + param.z);
-	D = (y + z)*(param.w - param.x);
-	E = (x + z)*(param.x + param.y);
-	F = (x - z)*(param.x - param.y);
-	G = (w + y)*(param.w - param.z);
-	H = (w - y)*(param.w + param.z);
  
-	quaternion<T> result;
-	result.w = B + (-E - F + G + H) /2;
-	result.x = A - (E + F + G + H)/2;
-	result.y = C + (E - F + G - H)/2;
-	result.z = D + (E - F - G + H)/2;
+	CUDA_CALLABLE quaternion<T> operator *(quaternion<T> &param){
  
-	return result;
-}
+		float A, B, C, D, E, F, G, H;
  
-template<typename T>
-matrix<T, 3> quaternion<T>::toMatrix3()
-{
-	matrix<T, 3> result;
  
+		A = (w + x)*(param.w + param.x);
+		B = (z - y)*(param.y - param.z);
+		C = (w - x)*(param.y + param.z);
+		D = (y + z)*(param.w - param.x);
+		E = (x + z)*(param.x + param.y);
+		F = (x - z)*(param.x - param.y);
+		G = (w + y)*(param.w - param.z);
+		H = (w - y)*(param.w + param.z);
  
-    T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
+		quaternion<T> result;
+		result.w = B + (-E - F + G + H) /2;
+		result.x = A - (E + F + G + H)/2;
+		result.y = C + (E - F + G - H)/2;
+		result.z = D + (E - F - G + H)/2;
  
+		return result;
+	}
+	
+	CUDA_CALLABLE matrix<T, 3> toMatrix3(){
  
-    // calculate coefficients
-    x2 = x + x; y2 = y + y;
-    z2 = z + z;
-    xx = x * x2; xy = x * y2; xz = x * z2;
-    yy = y * y2; yz = y * z2; zz = z * z2;
-    wx = w * x2; wy = w * y2; wz = w * z2;
+		matrix<T, 3> result;
  
-	result(0, 0) = 1 - (yy + zz);
-	result(0, 1) = xy - wz;
  
-	result(0, 2) = xz + wy;
+	    T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
  
-	result(1, 0) = xy + wz;
-	result(1, 1) = 1 - (xx + zz);
  
-	result(1, 2) = yz - wx;
+	    // calculate coefficients
+	    x2 = x + x; y2 = y + y;
+	    z2 = z + z;
+	    xx = x * x2; xy = x * y2; xz = x * z2;
+	    yy = y * y2; yz = y * z2; zz = z * z2;
+	    wx = w * x2; wy = w * y2; wz = w * z2;
  
-	result(2, 0) = xz - wy;
-	result(2, 1) = yz + wx;
+		result(0, 0) = 1 - (yy + zz);
+		result(0, 1) = xy - wz;
  
-	result(2, 2) = 1 - (xx + yy);
+		result(0, 2) = xz + wy;
  
-	return result;
-}
+		result(1, 0) = xy + wz;
+		result(1, 1) = 1 - (xx + zz);
  
-template<typename T>
-matrix<T, 4> quaternion<T>::toMatrix4()
-{
-	matrix<T, 4> result;
+		result(1, 2) = yz - wx;
  
+		result(2, 0) = xz - wy;
+		result(2, 1) = yz + wx;
  
-    T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
+		result(2, 2) = 1 - (xx + yy);
  
+		return result;
+	}
  
-    // calculate coefficients
-    x2 = x + x; y2 = y + y;
-    z2 = z + z;
-    xx = x * x2; xy = x * y2; xz = x * z2;
-    yy = y * y2; yz = y * z2; zz = z * z2;
-    wx = w * x2; wy = w * y2; wz = w * z2;
+	CUDA_CALLABLE matrix<T, 4> toMatrix4(){
  
-	result(0, 0) = 1 - (yy + zz);
-	result(0, 1) = xy - wz;
+		matrix<T, 4> result;
+	    T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
  
-	result(0, 2) = xz + wy;
+	    // calculate coefficients
+	    x2 = x + x; y2 = y + y;
+	    z2 = z + z;
+	    xx = x * x2; xy = x * y2; xz = x * z2;
+	    yy = y * y2; yz = y * z2; zz = z * z2;
+	    wx = w * x2; wy = w * y2; wz = w * z2;
  
-	result(1, 0) = xy + wz;
-	result(1, 1) = 1 - (xx + zz);
+		result(0, 0) = 1 - (yy + zz);
+		result(0, 1) = xy - wz;
  
-	result(1, 2) = yz - wx;
+		result(0, 2) = xz + wy;
  
-	result(2, 0) = xz - wy;
-	result(2, 1) = yz + wx;
+		result(1, 0) = xy + wz;
+		result(1, 1) = 1 - (xx + zz);
  
-	result(2, 2) = 1 - (xx + yy);
+		result(1, 2) = yz - wx;
  
-	result(3, 3) = 1;
+		result(2, 0) = xz - wy;
+		result(2, 1) = yz + wx;
  
-	return result;
-}
+		result(2, 2) = 1 - (xx + yy);
  
-template<typename T>
-quaternion<T>::quaternion()
-{
-	w=0; x=0; y=0; z=0;
-}
+		result(3, 3) = 1;
  
-template<typename T>
-quaternion<T>::quaternion(T c, T i, T j, T k)
-{
-	w=c;  x=i;  y=j;  z=k;
-}
+		return result;
+	}
+
+
+	CUDA_CALLABLE quaternion(){
+		w=0; x=0; y=0; z=0;
+	}
+
+	CUDA_CALLABLE quaternion(T c, T i, T j, T k){
+		w=c;  x=i;  y=j;  z=k;
+	}
+
+};
  
 }	//end rts namespace
  
-//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
-//template<class T> using rtsQuaternion = rts::quaternion<T>;
-//#endif
  
 #endif
 #ifndef	RTS_REALFIELD_H
 #define RTS_REALFIELD_H
  
-#include "../visualization/colormap.h"
-#include "../envi/envi.h"
-#include "../math/quad.h"
-#include "../cuda/devices.h"
-#include "cublas_v2.h"
+#include "../visualization/colormap.h"
+#include "../envi/envi.h"
+#include "../math/rect.h"
+#include "../cuda/devices.h"
+#include "cublas_v2.h"
 #include <cuda_runtime.h>
  
-///Compute a Gaussian function in 3D (mostly for testing)
-/*template<typename T>
-__global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, T std, rts::quad<T> shape)
-{
-	int iu = blockIdx.x * blockDim.x + threadIdx.x;
-	int iv = blockIdx.y * blockDim.y + threadIdx.y;
-
-	//make sure that the thread indices are in-bounds
-	if(iu >= r0 || iv >= r1) return;
-
-	//compute the index into the field
-	int i = iv*r0 + iu;
-
-	T u = (T)iu / (T)r0;
-	T v = (T)iv / (T)r1;
-
-	rts::vec<T> p = shape(u, v);
-
-	T fx = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[0] - mean, 2) / (2 * std*std) );
-	T fy = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[1] - mean, 2) / (2 * std*std) );
-	T fz = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[2] - mean, 2) / (2 * std*std) );
-
-	dest[i] = fx * fy * fz;
+///Compute a Gaussian function in 3D (mostly for testing)
+/*template<typename T>
+__global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, T std, rts::rect<T> shape)
+{
+	int iu = blockIdx.x * blockDim.x + threadIdx.x;
+	int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+	//make sure that the thread indices are in-bounds
+	if(iu >= r0 || iv >= r1) return;
+
+	//compute the index into the field
+	int i = iv*r0 + iu;
+
+	T u = (T)iu / (T)r0;
+	T v = (T)iv / (T)r1;
+
+	rts::vec<T> p = shape(u, v);
+
+	T fx = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[0] - mean, 2) / (2 * std*std) );
+	T fy = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[1] - mean, 2) / (2 * std*std) );
+	T fz = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[2] - mean, 2) / (2 * std*std) );
+
+	dest[i] = fx * fy * fz;
 }*/
  
 namespace rts{
@@ -38,9 +38,9 @@ namespace rts{
 template<typename P, unsigned int N = 1, bool positive = false>
 class realfield{
  
-	P* X[N];		//an array of N gpu pointers for each field component
-	int R[2];		//resolution of the slice
-	quad<P> shape;
+	P* X[N];		//an array of N gpu pointers for each field component
+	int R[2];		//resolution of the slice
+	rect<P> shape;
  
 	void process_filename(std::string name, std::string &prefix, std::string &postfix, 
                           std::string &ext, unsigned int &digits)
@@ -68,13 +68,13 @@ class realfield{
  
 	void init()
 	{
-		for(unsigned int n=0; n<N; n++)
+		for(unsigned int n=0; n<N; n++)
 			X[n] = NULL;
 	}
 	void destroy()
 	{
-		for(unsigned int n=0; n<N; n++)
-			if(X[n] != NULL)
+		for(unsigned int n=0; n<N; n++)
+			if(X[n] != NULL)
 				HANDLE_ERROR(cudaFree(X[n]));
 	}
  
@@ -86,25 +86,25 @@ public:
 		init();
 		std::cout<<"realfield CONSTRUCTOR"<<std::endl;
 	}
-	realfield(unsigned int x, unsigned int y)
-	{
-        //set the resolution
-        R[0] = x;
-        R[1] = y;
-		//allocate memory on the GPU
-		for(unsigned int n=0; n<N; n++)
-		{
+	realfield(unsigned int x, unsigned int y)
+	{
+        //set the resolution
+        R[0] = x;
+        R[1] = y;
+		//allocate memory on the GPU
+		for(unsigned int n=0; n<N; n++)
+		{
 			HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(P) * R[0] * R[1] ));
-		}
-		shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));	//default geometry
-		clear();		//zero the field
-		std::cout<<"realfield CONSTRUCTOR"<<std::endl;
+		}
+		//shape = rect<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));	//default geometry
+		clear();		//zero the field
+		std::cout<<"realfield CONSTRUCTOR"<<std::endl;
     }
  
-	~realfield()
-    {
-		destroy();
-		std::cout<<"realfield DESTRUCTOR"<<std::endl;
+	~realfield()
+    {
+		destroy();
+		std::cout<<"realfield DESTRUCTOR"<<std::endl;
     }
  
 	P* ptr(unsigned int n)
@@ -115,11 +115,11 @@ public:
 	}
  
 	//set all components of the field to zero
-	void clear()
-    {
-		for(unsigned int n=0; n<N; n++)
-			if(X[n] != NULL)
-				HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(P) * R[0] * R[1]));
+	void clear()
+    {
+		for(unsigned int n=0; n<N; n++)
+			if(X[n] != NULL)
+				HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(P) * R[0] * R[1]));
     }
  
 	void toImage(std::string filename, unsigned int n, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer)
@@ -127,7 +127,7 @@ public:
 		rts::gpu2image<P>(X[n], filename, R[0], R[1], vmin, vmax, cmap);
     }
  
-	void toImages(std::string filename, rts::colormapType cmap = rts::cmBrewer)
+	void toImages(std::string filename, bool global_max = true, rts::colormapType cmap = rts::cmBrewer)
 	{
         std::string prefix, postfix, extension;
         unsigned int digits;
@@ -175,74 +175,77 @@ public:
  
         cublasDestroy(handle);  //destroy the CUBLAS handle
  
+        P outputMax = abs(maxAll);			//maximum value used for each output image
 		for(int n=0; n<N; n++)          //for each image
 		{
+			if(!global_max) outputMax = maxVal[n];	//calculate the maximum value for this image
+
 			stringstream ss;            //assemble the file name
 			ss<<prefix<<std::setfill('0')<<std::setw(digits)<<n<<postfix<<extension;
 			std::cout<<ss.str()<<std::endl;
 			if(positive)                //if the image is positive
-				toImage(ss.str(), n, 0, maxAll, cmap);         //save the image using the global maximum
+				toImage(ss.str(), n, 0, abs(outputMax), cmap);         //save the image using the global maximum
 			else
-				toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap);   //save the image using the global maximum
+				toImage(ss.str(), n, -abs(outputMax), abs(outputMax), cmap);   //save the image using the global maximum
 		}
 	}
  
-	//assignment operator
-	realfield & operator= (const realfield & rhs)
-    {
-        //de-allocate any existing GPU memory
-        destroy();
-
-        //copy the slice resolution
-        R[0] = rhs.R[0];
-        R[1] = rhs.R[1];
-
-		for(unsigned int n=0; n<N; n++)
-		{
-			//allocate the necessary memory
-			HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1]));
-			//copy the slice
-			HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
-		}
-        std::cout<<"Assignment operator."<<std::endl;
-
-        return *this;
-    }
-
-	///copy constructor
-	realfield(const realfield &rhs)
-	{
-		//first make a shallow copy
-		R[0] = rhs.R[0];
-		R[1] = rhs.R[1];
-
-		for(unsigned int n=0; n<N; n++)
-		{
-			//do we have to make a deep copy?
-			if(rhs.X[n] == NULL)
-				X[n] = NULL;		//no
-			else
-			{
-				//allocate the necessary memory
-				HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1]));
-
-				//copy the slice
-				HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
-			}
-		}
-
-		std::cout<<"realfield COPY CONSTRUCTOR"<<std::endl;
+	//assignment operator
+	realfield & operator= (const realfield & rhs)
+    {
+        //de-allocate any existing GPU memory
+        destroy();
+
+        //copy the slice resolution
+        R[0] = rhs.R[0];
+        R[1] = rhs.R[1];
+
+		for(unsigned int n=0; n<N; n++)
+		{
+			//allocate the necessary memory
+			HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1]));
+			//copy the slice
+			HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
+		}
+        std::cout<<"Assignment operator."<<std::endl;
+
+        return *this;
+    }
+
+	///copy constructor
+	realfield(const realfield &rhs)
+	{
+		//first make a shallow copy
+		R[0] = rhs.R[0];
+		R[1] = rhs.R[1];
+
+		for(unsigned int n=0; n<N; n++)
+		{
+			//do we have to make a deep copy?
+			if(rhs.X[n] == NULL)
+				X[n] = NULL;		//no
+			else
+			{
+				//allocate the necessary memory
+				HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1]));
+
+				//copy the slice
+				HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
+			}
+		}
+
+		std::cout<<"realfield COPY CONSTRUCTOR"<<std::endl;
 	}
  
-	/*void gaussian(P mean, P std, unsigned int n=0)	//creates a 3D gaussian using component n
-	{
-		int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
-        int SQRT_BLOCK = (int)sqrt((float)maxThreads);
-		//create one thread for each detector pixel
-		dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
-		dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
-
-		gpu_gaussian<float> <<<dimGrid, dimBlock>>> (X[n], R[0], R[1], mean, std, shape);
+	/*void gaussian(P mean, P std, unsigned int n=0)	//creates a 3D gaussian using component n
+	{
+		int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
+        int SQRT_BLOCK = (int)sqrt((float)maxThreads);
+		//create one thread for each detector pixel
+		dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+		dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+		gpu_gaussian<float> <<<dimGrid, dimBlock>>> (X[n], R[0], R[1], mean, std, shape);
 	}*/
  
  
+#ifndef RTS_RECT_H
+#define RTS_RECT_H
+
+//enable CUDA_CALLABLE macro
+#include "../cuda/callable.h"
+#include "../math/vector.h"
+#include "../math/triangle.h"
+#include "../math/quaternion.h"
+#include <iostream>
+#include <iomanip>
+#include <algorithm>
+
+namespace rts{
+
+//template for a rectangle class in ND space
+template <class T, int N = 3>
+struct rect
+{
+	/*
+		^                   O
+		|                   
+		|                   
+		Y         C         
+		|                   
+		|                   
+		O---------X--------->
+	*/
+
+private:
+
+	rts::vec<T, N> C;
+	rts::vec<T, N> X;
+	rts::vec<T, N> Y;
+
+	CUDA_CALLABLE void scale(T factor){
+		X *= factor;
+		Y *= factor;
+	}
+
+	CUDA_CALLABLE void normal(vec<T, N> n){		//orient the rectangle along the specified normal
+
+		n = n.norm();								//normalize, just in case
+		vec<T, N> n_current = X.cross(Y).norm();	//compute the current normal
+		quaternion<T> q;							//create a quaternion
+		q.CreateRotation(n_current, n);				//initialize a rotation from n_current to n
+
+		//apply the quaternion to the vectors and position
+		X = q.toMatrix3() * X;
+		Y = q.toMatrix3() * Y;
+	}
+
+	CUDA_CALLABLE void init(){
+		C = vec<T, N>(0, 0, 0);
+		X = vec<T, N>(1, 0, 0);
+		Y = vec<T, N>(0, 1, 0);
+	}
+
+public:
+
+	CUDA_CALLABLE rect(){
+		init();
+	}
+
+	CUDA_CALLABLE rect(T size, T z_pos = (T)0){
+		init();			//use the default setup
+		scale(size);	//scale the rectangle
+		C[2] = z_pos;
+	}
+
+	CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){
+		init();			//start with the default setting
+		C = c;
+		scale(size);	//scale the rectangle
+		normal(n);		//orient
+
+	}
+
+	/*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c)
+	{
+		A = a;		
+		Y = b - a;
+		X = c - a - Y;
+
+	}*/
+
+	/*******************************************************************
+	Constructor - create a rect from a position, normal, and rotation
+	*******************************************************************/
+	/*CUDA_CALLABLE rect(rts::vec<T, N> c, rts::vec<T, N> normal, T width, T height, T theta)
+	{
+
+        //compute the X direction - start along world-space X
+        Y = rts::vec<T, N>(0, 1, 0);
+        if(Y == normal)
+            Y = rts::vec<T, N>(0, 0, 1);
+
+        X = Y.cross(normal).norm();
+
+        std::cout<<X<<std::endl;
+
+        //rotate the X axis by theta radians
+        rts::quaternion<T> q;
+        q.CreateRotation(theta, normal);
+        X = q.toMatrix3() * X;
+        Y = normal.cross(X);
+
+        //normalize everything
+        X = X.norm();
+        Y = Y.norm();
+
+        //scale to match the rect width and height
+        X = X * width;
+        Y = Y * height;
+
+        //set the corner of the plane
+        A = c - X * 0.5f - Y * 0.5f;
+
+        std::cout<<X<<std::endl;
+	}*/
+
+	//boolean comparison
+	bool operator==(const rect<T, N> & rhs)
+	{
+		if(C == rhs.C && X == rhs.X && Y == rhs.Y)
+			return true;
+		else
+			return false;
+	}
+
+	/*******************************************
+	Return the normal for the rect
+	*******************************************/
+	CUDA_CALLABLE rts::vec<T, N> n()
+	{
+        return (X.cross(Y)).norm();
+	}
+
+	CUDA_CALLABLE rts::vec<T, N> p(T a, T b)
+	{
+		rts::vec<T, N> result;
+		//given the two parameters a, b = [0 1], returns the position in world space
+		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
+		result = A + X * a + Y * b;
+
+		return result;
+	}
+
+	CUDA_CALLABLE rts::vec<T, N> operator()(T a, T b)
+	{
+		return p(a, b);
+	}
+
+	std::string str()
+	{
+		std::stringstream ss;
+		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
+		ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
+		ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl;
+		ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X;
+
+        return ss.str();
+
+	}
+
+	CUDA_CALLABLE rect<T, N> operator*(T rhs)
+	{
+		//scales the plane by a scalar value
+
+		//create the new rectangle
+		rect<T, N> result = *this;
+		result.scale(rhs);
+
+		return result;
+
+	}
+
+	CUDA_CALLABLE T dist(vec<T, N> p)
+	{
+        //compute the distance between a point and this rect
+
+		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
+
+        //first break the rect up into two triangles
+        triangle<T, N> T0(A, A+X, A+Y);
+        triangle<T, N> T1(A+X+Y, A+X, A+Y);
+
+
+        T d0 = T0.dist(p);
+        T d1 = T1.dist(p);
+
+        if(d0 < d1)
+            return d0;
+        else
+            return d1;
+	}
+
+	CUDA_CALLABLE T dist_max(vec<T, N> p)
+	{
+		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
+        T da = (A - p).len();
+        T db = (A+X - p).len();
+        T dc = (A+Y - p).len();
+        T dd = (A+X+Y - p).len();
+
+        return std::max( da, std::max(db, std::max(dc, dd) ) );
+	}
+};
+
+}	//end namespace rts
+
+template <typename T, int N>
+std::ostream& operator<<(std::ostream& os, rts::rect<T, N> R)
+{
+    os<<R.str();
+    return os;
+}
+
+
+#endif
@@ -169,6 +169,11 @@ struct vec
  
         return result;
 	}
+	CUDA_CALLABLE vec<T, N> operator*=(T rhs){
+		for(int i=0; i<N; i++)
+			v[i] = v[i] * rhs;
+		return *this;
+	}
  
 	//conversion from a point
 	/*CUDA_CALLABLE vector<T, N> & operator=(point<T, N> rhs)
@@ -14,7 +14,7 @@ namespace rts{
  
 template<typename T>
 __global__ void gpu_planewave2efield(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, 
-									 planewave<T> w, quad<T> q)
+									 planewave<T> w, rect<T> q)
 {
     int iu = blockIdx.x * blockDim.x + threadIdx.x;
     int iv = blockIdx.y * blockDim.y + threadIdx.y;
@@ -119,7 +119,7 @@ protected:
     unsigned int R[2];
  
     //shape of the 2D field
-    quad<P> pos;
+    rect<P> pos;
  
 	void from_planewave(planewave<P> p)
 	{
@@ -196,7 +196,7 @@ public:
     efield(unsigned int res0, unsigned int res1, bool _vector = true)
     {
         init(res0, res1, _vector);
-        pos = rts::quad<P>(rts::vec<P>(-10, 0, -10), rts::vec<P>(-10, 0, 10), rts::vec<P>(10, 0, 10));
+        //pos = rts::rect<P>(rts::vec<P>(-10, 0, -10), rts::vec<P>(-10, 0, 10), rts::vec<P>(10, 0, 10));
     }
  
     //destructor
@@ -217,7 +217,7 @@ public:
         }
     }
  
-    void position(quad<P> _p)
+    void position(rect<P> _p)
     {
         pos = _p;
     }
@@ -60,7 +60,35 @@ public:
  
 	planewave<P> refract(rts::vec<P> kn) const
 	{
-		vec<P> kn_hat = kn.norm();	//normalize new_k
+		vec<P> kn_hat = kn.norm();				//normalize the new k
+		vec<P> k_hat = k.norm();				//normalize the current k
+
+		vec<P> r = k_hat.cross(kn_hat);			//compute the rotation vector
+
+		P theta = asin(r.len());				//compute the angle of the rotation about r
+
+		planewave<P> new_p;						//create a new plane wave
+
+		//deal with a zero vector (both k and kn point in the same direction)
+		if(theta == (P)0)
+		{
+			new_p = *this;
+			return new_p;
+		}
+
+		//create a quaternion to capture the rotation
+		quaternion<P> q;
+		q.CreateRotation(theta, r.norm());
+
+		//apply the rotation to E0
+		vec<P> E0n = q.toMatrix3() * E0;
+
+		new_p.k = kn_hat * k.len();
+		new_p.E0 = E0n;
+
+		return new_p;
+
+		/*vec<P> kn_hat = kn.norm();	//normalize new_k
 		vec<P> k_hat = k.norm();
  
 		//compute the side vector (around which we will be rotating)
@@ -86,7 +114,8 @@ public:
 		new_p.E0 = E0_prime;
 		new_p.k = kn_hat * k.len();
  
-		return new_p;
+		return new_p;*/
+
 		/*vec<P> kn_hat = kn.norm();		//normalize kn
 		vec<P> k_hat = k.norm();
 		vec<P> E0_hat = E0.norm();