added vector field visualization tools

David Mayerich
1 parent 4d67ff4e
Showing 15 changed files with 1088 additions and 357 deletions Show diff stats
cuda/devices.h
cuda/memory.h
math/function.h
math/matrix.h
math/point.h
math/quad.h
math/quaternion.h
math/triangle.h
math/vector.h
optics/beam.h
optics/efield.cuh
optics/planewave.h
visualization/colormap.h
visualization/scalarfield.cuh
visualization/vectorfield.cuh
+#ifndef RTS_CUDA_DEVICES
+#define RTS_CUDA_DEVICES
+
+#include <cuda.h>
+
+namespace rts{
+
+int maxThreadsPerBlock()
+{
+	int device;
+	cudaGetDevice(&device);		//get the id of the current device
+	cudaDeviceProp props;		//device property structure
+	cudaGetDeviceProperties(&props, device);
+	return props.maxThreadsPerBlock;
+}
+}	//end namespace rts
+
+#endif
 \ No newline at end of file
-#include <cuda.h>
-
@@ -18,62 +18,74 @@ class function
 	//function data
 	std::vector<dataPoint> f;
  
-	//comparison function for searching lambda
-    static bool findCeiling(dataPoint a, dataPoint b)
-    {
-        return (a.x > b.x);
+	//comparison function for searching lambda
+    static bool findCeiling(dataPoint a, dataPoint b)
+    {
+        return (a.x > b.x);
     }
  
  
 public:
+	function()
+	{
+		//insert(0, 0);
+	}
+
 	Y linear(X x)
 	{
-		//declare an iterator
-        typename std::vector< dataPoint >::iterator it;
-
-		dataPoint s;
-		s.x = x;
-
-        it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling);
-
-        //if the wavelength is past the end of the list, return the back
-        if(it == f.end())
-            return f.back().y;
-        //if the wavelength is before the beginning of the list, return the front
-        else if(it == f.begin())
-            return f.front().y;
-        //otherwise interpolate
-        else
-        {
-            X xMax = (*it).x;
-            X xMin = (*(it - 1)).x;
-            //std::cout<<lMin<<"----------"<<lMax<<std::endl;
-
-            X a = (x - xMin) / (xMax - xMin);
-            Y riMin = (*(it - 1)).y;
-            Y riMax = (*it).y;
-            Y interp;
-            interp = riMin * a + riMax * (1.0 - a);
-            return interp;
+		if(f.size() == 0)	return (Y)0;	//return zero if the function is empty
+		//declare an iterator
+        typename std::vector< dataPoint >::iterator it;
+
+		dataPoint s;
+		s.x = x;
+
+        it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling);
+
+        //if the wavelength is past the end of the list, return the back
+        if(it == f.end())
+            return f.back().y;
+        //if the wavelength is before the beginning of the list, return the front
+        else if(it == f.begin())
+            return f.front().y;
+        //otherwise interpolate
+        else
+        {
+            X xMax = (*it).x;
+            X xMin = (*(it - 1)).x;
+            //std::cout<<lMin<<"----------"<<lMax<<std::endl;
+
+            X a = (x - xMin) / (xMax - xMin);
+            Y riMin = (*(it - 1)).y;
+            Y riMax = (*it).y;
+            Y interp;
+            interp = riMax * a + riMin * (1.0 - a);
+            return interp;
         }
 	}
  
+	///add a data point to a function
 	void insert(X x, Y y)
 	{
-		//declare an iterator
-        typename std::vector< dataPoint >::iterator it;
-
-		dataPoint s;
-		s.x = x;
-		s.y = y;
-
-        it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling);
-
-        //if the function value is past the end of the vector, add it to the back
-        if(it == f.end())
-            return f.push_back(s);
-        //otherwise add the value at the iterator position
-        else
+		dataPoint s;
+		s.x = x;
+		s.y = y;
+
+		if(f.size() == 0 || f.back().x < x)
+			return f.push_back(s);
+
+		//declare an iterator
+        typename std::vector< dataPoint >::iterator it;
+
+		
+
+        it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling);
+
+        //if the function value is past the end of the vector, add it to the back
+        if(it == f.end())
+            return f.push_back(s);
+        //otherwise add the value at the iterator position
+        else
 		{
 			f.insert(it, s);
 		}
@@ -90,16 +102,24 @@ public:
 		return f[i].y;
 	}
  
+	///get the number of data points in the function
 	unsigned int getN()
 	{
 		return f.size();
 	}
  
+	//look up an indexed component
 	dataPoint operator[](int i)
 	{
 		return f[i];
 	}
  
+	///linear interpolation
+	Y operator()(X x)
+	{
+		return linear(x);
+	}
+
 	function<X, Y> operator+(Y r)
 	{
 		function<X, Y> result;
@@ -114,10 +134,19 @@ public:
 		return result;
 	}
  
+	function<X, Y> & operator= (const Y & rhs)
+	{		
+		f.clear();
+		if(rhs != 0)			//if the RHS is zero, just clear, otherwise add one value of RHS
+			insert(0, rhs);
+
+		return *this;
+	}
+
  
 };
  
 }	//end namespace rts
  
  
-#endif
+#endif
@@ -4,6 +4,7 @@
 //#include "rts/vector.h"
 #include <string.h>
 #include <iostream>
+#include "vector.h"
  
 namespace rts
 {
@@ -46,9 +47,9 @@ struct matrix
 		return *this;
 	}*/
  
-	vector<T, N> operator*(vector<T, N> rhs)
+	vec<T, N> operator*(vec<T, N> rhs)
 	{
-		vector<T, N> result;
+		vec<T, N> result;
  
 		for(int r=0; r<N; r++)
 			for(int c=0; c<N; c++)
-#ifndef RTS_rtsPoint_H
-#define RTS_rtsPoint_H
-
-#include "rts/math/vector.h"
-#include <string.h>
-#include "rts/cuda/callable.h"
-
-namespace rts
-{
-
-template <class T, int N>
-struct point
-{
-	T p[N];
-
-	CUDA_CALLABLE point()
-	{
-
-	}
-
-	//efficiency constructor, makes construction easier for 1D-4D vectors
-	CUDA_CALLABLE point(T x)
-	{
-		p[0] = x;
-	}
-	CUDA_CALLABLE point(T x, T y)
-	{
-		p[0] = x;
-		p[1] = y;
-	}
-	CUDA_CALLABLE point(T x, T y, T z)
-	{
-		p[0] = x;
-		p[1] = y;
-		p[2] = z;
-	}
-	CUDA_CALLABLE point(T x, T y, T z, T w)
-	{
-		p[0] = x;
-		p[1] = y;
-		p[2] = z;
-		p[3] = w;
-	}
-
-	//arithmetic operators
-	CUDA_CALLABLE rts::point<T, N> operator+(vector<T, N> v)
-	{
-        rts::point<T, N> r;
-
-        //calculate the position of the resulting point
-        for(int i=0; i<N; i++)
-            r.p[i] = p[i] + v.v[i];
-
-        return r;
-	}
-	CUDA_CALLABLE rts::point<T, N> operator-(vector<T, N> v)
-	{
-        rts::point<T, N> r;
-
-        //calculate the position of the resulting point
-        for(int i=0; i<N; i++)
-            r.p[i] = p[i] - v.v[i];
-
-        return r;
-	}
-	CUDA_CALLABLE vector<T, N> operator-(point<T, N> rhs)
-	{
-        vector<T, N> r;
-
-        //calculate the position of the resulting point
-        for(int i=0; i<N; i++)
-            r.v[i] = p[i] - rhs.p[i];
-
-        return r;
-	}
-	CUDA_CALLABLE rts::point<T, N> operator*(T rhs)
-	{
-        rts::point<T, N> r;
-
-        //calculate the position of the resulting point
-        for(int i=0; i<N; i++)
-            r.p[i] = p[i] * rhs;
-
-        return r;
-	}
-
-	CUDA_CALLABLE point(const T(&data)[N])
-	{
-		memcpy(p, data, sizeof(T) * N);
-	}
-
-	std::string toStr()
-	{
-		std::stringstream ss;
-
-		ss<<"(";
-		for(int i=0; i<N; i++)
-		{
-			ss<<p[i];
-			if(i != N-1)
-				ss<<", ";
-		}
-		ss<<")";
-
-		return ss.str();
-	}
-
-	//bracket operator
-	CUDA_CALLABLE T& operator[](int i)
-	{
-        return p[i];
-    }
-
-};
-
-}	//end namespace rts
-
-template <typename T, int N>
-std::ostream& operator<<(std::ostream& os, rts::point<T, N> p)
-{
-    os<<p.toStr();
-    return os;
-}
-
-//arithmetic
-template <typename T, int N>
-CUDA_CALLABLE rts::point<T, N> operator*(T lhs, rts::point<T, N> rhs)
-{
-    rts::point<T, N> r;
-
-    return rhs * lhs;
-}
-
-//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
-//template<class T, int N> using rtsPoint = rts::point<T, N>;
-//#endif
-
-#endif
@@ -2,37 +2,36 @@
 #define RTS_RECT_H
  
 //enable CUDA_CALLABLE macro
-#include "rts/cuda/callable.h"
-#include "rts/math/vector.h"
-#include "rts/math/point.h"
-#include "rts/math/triangle.h"
-#include "rts/math/quaternion.h"
+#include "../cuda/callable.h"
+#include "../math/vector.h"
+#include "../math/triangle.h"
+#include "../math/quaternion.h"
 #include <iostream>
 #include <algorithm>
  
 namespace rts{
  
 //template for a quadangle class in ND space
-template <class T, int N>
+template <class T, int N = 3>
 struct quad
 {
 	/*
-		C------------------>O
+		B------------------>C
 		^                   ^
 		|                   |
 		Y                   |
 		|                   |
 		|                   |
-		A---------X-------->B
+		A---------X-------->O
 	*/
  
 	/*T A[N];
 	T B[N];
 	T C[N];*/
  
-	rts::point<T, N> A;
-	rts::vector<T, N> X;
-	rts::vector<T, N> Y;
+	rts::vec<T, N> A;
+	rts::vec<T, N> X;
+	rts::vec<T, N> Y;
  
  
 	CUDA_CALLABLE quad()
@@ -40,56 +39,56 @@ struct quad
  
 	}
  
-	CUDA_CALLABLE quad(point<T, N> a, point<T, N> b, point<T, N> c)
+	CUDA_CALLABLE quad(vec<T, N> a, vec<T, N> b, vec<T, N> c)
 	{
  
-		A = a;
-		X = b - a;
-		Y = c - a;
+		A = a;		
+		Y = b - a;
+		X = c - a - Y;
  
 	}
  
     /****************************************************************
     Constructor - create a quad from two points and a normal
     ****************************************************************/
-	CUDA_CALLABLE quad(rts::point<T, N> pMin, rts::point<T, N> pMax, rts::vector<T, N> normal)
+	/*CUDA_CALLABLE quad(rts::vec<T, N> pMin, rts::vec<T, N> pMax, rts::vec<T, N> normal)
 	{
  
         //assign the corner point
         A = pMin;
  
         //compute the vector from pMin to pMax
-        rts::vector<T, 3> v0;
+        rts::vec<T, 3> v0;
         v0 = pMax - pMin;
  
         //compute the cross product of A and the plane normal
-        rts::vector<T, 3> v1;
+        rts::vec<T, 3> v1;
         v1 = v0.cross(normal);
  
  
         //calculate point B
-        rts::point<T, 3> B;
+        rts::vec<T, 3> B;
         B = A + v0 * 0.5f + v1 * 0.5f;
  
         //calculate rtsPoint C
-        rts::point<T, 3> C;
+        rts::vec<T, 3> C;
         C = A  + v0 * 0.5f - v1 * 0.5f;
  
         //calculate X and Y
         X = B - A;
         Y = C - A;
-	}
+	}*/
  
 	/*******************************************************************
 	Constructor - create a quad from a position, normal, and rotation
 	*******************************************************************/
-	CUDA_CALLABLE quad(rts::point<T, N> c, rts::vector<T, N> normal, T width, T height, T theta)
+	CUDA_CALLABLE quad(rts::vec<T, N> c, rts::vec<T, N> normal, T width, T height, T theta)
 	{
  
         //compute the X direction - start along world-space X
-        Y = rts::vector<T, N>(0, 1, 0);
+        Y = rts::vec<T, N>(0, 1, 0);
         if(Y == normal)
-            Y = rts::vector<T, N>(0, 0, 1);
+            Y = rts::vec<T, N>(0, 0, 1);
  
         X = Y.cross(normal).norm();
  
@@ -118,33 +117,33 @@ struct quad
 	/*******************************************
 	Return the normal for the quad
 	*******************************************/
-	CUDA_CALLABLE rts::vector<T, N> n()
+	CUDA_CALLABLE rts::vec<T, N> n()
 	{
         return (X.cross(Y)).norm();
 	}
  
-	CUDA_CALLABLE rts::point<T, N> p(T a, T b)
+	CUDA_CALLABLE rts::vec<T, N> p(T a, T b)
 	{
-		rts::point<T, N> result;
+		rts::vec<T, N> result;
 		//given the two parameters a, b = [0 1], returns the position in world space
 		result = A + X * a + Y * b;
  
 		return result;
 	}
  
-	CUDA_CALLABLE rts::point<T, N> operator()(T a, T b)
+	CUDA_CALLABLE rts::vec<T, N> operator()(T a, T b)
 	{
 		return p(a, b);
 	}
  
-	std::string toStr()
+	std::string str()
 	{
 		std::stringstream ss;
  
 		ss<<"A = "<<A<<std::endl;
-		ss<<"B = "<<A + X<<std::endl;
-		ss<<"C = "<<A + X + Y<<std::endl;
-		ss<<"D = "<<A + Y<<std::endl;
+		ss<<"B = "<<A + Y<<std::endl;
+		ss<<"C = "<<A + Y + X<<std::endl;
+		ss<<"D = "<<A + X<<std::endl;
  
         return ss.str();
  
@@ -155,7 +154,7 @@ struct quad
 		//scales the plane by a scalar value
  
 		//compute the center point
-		rts::point<T, N> c = A + X*0.5f + Y*0.5f;
+		rts::vec<T, N> c = A + X*0.5f + Y*0.5f;
  
 		//create the new quadangle
 		quad<T, N> result;
@@ -167,7 +166,7 @@ struct quad
  
 	}
  
-	CUDA_CALLABLE T dist(point<T, N> p)
+	CUDA_CALLABLE T dist(vec<T, N> p)
 	{
         //compute the distance between a point and this quad
  
@@ -176,8 +175,8 @@ struct quad
         triangle<T, N> T1(A+X+Y, A+X, A+Y);
  
  
-        ptype d0 = T0.dist(p);
-        ptype d1 = T1.dist(p);
+        T d0 = T0.dist(p);
+        T d1 = T1.dist(p);
  
         if(d0 < d1)
             return d0;
@@ -185,7 +184,7 @@ struct quad
             return d1;
 	}
  
-	CUDA_CALLABLE T dist_max(point<T, N> p)
+	CUDA_CALLABLE T dist_max(vec<T, N> p)
 	{
         T da = (A - p).len();
         T db = (A+X - p).len();
@@ -201,7 +200,7 @@ struct quad
 template <typename T, int N>
 std::ostream& operator<<(std::ostream& os, rts::quad<T, N> R)
 {
-    os<<R.toStr();
+    os<<R.str();
     return os;
 }
  
@@ -16,7 +16,7 @@ public:
  
 	void normalize();
 	void CreateRotation(T theta, T axis_x, T axis_y, T axis_z);
-	void CreateRotation(T theta, vector<T, 3> axis);
+	void CreateRotation(T theta, vec<T, 3> axis);
 	quaternion<T> operator*(quaternion<T> &rhs);
 	matrix<T, 3> toMatrix3();
 	matrix<T, 4> toMatrix4();
@@ -48,7 +48,7 @@ void quaternion&lt;T&gt;::CreateRotation(T theta, T axis_x, T axis_y, T axis_z)
 }
  
 template<typename T>
-void quaternion<T>::CreateRotation(T theta, vector<T, 3> axis)
+void quaternion<T>::CreateRotation(T theta, vec<T, 3> axis)
 {
 	CreateRotation(theta, axis[0], axis[1], axis[2]);
 }
@@ -4,12 +4,11 @@
 //enable CUDA_CALLABLE macro
 #include "rts/cuda/callable.h"
 #include "rts/math/vector.h"
-#include "rts/math/point.h"
 #include <iostream>
  
 namespace rts{
  
-template <class T, int N>
+template <class T, int N=3>
 struct triangle
 {
     /*
@@ -24,15 +23,15 @@ struct triangle
     */
     private:
  
-    point<T, N> A;
-    point<T, N> B;
-    point<T, N> C;
+    vec<T, N> A;
+    vec<T, N> B;
+    vec<T, N> C;
  
-    CUDA_CALLABLE point<T, N> _p(T s, T t)
+    CUDA_CALLABLE vec<T, N> _p(T s, T t)
     {
         //This function returns the point specified by p = A + s(B-A) + t(C-A)
-        vector<T, N> E0 = B-A;
-        vector<T, N> E1 = C-A;
+        vec<T, N> E0 = B-A;
+        vec<T, N> E1 = C-A;
  
         return A + s*E0 + t*E1;
     }
@@ -47,26 +46,26 @@ struct triangle
  
 	}
  
-	CUDA_CALLABLE triangle(point<T, N> a, point<T, N> b, point<T, N> c)
+	CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c)
 	{
 		A = a;
 		B = b;
 		C = c;
 	}
  
-	CUDA_CALLABLE rts::point<T, N> operator()(T s, T t)
+	CUDA_CALLABLE rts::vec<T, N> operator()(T s, T t)
 	{
         return _p(s, t);
 	}
  
-	CUDA_CALLABLE point<T, N> nearest(point<T, N> p)
+	CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p)
 	{
         //comptue the distance between a point and this triangle
         //  This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf
  
-        vector<T, N> E0 = B-A;
-        vector<T, N> E1 = C-A;
-        vector<T, N> D = A - p;
+        vec<T, N> E0 = B-A;
+        vec<T, N> E1 = C-A;
+        vec<T, N> D = A - p;
  
         T a = E0.dot(E0);
         T b = E0.dot(E1);
@@ -125,7 +124,7 @@ struct triangle
             {
                 //region 0
                 //std::cout<<"Region 0"<<std::endl;
-                T invDet = (ptype)1.0/det;
+                T invDet = (T)1.0/det;
                 s *= invDet;
                 t *= invDet;
                 //done
@@ -176,9 +175,9 @@ struct triangle
  
 	}
  
-	CUDA_CALLABLE T dist(point<T, N> p)
+	CUDA_CALLABLE T dist(vec<T, N> p)
 	{
-        point<T, N> n = nearest(p);
+        vec<T, N> n = nearest(p);
  
         return (p - n).len();
 	}
@@ -4,20 +4,21 @@
 #include <iostream>
 #include <cmath>
 #include <sstream>
-//#include "rts/point.h"
+//#include "rts/math/point.h"
 #include "rts/cuda/callable.h"
  
+
 namespace rts
 {
  
  
  
-template <class T, int N>
-struct vector
+template <class T, int N=3>
+struct vec
 {
 	T v[N];
  
-	CUDA_CALLABLE vector()
+	CUDA_CALLABLE vec()
 	{
 		//memset(v, 0, sizeof(T) * N);
 		for(int i=0; i<N; i++)
@@ -25,19 +26,30 @@ struct vector
 	}
  
 	//efficiency constructor, makes construction easier for 1D-4D vectors
-	CUDA_CALLABLE vector(T x, T y = (T)0.0, T z = (T)0.0, T w = (T)0.0)
+	CUDA_CALLABLE vec(T x)
+	{
+		v[0] = x;
+	}
+	CUDA_CALLABLE vec(T x, T y)
+	{
+		v[0] = x;
+		v[1] = y;
+	}
+	CUDA_CALLABLE vec(T x, T y, T z)
 	{
-		if(N >= 1)
-			v[0] = x;
-		if(N >= 2)
-			v[1] = y;
-		if(N >= 3)
-			v[2] = z;
-		if(N >= 4)
-			v[3] = w;
+		v[0] = x;
+		v[1] = y;
+		v[2] = z;
+	}
+	CUDA_CALLABLE vec(T x, T y, T z, T w)
+	{
+		v[0] = x;
+		v[1] = y;
+		v[2] = z;
+		v[3] = w;
 	}
  
-	CUDA_CALLABLE vector(const T(&data)[N])
+	CUDA_CALLABLE vec(const T(&data)[N])
 	{
 		memcpy(v, data, sizeof(T) * N);
 	}
@@ -54,12 +66,12 @@ struct vector
  
 	}
  
-	CUDA_CALLABLE vector<T, N> cart2sph()
+	CUDA_CALLABLE vec<T, N> cart2sph()
 	{
 		//convert the vector from cartesian to spherical coordinates
 		//x, y, z -> r, theta, phi (where theta = 0 to 2*pi)
  
-		vector<T, N> sph;
+		vec<T, N> sph;
 		sph[0] = std::sqrt(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
 		sph[1] = std::atan2(v[1], v[0]);
 		sph[2] = std::acos(v[2] / sph[0]);
@@ -67,12 +79,12 @@ struct vector
 		return sph;
 	}
  
-	CUDA_CALLABLE vector<T, N> sph2cart()
+	CUDA_CALLABLE vec<T, N> sph2cart()
 	{
 		//convert the vector from cartesian to spherical coordinates
 		//r, theta, phi -> x, y, z (where theta = 0 to 2*pi)
  
-		vector<T, N> cart;
+		vec<T, N> cart;
 		cart[0] = v[0] * std::cos(v[1]) * std::sin(v[2]);
 		cart[1] = v[0] * std::sin(v[1]) * std::sin(v[2]);
 		cart[2] = v[0] * std::cos(v[2]);
@@ -80,10 +92,10 @@ struct vector
 		return cart;
 	}
  
-	CUDA_CALLABLE vector<T, N> norm()
+	CUDA_CALLABLE vec<T, N> norm()
 	{
         //compute and return the vector norm
-        vector<T, N> result;
+        vec<T, N> result;
  
         //compute the vector length
         T l = len();
@@ -97,9 +109,9 @@ struct vector
         return result;
 	}
  
-	CUDA_CALLABLE vector<T, 3> cross(vector<T, 3> rhs)
+	CUDA_CALLABLE vec<T, 3> cross(vec<T, 3> rhs)
 	{
-		vector<T, 3> result;
+		vec<T, 3> result;
  
 		//compute the cross product (only valid for 3D vectors)
 		result[0] = v[1] * rhs[2] - v[2] * rhs[1];
@@ -109,7 +121,7 @@ struct vector
 		return result;
 	}
  
-    CUDA_CALLABLE T dot(vector<T, N> rhs)
+    CUDA_CALLABLE T dot(vec<T, N> rhs)
     {
         T result = (T)0;
  
@@ -121,36 +133,36 @@ struct vector
     }
  
 	//arithmetic
-	CUDA_CALLABLE vector<T, N> operator+(vector<T, N> rhs)
+	CUDA_CALLABLE vec<T, N> operator+(vec<T, N> rhs)
 	{
-        vector<T, N> result;
+        vec<T, N> result;
  
         for(int i=0; i<N; i++)
             result.v[i] = v[i] + rhs.v[i];
  
         return result;
 	}
-	CUDA_CALLABLE vector<T, N> operator-(vector<T, N> rhs)
+	CUDA_CALLABLE vec<T, N> operator-(vec<T, N> rhs)
 	{
-        vector<T, N> result;
+        vec<T, N> result;
  
         for(int i=0; i<N; i++)
             result.v[i] = v[i] - rhs.v[i];
  
         return result;
 	}
-	CUDA_CALLABLE vector<T, N> operator*(T rhs)
+	CUDA_CALLABLE vec<T, N> operator*(T rhs)
 	{
-        vector<T, N> result;
+        vec<T, N> result;
  
         for(int i=0; i<N; i++)
             result.v[i] = v[i] * rhs;
  
         return result;
 	}
-	CUDA_CALLABLE vector<T, N> operator/(T rhs)
+	CUDA_CALLABLE vec<T, N> operator/(T rhs)
 	{
-        vector<T, N> result;
+        vec<T, N> result;
  
         for(int i=0; i<N; i++)
             result.v[i] = v[i] / rhs;
@@ -158,7 +170,16 @@ struct vector
         return result;
 	}
  
-	CUDA_CALLABLE bool operator==(vector<T, N> rhs)
+	//conversion from a point
+	/*CUDA_CALLABLE vector<T, N> & operator=(point<T, N> rhs)
+	{
+		for(int n=0; n<N; n++)
+			v[n] = rhs.p[n];
+
+		return *this;
+	}*/
+
+	CUDA_CALLABLE bool operator==(vec<T, N> rhs)
 	{
         if ( (rhs.v[0] == v[0]) && (rhs.v[1] == v[1]) && (rhs.v[2] == v[2]) )
             return true;
@@ -194,7 +215,7 @@ struct vector
 }	//end namespace rts
  
 template <typename T, int N>
-std::ostream& operator<<(std::ostream& os, rts::vector<T, N> v)
+std::ostream& operator<<(std::ostream& os, rts::vec<T, N> v)
 {
     os<<v.toStr();
     return os;
@@ -202,9 +223,9 @@ std::ostream&amp; operator&lt;&lt;(std::ostream&amp; os, rts::vector&lt;T, N&gt; v)
  
 //arithmetic operators
 template <typename T, int N>
-CUDA_CALLABLE rts::vector<T, N> operator-(rts::vector<T, N> v)
+CUDA_CALLABLE rts::vec<T, N> operator-(rts::vec<T, N> v)
 {
-    rts::vector<T, N> r;
+    rts::vec<T, N> r;
  
     //negate the vector
     for(int i=0; i<N; i++)
@@ -214,9 +235,9 @@ CUDA_CALLABLE rts::vector&lt;T, N&gt; operator-(rts::vector&lt;T, N&gt; v)
 }
  
 template <typename T, int N>
-CUDA_CALLABLE rts::vector<T, N> operator*(T lhs, rts::vector<T, N> rhs)
+CUDA_CALLABLE rts::vec<T, N> operator*(T lhs, rts::vec<T, N> rhs)
 {
-    rts::vector<T, N> r;
+    rts::vec<T, N> r;
  
     return rhs * lhs;
 }
+#ifndef RTS_BEAM
+#define RTS_BEAM
+
+#include "../math/vector.h"
+#include "../math/function.h"
+#include <vector>
+
+namespace rts{
+
+template<typename P>
+class beam
+{
+public:
+	enum beam_type {Uniform, Bartlett, Hamming, Hanning};
+
+private:
+	
+	P na[2];		//numerical aperature of the focusing optics	
+	vec<P> f;		//focal point	
+	vec<P> k;		//direction vector	
+	vec<P> E0;		//polarization direction
+	P omega;		//frequency
+
+	function<P, P> apod;	//apodization function
+	unsigned int apod_res;	//resolution of complex apodization filters
+
+	void apod_uniform()
+	{
+		apod = (P)1;
+	}
+	void apod_bartlett()
+	{
+		apod = (P)1;
+		apod.insert((P)1, (P)0);
+	}
+	void apod_hanning()
+	{
+		apod = (P)0;
+		P x, y;
+		for(unsigned int n=0; n<apod_res; n++)
+		{
+			x = (P)n/(P)apod_res;
+			y = pow( cos( ((P)3.14159 * x) / 2 ), 2);
+			apod.insert(x, y);
+		}
+	}
+	void apod_hamming()
+	{
+		apod = (P)0;
+		P x, y;
+		for(unsigned int n=0; n<apod_res; n++)
+		{
+			x = (P)n/(P)apod_res;
+			y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x);
+			apod.insert(x, y);
+		}
+	}
+
+	void set_apod(beam_type type)
+	{
+		if(type == Uniform)
+			apod_uniform();
+		if(type == Bartlett)
+			apod_bartlett();
+		if(type == Hanning)
+			apod_hanning();
+		if(type == Hamming)
+			apod_hamming();
+	}
+
+public:
+
+	///constructor: build a default beam (NA=1.0)
+	beam(beam_type _apod = Uniform)
+	{
+		na[0] = (P)0.0;
+		na[1] = (P)1.0;
+		f = vec<P>( (P)0.0, (P)0.0, (P)0.0 );
+		k = vec<P>( (P)0.0, (P)0.0, (P)1.0 );
+		E0 = vec<P>( (P)1.0, (P)0.0, (P)0.0 );
+		omega = (P)2 * (P)3.14159;
+		apod_res = 256;						//set the default resolution for apodization filters
+		set_apod(_apod);						//set the apodization function type
+		
+	}
+
+	///Numerical Aperature functions
+	void NA(P _na)
+	{
+		na[0] = (P)0;
+		na[1] = _na;
+	}
+	void NA(P _na0, P _na1)
+	{
+		na[0] = _na0;
+		na[1] = _na1;
+	}
+
+
+	//Monte-Carlo decomposition into plane waves
+	std::vector< planewave<P> > mc(unsigned int N, unsigned int seed = 0)
+	{
+		/*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling
+			of a sphere and projecting these samples onto an inscribed sphere.
+
+			samples = rtsPointer to sample vectors specified as normalized cartesian coordinates
+			N       = number of samples
+			kSph	= incident light direction in spherical coordinates
+			NAin    = internal obscuration NA
+			NAout   = outer cassegrain NA
+		*/
+
+		srand(seed);		//seed the random number generator
+
+		///compute the rotation operator to transform (0, 0, 1) to k
+		P cos_angle = k.dot(rts::vec<P>(0, 0, 1));
+		rts::matrix<P, 3> rotation;
+		if(cos_angle != 1.0)
+		{
+			rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k).norm();	//compute the axis of rotation
+			P angle = acos(cos_angle);							//compute the angle of rotation
+			rts::quaternion<P> quat;							//create a quaternion describing the rotation
+			quat.CreateRotation(angle, r_axis);
+			rotation = quat.toMatrix3();							//compute the rotation matrix
+		}
+
+		//find the phi values associated with the cassegrain ring
+		P PHI[2];
+		PHI[0] = (P)asin(na[0]);
+		PHI[1] = (P)asin(na[1]);
+
+		//calculate the z-axis cylinder coordinates associated with these angles
+		P Z[2];
+		Z[0] = cos(PHI[0]);
+		Z[1] = cos(PHI[1]);
+		P range = Z[0] - Z[1];
+
+		std::vector< planewave<P> > samples;	//create a vector of plane waves
+
+		planewave<P> beam_center(k, E0, omega);	//create a plane wave representing the beam center
+
+		//draw a distribution of random phi, z values
+		P z, phi, theta;
+		for(int i=0; i<N; i++)								//for each sample
+		{
+			z = ((P)rand() / (P)RAND_MAX) * range + Z[1];	//find a random position on the surface of a cylinder
+			theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159;
+			phi = acos(z);									//project onto the sphere, computing phi in spherical coordinates
+
+			//compute and store cartesian coordinates
+			rts::vec<P> spherical(1, theta, phi);				//convert from spherical to cartesian coordinates
+			rts::vec<P> cart = spherical.sph2cart();
+			vec<P> k_prime = rotation * cart;				//create a sample vector
+
+			//store a wave refracted along the given direction
+			samples.push_back(beam_center.refract(k_prime) * apod(phi/PHI[1]));
+		}
+
+		return samples;
+	}
+
+};
+
+}
+
+#endif
 \ No newline at end of file
-#include "rts/math/complex.h"
-#include "rts/visualization/colormap.h"
+#include "../math/complex.h"
+#include "../visualization/colormap.h"
+#include "../visualization/scalarfield.cuh"
+#include "../visualization/vectorfield.cuh"
+#include "../optics/planewave.h"
+#include "../cuda/devices.h"
+
+
  
 namespace rts{
  
+template<typename T>
+__global__ void gpu_planewave2efield(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, 
+									 planewave<T> w, quad<T> q)
+{
+    int iu = blockIdx.x * blockDim.x + threadIdx.x;
+    int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+    //make sure that the thread indices are in-bounds
+    if(iu >= r0 || iv >= r1) return;
+
+    //compute the index into the field
+    int i = iv*r0 + iu;
+
+	//get the current position
+	vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 );
+	vec<T> r(p[0], p[1], p[2]);
+
+	complex<T> x( 0.0f, w.omega * (w.k_hat.dot(r)) );
+
+    if(Y == NULL)                       //if this is a scalar simulation
+        X[i] += w.E0.len() * exp(x);    //use the vector magnitude as the plane wave amplitude
+    else
+    {
+        X[i] += w.E0[0] * exp(x);
+        Y[i] += w.E0[1] * exp(x);
+        Z[i] += w.E0[2] * exp(x);
+    }
+}
+
+template<typename T>
+__global__ void gpu_efield_magnitude(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, T* M)
+{
+	int iu = blockIdx.x * blockDim.x + threadIdx.x;
+    int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+    //make sure that the thread indices are in-bounds
+    if(iu >= r0 || iv >= r1) return;
+
+    //compute the index into the field
+    int i = iv*r0 + iu;
+
+    if(Y == NULL)                      //if this is a scalar simulation
+	   M[i] = X[i].abs();              //compute the magnitude of the X component
+    else
+    {
+        M[i] = rts::vec<T>(X[i].abs(), Y[i].abs(), Z[i].abs()).len();
+        //M[i] = Z[i].abs();
+    }
+}
+
+template<typename T>
+__global__ void gpu_efield_polarization(complex<T>* X, complex<T>* Y, complex<T>* Z, 
+                                        unsigned int r0, unsigned int r1,
+                                        T* Px, T* Py, T* Pz)
+{
+    int iu = blockIdx.x * blockDim.x + threadIdx.x;
+    int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+    //make sure that the thread indices are in-bounds
+    if(iu >= r0 || iv >= r1) return;
+
+    //compute the index into the field
+    int i = iv*r0 + iu;
+
+    //compute the field polarization
+    Px[i] = X[i].abs();
+    Py[i] = Y[i].abs();
+    Pz[i] = Z[i].abs();
+
+}
+
 /*  This class implements a discrete representation of an electromagnetic field
     in 2D. The majority of this representation is done on the GPU.
 */
@@ -21,23 +98,53 @@ private:
     //resolution of the discrete field
     unsigned int R[2];
  
+    //shape of the 2D field
+    quad<P> pos;
+
+	void from_planewave(planewave<P> p)
+	{
+        unsigned int SQRT_BLOCK = 16;
+        //create one thread for each detector pixel
+        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+        gpu_planewave2efield<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos);
+    }
+
+	void clear()
+	{
+		cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
+
+		if(!scalar)
+        {
+			cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
+			cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
+		}
+	}
+
  
 public:
  
     efield(unsigned int res0, unsigned int res1, bool _scalar = false)
     {
-        //initialize all pointers to NULL
-        X = Y = Z = NULL;
+        scalar = _scalar;           //initialize field type
+        
+        X = Y = Z = NULL;           //initialize all pointers to NULL
         R[0] = res0;
         R[1] = res1;
  
         //allocate memory on the gpu
         cudaMalloc(&X, sizeof(rts::complex<P>) * R[0] * R[1]);
+		cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
  
         if(!scalar)
         {
+            std::cout<<"scalar:";
             cudaMalloc(&Y, sizeof(rts::complex<P>) * R[0] * R[1]);
+			cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
+
             cudaMalloc(&Z, sizeof(rts::complex<P>) * R[0] * R[1]);
+			cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
         }
     }
  
@@ -49,14 +156,96 @@ public:
         if(Z != NULL) cudaFree(Z);
     }
  
-    void render(std::string)
+    void position(quad<P> _p)
+    {
+        pos = _p;
+    }
+
+    std::string str()
+    {
+        stringstream ss;
+        ss<<pos<<std::endl;
+        ss<<"X: "<<X<<std::endl;
+        ss<<"Y: "<<Y<<std::endl;
+        ss<<"Z: "<<Z;
+
+        return ss.str();
+    }
+
+    //assignment operator: build an electric field from a plane wave
+    efield<P> & operator= (const planewave<P> & rhs)
+	{
+		
+		clear();				//clear any previous field data
+		from_planewave(rhs);	//create a field from the planewave
+		return *this;
+	}
+
+	//assignment operator: add the electric field from a plane wave
+    efield<P> & operator+= (const planewave<P> & rhs)
+	{
+		//create a field from the planewave
+		from_planewave(rhs);
+
+		return *this;
+	}
+
+	//assignment operator: build an electric field from a list of plane waves
+	efield<P> & operator= (const std::vector< planewave<P> > & rhs)
+	{
+		clear();				//clear any previous field data
+		for(unsigned int i = 0; i < rhs.size(); i++)
+			from_planewave(rhs[i]);
+		return *this;
+	}
+
+	//return a scalar field representing field magnitude
+    scalarfield<P> mag()
     {
+        int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
+        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  
+		//create a scalar field to store the result
+		scalarfield<P> M(R[0], R[1]);
+
+        //create one thread for each detector pixel
+        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+        //compute the magnitude and store it in a scalar field
+		gpu_efield_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.S);
+
+		return M;
     }
  
+    //return a vector field representing field polarization
+    vectorfield<P> polarization()
+    {
+        if(scalar)
+        {
+            std::cout<<"ERROR: Cannot compute polarization of a scalar field."<<std::endl;
+            exit(1);
+        }
+        int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
+        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);        
+
+        //create one thread for each detector pixel
+        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+        
+        vectorfield<P> Pol(R[0], R[1]);     //create a vector field to store the result
+
+        //compute the polarization and store it in the vector field
+        gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.S[0], Pol.S[1], Pol.S[2]);
+
+        return Pol;                         //return the vector field
+    }
+
+
  
 };
  
  
  
 -}   //end namespace rts
+}   //end namespace rts
 \ No newline at end of file
+#ifndef RTS_PLANEWAVE
+#define RTS_PLANEWAVE
+
+#include <string>
+#include <sstream>
+
+#include "rts/math/vector.h"
+#include "rts/math/quaternion.h"
+
+namespace rts{
+
+template<typename P>
+class planewave{
+
+public:
+	rts::vec<P> k_hat;	//normalized planewave direction
+	P omega;				//frequency
+
+	rts::vec<P> E0;		//amplitude
+
+
+
+	planewave()
+	{
+		omega = (P)2 * (P)3.14159;
+		k_hat = rts::vec<P>(0, 0, 1);
+		E0 = rts::vec<P>(1, 0, 0);
+	}
+	///constructor: create a plane wave propagating along z, polarized along x
+	planewave(P _omega)
+	{
+		omega = _omega;
+		k_hat = rts::vec<P>(0, 0, 1);
+		E0 = rts::vec<P>(1, 0, 0);
+	}
+	///constructor: create a plane wave propagating along _k, polarized along _E0, at frequency _omega
+	planewave(vec<P> _k, vec<P> _E0, P _omega)
+	{
+		k_hat = _k.norm();
+		vec<P> s = k_hat.cross(_E0);	//re-orthogonalize
+		E0 = s.cross(k_hat);
+		omega = _omega;
+	}
+
+	///multiplication operator: scale E0
+	//assignment operator: build an electric field from a plane wave
+    planewave<P> & operator* (const P & rhs)
+	{
+		
+		E0 = E0 * rhs;
+		return *this;
+	}
+
+
+	planewave<P> refract(rts::vec<P> new_k)
+	{
+		new_k = new_k.norm();	//normalize new_k
+
+		//compute the side vector (around which we will be rotating)
+		rts::vec<P> s = k_hat.cross(E0.norm());
+
+		//compute the projection of k' onto the k-E plane
+		rts::vec<P> s_prime = s * (new_k.dot(s));
+
+		//compute the angle between
+		P theta = acos(k_hat.dot( (new_k - s_prime).norm() ));
+
+		//rotate E0 around s by theta
+		quaternion<P> q;
+		q.CreateRotation(theta, s);
+		rts::vec<P> E0_prime = q.toMatrix3() * E0;
+
+		//create the refracted plane wave
+		planewave<P> new_p(omega);
+		new_p.E0 = E0_prime;
+		new_p.k_hat = new_k;
+
+		return new_p;
+	}
+
+	std::string str()
+	{
+		std::stringstream ss;
+		ss<<E0<<" e^i ( "<<omega<<"t - "<<omega<<" "<<k_hat * omega<<" . r )";
+		return ss.str();
+	}
+};
+}
+
+#endif
 \ No newline at end of file
@@ -32,7 +32,7 @@ static cudaArray* gpuBrewer;
  
 namespace rts{
  
-enum colormapType {cmBrewer, cmGrayscale};
+enum colormapType {cmBrewer, cmGrayscale, cmRainbow};
  
 static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size)
 {
@@ -119,10 +119,10 @@ __global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsi
 	float a = (gpuSource[i] - minVal) / (maxVal - minVal);
  
 	//threshold
-	if(a > 1.0)
-        a = 1.0;
-    if(a < 0.0)
-        a = 0.0;
+	if(a > 1)
+        a = 1;
+    if(a < 0)
+        a = 0;
  
 	gpuDest[i * 3 + 0] = 255 * a;
 	gpuDest[i * 3 + 1] = 255 * a;
@@ -222,9 +222,9 @@ static void cpuApplyBrewer(T* cpuSource, unsigned char* cpuDest, unsigned int N,
         }
         else
         {
-            r = BREWERCP[ptLow * 4 + 0] * (1.0-m) + BREWERCP[ (ptLow+1) * 4 + 0] * m;
-            g = BREWERCP[ptLow * 4 + 1] * (1.0-m) + BREWERCP[ (ptLow+1) * 4 + 1] * m;
-            b = BREWERCP[ptLow * 4 + 2] * (1.0-m) + BREWERCP[ (ptLow+1) * 4 + 2] * m;
+            r = BREWERCP[ptLow * 4 + 0] * (1-m) + BREWERCP[ (ptLow+1) * 4 + 0] * m;
+            g = BREWERCP[ptLow * 4 + 1] * (1-m) + BREWERCP[ (ptLow+1) * 4 + 1] * m;
+            b = BREWERCP[ptLow * 4 + 2] * (1-m) + BREWERCP[ (ptLow+1) * 4 + 2] * m;
         }
  
  
@@ -251,8 +251,8 @@ static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T 
             //normalize to the range [valMin valMax]
             a = (cpuSource[i] - valMin) / range;
  
-            if(a < 0) a = 0.0;
-            if(a > 1) a = 1.0;
+            if(a < 0) a = 0;
+            if(a > 1) a = 1;
  
             cpuDest[i * 3 + 0] = 255 * a;
             cpuDest[i * 3 + 1] = 255 * a;
@@ -275,7 +275,7 @@ static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, co
 	}
  
     if(positive)
-        cpu2cpu(cpuSource, cpuDest, nVals, (T)0.0, maxVal, cm);
+        cpu2cpu(cpuSource, cpuDest, nVals, (T)0, maxVal, cm);
     else
         cpu2cpu(cpuSource, cpuDest, nVals, -maxVal, maxVal, cm);
  
-#ifndef RTS_SCALAR_SLICE
-#define RTS_SCALAR_SLICE
-
-#include "rts/visualization/colormap.h"
-#include "rts/envi/envi.h"
+#ifndef RTS_SCALAR_SLICE
+#define RTS_SCALAR_SLICE
+
+#include "../visualization/colormap.h"
+#include "../envi/envi.h"
+#include "../math/quad.h"
+#include "../cuda/devices.h"
 #include "cublas_v2.h"
-#include <cuda_runtime.h>
+#include <cuda_runtime.h>
+
+///Compute a Gaussian function in 3D (mostly for testing)
+template<typename T>
+__global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, T std, rts::quad<T> shape)
+{
+	int iu = blockIdx.x * blockDim.x + threadIdx.x;
+	int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+	//make sure that the thread indices are in-bounds
+	if(iu >= r0 || iv >= r1) return;
+
+	//compute the index into the field
+	int i = iv*r0 + iu;
+
+	T u = (T)iu / (T)r0;
+	T v = (T)iv / (T)r1;
+
+	rts::vec<T> p = shape(u, v);
+
+	T fx = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[0] - mean, 2) / (2 * std*std) );
+	T fy = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[1] - mean, 2) / (2 * std*std) );
+	T fz = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[2] - mean, 2) / (2 * std*std) );
+
+	dest[i] = fx * fy * fz;
+}
+
+namespace rts {
+template<typename P>
+struct scalarfield
+{
+	//gpu pointer to the scalar slice
+	P* S;
  
-template<typename P>
-struct scalarfield
-{
-	//gpu pointer to the scalar slice
-	P* S;
-
-	//resolution of the slice
-	int R[2];
+	//resolution of the slice
+	int R[2];
+
+	quad<P> shape;
  
     scalarfield()
     {
         R[0] = R[1] = 0;
+        shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));
         S = NULL;
  
-        //std::cout<<"Scalerslice created (default)."<<std::endl;
+		std::cout<<"scalarfield CONSTRUCTOR"<<std::endl;
     }
-
+
 	scalarfield(int x, int y)
-	{
-        //set the resolution
-        R[0] = x;
-        R[1] = y;
-
-        //allocate memory on the GPU
+	{
+        //set the resolution
+        R[0] = x;
+        R[1] = y;
+
+        shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));
+
+        //allocate memory on the GPU
         HANDLE_ERROR(cudaMalloc( (void**)&S, sizeof(P) * x * y ));
-        //std::cout<<"Scalerslice created."<<std::endl;
+
+		std::cout<<"scalarfield CONSTRUCTOR"<<std::endl;
     }
-
+
 	~scalarfield()
-    {
-        if(S != NULL)
-            HANDLE_ERROR(cudaFree(S));
+    {
+        if(S != NULL)
+            HANDLE_ERROR(cudaFree(S));
         S = NULL;
         R[0] = R[1] = 0;
  
-        //std::cout<<"Scalerslice destroyed."<<std::endl;
+		std::cout<<"scalarfield DESTRUCTOR"<<std::endl;
     }
-
+
 	void clear()
-    {
-        //this function sets the slice to zero
-        if(S != NULL)
-            //HANDLE_ERROR(cudaMalloc( (void**)&S, sizeof(P) * R[0] * R[1] ));
-            HANDLE_ERROR(cudaMemset(S, 0, sizeof(P) * R[0] * R[1]));
-    }
-
+    {
+        //this function sets the slice to zero
+        if(S != NULL)
+            HANDLE_ERROR(cudaMemset(S, 0, sizeof(P) * R[0] * R[1]));
+    }
+
 	void toImage(std::string filename, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer)
-    {
-        rts::gpu2image<ptype>(S, filename, R[0], R[1], vmin, vmax, cmap);
+    {
+        rts::gpu2image<P>(S, filename, R[0], R[1], vmin, vmax, cmap);
     }
  
 	void toImage(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer)
@@ -73,11 +106,11 @@ struct scalarfield
         //find the index of the value with maximum magnitude
         int N = R[0] * R[1];
         int result;
-    #ifdef PRECISION_SINGLE
-        stat = cublasIsamax(handle, N, S, 1, &result);
-    #elif defined PRECISION_DOUBLE
-        stat = cublasIdamax(handle, N, S, 1, &result);
-    #endif
+
+        if(sizeof(P) == 4)
+            stat = cublasIsamax(handle, N, (float*)S, 1, &result);
+        else
+            stat = cublasIdamax(handle, N, (double*)S, 1, &result);
  
         //adjust for 1-based indexing
         result -= 1;
@@ -92,7 +125,7 @@ struct scalarfield
  
         //retrieve the maximum value
         P maxVal;
-        HANDLE_ERROR(cudaMemcpy(&maxVal, S + result, sizeof(ptype), cudaMemcpyDeviceToHost));
+        HANDLE_ERROR(cudaMemcpy(&maxVal, S + result, sizeof(P), cudaMemcpyDeviceToHost));
  
         //destroy the CUBLAS handle
         cublasDestroy(handle);
@@ -105,7 +138,7 @@ struct scalarfield
     }
  
  
-	void toEnvi(std::string filename, ptype wavelength = 0, bool append = false)
+	void toEnvi(std::string filename, P wavelength = 0, bool append = false)
     {
         std::string mode;
         if(append) mode = "a";
@@ -115,23 +148,21 @@ struct scalarfield
         EnviFile outfile(filename, mode);
  
         //get the scalar slice from the GPU to the CPU
-        int memsize = sizeof(ptype) * R[0] * R[1];
-        ptype* cpuData = (ptype*) malloc( memsize );
+        int memsize = sizeof(P) * R[0] * R[1];
+        P* cpuData = (P*) malloc( memsize );
         HANDLE_ERROR(cudaMemcpy( cpuData, S, memsize, cudaMemcpyDeviceToHost));
  
         //add a band to the ENVI file
         outfile.addBand(cpuData, R[0], R[1], wavelength);
  
         outfile.close();
-
-
     }
  
 	//assignment operator
 	scalarfield & operator= (const scalarfield & rhs)
     {
         //de-allocate any existing GPU memory
-        if(S != NULL)
+        if(S != NULL)
             HANDLE_ERROR(cudaFree(S));
  
         //copy the slice resolution
@@ -139,20 +170,54 @@ struct scalarfield
         R[1] = rhs.R[1];
  
         //allocate the necessary memory
-        HANDLE_ERROR(cudaMalloc(&S, sizeof(ptype) * R[0] * R[1]));
+        HANDLE_ERROR(cudaMalloc(&S, sizeof(P) * R[0] * R[1]));
  
         //copy the slice
-        HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(ptype) * R[0] * R[1], cudaMemcpyDeviceToDevice));
+        HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  
  
         std::cout<<"Assignment operator."<<std::endl;
  
         return *this;
+    }
+
+	///copy constructor
+	scalarfield(const scalarfield &rhs)
+	{
+		//first make a shallow copy
+		R[0] = rhs.R[0];
+		R[1] = rhs.R[1];
+
+		//do we have to make a deep copy?
+		if(rhs.S == NULL)
+			S = NULL;		//no
+		else
+		{
+			//allocate the necessary memory
+			HANDLE_ERROR(cudaMalloc(&S, sizeof(P) * R[0] * R[1]));
+
+			//copy the slice
+			HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
+		}
+
+		std::cout<<"scalarfield COPY CONSTRUCTOR"<<std::endl;
+	}
+
+	void gaussian(P mean, P std)
+	{
+		int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
+        int SQRT_BLOCK = (int)sqrt((float)maxThreads);
+		//create one thread for each detector pixel
+		dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+		dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+		gpu_gaussian<float> <<<dimGrid, dimBlock>>> (S, R[0], R[1], mean, std, shape);
+	}
+
+};
+
+}   //end namespace rts
+
+
  
-    }
-
-};
-
-
-
 #endif
+#ifndef RTS_VECTORFIELD
+#define RTS_VECTORFIELD
+
+#include "../visualization/colormap.h"
+#include "../envi/envi.h"
+#include "../math/quad.h"
+#include "../cuda/devices.h"
+#include "cublas_v2.h"
+#include <cuda_runtime.h>
+#include <iomanip>
+
+#include <qimage.h>
+#include <qcolor.h>
+
+
+namespace rts {
+template<typename P, unsigned int N = 3>
+struct vectorfield
+{
+private:
+    void process_filename(std::string name, std::string &prefix, std::string &postfix, 
+                          std::string &ext, unsigned int &digits)
+    {
+        std::stringstream ss(name);
+        std::string item;
+        std::vector<std::string> elems;
+        while(std::getline(ss, item, '.'))      //split the string at the '.' character (filename and extension)
+        {
+            elems.push_back(item);
+        }
+        
+        prefix = elems[0];                      //prefix contains the filename (with wildcard '?' characters)
+        ext = elems[1];                         //file extension (ex. .bmp, .png)
+        ext = std::string(".") + ext;           //add a period back into the extension
+
+        size_t i0 = prefix.find_first_of("?");  //find the positions of the first and last wildcard ('?'')
+        size_t i1 = prefix.find_last_of("?");
+
+        postfix = prefix.substr(i1+1);
+        prefix = prefix.substr(0, i0);
+
+        digits = i1 - i0 + 1;                   //compute the number of wildcards
+
+    }
+
+public:
+	//gpu pointers to scalar slices
+	P* S[N];
+
+	//resolution of the slice
+	int R[2];
+
+	quad<P> shape;
+
+    vectorfield()
+    {
+        R[0] = R[1] = 0;            //set the initial resolution to 0
+        shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));
+        for(int n=0; n<N; n++)      //set each vector component to NULL
+            S[n]=NULL;
+
+		std::cout<<"vectorfield CONSTRUCTOR"<<std::endl;
+    }
+
+	vectorfield(int x, int y)
+	{
+        //set the resolution
+        R[0] = x;
+        R[1] = y;
+
+        shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));
+
+        //allocate memory on the GPU
+        for(int n=0; n<N; n++)
+            HANDLE_ERROR(cudaMalloc( (void**)&S[n], sizeof(P) * x * y ));
+
+		std::cout<<"vectorfield CONSTRUCTOR"<<std::endl;
+    }
+
+	~vectorfield()
+    {
+        for(int n=0; n<N; n++)
+            if(S[n] != NULL)
+            {
+                HANDLE_ERROR(cudaFree(S[n]));
+                S[n] = NULL;
+            }
+        R[0] = R[1] = 0;
+
+		std::cout<<"vectorfield DESTRUCTOR"<<std::endl;
+    }
+
+	void clear()
+    {
+        //this function sets the slice to zero
+        for(int n=0; n<N; n++)
+            if(S[n] != NULL)
+                HANDLE_ERROR(cudaMemset(S[n], 0, sizeof(P) * R[0] * R[1]));
+    }
+
+	void toImage(std::string filename, unsigned int n, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer)
+    {
+		rts::gpu2image<P>(S[n], filename, R[0], R[1], vmin, vmax, cmap);
+    }
+
+	void toImage3(std::string filename, P vmin, P vmax)
+	{
+		std::cout<<"Implementing a 3-component rainbow colormap: "<<filename.c_str()<<std::endl;
+		//create a buffer for each RGB component
+		unsigned char* red = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]);
+		unsigned char* green = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]);
+		unsigned char* blue = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]);
+
+		//retrieve the buffered images for each component
+		rts::gpu2cpu<P>(S[0], red, R[0] * R[1], vmin, vmax);
+		rts::gpu2cpu<P>(S[1], green, R[0] * R[1], vmin, vmax);
+		rts::gpu2cpu<P>(S[2], blue, R[0] * R[1], vmin, vmax);
+
+		QImage image(R[0], R[1], QImage::Format_RGB32);		//create a QImage object
+		if(image.isNull())										//if it didn't work, throw an error
+		{
+			std::cout<<"Error creating QImage."<<std::endl;
+			return;
+		}
+
+		int i;
+		unsigned char r, g, b;
+		unsigned int x, y;
+		for(y=0; y<R[1]; y++)
+			for(x=0; x<R[0]; x++)
+			{
+				//calculate the 1D index
+				i = y * R[0] + x;
+
+				r = red[i * 3 + 0];
+				g = green[i * 3 + 1];
+				b = blue[i * 3 + 2];
+
+				//set the image pixel
+				QColor color(r, g, b);
+				image.setPixel(x, y, color.rgb());
+			}
+
+		if(!image.save(filename.c_str()))					//if the image didn't save correctly,
+			std::cout<<"Error saving QImage."<<std::endl;	//	throw an error
+	}
+
+	void toImages(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer, bool globalmax = true)
+	{
+        std::string prefix, postfix, extension;
+        unsigned int digits;
+        process_filename(filename, prefix, postfix, extension, digits);      //process the filename for wild cards
+
+        cublasStatus_t stat;
+        cublasHandle_t handle;
+
+        //create a CUBLAS handle
+        stat = cublasCreate(&handle);
+        if(stat != CUBLAS_STATUS_SUCCESS)
+        {
+            std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
+            exit(1);
+        }
+
+        int L = R[0] * R[1];    //compute the number of discrete points in a slice
+        int result;             //result of the max operation
+
+        P maxVal[N];            //array stores minimum and maximum values
+        P maxAll = 0;           //largest value in the data set
+
+        //compute the maximum value for each vector component
+        for(int n=0; n<N; n++)
+        {
+            if(sizeof(P) == 4)
+                stat = cublasIsamax(handle, L, (const float*)S[n], 1, &result);
+            else
+                stat = cublasIdamax(handle, L, (const double*)S[n], 1, &result);
+
+            result -= 1;        //adjust for 1-based indexing
+
+            if(stat != CUBLAS_STATUS_SUCCESS)   //if there was a GPU error, terminate
+            {
+                std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
+                exit(1);
+            }
+
+            //retrieve the maximum value for this slice and store it in the maxVal array
+            HANDLE_ERROR(cudaMemcpy(&maxVal[n], S[n] + result, sizeof(P), cudaMemcpyDeviceToHost));
+            if(maxVal[n] > maxAll)          //if maxVal is larger, update the maxAll variable
+                maxAll = maxVal[n];
+
+        }
+        
+        cublasDestroy(handle);  //destroy the CUBLAS handle
+
+		if(cmap == rts::cmRainbow && N == 3)		//if the user specifies a rainbow colormap, and the vector has 3 elements
+		{
+			//implement a single image with RGB = XYZ
+			if(positive)
+				toImage3(prefix+postfix+extension, 0, maxAll);
+			else
+				toImage3(prefix+postfix+extension, 0, maxAll);
+		}
+		else
+		{
+			for(int n=0; n<N; n++)          //for each image
+			{
+				stringstream ss;            //assemble the file name
+				ss<<prefix<<std::setfill('0')<<std::setw(digits)<<n<<postfix<<extension;
+				std::cout<<ss.str()<<std::endl;
+				if(positive)                //if the image is positive
+				{
+					std::cout<<"Positive: "<<n<<std::endl;
+					if(globalmax)           //if the global maximum is used
+						toImage(ss.str(), n, 0, maxAll, cmap);         //save the image using the global maximum
+					else
+						toImage(ss.str(), n, 0, maxVal[n], cmap);      //save the image using the local maximum
+				}
+				else
+				{
+					std::cout<<"Negative: "<<n<<std::endl;
+					if(globalmax)           //if the global maximum is used
+						toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap);   //save the image using the global maximum
+					else
+						toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap);   //save the image using the local maximum
+				}
+			}
+		}
+    }
+
+	//assignment operator
+	vectorfield & operator= (const vectorfield & rhs)
+    {
+        //de-allocate any existing GPU memory
+        for(int n=0; n<N; n++)
+            if(S[n] != NULL)
+                HANDLE_ERROR(cudaFree(S[n]));
+
+        //copy the slice resolution
+        R[0] = rhs.R[0];
+        R[1] = rhs.R[1];
+
+        for(int n=0; n<N; n++)
+        {
+            //allocate the necessary memory
+            HANDLE_ERROR(cudaMalloc(&S[n], sizeof(P) * R[0] * R[1]));
+
+            //copy the slice
+            HANDLE_ERROR(cudaMemcpy(S[n], rhs.S[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
+        }
+
+
+        std::cout<<"Assignment operator."<<std::endl;
+
+        return *this;
+    }
+
+	///copy constructor
+	vectorfield(const vectorfield &rhs)
+	{
+		//first make a shallow copy
+		R[0] = rhs.R[0];
+		R[1] = rhs.R[1];
+
+		//do we have to make a deep copy?
+        if(rhs.S[0] == NULL)        //no?
+        {
+            for(int n=0; n<N; n++)  //set all components to NULL
+            {
+                S[n] = NULL;
+            }
+        }
+        else
+        {
+            for(int n=0; n<N; n++)
+            {
+    			//allocate the necessary memory
+    			HANDLE_ERROR(cudaMalloc(&S[n], sizeof(P) * R[0] * R[1]));
+
+    			//copy the slice
+    			HANDLE_ERROR(cudaMemcpy(S[n], rhs.S[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
+            }
+        }
+
+		std::cout<<"vectorfield COPY CONSTRUCTOR"<<std::endl;
+	}
+
+};
+
+}   //end namespace rts
+
+
+
+#endif