separated executable arguments and options in the arglist class

David Mayerich
1 parent 7b3948ab
Showing 8 changed files with 2056 additions and 2039 deletions Show diff stats
math/complex.h
math/complexfield.cuh
math/field.cuh
math/rect.h
math/triangle.h
optics/material.h
optics/mirst-1d.cuh
ui/arguments.h
-/*RTS Complex number class.  This class is CUDA compatible,
-and can therefore be used in CUDA code and on CUDA devices.
-*/
-
-#ifndef RTS_COMPLEX
-#define RTS_COMPLEX
-
-#include "../cuda/callable.h"
-#include <cmath>
-#include <string>
-#include <sstream>
-#include <iostream>
-
-namespace stim
-{
-
-template <class T>
-struct complex
-{
-    T r, i;
-
-    //default constructor
-    CUDA_CALLABLE complex()
-    {
-        r = 0;
-	   i = 0;
-    }
-
-    //constructor when given real and imaginary values
-    CUDA_CALLABLE complex(T r, T i = 0)
-    {
-        this->r = r;
-        this->i = i;
-    }
-
-	//access methods
-	CUDA_CALLABLE T real()
-	{
-		return r;
-	}
-
-	CUDA_CALLABLE T real(T r_val)
-	{
-		r = r_val;
-		return r_val;
-	}
-
-	CUDA_CALLABLE T imag()
-	{
-		return i;
-	}
-	CUDA_CALLABLE T imag(T i_val)
-	{
-		i = i_val;
-		return i_val;
-	}
-
-    
-
-    //return the current value multiplied by i
-    CUDA_CALLABLE complex<T> imul()
-    {
-        complex<T> result;
-        result.r = -i;
-        result.i = r;
-
-        return result;
-    }
-
-    //returns the complex signum (-1, 0, 1)
-    CUDA_CALLABLE int sgn(){
-        if(r > 0) return 1;
-        else if(r < 0) return -1;
-        else return (0 < i - i < 0);
-    }
-
-	//ARITHMETIC OPERATORS--------------------
-
-    //binary + operator (returns the result of adding two complex values)
-    CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const
-    {
-        complex<T> result;
-        result.r = r + rhs.r;
-        result.i = i + rhs.i;
-        return result;
-    }
-
-	CUDA_CALLABLE complex<T> operator+ (const T rhs) const
-    {
-        complex<T> result;
-        result.r = r + rhs;
-        result.i = i;
-        return result;
-    }
-
-    //binary - operator (returns the result of adding two complex values)
-    CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const
-    {
-        complex<T> result;
-        result.r = r - rhs.r;
-        result.i = i - rhs.i;
-        return result;
-    }
-
-    //binary - operator (returns the result of adding two complex values)
-    CUDA_CALLABLE complex<T> operator- (const T rhs)
-    {
-        complex<T> result;
-        result.r = r - rhs;
-        result.i = i;
-        return result;
-    }
-
-    //binary MULTIPLICATION operators (returns the result of multiplying complex values)
-    CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const
-    {
-        complex<T> result;
-        result.r = r * rhs.r - i * rhs.i;
-        result.i = r * rhs.i + i * rhs.r;
-        return result;
-    }
-    CUDA_CALLABLE complex<T> operator* (const T rhs)
-    {
-        return complex<T>(r * rhs, i * rhs);
-    }
-
-    //binary DIVISION operators (returns the result of dividing complex values)
-    CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const
-    {
-        complex<T> result;
-        T denom = rhs.r * rhs.r + rhs.i * rhs.i;
-        result.r = (r * rhs.r + i * rhs.i) / denom;
-        result.i = (- r * rhs.i + i * rhs.r) / denom;
-
-        return result;
-    }
-    CUDA_CALLABLE complex<T> operator/ (const T rhs)
-    {
-        return complex<T>(r / rhs, i / rhs);
-    }
-
-    //ASSIGNMENT operators-----------------------------------
-    CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs)
-    {
-        //check for self-assignment
-        if(this != &rhs)
-        {
-            this->r = rhs.r;
-            this->i = rhs.i;
-        }
-        return *this;
-    }
-    CUDA_CALLABLE complex<T> & operator=(const T &rhs)
-    {
-        	this->r = rhs;
-        	this->i = 0;
-
-		return *this;
-    }
-
-    //arithmetic assignment operators
-    CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs)
-    {
-		*this = *this + rhs;
-        	return *this;
-    }
-    CUDA_CALLABLE complex<T> operator+=(const T &rhs)
-    {
-		*this = *this + rhs;
-        	return *this;
-    }
-
-	CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs)
-    {
-		*this = *this - rhs;
-        	return *this;
-    }
-    CUDA_CALLABLE complex<T> operator-=(const T &rhs)
-    {
-		*this = *this - rhs;
-        	return *this;
-    }
-
-    CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs)
-    {
-		*this = *this * rhs;
-        	return *this;
-    }
-	CUDA_CALLABLE complex<T> operator*=(const T &rhs)
-    {
-		*this = *this * rhs;
-        	return *this;
-    }
-	//divide and assign
-	CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs)
-    {
-		*this = *this / rhs;
-        	return *this;
-    }
-    CUDA_CALLABLE complex<T> operator/=(const T &rhs)
-    {
-		*this = *this / rhs;
-        	return *this;
-    }
-
-    //absolute value operator (returns the absolute value of the complex number)
-	CUDA_CALLABLE T abs()
-	{
-		return std::sqrt(r * r + i * i);
-	}
-
-	CUDA_CALLABLE complex<T> log()
-	{
-		complex<T> result;
-		result.r = (T)std::log(std::sqrt(r * r + i * i));
-		result.i = (T)std::atan2(i, r);
-
-
-		return result;
-	}
-
-	CUDA_CALLABLE complex<T> exp()
-	{
-		complex<T> result;
-
-		T e_r = std::exp(r);
-		result.r = e_r * (T)std::cos(i);
-		result.i = e_r * (T)std::sin(i);
-
-		return result;
-	}
-
-	/*CUDA_CALLABLE complex<T> pow(int y)
-	{
-
-        return pow((double)y);
-	}*/
-
-	CUDA_CALLABLE complex<T> pow(T y)
-	{
-		complex<T> result;
-
-		result = log() * y;
-
-		return result.exp();
-	}
-
-	CUDA_CALLABLE complex<T> sqrt()
-	{
-		complex<T> result;
-
-		//convert to polar coordinates
-		T a = std::sqrt(r*r + i*i);
-		T theta = std::atan2(i, r);
-
-		//find the square root
-		T a_p = std::sqrt(a);
-		T theta_p = theta/2.0f;
-
-		//convert back to cartesian coordinates
-		result.r = a_p * std::cos(theta_p);
-		result.i = a_p * std::sin(theta_p);
-
-		return result;
-	}
-
-	std::string str()
-	{
-		std::stringstream ss;
-		ss<<"("<<r<<","<<i<<")";
-
-		return ss.str();
-	}
-
-	//COMPARISON operators
-	CUDA_CALLABLE bool operator==(complex<T> rhs)
-	{
-        if(r == rhs.r && i == rhs.i)
-            return true;
-        return false;
-    }
-
-    CUDA_CALLABLE bool operator==(T rhs)
-	{
-        if(r == rhs && i == 0)
-            return true;
-        return false;
-    }
-
-    CUDA_CALLABLE bool operator!=(T rhs)
-    {
-        if(r != rhs || i != 0)
-            return true;
-        return false;
-    }
-
-    CUDA_CALLABLE bool operator<(complex<T> rhs){
-    	return abs() < rhs.abs();
-    }
-    CUDA_CALLABLE bool operator<=(complex<T> rhs){
-    	return abs() <= rhs.abs();
-    }
-    CUDA_CALLABLE bool operator>(complex<T> rhs){
-    	return abs() > rhs.abs();
-    }
-    CUDA_CALLABLE bool operator >=(complex<T> rhs){
-    	return abs() >= rhs.abs();
-    }
-
-	//CASTING operators
-	template < typename otherT >
-	operator complex<otherT>()
-	{
-		complex<otherT> result((otherT)r, (otherT)i);
-		return result;
-	}
-	template< typename otherT >
-	complex( const complex<otherT> &rhs)
-	{
-		r = (T)rhs.r;
-		i = (T)rhs.i;
-	}
-	template< typename otherT >
-	complex& operator=(const complex<otherT> &rhs)
-	{
-		r = (T)rhs.r;
-		i = (T)rhs.i;
-		return *this;
-	}
-
-};
-
-}	//end RTS namespace
-
-//addition
-template<typename T>
-CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b)
-{
-    return stim::complex<T>((T)a + b.r, b.i);
-}
-
-//subtraction with a real value
-template<typename T>
-CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b)
-{
-    return stim::complex<T>((T)a - b.r, -b.i);
-}
-
-//minus sign
-template<typename T>
-CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs)
-{
-    return stim::complex<T>(-rhs.r, -rhs.i);
-}
-
-//multiply a T value by a complex value
-template<typename T>
-CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b)
-{
-    return stim::complex<T>((T)a * b.r, (T)a * b.i);
-}
-
-//divide a T value by a complex value
-template<typename T>
-CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b)
-{
-    stim::complex<T> result;
-
-    T denom = b.r * b.r + b.i * b.i;
-
-    result.r = ((T)a * b.r) / denom;
-    result.i = -((T)a * b.i) / denom;
-
-    return result;
-}
-
-
-template<typename T>
-CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y)
-{
-	return x.pow(y);
-}
-template<typename T>
-CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y)
-{
-	return x.pow(y);
-}
-
-//log function
-template<typename T>
-CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x)
-{
-	return x.log();
-}
-
-//exp function
-template<typename T>
-CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x)
-{
-	return x.exp();
-}
-
-//sqrt function
-template<typename T>
-CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x)
-{
-	return x.sqrt();
-}
-
-
-template <typename T>
-CUDA_CALLABLE static T abs(stim::complex<T> a)
-{
-    return a.abs();
-}
-
-template <typename T>
-CUDA_CALLABLE static T real(stim::complex<T> a)
-{
-    return a.r;
-}
-
-//template <typename T>
-CUDA_CALLABLE static float real(float a)
-{
-    return a;
-}
-
-template <typename T>
-CUDA_CALLABLE static T imag(stim::complex<T> a)
-{
-    return a.i;
-}
-
-//trigonometric functions
-//template<class A>
-/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)
-{
-	stim::complex<float> result;
-	result.r = sinf(x.r) * coshf(x.i);
-	result.i = cosf(x.r) * sinhf(x.i);
-
-	return result;
-}*/
-
-template<class A>
-CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)
-{
-	stim::complex<A> result;
-	result.r = (A)std::sin(x.r) * (A)std::cosh(x.i);
-	result.i = (A)std::cos(x.r) * (A)std::sinh(x.i);
-
-	return result;
-}
-
-//floating point template
-//template<class A>
-/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)
-{
-	stim::complex<float> result;
-	result.r = cosf(x.r) * coshf(x.i);
-	result.i = -(sinf(x.r) * sinhf(x.i));
-
-	return result;
-}*/
-
-template<class A>
-CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)
-{
-	stim::complex<A> result;
-	result.r = (A)std::cos(x.r) * (A)std::cosh(x.i);
-	result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i));
-
-	return result;
-}
-
-
-template<class A>
-std::ostream& operator<<(std::ostream& os, stim::complex<A> x)
-{
-    os<<x.str();
-    return os;
-}
-
-template<class A>
-std::istream& operator>>(std::istream& is, stim::complex<A>& x)
-{
-    A r, i;
-	r = i = 0;		//initialize the real and imaginary parts to zero
-    is>>r;			//parse
-    is>>i;
-
-    x.real(r);		//assign the parsed values to x
-    x.imag(i);
-
-    return is;		//return the stream
-}
-
-//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
-//template<class T> using rtsComplex = stim::complex<T>;
-//#endif
-
-
-
-#endif
+/*RTS Complex number class.  This class is CUDA compatible,
+and can therefore be used in CUDA code and on CUDA devices.
+*/
+
+#ifndef RTS_COMPLEX
+#define RTS_COMPLEX
+
+#include "../cuda/callable.h"
+#include <cmath>
+#include <string>
+#include <sstream>
+#include <iostream>
+
+namespace stim
+{
+
+template <class T>
+struct complex
+{
+    T r, i;
+
+    //default constructor
+    CUDA_CALLABLE complex()
+    {
+        r = 0;
+	   i = 0;
+    }
+
+    //constructor when given real and imaginary values
+    CUDA_CALLABLE complex(T r, T i = 0)
+    {
+        this->r = r;
+        this->i = i;
+    }
+
+	//access methods
+	CUDA_CALLABLE T real()
+	{
+		return r;
+	}
+
+	CUDA_CALLABLE T real(T r_val)
+	{
+		r = r_val;
+		return r_val;
+	}
+
+	CUDA_CALLABLE T imag()
+	{
+		return i;
+	}
+	CUDA_CALLABLE T imag(T i_val)
+	{
+		i = i_val;
+		return i_val;
+	}
+
+    
+
+    //return the current value multiplied by i
+    CUDA_CALLABLE complex<T> imul()
+    {
+        complex<T> result;
+        result.r = -i;
+        result.i = r;
+
+        return result;
+    }
+
+    //returns the complex signum (-1, 0, 1)
+    CUDA_CALLABLE int sgn(){
+        if(r > 0) return 1;
+        else if(r < 0) return -1;
+        else return (0 < i - i < 0);
+    }
+
+	//ARITHMETIC OPERATORS--------------------
+
+    //binary + operator (returns the result of adding two complex values)
+    CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const
+    {
+        complex<T> result;
+        result.r = r + rhs.r;
+        result.i = i + rhs.i;
+        return result;
+    }
+
+	CUDA_CALLABLE complex<T> operator+ (const T rhs) const
+    {
+        complex<T> result;
+        result.r = r + rhs;
+        result.i = i;
+        return result;
+    }
+
+    //binary - operator (returns the result of adding two complex values)
+    CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const
+    {
+        complex<T> result;
+        result.r = r - rhs.r;
+        result.i = i - rhs.i;
+        return result;
+    }
+
+    //binary - operator (returns the result of adding two complex values)
+    CUDA_CALLABLE complex<T> operator- (const T rhs)
+    {
+        complex<T> result;
+        result.r = r - rhs;
+        result.i = i;
+        return result;
+    }
+
+    //binary MULTIPLICATION operators (returns the result of multiplying complex values)
+    CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const
+    {
+        complex<T> result;
+        result.r = r * rhs.r - i * rhs.i;
+        result.i = r * rhs.i + i * rhs.r;
+        return result;
+    }
+    CUDA_CALLABLE complex<T> operator* (const T rhs)
+    {
+        return complex<T>(r * rhs, i * rhs);
+    }
+
+    //binary DIVISION operators (returns the result of dividing complex values)
+    CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const
+    {
+        complex<T> result;
+        T denom = rhs.r * rhs.r + rhs.i * rhs.i;
+        result.r = (r * rhs.r + i * rhs.i) / denom;
+        result.i = (- r * rhs.i + i * rhs.r) / denom;
+
+        return result;
+    }
+    CUDA_CALLABLE complex<T> operator/ (const T rhs)
+    {
+        return complex<T>(r / rhs, i / rhs);
+    }
+
+    //ASSIGNMENT operators-----------------------------------
+    CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs)
+    {
+        //check for self-assignment
+        if(this != &rhs)
+        {
+            this->r = rhs.r;
+            this->i = rhs.i;
+        }
+        return *this;
+    }
+    CUDA_CALLABLE complex<T> & operator=(const T &rhs)
+    {
+        	this->r = rhs;
+        	this->i = 0;
+
+		return *this;
+    }
+
+    //arithmetic assignment operators
+    CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs)
+    {
+		*this = *this + rhs;
+        	return *this;
+    }
+    CUDA_CALLABLE complex<T> operator+=(const T &rhs)
+    {
+		*this = *this + rhs;
+        	return *this;
+    }
+
+	CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs)
+    {
+		*this = *this - rhs;
+        	return *this;
+    }
+    CUDA_CALLABLE complex<T> operator-=(const T &rhs)
+    {
+		*this = *this - rhs;
+        	return *this;
+    }
+
+    CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs)
+    {
+		*this = *this * rhs;
+        	return *this;
+    }
+	CUDA_CALLABLE complex<T> operator*=(const T &rhs)
+    {
+		*this = *this * rhs;
+        	return *this;
+    }
+	//divide and assign
+	CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs)
+    {
+		*this = *this / rhs;
+        	return *this;
+    }
+    CUDA_CALLABLE complex<T> operator/=(const T &rhs)
+    {
+		*this = *this / rhs;
+        	return *this;
+    }
+
+    //absolute value operator (returns the absolute value of the complex number)
+	CUDA_CALLABLE T abs()
+	{
+		return std::sqrt(r * r + i * i);
+	}
+
+	CUDA_CALLABLE complex<T> log()
+	{
+		complex<T> result;
+		result.r = (T)std::log(std::sqrt(r * r + i * i));
+		result.i = (T)std::atan2(i, r);
+
+
+		return result;
+	}
+
+	CUDA_CALLABLE complex<T> exp()
+	{
+		complex<T> result;
+
+		T e_r = std::exp(r);
+		result.r = e_r * (T)std::cos(i);
+		result.i = e_r * (T)std::sin(i);
+
+		return result;
+	}
+
+	/*CUDA_CALLABLE complex<T> pow(int y)
+	{
+
+        return pow((double)y);
+	}*/
+
+	CUDA_CALLABLE complex<T> pow(T y)
+	{
+		complex<T> result;
+
+		result = log() * y;
+
+		return result.exp();
+	}
+
+	CUDA_CALLABLE complex<T> sqrt()
+	{
+		complex<T> result;
+
+		//convert to polar coordinates
+		T a = std::sqrt(r*r + i*i);
+		T theta = std::atan2(i, r);
+
+		//find the square root
+		T a_p = std::sqrt(a);
+		T theta_p = theta/2.0f;
+
+		//convert back to cartesian coordinates
+		result.r = a_p * std::cos(theta_p);
+		result.i = a_p * std::sin(theta_p);
+
+		return result;
+	}
+
+	std::string str()
+	{
+		std::stringstream ss;
+		ss<<"("<<r<<","<<i<<")";
+
+		return ss.str();
+	}
+
+	//COMPARISON operators
+	CUDA_CALLABLE bool operator==(complex<T> rhs)
+	{
+        if(r == rhs.r && i == rhs.i)
+            return true;
+        return false;
+    }
+
+    CUDA_CALLABLE bool operator==(T rhs)
+	{
+        if(r == rhs && i == 0)
+            return true;
+        return false;
+    }
+
+    CUDA_CALLABLE bool operator!=(T rhs)
+    {
+        if(r != rhs || i != 0)
+            return true;
+        return false;
+    }
+
+    CUDA_CALLABLE bool operator<(complex<T> rhs){
+    	return abs() < rhs.abs();
+    }
+    CUDA_CALLABLE bool operator<=(complex<T> rhs){
+    	return abs() <= rhs.abs();
+    }
+    CUDA_CALLABLE bool operator>(complex<T> rhs){
+    	return abs() > rhs.abs();
+    }
+    CUDA_CALLABLE bool operator >=(complex<T> rhs){
+    	return abs() >= rhs.abs();
+    }
+
+	//CASTING operators
+	template < typename otherT >
+	operator complex<otherT>()
+	{
+		complex<otherT> result((otherT)r, (otherT)i);
+		return result;
+	}
+	template< typename otherT >
+	complex( const complex<otherT> &rhs)
+	{
+		r = (T)rhs.r;
+		i = (T)rhs.i;
+	}
+	template< typename otherT >
+	complex& operator=(const complex<otherT> &rhs)
+	{
+		r = (T)rhs.r;
+		i = (T)rhs.i;
+		return *this;
+	}
+
+};
+
+}	//end RTS namespace
+
+//addition
+template<typename T>
+CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b)
+{
+    return stim::complex<T>((T)a + b.r, b.i);
+}
+
+//subtraction with a real value
+template<typename T>
+CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b)
+{
+    return stim::complex<T>((T)a - b.r, -b.i);
+}
+
+//minus sign
+template<typename T>
+CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs)
+{
+    return stim::complex<T>(-rhs.r, -rhs.i);
+}
+
+//multiply a T value by a complex value
+template<typename T>
+CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b)
+{
+    return stim::complex<T>((T)a * b.r, (T)a * b.i);
+}
+
+//divide a T value by a complex value
+template<typename T>
+CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b)
+{
+    stim::complex<T> result;
+
+    T denom = b.r * b.r + b.i * b.i;
+
+    result.r = ((T)a * b.r) / denom;
+    result.i = -((T)a * b.i) / denom;
+
+    return result;
+}
+
+
+template<typename T>
+CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y)
+{
+	return x.pow(y);
+}
+template<typename T>
+CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y)
+{
+	return x.pow(y);
+}
+
+//log function
+template<typename T>
+CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x)
+{
+	return x.log();
+}
+
+//exp function
+template<typename T>
+CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x)
+{
+	return x.exp();
+}
+
+//sqrt function
+template<typename T>
+CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x)
+{
+	return x.sqrt();
+}
+
+
+template <typename T>
+CUDA_CALLABLE static T abs(stim::complex<T> a)
+{
+    return a.abs();
+}
+
+template <typename T>
+CUDA_CALLABLE static T real(stim::complex<T> a)
+{
+    return a.r;
+}
+
+//template <typename T>
+CUDA_CALLABLE static float real(float a)
+{
+    return a;
+}
+
+template <typename T>
+CUDA_CALLABLE static T imag(stim::complex<T> a)
+{
+    return a.i;
+}
+
+//trigonometric functions
+//template<class A>
+/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)
+{
+	stim::complex<float> result;
+	result.r = sinf(x.r) * coshf(x.i);
+	result.i = cosf(x.r) * sinhf(x.i);
+
+	return result;
+}*/
+
+template<class A>
+CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)
+{
+	stim::complex<A> result;
+	result.r = (A)std::sin(x.r) * (A)std::cosh(x.i);
+	result.i = (A)std::cos(x.r) * (A)std::sinh(x.i);
+
+	return result;
+}
+
+//floating point template
+//template<class A>
+/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)
+{
+	stim::complex<float> result;
+	result.r = cosf(x.r) * coshf(x.i);
+	result.i = -(sinf(x.r) * sinhf(x.i));
+
+	return result;
+}*/
+
+template<class A>
+CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)
+{
+	stim::complex<A> result;
+	result.r = (A)std::cos(x.r) * (A)std::cosh(x.i);
+	result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i));
+
+	return result;
+}
+
+
+template<class A>
+std::ostream& operator<<(std::ostream& os, stim::complex<A> x)
+{
+    os<<x.str();
+    return os;
+}
+
+template<class A>
+std::istream& operator>>(std::istream& is, stim::complex<A>& x)
+{
+    A r, i;
+	r = i = 0;		//initialize the real and imaginary parts to zero
+    is>>r;			//parse
+    is>>i;
+
+    x.real(r);		//assign the parsed values to x
+    x.imag(i);
+
+    return is;		//return the stream
+}
+
+//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
+//template<class T> using rtsComplex = stim::complex<T>;
+//#endif
+
+
+
+#endif
-#ifndef	RTS_COMPLEXFIELD_H
-#define RTS_COMPLEXFIELD_H
-
-#include "cublas_v2.h"
-#include <cuda_runtime.h>
-
-#include "../math/field.cuh"
-#include "../math/complex.h"
-#include "../math/realfield.cuh"
-
-namespace stim{
-
-template<typename T>
-__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){
-
-	int iu = blockIdx.x * blockDim.x + threadIdx.x;
-	int iv = blockIdx.y * blockDim.y + threadIdx.y;
-
-	//make sure that the thread indices are in-bounds
-	if(iu >= r0 || iv >= r1) return;
-
-	//compute the index into the field
-	int i = iv*r0 + iu;
-
-	//calculate and store the result
-	dest[i] = source[i].abs();
-}
-
-/*This class stores functions for saving images of complex fields
-*/
-template<typename T, unsigned int D = 1>
-class complexfield : public field< stim::complex<T>, D >{
-	using field< stim::complex<T>, D >::R;
-	using field< stim::complex<T>, D >::X;
-	using field< stim::complex<T>, D >::shape;
-	using field< stim::complex<T>, D >::cuda_params;
-
-	
-
-public:
-
-	//find the maximum value of component n
-	stim::complex<T> find_max(unsigned int n){
-		cublasStatus_t stat;
-		cublasHandle_t handle;
-
-		//create a CUBLAS handle
-		stat = cublasCreate(&handle);
-		if(stat != CUBLAS_STATUS_SUCCESS){
-			std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
-			exit(1);
-		}
-
-		int L = R[0] * R[1];    //compute the number of discrete points in a slice
-		int index;				//result of the max operation
-		stim::complex<T> result;
-
-		if(sizeof(T) == 8)
-			stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index);
-		else
-			stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index);
-
-		index -= 1;        //adjust for 1-based indexing
-
-		//if there was a GPU error, terminate
-		if(stat != CUBLAS_STATUS_SUCCESS){
-			std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
-			exit(1);
-		}
-
-		//retrieve the maximum value for this slice and store it in the maxVal array
-		std::cout<<X[n]<<std::endl;
-		HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost));
-		return result;
-	}
-
-public:
-
-	enum attribute {magnitude, real, imaginary};
-
-	//constructor (no parameters)
-	complexfield() : field<stim::complex<T>, D>(){};
-
-	//constructor (resolution specified)
-	complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){};
-
-	//assignment from a field of complex values
-	complexfield & operator=(const field< stim::complex<T>, D > rhs){
-		field< complex<T>, D >::operator=(rhs);
-		return *this;
-	}
-
-	//assignment operator (scalar value)
-	complexfield & operator= (const complex<T> rhs){
-
-		field< complex<T>, D >::operator=(rhs);
-		return *this;
-	}
-
-	//assignment operator (vector value)
-	complexfield & operator= (const vec< complex<T>, D > rhs){
-
-		field< complex<T>, D >::operator=(rhs);
-		return *this;
-	}
-
-	//cropping
-	complexfield crop(unsigned int width, unsigned int height){
-
-		complexfield<T, D> result;
-		result = field< complex<T>, D>::crop(width, height);
-		return result;
-	}
-
-	void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){
-
-		field<T, 1> rf(R[0], R[1]);
-
-		//get cuda parameters
-		dim3 blocks, grids;
-		cuda_params(grids, blocks);
-
-		if(type == magnitude){
-			gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]);
-			rf.toImage(filename, n, true);
-		}
-
-	}
-
-
-};
-
-
-}	//end namespace rts
-
-
-#endif
+#ifndef	RTS_COMPLEXFIELD_H
+#define RTS_COMPLEXFIELD_H
+
+#include "cublas_v2.h"
+#include <cuda_runtime.h>
+
+#include "../math/field.cuh"
+#include "../math/complex.h"
+#include "../math/realfield.cuh"
+
+namespace stim{
+
+template<typename T>
+__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){
+
+	int iu = blockIdx.x * blockDim.x + threadIdx.x;
+	int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+	//make sure that the thread indices are in-bounds
+	if(iu >= r0 || iv >= r1) return;
+
+	//compute the index into the field
+	int i = iv*r0 + iu;
+
+	//calculate and store the result
+	dest[i] = source[i].abs();
+}
+
+/*This class stores functions for saving images of complex fields
+*/
+template<typename T, unsigned int D = 1>
+class complexfield : public field< stim::complex<T>, D >{
+	using field< stim::complex<T>, D >::R;
+	using field< stim::complex<T>, D >::X;
+	using field< stim::complex<T>, D >::shape;
+	using field< stim::complex<T>, D >::cuda_params;
+
+	
+
+public:
+
+	//find the maximum value of component n
+	stim::complex<T> find_max(unsigned int n){
+		cublasStatus_t stat;
+		cublasHandle_t handle;
+
+		//create a CUBLAS handle
+		stat = cublasCreate(&handle);
+		if(stat != CUBLAS_STATUS_SUCCESS){
+			std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
+			exit(1);
+		}
+
+		int L = R[0] * R[1];    //compute the number of discrete points in a slice
+		int index;				//result of the max operation
+		stim::complex<T> result;
+
+		if(sizeof(T) == 8)
+			stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index);
+		else
+			stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index);
+
+		index -= 1;        //adjust for 1-based indexing
+
+		//if there was a GPU error, terminate
+		if(stat != CUBLAS_STATUS_SUCCESS){
+			std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
+			exit(1);
+		}
+
+		//retrieve the maximum value for this slice and store it in the maxVal array
+		std::cout<<X[n]<<std::endl;
+		HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost));
+		return result;
+	}
+
+public:
+
+	enum attribute {magnitude, real, imaginary};
+
+	//constructor (no parameters)
+	complexfield() : field<stim::complex<T>, D>(){};
+
+	//constructor (resolution specified)
+	complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){};
+
+	//assignment from a field of complex values
+	complexfield & operator=(const field< stim::complex<T>, D > rhs){
+		field< complex<T>, D >::operator=(rhs);
+		return *this;
+	}
+
+	//assignment operator (scalar value)
+	complexfield & operator= (const complex<T> rhs){
+
+		field< complex<T>, D >::operator=(rhs);
+		return *this;
+	}
+
+	//assignment operator (vector value)
+	complexfield & operator= (const vec< complex<T>, D > rhs){
+
+		field< complex<T>, D >::operator=(rhs);
+		return *this;
+	}
+
+	//cropping
+	complexfield crop(unsigned int width, unsigned int height){
+
+		complexfield<T, D> result;
+		result = field< complex<T>, D>::crop(width, height);
+		return result;
+	}
+
+	void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){
+
+		field<T, 1> rf(R[0], R[1]);
+
+		//get cuda parameters
+		dim3 blocks, grids;
+		cuda_params(grids, blocks);
+
+		if(type == magnitude){
+			gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]);
+			rf.toImage(filename, n, true);
+		}
+
+	}
+
+
+};
+
+
+}	//end namespace rts
+
+
+#endif
-#ifndef RTS_FIELD_CUH
-#define RTS_FIELD_CUH
-
-#include <vector>
-#include <string>
-#include <sstream>
-
-#include "cublas_v2.h"
-#include <cuda_runtime.h>
-
-#include "../math/rect.h"
-#include "../cuda/threads.h"
-#include "../cuda/error.h"
-#include "../cuda/devices.h"
-#include "../visualization/colormap.h"
-
-
-namespace stim{
-
-//multiply R = X * Y
-template<typename T>
-__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){
-
-	int iu = blockIdx.x * blockDim.x + threadIdx.x;
-    int iv = blockIdx.y * blockDim.y + threadIdx.y;
-
-    //make sure that the thread indices are in-bounds
-    if(iu >= r0 || iv >= r1) return;
-
-    //compute the index into the field
-    int i = iv*r0 + iu;
-
-    //calculate and store the result
-    R[i] = X[i] * Y[i];
-}
-
-//assign a constant value to all points
-template<typename T>
-__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){
-
-	int iu = blockIdx.x * blockDim.x + threadIdx.x;
-	int iv = blockIdx.y * blockDim.y + threadIdx.y;
-
-	//make sure that the thread indices are in-bounds
-	if(iu >= r0 || iv >= r1) return;
-
-	//compute the index into the field
-	int i = iv*r0 + iu;
-
-	//calculate and store the result
-	ptr[i] = val;
-}
-
-//crop the field to the new dimensions (width x height)
-template<typename T>
-__global__ void gpu_field_crop(T* dest, T* source, 
-								unsigned int r0, unsigned int r1, 
-								unsigned int width, unsigned int height){
-
-	int iu = blockIdx.x * blockDim.x + threadIdx.x;
-    int iv = blockIdx.y * blockDim.y + threadIdx.y;
-
-    //make sure that the thread indices are in-bounds
-    if(iu >= width || iv >= height) return;
-
-    //compute the index into the field
-    int is = iv*r0 + iu;
-    int id = iv*width + iu;
-
-    //calculate and store the result
-    dest[id] = source[is];
-}
-
-template<typename T, unsigned int D = 1>
-class field{
-
-protected:
-
-	T* X[D];			//pointer to the field data
-	unsigned int R[2];	//field resolution
-	stim::rect<T> shape;		//position and shape of the field slice
-
-	//calculates the optimal block and grid sizes using information from the GPU
-	void cuda_params(dim3& grids, dim3& blocks){
-		int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
-		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
-
-		//create one thread for each detector pixel
-		blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
-		grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
-	}
-
-	//find the maximum value of component n
-	T find_max(unsigned int n){
-		cublasStatus_t stat;
-		cublasHandle_t handle;
-
-		//create a CUBLAS handle
-		stat = cublasCreate(&handle);
-		if(stat != CUBLAS_STATUS_SUCCESS){
-			std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
-			exit(1);
-		}
-
-		int L = R[0] * R[1];    //compute the number of discrete points in a slice
-		int index;				//result of the max operation
-		T result;
-
-		if(sizeof(T) == 4)
-			stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index);
-		else
-			stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index);
-
-		index -= 1;        //adjust for 1-based indexing
-
-		//if there was a GPU error, terminate
-		if(stat != CUBLAS_STATUS_SUCCESS){
-			std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
-			exit(1);
-		}
-
-		//retrieve the maximum value for this slice and store it in the maxVal array
-		HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost));
-		return result;
-	}
-
-public:
-
-	//returns a list of file names given an input string with wild cards
-	std::vector<std::string> process_filename(std::string name){
-		std::stringstream ss(name);
-		std::string item;
-		std::vector<std::string> elems;
-		while(std::getline(ss, item, '.'))      //split the string at the '.' character (filename and extension)
-		{
-		    elems.push_back(item);
-		}
-
-		std::string prefix = elems[0];                      //prefix contains the filename (with wildcard '?' characters)
-		std::string ext = elems[1];                         //file extension (ex. .bmp, .png)
-		ext = std::string(".") + ext;           //add a period back into the extension
-
-		size_t i0 = prefix.find_first_of("?");  //find the positions of the first and last wildcard ('?'')
-		size_t i1 = prefix.find_last_of("?");
-
-		std::string postfix = prefix.substr(i1+1);
-		prefix = prefix.substr(0, i0);
-
-		unsigned int digits = i1 - i0 + 1;                   //compute the number of wildcards
-
-		std::vector<std::string> flist;			//create a vector of file names
-		//fill the list
-		for(unsigned int d=0; d<D; d++){
-			std::stringstream ss;            //assemble the file name
-			ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext;
-			flist.push_back(ss.str());
-		}
-
-		return flist;
-	}
-
-	void init(){
-		for(unsigned int n=0; n<D; n++)
-			X[n] = NULL;
-	}
-	void destroy(){
-		for(unsigned int n=0; n<D; n++)
-			if(X[n] != NULL)
-				HANDLE_ERROR(cudaFree(X[n]));
-	}
-
-public:
-	//field constructor
-	field(){
-		R[0] = R[1] = 0;
-		init();
-	}
-
-	field(unsigned int x, unsigned int y){
-        //set the resolution
-        R[0] = x;
-        R[1] = y;
-		//allocate memory on the GPU
-		for(unsigned int n=0; n<D; n++){
-			HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] ));
-		}
-		clear();		//zero the field
-    }
-
-    ///copy constructor
-	field(const field &rhs){
-		//first make a shallow copy
-		R[0] = rhs.R[0];
-		R[1] = rhs.R[1];
-
-		for(unsigned int n=0; n<D; n++){
-			//do we have to make a deep copy?
-			if(rhs.X[n] == NULL)
-				X[n] = NULL;		//no
-			else{
-				//allocate the necessary memory
-				HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
-
-				//copy the slice
-				HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
-			}
-		}
-	}
-
-	~field(){
-		destroy();
-    }
-
-    //assignment operator
-	field & operator= (const field & rhs){
-
-        //de-allocate any existing GPU memory
-        destroy();
-
-        //copy the slice resolution
-        R[0] = rhs.R[0];
-        R[1] = rhs.R[1];
-
-		for(unsigned int n=0; n<D; n++)
-		{
-			//allocate the necessary memory
-			HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
-			//copy the slice
-			HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
-		}
-        return *this;
-    }
-
-    field & operator= (const T rhs){
-
-    	int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
-        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
-
-        //create one thread for each detector pixel
-        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
-        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
-
-        //assign the constant value to all positions and dimensions
-        for(int n=0; n<D; n++)
-        	stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]);
-
-        return *this;
-    }
-
-    //assignment of vector component
-    field & operator= (const vec<T, D> rhs){
-
-    	int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
-        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
-
-        //create one thread for each detector pixel
-        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
-        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
-
-        //assign the constant value to all positions and dimensions
-        for(unsigned int n=0; n<D; n++)
-        	stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]);
-
-        return *this;
-
-    }
-
-    //multiply two fields (element-wise multiplication)
-    field<T, D> operator* (const field & rhs){
-
-    	int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
-        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
-
-        //create one thread for each detector pixel
-        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
-        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
-
-        //create a scalar field to store the result
-        field<T, D> result(R[0], R[1]);
-
-        for(int n=0; n<D; n++)
-        	stim::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]);
-
-        return result;
-    }
-
-	T* ptr(unsigned int n = 0){
-		if(n < D)
-			return X[n];
-		else return NULL;
-	}
-
-	//return the vector component at position (u, v)
-	vec<T, D> get(unsigned int u, unsigned int v){
-
-		vec<T, D> result;
-		for(unsigned int d=0; d<D; d++){
-			HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost));
-		}
-
-		return result;
-	}
-
-	//set all components of the field to zero
-	void clear(){
-		for(unsigned int n=0; n<D; n++)
-			if(X[n] != NULL)
-				HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1]));
-    }
-
-    //crop the field
-    field<T, D> crop(unsigned int width, unsigned int height){
-    	int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
-        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
-
-        //create one thread for each detector pixel
-        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
-        dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK);
-
-        //create a scalar field to store the result
-        field<T, D> result(width, height);
-
-        for(int n=0; n<D; n++)
-        	stim::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height);
-
-        return result;
-    }
-
-    //save an image representing component n
-    void toImage(std::string filename, unsigned int n = 0,
-    			 bool positive = false, stim::colormapType cmap = stim::cmBrewer){
-    	T max_val = find_max(n);	//find the maximum value
-
-    	if(positive)				//if the field is positive, use the range [0 max_val]
-    		stim::gpu2image<T>(X[n], filename, R[0], R[1], 0, max_val, cmap);
-    	else
-    		stim::gpu2image<T>(X[n], filename, R[0], R[1], -max_val, max_val, cmap);
-    }
-
-};
-
-}		//end namespace rts
-#endif
+#ifndef RTS_FIELD_CUH
+#define RTS_FIELD_CUH
+
+#include <vector>
+#include <string>
+#include <sstream>
+
+#include "cublas_v2.h"
+#include <cuda_runtime.h>
+
+#include "../math/rect.h"
+#include "../cuda/threads.h"
+#include "../cuda/error.h"
+#include "../cuda/devices.h"
+#include "../visualization/colormap.h"
+
+
+namespace stim{
+
+//multiply R = X * Y
+template<typename T>
+__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){
+
+	int iu = blockIdx.x * blockDim.x + threadIdx.x;
+    int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+    //make sure that the thread indices are in-bounds
+    if(iu >= r0 || iv >= r1) return;
+
+    //compute the index into the field
+    int i = iv*r0 + iu;
+
+    //calculate and store the result
+    R[i] = X[i] * Y[i];
+}
+
+//assign a constant value to all points
+template<typename T>
+__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){
+
+	int iu = blockIdx.x * blockDim.x + threadIdx.x;
+	int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+	//make sure that the thread indices are in-bounds
+	if(iu >= r0 || iv >= r1) return;
+
+	//compute the index into the field
+	int i = iv*r0 + iu;
+
+	//calculate and store the result
+	ptr[i] = val;
+}
+
+//crop the field to the new dimensions (width x height)
+template<typename T>
+__global__ void gpu_field_crop(T* dest, T* source, 
+								unsigned int r0, unsigned int r1, 
+								unsigned int width, unsigned int height){
+
+	int iu = blockIdx.x * blockDim.x + threadIdx.x;
+    int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+    //make sure that the thread indices are in-bounds
+    if(iu >= width || iv >= height) return;
+
+    //compute the index into the field
+    int is = iv*r0 + iu;
+    int id = iv*width + iu;
+
+    //calculate and store the result
+    dest[id] = source[is];
+}
+
+template<typename T, unsigned int D = 1>
+class field{
+
+protected:
+
+	T* X[D];			//pointer to the field data
+	unsigned int R[2];	//field resolution
+	stim::rect<T> shape;		//position and shape of the field slice
+
+	//calculates the optimal block and grid sizes using information from the GPU
+	void cuda_params(dim3& grids, dim3& blocks){
+		int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
+		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+		//create one thread for each detector pixel
+		blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
+		grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+	}
+
+	//find the maximum value of component n
+	T find_max(unsigned int n){
+		cublasStatus_t stat;
+		cublasHandle_t handle;
+
+		//create a CUBLAS handle
+		stat = cublasCreate(&handle);
+		if(stat != CUBLAS_STATUS_SUCCESS){
+			std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
+			exit(1);
+		}
+
+		int L = R[0] * R[1];    //compute the number of discrete points in a slice
+		int index;				//result of the max operation
+		T result;
+
+		if(sizeof(T) == 4)
+			stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index);
+		else
+			stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index);
+
+		index -= 1;        //adjust for 1-based indexing
+
+		//if there was a GPU error, terminate
+		if(stat != CUBLAS_STATUS_SUCCESS){
+			std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
+			exit(1);
+		}
+
+		//retrieve the maximum value for this slice and store it in the maxVal array
+		HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost));
+		return result;
+	}
+
+public:
+
+	//returns a list of file names given an input string with wild cards
+	std::vector<std::string> process_filename(std::string name){
+		std::stringstream ss(name);
+		std::string item;
+		std::vector<std::string> elems;
+		while(std::getline(ss, item, '.'))      //split the string at the '.' character (filename and extension)
+		{
+		    elems.push_back(item);
+		}
+
+		std::string prefix = elems[0];                      //prefix contains the filename (with wildcard '?' characters)
+		std::string ext = elems[1];                         //file extension (ex. .bmp, .png)
+		ext = std::string(".") + ext;           //add a period back into the extension
+
+		size_t i0 = prefix.find_first_of("?");  //find the positions of the first and last wildcard ('?'')
+		size_t i1 = prefix.find_last_of("?");
+
+		std::string postfix = prefix.substr(i1+1);
+		prefix = prefix.substr(0, i0);
+
+		unsigned int digits = i1 - i0 + 1;                   //compute the number of wildcards
+
+		std::vector<std::string> flist;			//create a vector of file names
+		//fill the list
+		for(unsigned int d=0; d<D; d++){
+			std::stringstream ss;            //assemble the file name
+			ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext;
+			flist.push_back(ss.str());
+		}
+
+		return flist;
+	}
+
+	void init(){
+		for(unsigned int n=0; n<D; n++)
+			X[n] = NULL;
+	}
+	void destroy(){
+		for(unsigned int n=0; n<D; n++)
+			if(X[n] != NULL)
+				HANDLE_ERROR(cudaFree(X[n]));
+	}
+
+public:
+	//field constructor
+	field(){
+		R[0] = R[1] = 0;
+		init();
+	}
+
+	field(unsigned int x, unsigned int y){
+        //set the resolution
+        R[0] = x;
+        R[1] = y;
+		//allocate memory on the GPU
+		for(unsigned int n=0; n<D; n++){
+			HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] ));
+		}
+		clear();		//zero the field
+    }
+
+    ///copy constructor
+	field(const field &rhs){
+		//first make a shallow copy
+		R[0] = rhs.R[0];
+		R[1] = rhs.R[1];
+
+		for(unsigned int n=0; n<D; n++){
+			//do we have to make a deep copy?
+			if(rhs.X[n] == NULL)
+				X[n] = NULL;		//no
+			else{
+				//allocate the necessary memory
+				HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
+
+				//copy the slice
+				HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
+			}
+		}
+	}
+
+	~field(){
+		destroy();
+    }
+
+    //assignment operator
+	field & operator= (const field & rhs){
+
+        //de-allocate any existing GPU memory
+        destroy();
+
+        //copy the slice resolution
+        R[0] = rhs.R[0];
+        R[1] = rhs.R[1];
+
+		for(unsigned int n=0; n<D; n++)
+		{
+			//allocate the necessary memory
+			HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
+			//copy the slice
+			HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
+		}
+        return *this;
+    }
+
+    field & operator= (const T rhs){
+
+    	int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
+        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+        //create one thread for each detector pixel
+        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+        //assign the constant value to all positions and dimensions
+        for(int n=0; n<D; n++)
+        	stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]);
+
+        return *this;
+    }
+
+    //assignment of vector component
+    field & operator= (const vec<T, D> rhs){
+
+    	int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
+        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+        //create one thread for each detector pixel
+        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+        //assign the constant value to all positions and dimensions
+        for(unsigned int n=0; n<D; n++)
+        	stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]);
+
+        return *this;
+
+    }
+
+    //multiply two fields (element-wise multiplication)
+    field<T, D> operator* (const field & rhs){
+
+    	int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
+        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+        //create one thread for each detector pixel
+        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+        //create a scalar field to store the result
+        field<T, D> result(R[0], R[1]);
+
+        for(int n=0; n<D; n++)
+        	stim::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]);
+
+        return result;
+    }
+
+	T* ptr(unsigned int n = 0){
+		if(n < D)
+			return X[n];
+		else return NULL;
+	}
+
+	//return the vector component at position (u, v)
+	vec<T, D> get(unsigned int u, unsigned int v){
+
+		vec<T, D> result;
+		for(unsigned int d=0; d<D; d++){
+			HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost));
+		}
+
+		return result;
+	}
+
+	//set all components of the field to zero
+	void clear(){
+		for(unsigned int n=0; n<D; n++)
+			if(X[n] != NULL)
+				HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1]));
+    }
+
+    //crop the field
+    field<T, D> crop(unsigned int width, unsigned int height){
+    	int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
+        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+        //create one thread for each detector pixel
+        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+        dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+        //create a scalar field to store the result
+        field<T, D> result(width, height);
+
+        for(int n=0; n<D; n++)
+        	stim::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height);
+
+        return result;
+    }
+
+    //save an image representing component n
+    void toImage(std::string filename, unsigned int n = 0,
+    			 bool positive = false, stim::colormapType cmap = stim::cmBrewer){
+    	T max_val = find_max(n);	//find the maximum value
+
+    	if(positive)				//if the field is positive, use the range [0 max_val]
+    		stim::gpu2image<T>(X[n], filename, R[0], R[1], 0, max_val, cmap);
+    	else
+    		stim::gpu2image<T>(X[n], filename, R[0], R[1], -max_val, max_val, cmap);
+    }
+
+};
+
+}		//end namespace rts
+#endif
-#ifndef RTS_RECT_H
-#define RTS_RECT_H
-
-//enable CUDA_CALLABLE macro
-#include "../cuda/callable.h"
-#include "../math/vector.h"
-#include "../math/triangle.h"
-#include "../math/quaternion.h"
-#include <iostream>
-#include <iomanip>
-#include <algorithm>
-
-namespace stim{
-
-//template for a rectangle class in ND space
-template <class T, int N = 3>
-struct rect
-{
-	/*
-		^                   O
-		|                   
-		|                   
-		Y         C         
-		|                   
-		|                   
-		O---------X--------->
-	*/
-
-private:
-
-	stim::vec<T, N> C;
-	stim::vec<T, N> X;
-	stim::vec<T, N> Y;
-
-	CUDA_CALLABLE void scale(T factor){
-		X *= factor;
-		Y *= factor;
-	}
-
-	CUDA_CALLABLE void normal(vec<T, N> n){		//orient the rectangle along the specified normal
-
-		n = n.norm();								//normalize, just in case
-		vec<T, N> n_current = X.cross(Y).norm();	//compute the current normal
-		quaternion<T> q;							//create a quaternion
-		q.CreateRotation(n_current, n);				//initialize a rotation from n_current to n
-
-		//apply the quaternion to the vectors and position
-		X = q.toMatrix3() * X;
-		Y = q.toMatrix3() * Y;
-	}
-
-	CUDA_CALLABLE void init(){
-		C = vec<T, N>(0, 0, 0);
-		X = vec<T, N>(1, 0, 0);
-		Y = vec<T, N>(0, 1, 0);
-	}
-
-public:
-
-	CUDA_CALLABLE rect(){
-		init();
-	}
-
-	CUDA_CALLABLE rect(T size, T z_pos = (T)0){
-		init();			//use the default setup
-		scale(size);	//scale the rectangle
-		C[2] = z_pos;
-	}
-
-	CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){
-		init();			//start with the default setting
-		C = c;
-		scale(size);	//scale the rectangle
-		normal(n);		//orient
-
-	}
-
-	/*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c)
-	{
-		A = a;		
-		Y = b - a;
-		X = c - a - Y;
-
-	}*/
-
-	/*******************************************************************
-	Constructor - create a rect from a position, normal, and rotation
-	*******************************************************************/
-	/*CUDA_CALLABLE rect(stim::vec<T, N> c, stim::vec<T, N> normal, T width, T height, T theta)
-	{
-
-        //compute the X direction - start along world-space X
-        Y = stim::vec<T, N>(0, 1, 0);
-        if(Y == normal)
-            Y = stim::vec<T, N>(0, 0, 1);
-
-        X = Y.cross(normal).norm();
-
-        std::cout<<X<<std::endl;
-
-        //rotate the X axis by theta radians
-        stim::quaternion<T> q;
-        q.CreateRotation(theta, normal);
-        X = q.toMatrix3() * X;
-        Y = normal.cross(X);
-
-        //normalize everything
-        X = X.norm();
-        Y = Y.norm();
-
-        //scale to match the rect width and height
-        X = X * width;
-        Y = Y * height;
-
-        //set the corner of the plane
-        A = c - X * 0.5f - Y * 0.5f;
-
-        std::cout<<X<<std::endl;
-	}*/
-
-	//boolean comparison
-	bool operator==(const rect<T, N> & rhs)
-	{
-		if(C == rhs.C && X == rhs.X && Y == rhs.Y)
-			return true;
-		else
-			return false;
-	}
-
-	/*******************************************
-	Return the normal for the rect
-	*******************************************/
-	CUDA_CALLABLE stim::vec<T, N> n()
-	{
-        return (X.cross(Y)).norm();
-	}
-
-	CUDA_CALLABLE stim::vec<T, N> p(T a, T b)
-	{
-		stim::vec<T, N> result;
-		//given the two parameters a, b = [0 1], returns the position in world space
-		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
-		result = A + X * a + Y * b;
-
-		return result;
-	}
-
-	CUDA_CALLABLE stim::vec<T, N> operator()(T a, T b)
-	{
-		return p(a, b);
-	}
-
-	std::string str()
-	{
-		std::stringstream ss;
-		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
-		ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
-		ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl;
-		ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X;
-
-        return ss.str();
-
-	}
-
-	CUDA_CALLABLE rect<T, N> operator*(T rhs)
-	{
-		//scales the plane by a scalar value
-
-		//create the new rectangle
-		rect<T, N> result = *this;
-		result.scale(rhs);
-
-		return result;
-
-	}
-
-	CUDA_CALLABLE T dist(vec<T, N> p)
-	{
-        //compute the distance between a point and this rect
-
-		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
-
-        //first break the rect up into two triangles
-        triangle<T, N> T0(A, A+X, A+Y);
-        triangle<T, N> T1(A+X+Y, A+X, A+Y);
-
-
-        T d0 = T0.dist(p);
-        T d1 = T1.dist(p);
-
-        if(d0 < d1)
-            return d0;
-        else
-            return d1;
-	}
-
-	CUDA_CALLABLE T dist_max(vec<T, N> p)
-	{
-		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
-        T da = (A - p).len();
-        T db = (A+X - p).len();
-        T dc = (A+Y - p).len();
-        T dd = (A+X+Y - p).len();
-
-        return std::max( da, std::max(db, std::max(dc, dd) ) );
-	}
-};
-
-}	//end namespace rts
-
-template <typename T, int N>
-std::ostream& operator<<(std::ostream& os, stim::rect<T, N> R)
-{
-    os<<R.str();
-    return os;
-}
-
-
-#endif
+#ifndef RTS_RECT_H
+#define RTS_RECT_H
+
+//enable CUDA_CALLABLE macro
+#include "../cuda/callable.h"
+#include "../math/vector.h"
+#include "../math/triangle.h"
+#include "../math/quaternion.h"
+#include <iostream>
+#include <iomanip>
+#include <algorithm>
+
+namespace stim{
+
+//template for a rectangle class in ND space
+template <class T, int N = 3>
+struct rect
+{
+	/*
+		^                   O
+		|                   
+		|                   
+		Y         C         
+		|                   
+		|                   
+		O---------X--------->
+	*/
+
+private:
+
+	stim::vec<T, N> C;
+	stim::vec<T, N> X;
+	stim::vec<T, N> Y;
+
+	CUDA_CALLABLE void scale(T factor){
+		X *= factor;
+		Y *= factor;
+	}
+
+	CUDA_CALLABLE void normal(vec<T, N> n){		//orient the rectangle along the specified normal
+
+		n = n.norm();								//normalize, just in case
+		vec<T, N> n_current = X.cross(Y).norm();	//compute the current normal
+		quaternion<T> q;							//create a quaternion
+		q.CreateRotation(n_current, n);				//initialize a rotation from n_current to n
+
+		//apply the quaternion to the vectors and position
+		X = q.toMatrix3() * X;
+		Y = q.toMatrix3() * Y;
+	}
+
+	CUDA_CALLABLE void init(){
+		C = vec<T, N>(0, 0, 0);
+		X = vec<T, N>(1, 0, 0);
+		Y = vec<T, N>(0, 1, 0);
+	}
+
+public:
+
+	CUDA_CALLABLE rect(){
+		init();
+	}
+
+	CUDA_CALLABLE rect(T size, T z_pos = (T)0){
+		init();			//use the default setup
+		scale(size);	//scale the rectangle
+		C[2] = z_pos;
+	}
+
+	CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){
+		init();			//start with the default setting
+		C = c;
+		scale(size);	//scale the rectangle
+		normal(n);		//orient
+
+	}
+
+	/*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c)
+	{
+		A = a;		
+		Y = b - a;
+		X = c - a - Y;
+
+	}*/
+
+	/*******************************************************************
+	Constructor - create a rect from a position, normal, and rotation
+	*******************************************************************/
+	/*CUDA_CALLABLE rect(stim::vec<T, N> c, stim::vec<T, N> normal, T width, T height, T theta)
+	{
+
+        //compute the X direction - start along world-space X
+        Y = stim::vec<T, N>(0, 1, 0);
+        if(Y == normal)
+            Y = stim::vec<T, N>(0, 0, 1);
+
+        X = Y.cross(normal).norm();
+
+        std::cout<<X<<std::endl;
+
+        //rotate the X axis by theta radians
+        stim::quaternion<T> q;
+        q.CreateRotation(theta, normal);
+        X = q.toMatrix3() * X;
+        Y = normal.cross(X);
+
+        //normalize everything
+        X = X.norm();
+        Y = Y.norm();
+
+        //scale to match the rect width and height
+        X = X * width;
+        Y = Y * height;
+
+        //set the corner of the plane
+        A = c - X * 0.5f - Y * 0.5f;
+
+        std::cout<<X<<std::endl;
+	}*/
+
+	//boolean comparison
+	bool operator==(const rect<T, N> & rhs)
+	{
+		if(C == rhs.C && X == rhs.X && Y == rhs.Y)
+			return true;
+		else
+			return false;
+	}
+
+	/*******************************************
+	Return the normal for the rect
+	*******************************************/
+	CUDA_CALLABLE stim::vec<T, N> n()
+	{
+        return (X.cross(Y)).norm();
+	}
+
+	CUDA_CALLABLE stim::vec<T, N> p(T a, T b)
+	{
+		stim::vec<T, N> result;
+		//given the two parameters a, b = [0 1], returns the position in world space
+		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
+		result = A + X * a + Y * b;
+
+		return result;
+	}
+
+	CUDA_CALLABLE stim::vec<T, N> operator()(T a, T b)
+	{
+		return p(a, b);
+	}
+
+	std::string str()
+	{
+		std::stringstream ss;
+		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
+		ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
+		ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl;
+		ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X;
+
+        return ss.str();
+
+	}
+
+	CUDA_CALLABLE rect<T, N> operator*(T rhs)
+	{
+		//scales the plane by a scalar value
+
+		//create the new rectangle
+		rect<T, N> result = *this;
+		result.scale(rhs);
+
+		return result;
+
+	}
+
+	CUDA_CALLABLE T dist(vec<T, N> p)
+	{
+        //compute the distance between a point and this rect
+
+		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
+
+        //first break the rect up into two triangles
+        triangle<T, N> T0(A, A+X, A+Y);
+        triangle<T, N> T1(A+X+Y, A+X, A+Y);
+
+
+        T d0 = T0.dist(p);
+        T d1 = T1.dist(p);
+
+        if(d0 < d1)
+            return d0;
+        else
+            return d1;
+	}
+
+	CUDA_CALLABLE T dist_max(vec<T, N> p)
+	{
+		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
+        T da = (A - p).len();
+        T db = (A+X - p).len();
+        T dc = (A+Y - p).len();
+        T dd = (A+X+Y - p).len();
+
+        return std::max( da, std::max(db, std::max(dc, dd) ) );
+	}
+};
+
+}	//end namespace rts
+
+template <typename T, int N>
+std::ostream& operator<<(std::ostream& os, stim::rect<T, N> R)
+{
+    os<<R.str();
+    return os;
+}
+
+
+#endif
-#ifndef RTS_TRIANGLE_H
-#define RTS_TRIANGLE_H
-
-//enable CUDA_CALLABLE macro
-#include "../cuda/callable.h"
-#include "../math/vector.h"
-#include <iostream>
-
-namespace stim{
-
-template <class T, int N=3>
-struct triangle
-{
-    /*
-        A------>B
-        |      /
-        |     /
-        |    /
-        |   /
-        |  /
-        | /
-        C
-    */
-    private:
-
-    vec<T, N> A;
-    vec<T, N> B;
-    vec<T, N> C;
-
-    CUDA_CALLABLE vec<T, N> _p(T s, T t)
-    {
-        //This function returns the point specified by p = A + s(B-A) + t(C-A)
-        vec<T, N> E0 = B-A;
-        vec<T, N> E1 = C-A;
-
-        return A + s*E0 + t*E1;
-    }
-
-
-    public:
-
-
-
-    CUDA_CALLABLE triangle()
-	{
-
-	}
-
-	CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c)
-	{
-		A = a;
-		B = b;
-		C = c;
-	}
-
-	CUDA_CALLABLE stim::vec<T, N> operator()(T s, T t)
-	{
-        return _p(s, t);
-	}
-
-	CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p)
-	{
-        //comptue the distance between a point and this triangle
-        //  This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf
-
-        vec<T, N> E0 = B-A;
-        vec<T, N> E1 = C-A;
-        vec<T, N> D = A - p;
-
-        T a = E0.dot(E0);
-        T b = E0.dot(E1);
-        T c = E1.dot(E1);
-        T d = E0.dot(D);
-        T e = E1.dot(D);
-        //T f = D.dot(D);
-
-        T det = a*c - b*b;
-        T s = b*e - c*d;
-        T t = b*d - a*e;
-
-        /*std::cout<<"E0: "<<E0<<std::endl;
-        std::cout<<"E1: "<<E1<<std::endl;
-        std::cout<<"a: "<<a<<std::endl;
-        std::cout<<"b: "<<b<<std::endl;
-        std::cout<<"c: "<<c<<std::endl;
-        std::cout<<"d: "<<d<<std::endl;
-        std::cout<<"e: "<<e<<std::endl;
-        std::cout<<"f: "<<f<<std::endl;
-        std::cout<<"det: "<<det<<std::endl;
-        std::cout<<"s: "<<s<<std::endl;
-        std::cout<<"t: "<<t<<std::endl;*/
-
-
-        if( s+t <= det)
-        {
-            if(s < 0)
-            {
-                if(t < 0)
-                {
-                    //region 4
-                    //std::cout<<"Region 4"<<std::endl;
-                    s = 0;
-                    t = 0;
-                    //done?
-                }
-                else
-                {
-                    //region 3
-                    //std::cout<<"Region 3"<<std::endl;
-                    s=0;
-                    t = ( e >= 0 ? 0 : ( -e >= c ? 1 : -e/c ) );
-                    //done
-                }
-            }
-            else if(t < 0)
-            {
-                //region 5
-                //std::cout<<"Region 5"<<std::endl;
-                s = ( d >= 0 ? 0 : ( -d >= a ? 1 : -d/a ) );
-                t = 0;
-                //done
-            }
-            else
-            {
-                //region 0
-                //std::cout<<"Region 0"<<std::endl;
-                T invDet = (T)1.0/det;
-                s *= invDet;
-                t *= invDet;
-                //done
-            }
-        }
-        else
-        {
-            if(s < 0)
-            {
-                //region 2
-                //std::cout<<"Region 2"<<std::endl;
-                s = 0;
-                t = 1;
-                //done?
-
-            }
-            else if(t < 0)
-            {
-                //region 6
-                //std::cout<<"Region 6"<<std::endl;
-                s = 1;
-                t = 0;
-                //done?
-            }
-            else
-            {
-                //region 1
-                //std::cout<<"Region 1"<<std::endl;
-                T numer = c + e - b - d;
-                if( numer <= 0 )
-                    s = 0;
-                else
-                {
-                    T denom = a - 2 * b + c;
-                    s = ( numer >= denom ? 1 : numer/denom );
-                }
-                t = 1 - s;
-                //done
-            }
-        }
-
-        //std::cout<<"s: "<<s<<std::endl;
-        //std::cout<<"t: "<<t<<std::endl;
-
-        //std::cout<<"p: "<<_p(s, t)<<std::endl;
-
-		return _p(s, t);
-
-	}
-
-	CUDA_CALLABLE T dist(vec<T, N> p)
-	{
-        vec<T, N> n = nearest(p);
-
-        return (p - n).len();
-	}
-};
-
-}
-
-#endif
+#ifndef RTS_TRIANGLE_H
+#define RTS_TRIANGLE_H
+
+//enable CUDA_CALLABLE macro
+#include "../cuda/callable.h"
+#include "../math/vector.h"
+#include <iostream>
+
+namespace stim{
+
+template <class T, int N=3>
+struct triangle
+{
+    /*
+        A------>B
+        |      /
+        |     /
+        |    /
+        |   /
+        |  /
+        | /
+        C
+    */
+    private:
+
+    vec<T, N> A;
+    vec<T, N> B;
+    vec<T, N> C;
+
+    CUDA_CALLABLE vec<T, N> _p(T s, T t)
+    {
+        //This function returns the point specified by p = A + s(B-A) + t(C-A)
+        vec<T, N> E0 = B-A;
+        vec<T, N> E1 = C-A;
+
+        return A + s*E0 + t*E1;
+    }
+
+
+    public:
+
+
+
+    CUDA_CALLABLE triangle()
+	{
+
+	}
+
+	CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c)
+	{
+		A = a;
+		B = b;
+		C = c;
+	}
+
+	CUDA_CALLABLE stim::vec<T, N> operator()(T s, T t)
+	{
+        return _p(s, t);
+	}
+
+	CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p)
+	{
+        //comptue the distance between a point and this triangle
+        //  This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf
+
+        vec<T, N> E0 = B-A;
+        vec<T, N> E1 = C-A;
+        vec<T, N> D = A - p;
+
+        T a = E0.dot(E0);
+        T b = E0.dot(E1);
+        T c = E1.dot(E1);
+        T d = E0.dot(D);
+        T e = E1.dot(D);
+        //T f = D.dot(D);
+
+        T det = a*c - b*b;
+        T s = b*e - c*d;
+        T t = b*d - a*e;
+
+        /*std::cout<<"E0: "<<E0<<std::endl;
+        std::cout<<"E1: "<<E1<<std::endl;
+        std::cout<<"a: "<<a<<std::endl;
+        std::cout<<"b: "<<b<<std::endl;
+        std::cout<<"c: "<<c<<std::endl;
+        std::cout<<"d: "<<d<<std::endl;
+        std::cout<<"e: "<<e<<std::endl;
+        std::cout<<"f: "<<f<<std::endl;
+        std::cout<<"det: "<<det<<std::endl;
+        std::cout<<"s: "<<s<<std::endl;
+        std::cout<<"t: "<<t<<std::endl;*/
+
+
+        if( s+t <= det)
+        {
+            if(s < 0)
+            {
+                if(t < 0)
+                {
+                    //region 4
+                    //std::cout<<"Region 4"<<std::endl;
+                    s = 0;
+                    t = 0;
+                    //done?
+                }
+                else
+                {
+                    //region 3
+                    //std::cout<<"Region 3"<<std::endl;
+                    s=0;
+                    t = ( e >= 0 ? 0 : ( -e >= c ? 1 : -e/c ) );
+                    //done
+                }
+            }
+            else if(t < 0)
+            {
+                //region 5
+                //std::cout<<"Region 5"<<std::endl;
+                s = ( d >= 0 ? 0 : ( -d >= a ? 1 : -d/a ) );
+                t = 0;
+                //done
+            }
+            else
+            {
+                //region 0
+                //std::cout<<"Region 0"<<std::endl;
+                T invDet = (T)1.0/det;
+                s *= invDet;
+                t *= invDet;
+                //done
+            }
+        }
+        else
+        {
+            if(s < 0)
+            {
+                //region 2
+                //std::cout<<"Region 2"<<std::endl;
+                s = 0;
+                t = 1;
+                //done?
+
+            }
+            else if(t < 0)
+            {
+                //region 6
+                //std::cout<<"Region 6"<<std::endl;
+                s = 1;
+                t = 0;
+                //done?
+            }
+            else
+            {
+                //region 1
+                //std::cout<<"Region 1"<<std::endl;
+                T numer = c + e - b - d;
+                if( numer <= 0 )
+                    s = 0;
+                else
+                {
+                    T denom = a - 2 * b + c;
+                    s = ( numer >= denom ? 1 : numer/denom );
+                }
+                t = 1 - s;
+                //done
+            }
+        }
+
+        //std::cout<<"s: "<<s<<std::endl;
+        //std::cout<<"t: "<<t<<std::endl;
+
+        //std::cout<<"p: "<<_p(s, t)<<std::endl;
+
+		return _p(s, t);
+
+	}
+
+	CUDA_CALLABLE T dist(vec<T, N> p)
+	{
+        vec<T, N> n = nearest(p);
+
+        return (p - n).len();
+	}
+};
+
+}
+
+#endif
-#ifndef RTS_MATERIAL_H
-#define RTS_MATERIAL_H
-
-#include <vector>
-#include <ostream>
-#include <iostream>
-#include <fstream>
-#include <complex>
-#include <algorithm>
-#include <sstream>
-#include "../math/complex.h"
-#include "../math/constants.h"
-#include "../math/function.h"
-
-namespace stim{
-
-//Material class - default representation for the material property is the refractive index (RI)
-template<typename T>
-class material : public function< T, complex<T> >{
-
-public:
-    enum wave_property{microns, inverse_cm};
-    enum material_property{ri, absorbance};
-
-private:
-
-    using function< T, complex<T> >::X;
-    using function< T, complex<T> >::Y;
-    using function< T, complex<T> >::insert;
-    using function< T, complex<T> >::bounding;
-
-    std::string name;	//name for the material (defaults to file name)
-
-    void process_header(std::string str, wave_property& wp, material_property& mp){
-
-    	std::stringstream ss(str);	//create a stream from the data string
-    	std::string line;
-    	std::getline(ss, line);		//get the first line as a string
-		while(line[0] == '#'){		//continue looping while the line is a comment
-
-			std::stringstream lstream(line);	//create a stream from the line
-			lstream.ignore();					//ignore the first character ('#')
-
-			std::string prop;		//get the property name
-			lstream>>prop;
-
-			if(prop == "X"){
-				std::string wp_name;
-				lstream>>wp_name;
-				if(wp_name == "microns") wp = microns;
-				else if(wp_name == "inverse_cm") wp = inverse_cm;
-			}
-			else if(prop == "Y"){
-				std::string mp_name;
-				lstream>>mp_name;
-				if(mp_name == "ri") mp = ri;
-				else if(mp_name == "absorbance") mp = absorbance;
-			}
-
-			std::getline(ss, line);		//get the next line
-		}
-
-		function< T, stim::complex<T> >::process_string(str);
-	}
-
-    void from_inverse_cm(){
-    	//convert inverse centimeters to wavelength (in microns)
-    	for(unsigned int i=0; i<X.size(); i++)
-    		X[i] = 10000 / X[i];
-
-    	//reverse the function array
-    	std::reverse(X.begin(), X.end());
-    	std::reverse(Y.begin(), Y.end());
-
-    }
-
-    void init(){
-    	bounding[0] = bounding[1] = stim::complex<T>(1, 0);
-    }
-
-
-public:
-
-    material(std::string filename, wave_property wp, material_property mp){
-    	name = filename;
-    	load(filename, wp, mp);
-    }
-
-    material(std::string filename){
-    	name = filename;
-    	load(filename);
-    }
-
-    material(){
-    	init();
-    }
-
-    complex<T> getN(T lambda){
-    	return function< T, complex<T> >::linear(lambda);
-    }
-
-    void load(std::string filename, wave_property wp, material_property mp){
-
-    	//load the file as a function
-    	function< T, complex<T> >::load(filename);
-    }
-
-    void load(std::string filename){
-
-    	wave_property wp = inverse_cm;
-    	material_property mp = ri;
-    	//turn the file into a string
-    	std::ifstream t(filename.c_str());	//open the file as a stream
-
-    	if(!t){
-    		std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;
-    		exit(1);
-    	}
-		std::string str((std::istreambuf_iterator<char>(t)),
-		std::istreambuf_iterator<char>());
-
-		//process the header information
-		process_header(str, wp, mp);
-
-		//convert units
-		if(wp == inverse_cm)
-			from_inverse_cm();
-		//set the bounding values
-		bounding[0] = Y[0];
-		bounding[1] = Y.back();
-    }
-    std::string str(){
-    	std::stringstream ss;
-    	ss<<name<<std::endl;
-    	ss<<function< T, complex<T> >::str();
-    	return ss.str();
-    }
-    std::string get_name(){
-    	return name;
-    }
-
-    void set_name(std::string str){
-    	name = str;
-    }
-
-};
-
-}
-
-
-
-
-#endif
+#ifndef RTS_MATERIAL_H
+#define RTS_MATERIAL_H
+
+#include <vector>
+#include <ostream>
+#include <iostream>
+#include <fstream>
+#include <complex>
+#include <algorithm>
+#include <sstream>
+#include "../math/complex.h"
+#include "../math/constants.h"
+#include "../math/function.h"
+
+namespace stim{
+
+//Material class - default representation for the material property is the refractive index (RI)
+template<typename T>
+class material : public function< T, complex<T> >{
+
+public:
+    enum wave_property{microns, inverse_cm};
+    enum material_property{ri, absorbance};
+
+private:
+
+    using function< T, complex<T> >::X;
+    using function< T, complex<T> >::Y;
+    using function< T, complex<T> >::insert;
+    using function< T, complex<T> >::bounding;
+
+    std::string name;	//name for the material (defaults to file name)
+
+    void process_header(std::string str, wave_property& wp, material_property& mp){
+
+    	std::stringstream ss(str);	//create a stream from the data string
+    	std::string line;
+    	std::getline(ss, line);		//get the first line as a string
+		while(line[0] == '#'){		//continue looping while the line is a comment
+
+			std::stringstream lstream(line);	//create a stream from the line
+			lstream.ignore();					//ignore the first character ('#')
+
+			std::string prop;		//get the property name
+			lstream>>prop;
+
+			if(prop == "X"){
+				std::string wp_name;
+				lstream>>wp_name;
+				if(wp_name == "microns") wp = microns;
+				else if(wp_name == "inverse_cm") wp = inverse_cm;
+			}
+			else if(prop == "Y"){
+				std::string mp_name;
+				lstream>>mp_name;
+				if(mp_name == "ri") mp = ri;
+				else if(mp_name == "absorbance") mp = absorbance;
+			}
+
+			std::getline(ss, line);		//get the next line
+		}
+
+		function< T, stim::complex<T> >::process_string(str);
+	}
+
+    void from_inverse_cm(){
+    	//convert inverse centimeters to wavelength (in microns)
+    	for(unsigned int i=0; i<X.size(); i++)
+    		X[i] = 10000 / X[i];
+
+    	//reverse the function array
+    	std::reverse(X.begin(), X.end());
+    	std::reverse(Y.begin(), Y.end());
+
+    }
+
+    void init(){
+    	bounding[0] = bounding[1] = stim::complex<T>(1, 0);
+    }
+
+
+public:
+
+    material(std::string filename, wave_property wp, material_property mp){
+    	name = filename;
+    	load(filename, wp, mp);
+    }
+
+    material(std::string filename){
+    	name = filename;
+    	load(filename);
+    }
+
+    material(){
+    	init();
+    }
+
+    complex<T> getN(T lambda){
+    	return function< T, complex<T> >::linear(lambda);
+    }
+
+    void load(std::string filename, wave_property wp, material_property mp){
+
+    	//load the file as a function
+    	function< T, complex<T> >::load(filename);
+    }
+
+    void load(std::string filename){
+
+    	wave_property wp = inverse_cm;
+    	material_property mp = ri;
+    	//turn the file into a string
+    	std::ifstream t(filename.c_str());	//open the file as a stream
+
+    	if(!t){
+    		std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;
+    		exit(1);
+    	}
+		std::string str((std::istreambuf_iterator<char>(t)),
+		std::istreambuf_iterator<char>());
+
+		//process the header information
+		process_header(str, wp, mp);
+
+		//convert units
+		if(wp == inverse_cm)
+			from_inverse_cm();
+		//set the bounding values
+		bounding[0] = Y[0];
+		bounding[1] = Y.back();
+    }
+    std::string str(){
+    	std::stringstream ss;
+    	ss<<name<<std::endl;
+    	ss<<function< T, complex<T> >::str();
+    	return ss.str();
+    }
+    std::string get_name(){
+    	return name;
+    }
+
+    void set_name(std::string str){
+    	name = str;
+    }
+
+};
+
+}
+
+
+
+
+#endif
-#include "../optics/material.h"
-#include "../math/complexfield.cuh"
-#include "../math/constants.h"
-//#include "../envi/bil.h"
-
-#include "cufft.h"
-
-#include <vector>
-#include <sstream>
-
-namespace stim{
-
-//this function writes a sinc function to "dest" such that an iFFT produces a slab
-template<typename T>
-__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri, 
-									  T* src, T* zf, 
-									  T w, unsigned int zR, unsigned int nuR){
-	//dest = complex field representing the sample
-	//ri = refractive indices for each wavelength
-	//src = intensity of the light source for each wavelength
-	//zf = z position of the slab interface for each wavelength (accounting for optical path length)
-	//w = width of the slab (in pixels)
-	//zR = number of z-axis samples
-	//nuR = number of wavelengths
-
-    //get the current coordinate in the plane slice
-	int ifz = blockIdx.x * blockDim.x + threadIdx.x;
-	int inu = blockIdx.y * blockDim.y + threadIdx.y;
-
-	//make sure that the thread indices are in-bounds
-	if(inu >= nuR || ifz >= zR) return;
-
-	int i = inu * zR + ifz;
-
-    T fz;
-    if(ifz < zR/2)
-        fz = ifz / (T)zR;
-    else
-        fz = -(zR - ifz) / (T)zR;
-
-    //if the slab starts outside of the simulation domain, just return
-    if(zf[inu] >= zR) return;
-
-	//fill the array along z with a sinc function representing the Fourier transform of the layer
-
-	T opl = w * ri[inu].real();			//optical path length
-
-	//handle the case where the slab goes outside the simulation domain
-	if(zf[inu] + opl >= zR)
-		opl = zR - zf[inu];
-
-	if(opl == 0) return;
-
-	//T l = w * ri[inu].real();
-	//complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
-	complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
-
-	complex<T> eta = ri[inu] * ri[inu] - 1;
-
-	//dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
-	if(ifz == 0)
-        dest[i] += opl * exp(e) * eta * src[inu];
-    else
-        dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
-}
-
-template<typename T>
-__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
-	//zf = current z depth (optical path length) in pixels
-	//ri = refractive index of the material
-	//w = actual width of the layer (in pixels)
-
-
-	//compute the index for this thread
-	int i = blockIdx.x * blockDim.x + threadIdx.x;
-	if(i >= S) return;
-
-	if(ri == NULL)
-		zf[i] += w;
-	else
-		zf[i] += ri[i].real() * w;
-}
-
-//apply the 1D MIRST filter to an existing sample (overwriting the sample)
-template<typename T>
-__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda, 
-								 T dFz,
-								 T inNA, T outNA, 
-								 unsigned int lambdaR, unsigned int zR, 
-								 T sigma = 0){
-	//sampleFFT = the sample in the Fourier domain (will be overwritten)
-	//lambda = list of wavelengths
-	//dFz = delta along the Fz axis in the frequency domain
-	//inNA = NA of the internal obscuration
-	//outNA = NA of the objective
-	//zR = number of pixels along the Fz axis (same as the z-axis)
-	//lambdaR = number of wavelengths
-	//sigma = width of the Gaussian source
-	int ifz = blockIdx.x * blockDim.x + threadIdx.x;
-	int inu = blockIdx.y * blockDim.y + threadIdx.y;
-
-	if(inu >= lambdaR || ifz >= zR) return;
-
-	//calculate the index into the sample FT
-	int i = inu * zR + ifz;
-
-	//compute the frequency (and set all negative spatial frequencies to zero)
-	T fz;
-	if(ifz < zR / 2)
-	    fz = ifz * dFz;
-	//if the spatial frequency is negative, set it to zero and exit
-	else{
-	    sampleFFT[i] = 0;
-	    return;
-	}
-
-	//compute the frequency in inverse microns
-	T nu = 1/lambda[inu];
-
-	//determine the radius of the integration circle
-	T nu_sq = nu * nu;
-	T fz_sq = (fz * fz) / 4;
-
-	//cut off frequencies above the diffraction limit
-	T r;
-	if(fz_sq < nu_sq)
-	    r = sqrt(nu_sq - fz_sq);
-	else
-	    r = 0;
-
-	//account for the optics
-	T Q = 0;
-	if(r > nu * inNA && r < nu * outNA)
-	    Q = 1;
-
-	//account for the source
-	//T sigma = 30.0;
-	T s = exp( - (r*r * sigma*sigma) / 2 );
-	//T s=1;
-
-	//compute the final filter
-	T mirst = 0;
-	if(fz != 0)
-	    mirst = 2 * stimPI * r * s * Q * (1/fz);
-
-	sampleFFT[i] *= mirst;
-
-}
-
-/*This object performs a 1-dimensional (layered) MIRST simulation
-*/
-template<typename T>
-class mirst1d{
-
-private:
-	unsigned int Z;	//z-axis resolution
-	unsigned int pad;	//pixel padding on either side of the sample
-
-	std::vector< material<T> > matlist;	//list of materials
-	std::vector< T > layers;				//list of layer thicknesses
-
-	std::vector< T > lambdas;		//list of wavelengths that are being simulated
-	unsigned int S;					//number of wavelengths (size of "lambdas")
-
-	T NA[2];						//numerical aperature (central obscuration and outer diameter)
-
-	function<T, T> source_profile;	//profile (spectrum) of the source (expressed in inverse centimeters)
-
-	complexfield<T, 1> scratch;		//scratch GPU memory used to build samples, transforms, etc.
-
-	void fft(int direction = CUFFT_FORWARD){
-
-		unsigned padZ = Z + pad;
-		
-		//create cuFFT handles
-		cufftHandle plan;
-		cufftResult result;
-		
-		if(sizeof(T) == 4)
-			result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size());	//single precision
-		else
-			result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size());	//double precision
-
-		//check for Plan 1D errors
-		if(result != CUFFT_SUCCESS){
-			std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
-			CufftError(result);
-			exit(1);
-		}
-
-		if(sizeof(T) == 4)
-			result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
-		else
-			result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
-
-		//check for FFT errors
-		if(result != CUFFT_SUCCESS){
-			std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
-			CufftError(result);
-			exit(1);
-		}
-
-		cufftDestroy(plan);
-	}
-
-
-	//initialize the scratch memory
-	void init_scratch(){
-		scratch = complexfield<T, 1>(Z + pad , lambdas.size());
-		scratch = 0;
-	}
-
-	//get the list of scattering efficiency (eta) values for a specified layer
-	std::vector< complex<T> > layer_etas(unsigned int l){
-
-		std::vector< complex<T> > etas;
-
-		//fill the list of etas
-		for(unsigned int i=0; i<lambdas.size(); i++)
-			etas.push_back( matlist[l].eta(lambdas[i]) );
-		return etas;
-	}
-
-	//calculates the optimal block and grid sizes using information from the GPU
-	void cuda_params(dim3& grids, dim3& blocks){
-		int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
-		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
-
-		//create one thread for each detector pixel
-		blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
-		grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
-	}
-
-	//add the fourier transform of layer n to the scratch space
-	void build_layer_fft(unsigned int n, T* zf){
-		unsigned int paddedZ = Z + pad;
-
-		T wpx = layers[n] / dz();	//calculate the width of the layer in pixels
-
-		//allocate memory for the refractive index
-		complex<T>* gpuRi;
-		HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
-
-		//allocate memory for the source profile
-		T* gpuSrc;
-		HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
-
-		complex<T> ri;
-		T source;
-		//store the refractive index and source profile in a CPU array
-		for(int inu=0; inu<S; inu++){
-			//save the refractive index to the GPU
-			ri = matlist[n].getN(lambdas[inu]);
-			HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
-
-			//save the source profile to the GPU
-			source = source_profile(10000 / lambdas[inu]);
-			HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
-
-		}
-
-		//create one thread for each pixel of the field slice
-		dim3 gridDim, blockDim;
-		cuda_params(gridDim, blockDim);
-		stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
-
-		int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
-		int linGrid = S / linBlock + 1;
-		stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
-
-		//free memory
-		HANDLE_ERROR(cudaFree(gpuRi));
-		HANDLE_ERROR(cudaFree(gpuSrc));
-	}
-
-	void build_sample(){
-		init_scratch();		//initialize the GPU scratch space
-		//build_layer(1);
-
-		T* zf;
-		HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
-		HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
-
-		//render each layer of the sample
-		for(unsigned int l=0; l<layers.size(); l++){
-			build_layer_fft(l, zf);
-		}
-
-		HANDLE_ERROR(cudaFree(zf));
-	}
-
-	void apply_filter(){
-		dim3 gridDim, blockDim;
-		cuda_params(gridDim, blockDim);
-
-		unsigned int Zpad = Z + pad;
-
-		T sim_range = dz() * Zpad;
-    	T dFz = 1 / sim_range;
-
-		//copy the array of wavelengths to the GPU
-		T* gpuLambdas;
-		HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
-		HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
-		stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas, 
-								 dFz,
-								 NA[0], NA[1], 
-								 S, Zpad);
-	}
-
-	//crop the image to the sample thickness - keep in mind that sample thickness != optical path length
-	void crop(){
-
-		scratch = scratch.crop(Z, S);
-	}
-	
-	//save the scratch field as a binary file
-	void to_binary(std::string filename){
-
-	}
-
-
-public:
-
-	//constructor
-	mirst1d(unsigned int rZ = 100,
-			unsigned int padding = 0){
-		Z = rZ;
-		pad = padding;
-		NA[0] = 0;
-		NA[1] = 0.8;
-		S = 0;
-		source_profile = 1;
-	}
-
-	//add a layer, thickness = microns
-	void add_layer(material<T> mat, T thickness){
-		matlist.push_back(mat);
-		layers.push_back(thickness);
-	}
-
-	void add_layer(std::string filename, T thickness){
-		add_layer(material<T>(filename), thickness);
-	}
-
-	//adds a profile spectrum for the light source
-	void set_source(std::string filename){
-		source_profile.load(filename);
-	}
-
-	//adds a block of wavenumbers (cm^-1) to the simulation parameters
-	void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
-		unsigned int nu = start;
-		while(nu <= stop){
-			lambdas.push_back((T)10000 / nu);
-			nu += step;
-		}
-		S = lambdas.size();		//increment the number of wavelengths (shorthand for later)
-	}
-
-	T thickness(){
-		T t = 0;
-		for(unsigned int l=0; l<layers.size(); l++)
-			t += layers[l];
-		return t;
-	}
-
-	void padding(unsigned int padding = 0){
-		pad = padding;
-	}
-
-	T dz(){
-		return thickness() / Z;		//calculate the z-axis step size
-	}
-
-	void na(T in, T out){
-		NA[0] = in;
-		NA[1] = out;
-	}
-
-	void na(T out){
-		na(0, out);
-	}
-
-	stim::function<T, T> get_source(){
-		return source_profile;
-	}
-
-	void save_sample(std::string filename){
-		//create a sample and save the magnitude as an image
-		build_sample();
-		fft(CUFFT_INVERSE);
-		scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
-	}
-
-	void save_mirst(std::string filename, bool binary = true){
-		//apply the MIRST filter to a sample and save the image
-
-		//build the sample in the Fourier domain
-		build_sample();
-
-		//apply the MIRST filter
-		apply_filter();
-
-		//apply an inverse FFT to bring the results back into the spatial domain
-		fft(CUFFT_INVERSE);
-
-		crop();
-
-		//save the image
-		if(binary)
-			to_binary(filename);
-		else
-			scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
-	}
-
-
-
-
-	std::string str(){
-
-		stringstream ss;
-		ss<<"1D MIRST Simulation========================="<<std::endl;
-		ss<<"z-axis resolution: "<<Z<<std::endl;
-		ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
-		ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
-		ss<<"padding: "<<pad<<std::endl;
-		ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
-		ss<<"dz: "<<dz()<<" um"<<std::endl;
-		ss<<std::endl;
-		ss<<layers.size()<<" layers-------------"<<std::endl;
-		for(unsigned int l=0; l<layers.size(); l++)
-			ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
-
-		ss<<"source profile-----------"<<std::endl;
-		ss<<get_source().str()<<std::endl;
-
-		return ss.str();
-
-
-	}
-
-
-
-};
-
-}
+#include "../optics/material.h"
+#include "../math/complexfield.cuh"
+#include "../math/constants.h"
+//#include "../envi/bil.h"
+
+#include "cufft.h"
+
+#include <vector>
+#include <sstream>
+
+namespace stim{
+
+//this function writes a sinc function to "dest" such that an iFFT produces a slab
+template<typename T>
+__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri, 
+									  T* src, T* zf, 
+									  T w, unsigned int zR, unsigned int nuR){
+	//dest = complex field representing the sample
+	//ri = refractive indices for each wavelength
+	//src = intensity of the light source for each wavelength
+	//zf = z position of the slab interface for each wavelength (accounting for optical path length)
+	//w = width of the slab (in pixels)
+	//zR = number of z-axis samples
+	//nuR = number of wavelengths
+
+    //get the current coordinate in the plane slice
+	int ifz = blockIdx.x * blockDim.x + threadIdx.x;
+	int inu = blockIdx.y * blockDim.y + threadIdx.y;
+
+	//make sure that the thread indices are in-bounds
+	if(inu >= nuR || ifz >= zR) return;
+
+	int i = inu * zR + ifz;
+
+    T fz;
+    if(ifz < zR/2)
+        fz = ifz / (T)zR;
+    else
+        fz = -(zR - ifz) / (T)zR;
+
+    //if the slab starts outside of the simulation domain, just return
+    if(zf[inu] >= zR) return;
+
+	//fill the array along z with a sinc function representing the Fourier transform of the layer
+
+	T opl = w * ri[inu].real();			//optical path length
+
+	//handle the case where the slab goes outside the simulation domain
+	if(zf[inu] + opl >= zR)
+		opl = zR - zf[inu];
+
+	if(opl == 0) return;
+
+	//T l = w * ri[inu].real();
+	//complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
+	complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
+
+	complex<T> eta = ri[inu] * ri[inu] - 1;
+
+	//dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
+	if(ifz == 0)
+        dest[i] += opl * exp(e) * eta * src[inu];
+    else
+        dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
+}
+
+template<typename T>
+__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
+	//zf = current z depth (optical path length) in pixels
+	//ri = refractive index of the material
+	//w = actual width of the layer (in pixels)
+
+
+	//compute the index for this thread
+	int i = blockIdx.x * blockDim.x + threadIdx.x;
+	if(i >= S) return;
+
+	if(ri == NULL)
+		zf[i] += w;
+	else
+		zf[i] += ri[i].real() * w;
+}
+
+//apply the 1D MIRST filter to an existing sample (overwriting the sample)
+template<typename T>
+__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda, 
+								 T dFz,
+								 T inNA, T outNA, 
+								 unsigned int lambdaR, unsigned int zR, 
+								 T sigma = 0){
+	//sampleFFT = the sample in the Fourier domain (will be overwritten)
+	//lambda = list of wavelengths
+	//dFz = delta along the Fz axis in the frequency domain
+	//inNA = NA of the internal obscuration
+	//outNA = NA of the objective
+	//zR = number of pixels along the Fz axis (same as the z-axis)
+	//lambdaR = number of wavelengths
+	//sigma = width of the Gaussian source
+	int ifz = blockIdx.x * blockDim.x + threadIdx.x;
+	int inu = blockIdx.y * blockDim.y + threadIdx.y;
+
+	if(inu >= lambdaR || ifz >= zR) return;
+
+	//calculate the index into the sample FT
+	int i = inu * zR + ifz;
+
+	//compute the frequency (and set all negative spatial frequencies to zero)
+	T fz;
+	if(ifz < zR / 2)
+	    fz = ifz * dFz;
+	//if the spatial frequency is negative, set it to zero and exit
+	else{
+	    sampleFFT[i] = 0;
+	    return;
+	}
+
+	//compute the frequency in inverse microns
+	T nu = 1/lambda[inu];
+
+	//determine the radius of the integration circle
+	T nu_sq = nu * nu;
+	T fz_sq = (fz * fz) / 4;
+
+	//cut off frequencies above the diffraction limit
+	T r;
+	if(fz_sq < nu_sq)
+	    r = sqrt(nu_sq - fz_sq);
+	else
+	    r = 0;
+
+	//account for the optics
+	T Q = 0;
+	if(r > nu * inNA && r < nu * outNA)
+	    Q = 1;
+
+	//account for the source
+	//T sigma = 30.0;
+	T s = exp( - (r*r * sigma*sigma) / 2 );
+	//T s=1;
+
+	//compute the final filter
+	T mirst = 0;
+	if(fz != 0)
+	    mirst = 2 * stimPI * r * s * Q * (1/fz);
+
+	sampleFFT[i] *= mirst;
+
+}
+
+/*This object performs a 1-dimensional (layered) MIRST simulation
+*/
+template<typename T>
+class mirst1d{
+
+private:
+	unsigned int Z;	//z-axis resolution
+	unsigned int pad;	//pixel padding on either side of the sample
+
+	std::vector< material<T> > matlist;	//list of materials
+	std::vector< T > layers;				//list of layer thicknesses
+
+	std::vector< T > lambdas;		//list of wavelengths that are being simulated
+	unsigned int S;					//number of wavelengths (size of "lambdas")
+
+	T NA[2];						//numerical aperature (central obscuration and outer diameter)
+
+	function<T, T> source_profile;	//profile (spectrum) of the source (expressed in inverse centimeters)
+
+	complexfield<T, 1> scratch;		//scratch GPU memory used to build samples, transforms, etc.
+
+	void fft(int direction = CUFFT_FORWARD){
+
+		unsigned padZ = Z + pad;
+		
+		//create cuFFT handles
+		cufftHandle plan;
+		cufftResult result;
+		
+		if(sizeof(T) == 4)
+			result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size());	//single precision
+		else
+			result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size());	//double precision
+
+		//check for Plan 1D errors
+		if(result != CUFFT_SUCCESS){
+			std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
+			CufftError(result);
+			exit(1);
+		}
+
+		if(sizeof(T) == 4)
+			result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
+		else
+			result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
+
+		//check for FFT errors
+		if(result != CUFFT_SUCCESS){
+			std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
+			CufftError(result);
+			exit(1);
+		}
+
+		cufftDestroy(plan);
+	}
+
+
+	//initialize the scratch memory
+	void init_scratch(){
+		scratch = complexfield<T, 1>(Z + pad , lambdas.size());
+		scratch = 0;
+	}
+
+	//get the list of scattering efficiency (eta) values for a specified layer
+	std::vector< complex<T> > layer_etas(unsigned int l){
+
+		std::vector< complex<T> > etas;
+
+		//fill the list of etas
+		for(unsigned int i=0; i<lambdas.size(); i++)
+			etas.push_back( matlist[l].eta(lambdas[i]) );
+		return etas;
+	}
+
+	//calculates the optimal block and grid sizes using information from the GPU
+	void cuda_params(dim3& grids, dim3& blocks){
+		int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
+		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+		//create one thread for each detector pixel
+		blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
+		grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
+	}
+
+	//add the fourier transform of layer n to the scratch space
+	void build_layer_fft(unsigned int n, T* zf){
+		unsigned int paddedZ = Z + pad;
+
+		T wpx = layers[n] / dz();	//calculate the width of the layer in pixels
+
+		//allocate memory for the refractive index
+		complex<T>* gpuRi;
+		HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
+
+		//allocate memory for the source profile
+		T* gpuSrc;
+		HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
+
+		complex<T> ri;
+		T source;
+		//store the refractive index and source profile in a CPU array
+		for(int inu=0; inu<S; inu++){
+			//save the refractive index to the GPU
+			ri = matlist[n].getN(lambdas[inu]);
+			HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
+
+			//save the source profile to the GPU
+			source = source_profile(10000 / lambdas[inu]);
+			HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
+
+		}
+
+		//create one thread for each pixel of the field slice
+		dim3 gridDim, blockDim;
+		cuda_params(gridDim, blockDim);
+		stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
+
+		int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
+		int linGrid = S / linBlock + 1;
+		stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
+
+		//free memory
+		HANDLE_ERROR(cudaFree(gpuRi));
+		HANDLE_ERROR(cudaFree(gpuSrc));
+	}
+
+	void build_sample(){
+		init_scratch();		//initialize the GPU scratch space
+		//build_layer(1);
+
+		T* zf;
+		HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
+		HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
+
+		//render each layer of the sample
+		for(unsigned int l=0; l<layers.size(); l++){
+			build_layer_fft(l, zf);
+		}
+
+		HANDLE_ERROR(cudaFree(zf));
+	}
+
+	void apply_filter(){
+		dim3 gridDim, blockDim;
+		cuda_params(gridDim, blockDim);
+
+		unsigned int Zpad = Z + pad;
+
+		T sim_range = dz() * Zpad;
+    	T dFz = 1 / sim_range;
+
+		//copy the array of wavelengths to the GPU
+		T* gpuLambdas;
+		HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
+		HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
+		stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas, 
+								 dFz,
+								 NA[0], NA[1], 
+								 S, Zpad);
+	}
+
+	//crop the image to the sample thickness - keep in mind that sample thickness != optical path length
+	void crop(){
+
+		scratch = scratch.crop(Z, S);
+	}
+	
+	//save the scratch field as a binary file
+	void to_binary(std::string filename){
+
+	}
+
+
+public:
+
+	//constructor
+	mirst1d(unsigned int rZ = 100,
+			unsigned int padding = 0){
+		Z = rZ;
+		pad = padding;
+		NA[0] = 0;
+		NA[1] = 0.8;
+		S = 0;
+		source_profile = 1;
+	}
+
+	//add a layer, thickness = microns
+	void add_layer(material<T> mat, T thickness){
+		matlist.push_back(mat);
+		layers.push_back(thickness);
+	}
+
+	void add_layer(std::string filename, T thickness){
+		add_layer(material<T>(filename), thickness);
+	}
+
+	//adds a profile spectrum for the light source
+	void set_source(std::string filename){
+		source_profile.load(filename);
+	}
+
+	//adds a block of wavenumbers (cm^-1) to the simulation parameters
+	void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
+		unsigned int nu = start;
+		while(nu <= stop){
+			lambdas.push_back((T)10000 / nu);
+			nu += step;
+		}
+		S = lambdas.size();		//increment the number of wavelengths (shorthand for later)
+	}
+
+	T thickness(){
+		T t = 0;
+		for(unsigned int l=0; l<layers.size(); l++)
+			t += layers[l];
+		return t;
+	}
+
+	void padding(unsigned int padding = 0){
+		pad = padding;
+	}
+
+	T dz(){
+		return thickness() / Z;		//calculate the z-axis step size
+	}
+
+	void na(T in, T out){
+		NA[0] = in;
+		NA[1] = out;
+	}
+
+	void na(T out){
+		na(0, out);
+	}
+
+	stim::function<T, T> get_source(){
+		return source_profile;
+	}
+
+	void save_sample(std::string filename){
+		//create a sample and save the magnitude as an image
+		build_sample();
+		fft(CUFFT_INVERSE);
+		scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
+	}
+
+	void save_mirst(std::string filename, bool binary = true){
+		//apply the MIRST filter to a sample and save the image
+
+		//build the sample in the Fourier domain
+		build_sample();
+
+		//apply the MIRST filter
+		apply_filter();
+
+		//apply an inverse FFT to bring the results back into the spatial domain
+		fft(CUFFT_INVERSE);
+
+		crop();
+
+		//save the image
+		if(binary)
+			to_binary(filename);
+		else
+			scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
+	}
+
+
+
+
+	std::string str(){
+
+		stringstream ss;
+		ss<<"1D MIRST Simulation========================="<<std::endl;
+		ss<<"z-axis resolution: "<<Z<<std::endl;
+		ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
+		ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
+		ss<<"padding: "<<pad<<std::endl;
+		ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
+		ss<<"dz: "<<dz()<<" um"<<std::endl;
+		ss<<std::endl;
+		ss<<layers.size()<<" layers-------------"<<std::endl;
+		for(unsigned int l=0; l<layers.size(); l++)
+			ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
+
+		ss<<"source profile-----------"<<std::endl;
+		ss<<get_source().str()<<std::endl;
+
+		return ss.str();
+
+
+	}
+
+
+
+};
+
+}
@@ -15,7 +15,7 @@
 namespace stim{
-	class argument
+	class cmd_option
 	{
 	private:
 		bool ansi;
@@ -59,8 +59,8 @@ namespace stim{
 	public:
 		void set_ansi(bool b){ ansi = b; }
-        //create an argument with a given name, description, and default value
-		argument(std::string _name, std::string _desc, std::string _default = "", std::string _range = "")
+        //create an option with a given name, description, and default value
+		cmd_option(std::string _name, std::string _desc, std::string _default = "", std::string _range = "")
 		{
 			name = _name;
 			parse_desc(_desc);
@@ -81,12 +81,12 @@ namespace stim{
             return vals.size();
         }
-		//return the value of a text argument
+		//return the value of a text option
 		std::string as_string(unsigned int n = 0)
 		{
             if(!flag)
             {
-                std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl;
+                std::cout<<"ERROR - Option requested without being set: "<<name<<std::endl;
                 exit(1);
             }
@@ -96,12 +96,12 @@ namespace stim{
             else return "";
 		}
-        //return the value of a floating point argument
+        //return the value of a floating point option
 		float as_float(unsigned int n = 0)
 		{
             if(!flag)
             {
-                std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl;
+                std::cout<<"ERROR - option requested without being set: "<<name<<std::endl;
                 exit(1);
             }
@@ -115,12 +115,12 @@ namespace stim{
             else return 0;
 		}
-		//return the value of an integer argument
+		//return the value of an integer option
 		int as_int(unsigned int n = 0)
 		{
             if(!flag)
             {
-                std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl;
+                std::cout<<"ERROR - option requested without being set: "<<name<<std::endl;
                 exit(1);
             }
@@ -138,7 +138,7 @@ namespace stim{
 		int col_width()
 		{
             int n = 3;
-            //add the length of the argument name
+            //add the length of the option name
             n += name.size();
             //if there are any default parameters
@@ -147,7 +147,7 @@ namespace stim{
                 //padding (parenthesis, =, etc.)
                 n += 6;
-                //for each default argument value
+                //for each default option value
                 for(unsigned int v=0; v<vals.size(); v++)
                     n += vals[v].size() + 1;
             }
@@ -209,13 +209,13 @@ namespace stim{
 			return ss.str();
 		}
-		//compare the name of the argument to a string
+		//compare the name of the option to a string
 		bool operator==(std::string rhs)
 		{
             return (name == rhs);
 		}
-		//set the argument to a given value
+		//set the option to a given value
 		void set(std::string _value)
 		{
             parse_val(_value);
@@ -242,10 +242,11 @@ namespace stim{
     private:
 		bool ansi;
-		//vector of arguments
-        std::vector<argument> args;
+		//vector of options
+        std::vector<cmd_option> opts;
+        std::vector<std::string> args;
-		//column width of the longest argument
+		//column width of the longest option
         int col_width;
 		//list of sections
@@ -261,28 +262,28 @@ namespace stim{
 		void set_ansi(bool b)
 		{
 			ansi = b;
-			for(unsigned int i=0; i<args.size(); i++)
-				args[i].set_ansi(ansi);
+			for(unsigned int i=0; i<opts.size(); i++)
+				opts[i].set_ansi(ansi);
 		}
         void add(std::string _name, std::string _desc, std::string _default = "", std::string _range = "")
         {
-            argument arg(_name, _desc, _default, _range);
-			arg.set_ansi(ansi);
-            args.push_back(arg);
+            cmd_option opt(_name, _desc, _default, _range);
+			opt.set_ansi(ansi);
+            opts.push_back(opt);
-            col_width = std::max<int>(col_width, arg.col_width());
+            col_width = std::max<int>(col_width, opt.col_width());
         }
 		void section(std::string _name)
 		{
 			argsection s;
 			s.name = _name;
-			s.index = args.size();
+			s.index = opts.size();
 			sections.push_back(s);
 		}
-        //output the arguments (generally in response to --help)
+        //output the options (generally in response to --help)
         std::string str()
         {
             std::stringstream ss;
@@ -292,8 +293,8 @@ namespace stim{
             if(sections.size() > 0)
                 si = 0;
-            //for each argument
-            for(unsigned int a=0; a<args.size(); a++)
+            //for each option
+            for(unsigned int a=0; a<opts.size(); a++)
             {
                 if(si != -1 && a == sections[si].index)
                 {
@@ -305,7 +306,7 @@ namespace stim{
                     if(si == (int)sections.size()) si = -1;
                 }
-                ss<<args[a].toStr(col_width)<<std::endl;
+                ss<<opts[a].toStr(col_width)<<std::endl;
             }
             return ss.str();
@@ -313,9 +314,9 @@ namespace stim{
         int index(std::string _name)
         {
-        	unsigned int i = find(args.begin(), args.end(), _name) - args.begin();
+        	unsigned int i = find(opts.begin(), opts.end(), _name) - opts.begin();
-            if(i >= args.size())
+            if(i >= opts.size())
                 return -1;
             return (int)i;
@@ -327,52 +328,57 @@ namespace stim{
             if(i != -1)
             {
-                args[i].set(_value);
+            	opts[i].set(_value);
                 //adjust the column width if necessary
-                col_width = (std::max)(col_width, args[i].col_width());
+                col_width = (std::max)(col_width, opts[i].col_width());
             }
             else
-                std::cout<<"ERROR - Argument not recognized: "<<_name<<std::endl;
+                std::cout<<"ERROR - option not recognized: "<<_name<<std::endl;
         }
         //parse a parameter string
         void parse(int argc, char* argv[])
         {
-            //if the number of arguments is 1, we're done
+            //if the number of options is 1, we're done
             if(argc <= 1) return;
             std::string name;
             std::string params;
+            bool args_done = false;		//create a flag that turns true when the first option is encountered
+
             for(int i=1; i<argc; i++)
             {
-                //if the argument is a parameter name
+                //if the argument is an option
                 if(argv[i][0] == '-' && argv[i][1] == '-')
                 {
-                    //add any previous arguments
+                	args_done = true;				//arguments for the executable are done, all options now
+                    //add any previous options
                     if(name != "")
                         set(name, params);
-                    //set the current argument to this name
+                    //set the current option to this name
                     name = argv[i]+2;
                     //clear the parameters list
                     params = "";
                 }
-                else
-                {
+                else if(!args_done){
+                	args.push_back(argv[i]);
+                }
+                else{	//everything else is an arg for the most recent option
 					if(params != "")
 						params += " ";
                     params += argv[i];
                 }
             }
-            //set the last argument
+            //set the last option
             set(name, params);
         }
         //determine if a parameter has been set (either specified by the user or with a default value)
         bool operator()(std::string _name)
         {
-            int i = find(args.begin(), args.end(), _name) - args.begin();
+            int i = find(opts.begin(), opts.end(), _name) - opts.begin();
             if(i < 0)
             {
@@ -380,12 +386,13 @@ namespace stim{
                 exit(1);
             }
-            return args[i].is_set();
+            return opts[i].is_set();
         }
-        int nargs(std::string _name)
+        //number of arguments in a specified option
+        unsigned int nargs(std::string _name)
         {
-            int i = find(args.begin(), args.end(), _name) - args.begin();
+            int i = find(opts.begin(), opts.end(), _name) - opts.begin();
             if(i < 0)
             {
@@ -393,12 +400,22 @@ namespace stim{
                 exit(1);
             }
-            return args[i].nargs();
+            return opts[i].nargs();
+        }
+
+        //number of arguments for the executable
+        unsigned int nargs(){
+        	return args.size();
+        }
+
+        //return the a'th executable argument
+        std::string arg(unsigned int a){
+        	return args[a];
         }
-        argument operator[](std::string _name)
+        cmd_option operator[](std::string _name)
         {
-            int i = find(args.begin(), args.end(), _name) - args.begin();
+            int i = find(opts.begin(), opts.end(), _name) - opts.begin();
             if(i < 0)
             {
@@ -406,7 +423,7 @@ namespace stim{
                 exit(1);
             }
-            return args[i];
+            return opts[i];
         }