reformat of directory structure

David Mayerich
1 parent 0174d823
Showing 26 changed files with 2254 additions and 350 deletions Show diff stats
rts/biology/fibernet.h
rts/cuda/glbind.h
rts/cuda/memory.h
rts/cuda/threads.h
rts/envi/envi.h
rts/envi/envi_header.h
rts/gl/error.h
rts/gl/rtsSourceCode.h
rts/gl/rts_glShaderObject.h
rts/gl/rts_glShaderProgram.h
rts/gl/rts_glShaderUniform.h
rts/gl/rts_glUtilities.h
rts/gl/texture.h
rts/math/complex.h
rts/math/function.h
rts/math/matrix.h
rts/math/point.h
rts/math/quad.h
rts/math/quaternion.h
rts/math/spherical_bessel.h
+#ifndef RTS_GL_BIND_H
+#define RTS_GL_BIND_H
+
+#include <GL/glew.h>
+#include <GL/gl.h>
+
+#include <stdio.h>
+#include <cstring>
+
+#include <cudaHandleError.h>
+#include "cuda_gl_interop.h"
+#include "rts/gl/error.h"
+
+
+static void rtsInitGLEW()
+{
+	//Initialize the GLEW toolkit
+
+	GLenum err = glewInit();
+	if(GLEW_OK != err)
+	{
+		printf("Error starting GLEW.");
+	}
+	fprintf(stdout, "Status: Using GLEW %s\n", glewGetString(GLEW_VERSION));
+}
+
+static void rts_cudaSetDevice(int major = 1, int minor = 3)
+{
+	cudaDeviceProp prop;
+	int dev;
+
+	//find a CUDA device that can handle an offscreen buffer
+	int num_gpu;
+	HANDLE_ERROR(cudaGetDeviceCount(&num_gpu));
+	printf("Number of CUDA devices detected: %d\n", num_gpu);
+	memset(&prop, 0, sizeof(cudaDeviceProp));
+	prop.major=major;
+	prop.minor=minor;
+	HANDLE_ERROR(cudaChooseDevice(&dev, &prop));
+	HANDLE_ERROR(cudaGetDeviceProperties(&prop, dev));
+	HANDLE_ERROR(cudaGLSetGLDevice(dev));
+}
+
+static void* rts_cudaMapResource(cudaGraphicsResource* cudaBufferResource)
+{
+	//this function takes a predefined CUDA resource and maps it to a pointer
+	void* buffer;
+	HANDLE_ERROR(cudaGraphicsMapResources(1, &cudaBufferResource, NULL));
+	size_t size;
+	HANDLE_ERROR(cudaGraphicsResourceGetMappedPointer( (void**)&buffer, &size, cudaBufferResource));
+	return buffer;
+}
+static void rts_cudaUnmapResource(cudaGraphicsResource* resource)
+{
+	//this function unmaps the CUDA resource so it can be used by OpenGL
+	HANDLE_ERROR(cudaGraphicsUnmapResources(1, &resource, NULL));
+}
+
+static void rts_cudaCreateRenderBuffer(GLuint &glBufferName, cudaGraphicsResource* &cudaBufferResource, int resX, int resY)
+{
+	//delete the previous buffer name and resource
+	if(cudaBufferResource != 0)
+		HANDLE_ERROR(cudaGraphicsUnregisterResource(cudaBufferResource));
+	if(glBufferName != 0)
+		glDeleteBuffers(1, &glBufferName);
+
+	//generate an OpenGL offscreen buffer
+	glGenBuffers(1, &glBufferName);
+
+	//bind the buffer - directs all calls to this buffer
+	glBindBuffer(GL_PIXEL_UNPACK_BUFFER, glBufferName);
+	glBufferData(GL_PIXEL_UNPACK_BUFFER, resX * resY * sizeof(uchar3), NULL, GL_DYNAMIC_DRAW_ARB);
+	CHECK_OPENGL_ERROR
+	HANDLE_ERROR(cudaGraphicsGLRegisterBuffer(&cudaBufferResource, glBufferName, cudaGraphicsMapFlagsNone));
+}
+
+#endif
+#include <cuda.h>
+
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+#include "rts/cuda/callable.h"
+
+#ifndef CUDA_THREADS_H
+#define CUDA_THREADS_H
+
+#define MAX_GRID        65535
+
+__device__ unsigned int ThreadIndex1D()
+{
+    return blockIdx.y * gridDim.x * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+}
+
+dim3 GenGrid1D(unsigned int N, unsigned int blocksize = 128)
+{
+    dim3 dimgrid;
+
+    dimgrid.x = (N + blocksize - 1)/blocksize;
+    dimgrid.y = 1;
+    dimgrid.z = 1;
+
+    if(dimgrid.x > MAX_GRID)
+    {
+        dimgrid.y = (dimgrid.x + MAX_GRID - 1) / MAX_GRID;
+        dimgrid.x = MAX_GRID;
+    }
+
+    return dimgrid;
+
+}
+
+
+#endif
@@ -173,7 +173,7 @@ class EnviFile
             exit(1);
         }
  
-		float r, v0, v1;
+		float v0, v1;
         for(int n=0; n<N; n++)
         {
 			v0 = ((float*)A)[n];
@@ -72,7 +72,7 @@ struct EnviHeader
 	std::string trim(std::string line)
 	{
 		//trims whitespace from the beginning and end of line
-		int start_i, end_i;
+		unsigned int start_i, end_i;
 		for(start_i=0; start_i < line.length(); start_i++)
 			if(line[start_i] != 32)
 			{
@@ -188,16 +188,12 @@ struct EnviHeader
 	{
 		//this function returns a sequence of comma-delimited strings
 		std::vector<double> result;
-
-		double fentry;
-
 		std::string entry;
 		size_t i;
 		do
 		{
 			i = sequence.find_first_of(',');
 			entry = sequence.substr(0, i);
-			fentry = atof(entry.c_str());
 			sequence = sequence.substr(i+1);
 			result.push_back(atof(entry.c_str()));
 			//std::cout<<entry<<"   ";
@@ -356,7 +352,7 @@ struct EnviHeader
 		if(band_names.size() > 0)
 		{
 			outfile<<"band names = {"<<std::endl;
-			for(int i=0; i<band_names.size(); i++)
+			for(unsigned int i=0; i<band_names.size(); i++)
 			{
 				outfile<<band_names[i];
 				if(i < band_names.size() - 1)
@@ -365,7 +361,7 @@ struct EnviHeader
 			outfile<<"}"<<std::endl;
 		}
 		outfile<<"wavelength = {"<<std::endl;
-			for(int i=0; i<wavelength.size()-1; i++)
+			for(unsigned int i=0; i<wavelength.size()-1; i++)
 				outfile<<wavelength[i]<<", ";
 			outfile<<wavelength.back()<<"}"<<std::endl;
  
+#ifndef RTS_OPENGL_ERROR
+#define RTS_OPENGL_ERROR
+
+#include <stdio.h>
+#include <GL/gl.h>
+#include <GL/glu.h>
+
+#define CHECK_OPENGL_ERROR \
+{ GLenum error; \
+   while ( (error = glGetError()) != GL_NO_ERROR) { \
+   printf( "OpenGL ERROR: %s\nCHECK POINT: %s (line %d)\n", gluErrorString(error), __FILE__, __LINE__ ); \
+   } \
+}
+
+#endif
 \ No newline at end of file
+#ifndef RTSSOURCECODE_H
+#define RTSSOURCECODE_H
+
+#include <string>
+#include <fstream>
+#include <vector>
+#include <iostream>
+
+using namespace std;
+
+///This class defines generic source code that can be loaded from text files.  It is primarily used by the rts_glShaderProgram class for GLSL programming.
+
+class rtsSourceCode
+{
+public:
+	vector<string> source;			//the actual source code
+	void clear()					///<Clears any current source code from the class.
+	{
+		source.clear();
+	}
+	void LoadSource(const char* filename)	///<Loads source code from a specified file.
+	{
+		ifstream infile;		//create an input file
+		infile.open(filename);	//load the specified file
+	
+		if(!infile.is_open())	//if the file is not open, exit
+		{
+			return;
+		}
+		source.clear();			//remove any previous code
+
+		while(!infile.eof())
+		{
+			string current_line;		
+			getline(infile, current_line);
+			current_line += '\n';
+			source.push_back(current_line);
+		}
+	}
+	rtsSourceCode(const char* filename)	///<Constructor creates the class and loads source code from the specified file.
+	{
+		LoadSource(filename);
+	}
+	rtsSourceCode(){}						///<Constructor creates a blank class.
+	rtsSourceCode& operator+=(const rtsSourceCode& rhs)
+	{
+		int lines = rhs.source.size();
+		for(int l=0; l<lines; l++)
+			source.push_back(rhs.source[l]);
+		return *this;
+	}
+	rtsSourceCode& operator+=(const string& rhs)
+	{
+		source.push_back(rhs);
+		return *this;
+	}
+	void ConsoleOut()						///<Sends the source code to the standard output.
+	{
+		unsigned int lines = source.size();
+		for(unsigned int l = 0; l<lines; l++)
+			cout<<l<<":  "<<source[l];
+	}
+};
+
+#endif
 \ No newline at end of file
+#ifndef RTS_GLSHADERS
+#define RTS_GLSHADERS
+
+#include <GL/glew.h>
+//#include "windows.h"
+#include <GL/gl.h>
+#include "rtsSourceCode.h"
+
+class rts_glShaderObject
+{
+private:
+	void init()
+	{
+		id = 0;
+		compiled = false;
+		type = GL_FRAGMENT_SHADER;
+	}
+public:
+	bool compiled;
+	GLenum type;
+	rtsSourceCode source;
+	GLuint id;
+	string log;
+
+	rts_glShaderObject(GLenum type, const char* filename)
+	{
+		init();					//initialize the shader
+		SetType(type);	//set the shader type
+		LoadSource(filename);	//load the source code
+	}
+	rts_glShaderObject(GLenum type, rtsSourceCode sourceCode)
+	{
+		init();					//initialize the shader
+		SetType(type);	//set the shader type
+		source = sourceCode;
+	}
+	rts_glShaderObject()
+	{
+		init();
+	}
+	rts_glShaderObject(GLenum type)
+	{
+		init();
+		SetType(type);
+	}
+	void LoadSource(const char* filename)
+	{
+		source = rtsSourceCode(filename);	//get the shader source code
+
+	}
+	void SetType(GLenum type)
+	{
+		if(id != 0)					//if a shader currently exists, delete it
+		{
+			glDeleteShader(id);
+			id = 0;
+		}
+		type = type;
+		id = glCreateShader(type);		//create a shader object
+		if(id == 0)						//if a shader was not created, log an error
+		{
+			log = "Error getting shader ID from OpenGL";
+			return;
+		}
+	}
+	void UploadSource()
+	{
+		//create the structure for the shader source code
+		GLsizei count = source.source.size();
+		GLchar** code_string = new GLchar*[count];
+		GLint* length = new GLint[count];
+		for(int l = 0; l<count; l++)	//for each line of code
+		{
+			length[l] = source.source[l].size();
+			code_string[l] = new GLchar[length[l]];	//copy the string into a new structure
+			source.source[l].copy(code_string[l], (unsigned int)length[l]);
+
+		}
+		glShaderSource(id, count, (const GLchar**)code_string, length);		//attach the shader source
+	}
+	void Compile()
+	{
+		/*
+		This function compiles the shader source code, records any errors to a log, and sets the compiled flag.
+		*/
+		//send the source code to the GPU
+		UploadSource();
+
+		//compile the shader
+		glCompileShader(id);												//compile the shader
+		GLint compile_status;
+		glGetShaderiv(id, GL_COMPILE_STATUS, &compile_status);				//get the compile status
+		if(compile_status != GL_TRUE)	//if there was an error
+		{
+			GLchar buffer[1000];		//create a log buffer
+			GLsizei length;
+			glGetShaderInfoLog(id, 1000, &length, buffer);	//get the log
+			log = buffer;
+			compiled = false;
+		}
+		else
+			compiled = true;
+
+	}
+	void PrintLog()
+	{
+		cout<<log;
+		if(log.size() != 0) cout<<endl;
+	}
+	void Clean(){if(id != 0) glDeleteShader(id);}
+};
+
+
+
+#endif
+#ifndef RTS_GLSHADERPROGRAM_H
+#define RTS_GLSHADERPROGRAM_H
+
+/*********************************************************
+//create a shader program
+	rts_glShaderProgram myProgram;
+//initialize
+	myProgram.Init();
+//Attach shaders
+	myProgram.AttachShader(GL_FRAGMENT_SHADER, "filename.glsl");
+//Compile and link
+	myProgram.Compile();
+	myProgram.Link();
+	myProgram.PrintLog();
+//attach uniform variables
+	myProgram.AttachTextureMap("texture", texture);
+	myProgram.AttachGlobalUniform("light_intensity", &intensity);
+
+//use the program
+	myProgram.BeginProgram();
+	//render
+	myProgram.EndProgram();
+**********************************************************/
+
+
+#include "rts_glShaderObject.h"
+#include "rts_glShaderUniform.h"
+#include "rts_glTextureMap.h"
+#include <algorithm>
+
+using namespace std;
+
+class rts_glShaderProgram
+{
+private:
+	void get_uniforms()
+	{
+		GLint num_uniforms;
+		glGetProgramiv(id, GL_ACTIVE_UNIFORMS, &num_uniforms);		//get the number of uniform variables
+		GLint max_name_length;
+		glGetProgramiv(id, GL_ACTIVE_UNIFORM_MAX_LENGTH, &max_name_length);	//get the maximum uniform name length
+		GLchar* name_buffer = new GLchar[max_name_length];			//create a buffer to store the name
+		GLsizei length;						//I'm not using these yet
+		GLint size;
+		GLenum type;						//variable's data type
+		GLint location;						//GPU location of the variable
+		for(int i=0; i<num_uniforms; i++)		//create an rts_glShaderUniform structure for each variable
+		{
+			glGetActiveUniform(id, i, max_name_length, &length, &size, &type, name_buffer);	//get the uniform information
+			location = glGetUniformLocation(id, name_buffer);		//get the GPU location of the variable
+			//create the rts_glShaderUniform structure
+			rts_glShaderUniform current;
+			current.location = location;
+			current.name = name_buffer;
+			current.type = type;
+			current.p_value = NULL;
+
+
+			uniform_list.push_back(current);
+		}
+
+	}
+	int get_index(const char* name)
+	{
+		unsigned int size = uniform_list.size();
+		for(unsigned int i=0; i<size; i++)
+		{
+			if(uniform_list[i].name == name)
+				return i;
+		}
+		return -1;
+	}
+	string log;
+public:
+	GLuint id;
+	bool linked;
+	vector<rts_glShaderObject> shader_list;	//list of opengl shaders
+	vector<rts_glShaderUniform> uniform_list;	//list of active uniform variables
+	vector<rts_glTextureMap> texture_list;		//list of texture maps
+
+	rts_glShaderProgram()
+	{
+		linked = false;
+		id = 0;
+	}
+	void AttachShader(rts_glShaderObject shader)
+	{
+		if(id == 0)
+		{
+			Init();
+		}
+		if(shader.id == 0)	//if the shader is invalid
+		{
+			log = "Shader is invalid";
+			return;
+		}
+
+		//attach the shader to the program
+		glAttachShader(id, shader.id);			//attach the shader to the program in OpenGL
+		CHECK_OPENGL_ERROR
+		shader_list.push_back(shader);			//push the shader onto our list for later access
+	}
+	//type = GL_FRAGMENT_SHADER or GL_VERTEX_SHADER
+	void AttachShader(GLenum type, const char* filename)
+	{
+		rts_glShaderObject shader(type, filename);
+		AttachShader(shader);
+	}
+	void AttachShader(GLenum type, rtsSourceCode source)
+	{
+		rts_glShaderObject shader(type, source);
+		AttachShader(shader);
+	}
+	void PrintLog()
+	{
+		cout<<log;
+
+		if(log.size() != 0) cout<<endl;
+	}
+	void Compile()
+	{
+		if(shader_list.size() == 0)
+		{
+			log = "No shaders to compile";
+			return;
+		}
+
+		vector<rts_glShaderObject>::iterator iter;
+		for(iter = shader_list.begin(); iter != shader_list.end(); iter++)
+		{
+			(*iter).Compile();
+			//(*iter).PrintLog();
+		}
+	}
+	void Link()
+	{
+		glLinkProgram(id);				//link the current shader program
+		GLint link_status;				//test to see if the link went alright
+		glGetProgramiv(id, GL_LINK_STATUS, &link_status);
+		if(link_status != GL_TRUE)
+		{
+			linked = false;
+		}
+		else
+			linked = true;
+
+		GLsizei length;
+		GLchar buffer[1000];
+		glGetProgramInfoLog(id, 1000, &length, buffer);
+		log = buffer;
+
+		get_uniforms();			//create the list of active uniform variables
+	}
+	void BeginProgram()
+	{
+		CHECK_OPENGL_ERROR
+		if(id == 0)				//if the program is invalid, return
+		{
+			log = "Invalid program, cannot use.";
+			return;
+		}
+		if(!linked)
+		{
+			cout<<"Shader Program used without being linked."<<endl;
+			//exit(1);
+		}
+
+		//set up all of the texture maps
+		int num_textures = texture_list.size();
+
+		for(int t=0; t<num_textures; t++)
+		{
+			glActiveTexture(GL_TEXTURE0 + t);
+			CHECK_OPENGL_ERROR
+			//glEnable(texture_list[t].texture_type);
+			//CHECK_OPENGL_ERROR
+			glBindTexture(texture_list[t].texture_type, texture_list[t].name);
+			CHECK_OPENGL_ERROR
+		}
+
+		glUseProgram(id);
+		CHECK_OPENGL_ERROR
+	}
+	void EndProgram()
+	{
+		CHECK_OPENGL_ERROR
+		//return standard functionality
+		int num_textures = texture_list.size();
+
+		//disable all texture units
+		for(int t=0; t<num_textures; t++)
+		{
+			glActiveTexture(GL_TEXTURE0 + t);
+			glDisable(texture_list[t].texture_type);
+			CHECK_OPENGL_ERROR
+		}
+		//make sure that the single default texture unit is active
+		if(num_textures > 0)
+			glActiveTexture(GL_TEXTURE0);
+		CHECK_OPENGL_ERROR
+
+		//return to OpenGL default shading
+		glUseProgram(0);
+		CHECK_OPENGL_ERROR
+	}
+	void PrintUniforms()
+	{
+		cout<<"Shader Uniforms: "<<endl;
+		unsigned int i;
+		for(i=0; i<uniform_list.size(); i++)
+		{
+			cout<<i<<":  "<<uniform_list[i].name<<"          "<<uniform_list[i].location<<endl;
+		}
+	}
+	void AttachGlobalUniform(unsigned int index, void* param)		//attaches a global variable to the indexed uniform parameter
+	{
+		uniform_list[index].p_value = param;
+	}
+	void AttachGlobalUniform(const char* name, void* param)
+	{
+		//find the index of the shader
+		int index = get_index(name);
+		if(index != -1)
+			AttachGlobalUniform(index, param);
+		else
+		{
+			string strError = "Error finding uniform variable: ";
+			strError += name;
+			cout<<strError<<endl;
+		}
+	}
+	void AttachTextureMap(unsigned int index, rts_glTextureMap texture)	//attaches a texture map to the program
+	{
+		//if there is not a texture map assigned to the variable
+		if(uniform_list[index].p_value == NULL)
+		{
+			uniform_list[index].p_value = new unsigned int[1];
+			((unsigned int*)uniform_list[index].p_value)[0] = texture_list.size();		//set the parameter value to the index of the texture
+			texture_list.push_back(texture);						//add the texture to the texture list
+		}
+		//if there is a texture map assigned, replace it
+		else
+		{
+			texture_list[((unsigned int*)(uniform_list[index].p_value))[0]] = texture;
+		}
+
+	}
+	void AttachTextureMap(const char* name, rts_glTextureMap texture)
+	{
+		int index = get_index(name);
+		if(index != -1)		//make sure that the uniform index is valid
+			AttachTextureMap(index, texture);
+		else
+			cout<<"Error finding texture index.  Try linking."<<endl;
+	}
+	void UpdateGlobalUniforms()									//sends updated uniform information to the GPU
+	{
+		CHECK_OPENGL_ERROR
+		BeginProgram();
+		CHECK_OPENGL_ERROR
+		unsigned int num = uniform_list.size();
+		for(unsigned int i=0; i<num; i++)
+			uniform_list[i].submit_to_gpu();
+		EndProgram();
+	}
+	void Init()	//Initialize the shader program
+	{
+		CHECK_OPENGL_ERROR
+		if(id != 0)
+			Clean();
+		id = glCreateProgram();
+		if(id == 0)
+			log = "Error getting program ID from OpenGL";
+		CHECK_OPENGL_ERROR
+	}
+	void Clean()
+	{
+		if(id != 0)
+			glDeleteProgram(id);
+		id = 0;
+
+		//these are allocated outside the object and can just be cleared
+		uniform_list.clear();
+		texture_list.clear();
+
+		//delete each shader from OpenGL
+		int num_shad = shader_list.size();
+		for(int i=0; i<num_shad; i++)
+			shader_list[i].Clean();
+		//clear the list
+		shader_list.clear();
+	}
+};
+
+#endif
+#ifndef RTS_GLSHADERUNIFORM_H
+#define RTS_GLSHADERUNIFORM_H
+
+#include "CHECK_OPENGL_ERROR.h"
+#include <GL/glew.h>
+#include <string>
+
+using namespace std;
+
+enum rtsUniformEnum {RTS_FLOAT, RTS_INT, RTS_BOOL, RTS_FLOAT_MATRIX};
+
+///This class stores a single uniform variable for GLSL and is designed to be used by the rts_glShaderProgram class.
+struct rts_glShaderUniform
+{
+public:
+	string name;		//the name of the variable
+	GLint location;		//the location in the program
+	void* p_value;		//pointer to the global data representing the value in main memory
+	GLenum type;		//variable type (float, int, vec2, etc.)
+	//rtsUniformEnum rts_type;	//type of variable in rts format
+	//unsigned int num;		//the number of values required by the variable (1 for float, 2 for vec2, etc.)
+	string log;
+
+	//void convert_type(GLenum gl_type);		//converts the OpenGL data type to something useful for rts
+	void submit_to_gpu()
+	{
+		if(location < 0)
+			return;
+		if(p_value == NULL)
+		{
+			cout<<"Error in uniform address: "<<name<<endl;
+			return;
+		}
+	
+
+		CHECK_OPENGL_ERROR
+		switch(type)
+		{
+		case GL_FLOAT:
+			glUniform1fv(location, 1, (float*)p_value);
+			break;
+		case GL_FLOAT_VEC2:
+			glUniform2fv(location, 1, (float*)p_value);
+			break;
+		case GL_FLOAT_VEC3:
+			glUniform3fv(location, 1, (float*)p_value);
+			break;
+		case GL_FLOAT_VEC4:
+			glUniform4fv(location, 1, (float*)p_value);
+			break;
+		case GL_INT:
+			glUniform1iv(location, 1, (int*)p_value);
+			break;
+		case GL_INT_VEC2:
+			glUniform2iv(location, 1, (int*)p_value);
+			break;
+		case GL_INT_VEC3:
+			glUniform3iv(location, 1, (int*)p_value);
+			break;
+		case GL_INT_VEC4:
+			glUniform4iv(location, 1, (int*)p_value);
+			break;
+		case GL_BOOL:
+			glUniform1iv(location, 1, (int*)p_value);
+			break;
+		case GL_BOOL_VEC2:
+			glUniform2iv(location, 1, (int*)p_value);
+			break;
+		case GL_BOOL_VEC3:
+			glUniform3iv(location, 1, (int*)p_value);
+			break;
+		case GL_BOOL_VEC4:
+			glUniform4iv(location, 1, (int*)p_value);
+			break;
+		case GL_FLOAT_MAT2:
+			glUniformMatrix2fv(location, 1, GL_FALSE, (float*)p_value);
+			break;
+		case GL_FLOAT_MAT3:
+			glUniformMatrix3fv(location, 1, GL_FALSE, (float*)p_value);
+			break;
+		case GL_FLOAT_MAT4:
+			glUniformMatrix4fv(location, 1, GL_FALSE, (float*)p_value);
+			break;
+		case GL_SAMPLER_1D:
+		case GL_SAMPLER_2D:
+		case GL_SAMPLER_3D:
+		case GL_SAMPLER_CUBE:
+		case GL_SAMPLER_1D_SHADOW:
+		case GL_SAMPLER_2D_SHADOW:
+		default:
+			glUniform1iv(location, 1, (int*)p_value);
+			break;
+		}
+		CHECK_OPENGL_ERROR
+	}
+	rts_glShaderUniform()
+	{
+		location = -1;
+		p_value = NULL;
+	}
+};
+
+
+
+#endif
 \ No newline at end of file
+#ifndef RTS_GLUTILITIES_H
+#define RTS_GLUTILITIES_H
+
+
+#define CHECK_OPENGL_ERROR \
+{ GLenum error; \
+   while ( (error = glGetError()) != GL_NO_ERROR) { \
+   printf( "OpenGL ERROR: %s\nCHECK POINT: %s (line %d)\n", gluErrorString(error), __FILE__, __LINE__ ); \
+   } \
+} 
+
+#endif
 \ No newline at end of file
+#ifndef RTS_GLTEXTUREMAP_H
+#define RTS_GLTEXTUREMAP_H
+
+//#include <GL/glew.h>
+#include "rts/math/vector.h"
+#include "rts/gl/error.h"
+#include <stdlib.h>
+
+namespace rts{
+
+///This class stores an OpenGL texture map and is used by rts_glShaderProgram.
+class glTexture
+{
+private:
+	void get_type()			//guesses the texture type based on the size
+	{
+		if(size[1] == 0)
+			texture_type = GL_TEXTURE_1D;
+		else if(size[2] == 0)
+			texture_type = GL_TEXTURE_2D;
+		else
+			texture_type = GL_TEXTURE_3D;
+	}
+	void set_wrapping()		//set the texture wrapping based on the dimensions
+	{
+		CHECK_OPENGL_ERROR
+		switch(texture_type)
+		{
+		case GL_TEXTURE_3D:
+			glTexParameteri(texture_type, GL_TEXTURE_WRAP_R_EXT, GL_REPEAT);
+		case GL_TEXTURE_2D:
+			glTexParameteri(texture_type, GL_TEXTURE_WRAP_T, GL_MIRRORED_REPEAT);
+		case GL_TEXTURE_1D:
+			glTexParameteri(texture_type, GL_TEXTURE_WRAP_S, GL_REPEAT);
+			break;
+		case GL_TEXTURE_RECTANGLE_ARB:
+			glTexParameteri(texture_type, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+			glTexParameteri(texture_type, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+			break;
+
+		default:
+			break;
+		}
+		CHECK_OPENGL_ERROR
+	}
+	//void set_bits(GLvoid* bits);
+public:
+	vector<GLsizei, 3> size;		//vector representing the size of the texture
+	GLuint name;				//texture name assigned by OpenGL
+	GLenum texture_type;				//1D, 2D, 3D
+	GLint internal_format;	//number of components (ex. 4 for RGBA)
+	GLenum pixel_format;		//type of data (RGBA, LUMINANCE)
+	GLenum data_type;			//data type of the bits (float, int, etc.)
+
+	//constructor
+	glTexture()
+	{
+		name = 0;
+	}
+	glTexture(GLvoid *bits,
+			   GLenum type = GL_TEXTURE_2D,
+			   GLsizei width = 256,
+			   GLsizei height = 256,
+			   GLsizei depth = 0,
+			   GLint internalformat = 1,
+			   GLenum format = GL_LUMINANCE,
+			   GLenum datatype = GL_UNSIGNED_BYTE,
+			   GLint interpolation = GL_LINEAR)
+    {
+        init(bits, type, width, height, depth, internalformat, format, datatype, interpolation);
+    }
+
+	void begin()
+	{
+		glEnable(texture_type);
+		CHECK_OPENGL_ERROR
+		glBindTexture(texture_type, name);
+		CHECK_OPENGL_ERROR
+	}
+	void end()
+	{
+		glDisable(texture_type);
+		CHECK_OPENGL_ERROR
+	}
+
+	///Creates an OpenGL texture map. This function requires basic information about the texture map as well as a pointer to the bit data describing the texture.
+	void init(GLvoid *bits,
+			   GLenum type = GL_TEXTURE_2D,
+			   GLsizei width = 256,
+			   GLsizei height = 256,
+			   GLsizei depth = 0,
+			   GLint internalformat = 1,
+			   GLenum format = GL_LUMINANCE,
+			   GLenum datatype = GL_UNSIGNED_BYTE,
+			   GLint interpolation = GL_LINEAR)
+	{
+		CHECK_OPENGL_ERROR
+		if(name != 0)
+			glDeleteTextures(1, &name);
+
+
+		CHECK_OPENGL_ERROR
+		if(datatype == GL_FLOAT)
+		{
+			glPixelStorei(GL_PACK_ALIGNMENT, 4);
+			glPixelStorei(GL_UNPACK_ALIGNMENT, 4);				//I honestly don't know what this does but it fixes problems
+		}
+		else if(datatype == GL_UNSIGNED_BYTE)
+		{
+			//glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+			//glPixelStorei(GL_PACK_ALIGNMENT, 1);
+		}
+		else if(datatype == GL_UNSIGNED_SHORT)
+		{
+			//glPixelStorei(GL_UNPACK_ALIGNMENT, 2);
+			//glPixelStorei(GL_PACK_ALIGNMENT, 2);
+		}
+		CHECK_OPENGL_ERROR
+		glGenTextures(1, &name);							//get the texture name from OpenGL
+		//cout<<"OpenGL Name: "<<name<<endl;
+		CHECK_OPENGL_ERROR
+		size = vector<GLsizei, 3>(width, height, depth);		//assign the texture size
+		//get_type();											//guess the type based on the size
+		texture_type = type;						//set the type of texture
+		glEnable(texture_type);						//enable the texture map
+		CHECK_OPENGL_ERROR
+		glBindTexture(texture_type, name);							//bind the texture for editing
+		CHECK_OPENGL_ERROR
+		set_wrapping();										//set the texture wrapping parameters
+		CHECK_OPENGL_ERROR
+		glTexParameteri(texture_type, GL_TEXTURE_MAG_FILTER, interpolation);		//set filtering
+		CHECK_OPENGL_ERROR
+		glTexParameteri(texture_type, GL_TEXTURE_MIN_FILTER, interpolation);
+		CHECK_OPENGL_ERROR
+		internal_format = internalformat;					//set the number of components per pixel
+		pixel_format = format;								//set the pixel format
+		data_type = datatype;									//set the data type
+		SetBits(bits);										//send the bits to the OpenGL driver
+		glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);	//replace the specified vertex color
+		CHECK_OPENGL_ERROR
+		glDisable(texture_type);
+
+		glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+		glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+		glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
+	}
+	void Clean()
+	{
+		if(name != 0)
+		{
+			glDeleteTextures(1, &name);
+			CHECK_OPENGL_ERROR
+			name = 0;
+		}
+	}
+	void SetBits(GLvoid *bits)
+	{
+		glEnable(texture_type);						//enable the texture map
+		CHECK_OPENGL_ERROR
+		glBindTexture(texture_type, name);
+		CHECK_OPENGL_ERROR
+
+		switch(texture_type)
+		{
+		case GL_TEXTURE_3D:
+			glTexImage3D(texture_type, 0, internal_format, size[0], size[1], size[2], 0, pixel_format, data_type, bits);
+			CHECK_OPENGL_ERROR
+			break;
+		case GL_TEXTURE_2D:
+		case GL_TEXTURE_RECTANGLE_ARB:
+			glTexImage2D(texture_type, 0, internal_format, size[0], size[1], 0, pixel_format, data_type, bits);
+			CHECK_OPENGL_ERROR
+			break;
+		case GL_TEXTURE_1D:
+			glTexImage1D(texture_type, 0, internal_format, size[0], 0, pixel_format, data_type, bits);
+			CHECK_OPENGL_ERROR
+			break;
+		default:
+			//glTexImage2D(texture_type, 0, internal_format, size.x, size.y, 0, pixel_format, data_type, bits);
+			break;
+		}
+		CHECK_OPENGL_ERROR
+	}
+	void ResetBits(GLvoid *bits)
+	{
+		glEnable(texture_type);						//enable the texture map
+		CHECK_OPENGL_ERROR
+		glBindTexture(texture_type, name);
+		CHECK_OPENGL_ERROR
+
+		switch(texture_type)
+		{
+		case GL_TEXTURE_3D:
+			//glTexImage3D(texture_type, 0, internal_format, size.x, size.y, size.z, 0, pixel_format, data_type, bits);
+			break;
+		case GL_TEXTURE_2D:
+		case GL_TEXTURE_RECTANGLE_ARB:
+			glTexSubImage2D(texture_type, 0, 0, 0, size[0], size[1], pixel_format, data_type, bits);
+			CHECK_OPENGL_ERROR
+			break;
+		case GL_TEXTURE_1D:
+			//glTexImage1D(texture_type, 0, internal_format, size.x, 0, pixel_format, data_type, bits);
+			break;
+		default:
+			//glTexImage2D(texture_type, 0, internal_format, size.x, size.y, 0, pixel_format, data_type, bits);
+			break;
+		}
+		glDisable(texture_type);
+		CHECK_OPENGL_ERROR
+	}
+	void* GetBits(GLenum format, GLenum type)
+	{
+		//returns the texture data
+
+		int components;
+		switch(format)
+		{
+		case GL_RED:
+		case GL_GREEN:
+		case GL_BLUE:
+		case GL_ALPHA:
+		case GL_LUMINANCE:
+			components = 1;
+			break;
+		case GL_LUMINANCE_ALPHA:
+			components = 2;
+			break;
+		case GL_RGB:
+		case GL_BGR:
+			components = 3;
+			break;
+		case GL_RGBA:
+		case GL_BGRA:
+			components = 4;
+			break;
+		}
+
+		int type_size;
+		switch(type)
+		{
+		case GL_UNSIGNED_BYTE:
+		case GL_BYTE:
+			type_size = sizeof(char);
+			break;
+		case GL_UNSIGNED_SHORT:
+		case GL_SHORT:
+			type_size = sizeof(short);
+			break;
+		case GL_UNSIGNED_INT:
+		case GL_INT:
+			type_size = sizeof(int);
+			break;
+		case GL_FLOAT:
+			type_size = sizeof(float);
+			break;
+		}
+
+		//allocate memory for the texture
+		void* result = malloc(components*type_size * size[0] * size[1]);
+
+		begin();
+		glGetTexImage(texture_type, 0, format, type, result);
+
+		CHECK_OPENGL_ERROR
+		end();
+
+
+		return result;
+
+	}
+};
+
+}
+
+#define RTS_UNKNOWN				0
+
+#endif
@@ -15,12 +15,12 @@ namespace rts
 {
  
 template <class T>
-struct rtsComplex
+struct complex
 {
     T r, i;
  
     //default constructor
-    CUDA_CALLABLE rtsComplex()
+    CUDA_CALLABLE complex()
     {
         r = 0.0;
 		i = 0.0;
@@ -49,16 +49,16 @@ struct rtsComplex
 	}
  
     //constructor when given real and imaginary values
-    CUDA_CALLABLE rtsComplex(T r, T i)
+    CUDA_CALLABLE complex(T r, T i)
     {
         this->r = r;
         this->i = i;
     }
  
     //return the current value multiplied by i
-    CUDA_CALLABLE rtsComplex<T> imul()
+    CUDA_CALLABLE complex<T> imul()
     {
-        rtsComplex<T> result;
+        complex<T> result;
         result.r = -i;
         result.i = r;
  
@@ -68,70 +68,70 @@ struct rtsComplex
 	//ARITHMETIC OPERATORS--------------------
  
     //binary + operator (returns the result of adding two complex values)
-    CUDA_CALLABLE rtsComplex<T> operator+ (const rtsComplex<T> rhs)
+    CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs)
     {
-        rtsComplex<T> result;
+        complex<T> result;
         result.r = r + rhs.r;
         result.i = i + rhs.i;
         return result;
     }
  
-	CUDA_CALLABLE rtsComplex<T> operator+ (const T rhs)
+	CUDA_CALLABLE complex<T> operator+ (const T rhs)
     {
-        rtsComplex<T> result;
+        complex<T> result;
         result.r = r + rhs;
         result.i = i;
         return result;
     }
  
     //binary - operator (returns the result of adding two complex values)
-    CUDA_CALLABLE rtsComplex<T> operator- (const rtsComplex<T> rhs)
+    CUDA_CALLABLE complex<T> operator- (const complex<T> rhs)
     {
-        rtsComplex<T> result;
+        complex<T> result;
         result.r = r - rhs.r;
         result.i = i - rhs.i;
         return result;
     }
  
     //binary - operator (returns the result of adding two complex values)
-    CUDA_CALLABLE rtsComplex<T> operator- (const T rhs)
+    CUDA_CALLABLE complex<T> operator- (const T rhs)
     {
-        rtsComplex<T> result;
+        complex<T> result;
         result.r = r - rhs;
         result.i = i;
         return result;
     }
  
     //binary MULTIPLICATION operators (returns the result of multiplying complex values)
-    CUDA_CALLABLE rtsComplex<T> operator* (const rtsComplex<T> rhs)
+    CUDA_CALLABLE complex<T> operator* (const complex<T> rhs)
     {
-        rtsComplex<T> result;
+        complex<T> result;
         result.r = r * rhs.r - i * rhs.i;
         result.i = r * rhs.i + i * rhs.r;
         return result;
     }
-    CUDA_CALLABLE rtsComplex<T> operator* (const T rhs)
+    CUDA_CALLABLE complex<T> operator* (const T rhs)
     {
-        return rtsComplex<T>(r * rhs, i * rhs);
+        return complex<T>(r * rhs, i * rhs);
     }
  
     //binary DIVISION operators (returns the result of dividing complex values)
-    CUDA_CALLABLE rtsComplex<T> operator/ (const rtsComplex<T> rhs)
+    CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs)
     {
-        rtsComplex<T> result;
+        complex<T> result;
         T denom = rhs.r * rhs.r + rhs.i * rhs.i;
         result.r = (r * rhs.r + i * rhs.i) / denom;
         result.i = (- r * rhs.i + i * rhs.r) / denom;
  
         return result;
     }
-    CUDA_CALLABLE rtsComplex<T> operator/ (const T rhs)
+    CUDA_CALLABLE complex<T> operator/ (const T rhs)
     {
-        return rtsComplex<T>(r / rhs, i / rhs);
+        return complex<T>(r / rhs, i / rhs);
     }
  
     //ASSIGNMENT operators-----------------------------------
-    CUDA_CALLABLE rtsComplex<T> & operator=(const rtsComplex<T> &rhs)
+    CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs)
     {
         //check for self-assignment
         if(this != &rhs)
@@ -141,7 +141,7 @@ struct rtsComplex
         }
         return *this;
     }
-    CUDA_CALLABLE rtsComplex<T> & operator=(const T &rhs)
+    CUDA_CALLABLE complex<T> & operator=(const T &rhs)
     {
         this->r = rhs;
         this->i = 0;
@@ -150,34 +150,34 @@ struct rtsComplex
     }
  
     //arithmetic assignment operators
-    CUDA_CALLABLE rtsComplex<T> operator+=(const rtsComplex<T> &rhs)
+    CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs)
     {
 		*this = *this + rhs;
         return *this;
     }
-    CUDA_CALLABLE rtsComplex<T> operator+=(const T &rhs)
+    CUDA_CALLABLE complex<T> operator+=(const T &rhs)
     {
 		*this = *this + rhs;
         return *this;
     }
  
-    CUDA_CALLABLE rtsComplex<T> operator*=(const rtsComplex<T> &rhs)
+    CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs)
     {
 		*this = *this * rhs;
         return *this;
     }
-	CUDA_CALLABLE rtsComplex<T> operator*=(const T &rhs)
+	CUDA_CALLABLE complex<T> operator*=(const T &rhs)
     {
 		*this = *this * rhs;
         return *this;
     }
 	//divide and assign
-	CUDA_CALLABLE rtsComplex<T> operator/=(const rtsComplex<T> &rhs)
+	CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs)
     {
 		*this = *this / rhs;
         return *this;
     }
-    CUDA_CALLABLE rtsComplex<T> operator/=(const T &rhs)
+    CUDA_CALLABLE complex<T> operator/=(const T &rhs)
     {
 		*this = *this / rhs;
         return *this;
@@ -189,9 +189,9 @@ struct rtsComplex
 		return std::sqrt(r * r + i * i);
 	}
  
-	CUDA_CALLABLE rtsComplex<T> log()
+	CUDA_CALLABLE complex<T> log()
 	{
-        rtsComplex<T> result;
+        complex<T> result;
         result.r = std::log(std::sqrt(r * r + i * i));
         result.i = std::atan2(i, r);
  
@@ -199,9 +199,9 @@ struct rtsComplex
         return result;
 	}
  
-	CUDA_CALLABLE rtsComplex<T> exp()
+	CUDA_CALLABLE complex<T> exp()
 	{
-        rtsComplex<T> result;
+        complex<T> result;
  
         T e_r = std::exp(r);
         result.r = e_r * std::cos(i);
@@ -216,18 +216,18 @@ struct rtsComplex
         return pow((double)y);
 	}*/
  
-	CUDA_CALLABLE rtsComplex<T> pow(T y)
+	CUDA_CALLABLE complex<T> pow(T y)
 	{
-        rtsComplex<T> result;
+        complex<T> result;
  
         result = log() * y;
  
         return result.exp();
 	}
  
-	CUDA_CALLABLE rtsComplex<T> sqrt()
+	CUDA_CALLABLE complex<T> sqrt()
 	{
-		rtsComplex<T> result;
+		complex<T> result;
  
 		//convert to polar coordinates
 		T a = std::sqrt(r*r + i*i);
@@ -253,7 +253,7 @@ struct rtsComplex
 	}
  
 	//COMPARISON operators
-	CUDA_CALLABLE bool operator==(rtsComplex<T> rhs)
+	CUDA_CALLABLE bool operator==(complex<T> rhs)
 	{
         if(r == rhs.r && i == rhs.i)
             return true;
@@ -267,72 +267,44 @@ struct rtsComplex
         return false;
     }
  
-	/*//FRIEND functions
-    //unary minus operator (for negating the complex number)
-	template<class A> CUDA_CALLABLE friend complex<A> operator-(const complex<A> &rhs);
-
-	//multiplication by T values when the complex number isn't on the left hand side
-	template<class A> CUDA_CALLABLE friend complex<A> operator*(const A a, const complex<A> b);
-
-	//division by T values when the complex number isn't on the left hand side
-	template<class A> CUDA_CALLABLE friend complex<A> operator/(const A a, const complex<A> b);
-
-	//POW function
-	//template<class A> CUDA_CALLABLE friend complex<A> pow(const complex<A> x, T y);
-	template<class A> CUDA_CALLABLE friend complex<A> pow(const complex<A> x, int y);
-
-	//log function
-	template<class A> CUDA_CALLABLE friend complex<A> log(complex<A> x);
-
-	//exp function
-	template<class A> CUDA_CALLABLE friend complex<A> exp(complex<A> x);
-
-	//sqrt function
-	template<class A> CUDA_CALLABLE friend complex<A> sqrt(complex<A> x);
-
-	//trigonometric functions
-	template<class A> CUDA_CALLABLE friend complex<A> sin(complex<A> x);
-
-	template<class A> CUDA_CALLABLE friend complex<A> cos(complex<A> x);*/
-
 };
  
 }	//end RTS namespace
  
 //addition
 template<typename T>
-CUDA_CALLABLE static rts::rtsComplex<T> operator+(const double a, const rts::rtsComplex<T> b)
+CUDA_CALLABLE static rts::complex<T> operator+(const double a, const rts::complex<T> b)
 {
-    return rts::rtsComplex<T>(a + b.r, b.i);
+    return rts::complex<T>(a + b.r, b.i);
 }
  
 //subtraction with a real value
 template<typename T>
-CUDA_CALLABLE static rts::rtsComplex<T> operator-(const double a, const rts::rtsComplex<T> b)
+CUDA_CALLABLE static rts::complex<T> operator-(const double a, const rts::complex<T> b)
 {
-    return rts::rtsComplex<T>(a - b.r, -b.i);
+    return rts::complex<T>(a - b.r, -b.i);
 }
  
 //minus sign
 template<typename T>
-CUDA_CALLABLE static rts::rtsComplex<T> operator-(const rts::rtsComplex<T> &rhs)
+CUDA_CALLABLE static rts::complex<T> operator-(const rts::complex<T> &rhs)
 {
-    return rts::rtsComplex<T>(-rhs.r, -rhs.i);
+    return rts::complex<T>(-rhs.r, -rhs.i);
 }
  
 //multiply a T value by a complex value
 template<typename T>
-CUDA_CALLABLE static rts::rtsComplex<T> operator*(const double a, const rts::rtsComplex<T> b)
+CUDA_CALLABLE static rts::complex<T> operator*(const double a, const rts::complex<T> b)
 {
-    return rts::rtsComplex<T>(a * b.r, a * b.i);
+    return rts::complex<T>((T)a * b.r, (T)a * b.i);
 }
  
 //divide a T value by a complex value
 template<typename T>
-CUDA_CALLABLE static rts::rtsComplex<T> operator/(const double a, const rts::rtsComplex<T> b)
+CUDA_CALLABLE static rts::complex<T> operator/(const double a, const rts::complex<T> b)
 {
     //return complex<T>(a * b.r, a * b.i);
-    rts::rtsComplex<T> result;
+    rts::complex<T> result;
  
     T denom = b.r * b.r + b.i * b.i;
  
@@ -350,41 +322,41 @@ CUDA_CALLABLE static complex&lt;T&gt; pow(complex&lt;T&gt; x, int y)
 }*/
  
 template<typename T>
-CUDA_CALLABLE static rts::rtsComplex<T> pow(rts::rtsComplex<T> x, T y)
+CUDA_CALLABLE static rts::complex<T> pow(rts::complex<T> x, T y)
 {
 	return x.pow(y);
 }
  
 //log function
 template<typename T>
-CUDA_CALLABLE static rts::rtsComplex<T> log(rts::rtsComplex<T> x)
+CUDA_CALLABLE static rts::complex<T> log(rts::complex<T> x)
 {
 	return x.log();
 }
  
 //exp function
 template<typename T>
-CUDA_CALLABLE static rts::rtsComplex<T> exp(rts::rtsComplex<T> x)
+CUDA_CALLABLE static rts::complex<T> exp(rts::complex<T> x)
 {
 	return x.exp();
 }
  
 //sqrt function
 template<typename T>
-CUDA_CALLABLE static rts::rtsComplex<T> sqrt(rts::rtsComplex<T> x)
+CUDA_CALLABLE static rts::complex<T> sqrt(rts::complex<T> x)
 {
 	return x.sqrt();
 }
  
  
 template <typename T>
-CUDA_CALLABLE static T abs(rts::rtsComplex<T> a)
+CUDA_CALLABLE static T abs(rts::complex<T> a)
 {
     return a.abs();
 }
  
 template <typename T>
-CUDA_CALLABLE static T real(rts::rtsComplex<T> a)
+CUDA_CALLABLE static T real(rts::complex<T> a)
 {
     return a.r;
 }
@@ -396,16 +368,16 @@ CUDA_CALLABLE static float real(float a)
 }
  
 template <typename T>
-CUDA_CALLABLE static T imag(rts::rtsComplex<T> a)
+CUDA_CALLABLE static T imag(rts::complex<T> a)
 {
     return a.i;
 }
  
 //trigonometric functions
 template<class A>
-CUDA_CALLABLE rts::rtsComplex<A> sin(const rts::rtsComplex<A> x)
+CUDA_CALLABLE rts::complex<A> sin(const rts::complex<A> x)
 {
-	rts::rtsComplex<A> result;
+	rts::complex<A> result;
 	result.r = std::sin(x.r) * std::cosh(x.i);
 	result.i = std::cos(x.r) * std::sinh(x.i);
  
@@ -413,9 +385,9 @@ CUDA_CALLABLE rts::rtsComplex&lt;A&gt; sin(const rts::rtsComplex&lt;A&gt; x)
 }
  
 template<class A>
-CUDA_CALLABLE rts::rtsComplex<A> cos(const rts::rtsComplex<A> x)
+CUDA_CALLABLE rts::complex<A> cos(const rts::complex<A> x)
 {
-	rts::rtsComplex<A> result;
+	rts::complex<A> result;
 	result.r = std::cos(x.r) * std::cosh(x.i);
 	result.i = -(std::sin(x.r) * std::sinh(x.i));
  
@@ -424,12 +396,16 @@ CUDA_CALLABLE rts::rtsComplex&lt;A&gt; cos(const rts::rtsComplex&lt;A&gt; x)
  
  
 template<class A>
-std::ostream& operator<<(std::ostream& os, rts::rtsComplex<A> x)
+std::ostream& operator<<(std::ostream& os, rts::complex<A> x)
 {
     os<<x.toStr();
     return os;
 }
  
+#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
+template<class T> using rtsComplex = rts::complex<T>;
+#endif
+
  
  
 #endif
+#ifndef RTS_FUNCTION_H
+#define RTS_FUNCTION_H
+
+#include <string>
+
+namespace rts{
+
+template <class X, class Y>
+class function
+{
+	//datapoint class for storing function points
+	struct dataPoint
+	{
+		X x;
+		Y y;
+	};
+
+	//function data
+	std::vector<dataPoint> f;
+
+	//comparison function for searching lambda
+    static bool findCeiling(dataPoint a, dataPoint b)
+    {
+        return (a.x > b.x);
+    }
+
+
+public:
+	Y linear(X x)
+	{
+		//declare an iterator
+        typename std::vector< dataPoint >::iterator it;
+
+		dataPoint s;
+		s.x = x;
+
+        it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling);
+
+        //if the wavelength is past the end of the list, return the back
+        if(it == f.end())
+            return f.back().y;
+        //if the wavelength is before the beginning of the list, return the front
+        else if(it == f.begin())
+            return f.front().y;
+        //otherwise interpolate
+        else
+        {
+            X xMax = (*it).x;
+            X xMin = (*(it - 1)).x;
+            //std::cout<<lMin<<"----------"<<lMax<<std::endl;
+
+            X a = (x - xMin) / (xMax - xMin);
+            Y riMin = (*(it - 1)).y;
+            Y riMax = (*it).y;
+            Y interp;
+            interp = riMin * a + riMax * (1.0 - a);
+            return interp;
+        }
+	}
+
+	void insert(X x, Y y)
+	{
+		//declare an iterator
+        typename std::vector< dataPoint >::iterator it;
+
+		dataPoint s;
+		s.x = x;
+		s.y = y;
+
+        it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling);
+
+        //if the function value is past the end of the vector, add it to the back
+        if(it == f.end())
+            return f.push_back(s);
+        //otherwise add the value at the iterator position
+        else
+		{
+			f.insert(it, s);
+		}
+
+	}
+
+	X getX(unsigned int i)
+	{
+		return f[i].x;
+	}
+
+	Y getY(unsigned int i)
+	{
+		return f[i].y;
+	}
+
+	unsigned int getN()
+	{
+		return f.size();
+	}
+
+	dataPoint operator[](int i)
+	{
+		return f[i];
+	}
+
+	function<X, Y> operator+(Y r)
+	{
+		function<X, Y> result;
+
+		//add r to every point in f
+		for(int i=0; i<f.size(); i++)
+		{
+			result.f.push_back(f[i]);
+			result.f[i].y += r;
+		}
+
+		return result;
+	}
+
+
+};
+
+}	//end namespace rts
+
+
+#endif
@@ -9,12 +9,12 @@ namespace rts
 {
  
 template <class T, int N>
-struct rtsMatrix
+struct matrix
 {
 	//the matrix will be stored in column-major order (compatible with OpenGL)
 	T M[N*N];
  
-	rtsMatrix()
+	matrix()
 	{
 		for(int r=0; r<N; r++)
 			for(int c=0; c<N; c++)
@@ -29,7 +29,7 @@ struct rtsMatrix
 		return M[col * N + row];
 	}
  
-	rtsMatrix<T, N> operator=(T rhs)
+	matrix<T, N> operator=(T rhs)
 	{
 		int Nsq = N*N;
 		for(int i=0; i<Nsq; i++)
@@ -38,7 +38,7 @@ struct rtsMatrix
 		return *this;
 	}
  
-	/*rtsMatrix<T, N> operator=(rtsMatrix<T, N> rhs)
+	/*matrix<T, N> operator=(matrix<T, N> rhs)
 	{
 		for(int i=0; i<N; i++)
 			M[i] = rhs.M[i];
@@ -46,9 +46,9 @@ struct rtsMatrix
 		return *this;
 	}*/
  
-	rtsVector<T, N> operator*(rtsVector<T, N> rhs)
+	vector<T, N> operator*(vector<T, N> rhs)
 	{
-		rtsVector<T, N> result;
+		vector<T, N> result;
  
 		for(int r=0; r<N; r++)
 			for(int c=0; c<N; c++)
@@ -82,10 +82,14 @@ struct rtsMatrix
 }	//end namespace rts
  
 template <typename T, int N>
-std::ostream& operator<<(std::ostream& os, rts::rtsMatrix<T, N> M)
+std::ostream& operator<<(std::ostream& os, rts::matrix<T, N> M)
 {
     os<<M.toStr();
     return os;
 }
  
+#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
+template<class T, int N> using rtsMatrix = rts::matrix<T, N>;
+#endif
+
 #endif
 #ifndef RTS_rtsPoint_H
 #define RTS_rtsPoint_H
  
-#include "rts/math/vector.h"
+#include "rts/math/vector.h"
 #include <string.h>
 #include "rts/cuda/callable.h"
  
@@ -9,17 +9,17 @@ namespace rts
 {
  
 template <class T, int N>
-struct rtsPoint
+struct point
 {
 	T p[N];
  
-	CUDA_CALLABLE rtsPoint()
-	{
+	CUDA_CALLABLE point()
+	{
  
 	}
  
 	//efficiency constructor, makes construction easier for 1D-4D vectors
-	CUDA_CALLABLE rtsPoint(T x, T y = (T)0.0, T z = (T)0.0, T w = (T)0.0)
+	CUDA_CALLABLE point(T x, T y = (T)0.0, T z = (T)0.0, T w = (T)0.0)
 	{
 		if(N >= 1)
 			p[0] = x;
@@ -29,51 +29,51 @@ struct rtsPoint
 			p[2] = z;
 		if(N >= 4)
 			p[3] = w;
-	}
-
-	//arithmetic operators
-	CUDA_CALLABLE rts::rtsPoint<T, N> operator+(rts::rtsVector<T, N> v)
-	{
-        rts::rtsPoint<T, N> r;
-
-        //calculate the position of the resulting rtsPoint
-        for(int i=0; i<N; i++)
-            r.p[i] = p[i] + v.v[i];
-
-        return r;
-	}
-	CUDA_CALLABLE rts::rtsPoint<T, N> operator-(rts::rtsVector<T, N> v)
-	{
-        rts::rtsPoint<T, N> r;
-
-        //calculate the position of the resulting rtsPoint
-        for(int i=0; i<N; i++)
-            r.p[i] = p[i] - v.v[i];
-
-        return r;
-	}
-	CUDA_CALLABLE rts::rtsVector<T, N> operator-(rts::rtsPoint<T, N> rhs)
-	{
-        rts::rtsVector<T, N> r;
-
-        //calculate the position of the resulting rtsPoint
-        for(int i=0; i<N; i++)
-            r.v[i] = p[i] - rhs.p[i];
-
-        return r;
-	}
-	CUDA_CALLABLE rts::rtsPoint<T, N> operator*(T rhs)
-	{
-        rts::rtsPoint<T, N> r;
-
-        //calculate the position of the resulting rtsPoint
-        for(int i=0; i<N; i++)
-            r.p[i] = p[i] * rhs;
-
-        return r;
 	}
  
-	CUDA_CALLABLE rtsPoint(const T(&data)[N])
+	//arithmetic operators
+	CUDA_CALLABLE rts::point<T, N> operator+(vector<T, N> v)
+	{
+        rts::point<T, N> r;
+
+        //calculate the position of the resulting point
+        for(int i=0; i<N; i++)
+            r.p[i] = p[i] + v.v[i];
+
+        return r;
+	}
+	CUDA_CALLABLE rts::point<T, N> operator-(vector<T, N> v)
+	{
+        rts::point<T, N> r;
+
+        //calculate the position of the resulting point
+        for(int i=0; i<N; i++)
+            r.p[i] = p[i] - v.v[i];
+
+        return r;
+	}
+	CUDA_CALLABLE vector<T, N> operator-(point<T, N> rhs)
+	{
+        vector<T, N> r;
+
+        //calculate the position of the resulting point
+        for(int i=0; i<N; i++)
+            r.v[i] = p[i] - rhs.p[i];
+
+        return r;
+	}
+	CUDA_CALLABLE rts::point<T, N> operator*(T rhs)
+	{
+        rts::point<T, N> r;
+
+        //calculate the position of the resulting point
+        for(int i=0; i<N; i++)
+            r.p[i] = p[i] * rhs;
+
+        return r;
+	}
+
+	CUDA_CALLABLE point(const T(&data)[N])
 	{
 		memcpy(p, data, sizeof(T) * N);
 	}
@@ -92,34 +92,36 @@ struct rtsPoint
 		ss<<")";
  
 		return ss.str();
-	}
-
-	//bracket operator
-	CUDA_CALLABLE T& operator[](int i)
+	}
+
+	//bracket operator
+	CUDA_CALLABLE T& operator[](int i)
 	{
-        return p[i];
+        return p[i];
     }
  
-};
-
-}	//end namespace rts
-
-template <typename T, int N>
-std::ostream& operator<<(std::ostream& os, rts::rtsPoint<T, N> p)
-{
-    os<<p.toStr();
-    return os;
-}
-
-//arithmetic
-template <typename T, int N>
-CUDA_CALLABLE rts::rtsPoint<T, N> operator*(T lhs, rts::rtsPoint<T, N> rhs)
-{
-    rts::rtsPoint<T, N> r;
-
-    return rhs * lhs;
-}
-
-
-
-#endif
+};
+
+}	//end namespace rts
+
+template <typename T, int N>
+std::ostream& operator<<(std::ostream& os, rts::point<T, N> p)
+{
+    os<<p.toStr();
+    return os;
+}
+
+//arithmetic
+template <typename T, int N>
+CUDA_CALLABLE rts::point<T, N> operator*(T lhs, rts::point<T, N> rhs)
+{
+    rts::point<T, N> r;
+
+    return rhs * lhs;
+}
+
+#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
+template<class T, int N> using rtsPoint = rts::point<T, N>;
+#endif
+
+#endif
@@ -6,13 +6,14 @@
 #include "rts/math/vector.h"
 #include "rts/math/point.h"
 #include "rts/math/triangle.h"
+#include "rts/math/quaternion.h"
 #include <iostream>
  
 namespace rts{
  
-//template for a rtsQuadangle class in ND space
+//template for a quadangle class in ND space
 template <class T, int N>
-struct rtsQuad
+struct quad
 {
 	/*
 		C------------------>O
@@ -28,17 +29,17 @@ struct rtsQuad
 	T B[N];
 	T C[N];*/
  
-	rts::rtsPoint<T, N> A;
-	rts::rtsVector<T, N> X;
-	rts::rtsVector<T, N> Y;
+	rts::point<T, N> A;
+	rts::vector<T, N> X;
+	rts::vector<T, N> Y;
  
  
-	CUDA_CALLABLE rtsQuad()
+	CUDA_CALLABLE quad()
 	{
  
 	}
  
-	CUDA_CALLABLE rtsQuad(rtsPoint<T, N> a, rtsPoint<T, N> b, rtsPoint<T, N> c)
+	CUDA_CALLABLE quad(point<T, N> a, point<T, N> b, point<T, N> c)
 	{
  
 		A = a;
@@ -47,48 +48,90 @@ struct rtsQuad
  
 	}
  
-	CUDA_CALLABLE rtsQuad(rts::rtsPoint<T, N> pMin, rts::rtsPoint<T, N> pMax, rts::rtsVector<T, N> normal)
+    /****************************************************************
+    Constructor - create a quad from two points and a normal
+    ****************************************************************/
+	CUDA_CALLABLE quad(rts::point<T, N> pMin, rts::point<T, N> pMax, rts::vector<T, N> normal)
 	{
  
-        //assign the corner rtsPoint
+        //assign the corner point
         A = pMin;
  
         //compute the vector from pMin to pMax
-        rts::rtsVector<T, 3> v0;
+        rts::vector<T, 3> v0;
         v0 = pMax - pMin;
  
         //compute the cross product of A and the plane normal
-        rts::rtsVector<T, 3> v1;
+        rts::vector<T, 3> v1;
         v1 = v0.cross(normal);
  
  
-        //calculate rtsPoint B
-        rts::rtsPoint<T, 3> B;
+        //calculate point B
+        rts::point<T, 3> B;
         B = A + v0 * 0.5 + v1 * 0.5;
  
         //calculate rtsPoint C
-        rts::rtsPoint<T, 3> C;
+        rts::point<T, 3> C;
         C = A  + v0 * 0.5 - v1 * 0.5;
  
         //calculate X and Y
         X = B - A;
         Y = C - A;
+	}
+
+	/*******************************************************************
+	Constructor - create a quad from a position, normal, and rotation
+	*******************************************************************/
+	CUDA_CALLABLE quad(rts::point<T, N> c, rts::vector<T, N> normal, T width, T height, T theta)
+	{
+
+        //compute the X direction - start along world-space X
+        Y = rts::vector<T, N>(0, 1, 0);
+        if(Y == normal)
+            Y = rts::vector<T, N>(0, 0, 1);
+
+        X = Y.cross(normal).norm();
+
+        std::cout<<X<<std::endl;
+
+        //rotate the X axis by theta radians
+        rts::quaternion<T> q;
+        q.CreateRotation(theta, normal);
+        X = q.toMatrix3() * X;
+        Y = normal.cross(X);
  
+        //normalize everything
+        X = X.norm();
+        Y = Y.norm();
  
+        //scale to match the quad width and height
+        X = X * width;
+        Y = Y * height;
  
+        //set the corner of the plane
+        A = c - X * 0.5 - Y * 0.5;
  
+        std::cout<<X<<std::endl;
 	}
  
-	CUDA_CALLABLE rts::rtsPoint<T, N> p(T a, T b)
+	/*******************************************
+	Return the normal for the quad
+	*******************************************/
+	CUDA_CALLABLE rts::vector<T, N> n()
 	{
-		rts::rtsPoint<T, N> result;
+        return (X.cross(Y)).norm();
+	}
+
+	CUDA_CALLABLE rts::point<T, N> p(T a, T b)
+	{
+		rts::point<T, N> result;
 		//given the two parameters a, b = [0 1], returns the position in world space
 		result = A + X * a + Y * b;
  
 		return result;
 	}
  
-	CUDA_CALLABLE rts::rtsPoint<T, N> operator()(T a, T b)
+	CUDA_CALLABLE rts::point<T, N> operator()(T a, T b)
 	{
 		return p(a, b);
 	}
@@ -106,15 +149,15 @@ struct rtsQuad
  
 	}
  
-	CUDA_CALLABLE rtsQuad<T, N> operator*(T rhs)
+	CUDA_CALLABLE quad<T, N> operator*(T rhs)
 	{
 		//scales the plane by a scalar value
  
-		//compute the center rtsPoint
-		rts::rtsPoint<T, N> c = A + X*0.5 + Y*0.5;
+		//compute the center point
+		rts::point<T, N> c = A + X*0.5 + Y*0.5;
  
-		//create the new rtsQuadangle
-		rtsQuad<T, N> result;
+		//create the new quadangle
+		quad<T, N> result;
 		result.X = X * rhs;
 		result.Y = Y * rhs;
 		result.A = c - result.X*0.5 - result.Y*0.5;
@@ -123,13 +166,13 @@ struct rtsQuad
  
 	}
  
-	CUDA_CALLABLE T dist(rtsPoint<T, N> p)
+	CUDA_CALLABLE T dist(point<T, N> p)
 	{
         //compute the distance between a point and this quad
  
         //first break the quad up into two triangles
-        rtsTriangle<T, N> T0(A, A+X, A+Y);
-        rtsTriangle<T, N> T1(A+X+Y, A+X, A+Y);
+        triangle<T, N> T0(A, A+X, A+Y);
+        triangle<T, N> T1(A+X+Y, A+X, A+Y);
  
  
         ptype d0 = T0.dist(p);
@@ -140,12 +183,22 @@ struct rtsQuad
         else
             return d1;
 	}
+
+	CUDA_CALLABLE T dist_max(point<T, N> p)
+	{
+        T da = (A - p).len();
+        T db = (A+X - p).len();
+        T dc = (A+Y - p).len();
+        T dd = (A+X+Y - p).len();
+
+        return max( da, max(db, max(dc, dd) ) );
+	}
 };
  
 }	//end namespace rts
  
 template <typename T, int N>
-std::ostream& operator<<(std::ostream& os, rts::rtsQuad<T, N> R)
+std::ostream& operator<<(std::ostream& os, rts::quad<T, N> R)
 {
     os<<R.toStr();
     return os;
@@ -6,7 +6,7 @@
 namespace rts{
  
 template<typename T>
-class rtsQuaternion
+class quaternion
 {
 public:
 	T w;
@@ -16,18 +16,19 @@ public:
  
 	void normalize();
 	void CreateRotation(T theta, T axis_x, T axis_y, T axis_z);
-	void CreateRotation(T theta, rtsVector<T, 3> axis);
-	rtsQuaternion<T> operator*(rtsQuaternion<T> &rhs);
-	rtsMatrix<T, 3> toMatrix();
+	void CreateRotation(T theta, vector<T, 3> axis);
+	quaternion<T> operator*(quaternion<T> &rhs);
+	matrix<T, 3> toMatrix3();
+	matrix<T, 4> toMatrix4();
  
  
-	rtsQuaternion();
-	rtsQuaternion(T w, T x, T y, T z);
+	quaternion();
+	quaternion(T w, T x, T y, T z);
  
 };
  
 template<typename T>
-void rtsQuaternion<T>::normalize()
+void quaternion<T>::normalize()
 {
 	double length=sqrt(w*w + x*x + y*y + z*z);
 	w=w/length;
@@ -37,23 +38,23 @@ void rtsQuaternion&lt;T&gt;::normalize()
 }
  
 template<typename T>
-void rtsQuaternion<T>::CreateRotation(T theta, T axis_x, T axis_y, T axis_z)
+void quaternion<T>::CreateRotation(T theta, T axis_x, T axis_y, T axis_z)
 {
 	//assign the given Euler rotation to this quaternion
-	w = cos(theta/2.0);
-	x = axis_x*sin(theta/2.0);
-	y = axis_y*sin(theta/2.0);
-	z = axis_z*sin(theta/2.0);
+	w = (T)cos(theta/2.0);
+	x = axis_x*(T)sin(theta/2.0);
+	y = axis_y*(T)sin(theta/2.0);
+	z = axis_z*(T)sin(theta/2.0);
 }
  
 template<typename T>
-void rtsQuaternion<T>::CreateRotation(T theta, rtsVector<T, 3> axis)
+void quaternion<T>::CreateRotation(T theta, vector<T, 3> axis)
 {
 	CreateRotation(theta, axis[0], axis[1], axis[2]);
 }
  
 template<typename T>
-rtsQuaternion<T> rtsQuaternion<T>::operator *(rtsQuaternion<T> &param)
+quaternion<T> quaternion<T>::operator *(quaternion<T> &param)
 {
 	float A, B, C, D, E, F, G, H;
  
@@ -67,7 +68,7 @@ rtsQuaternion&lt;T&gt; rtsQuaternion&lt;T&gt;::operator *(rtsQuaternion&lt;T&gt; &amp;param)
 	G = (w + y)*(param.w - param.z);
 	H = (w - y)*(param.w + param.z);
  
-	rtsQuaternion<T> result;
+	quaternion<T> result;
 	result.w = B + (-E - F + G + H) /2;
 	result.x = A - (E + F + G + H)/2;
 	result.y = C + (E - F + G - H)/2;
@@ -77,12 +78,12 @@ rtsQuaternion&lt;T&gt; rtsQuaternion&lt;T&gt;::operator *(rtsQuaternion&lt;T&gt; &amp;param)
 }
  
 template<typename T>
-rtsMatrix<T, 3> rtsQuaternion<T>::toMatrix()
+matrix<T, 3> quaternion<T>::toMatrix3()
 {
-	rtsMatrix<T, 3> result;
+	matrix<T, 3> result;
  
  
-    double wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
+    T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
  
  
     // calculate coefficients
@@ -92,62 +93,76 @@ rtsMatrix&lt;T, 3&gt; rtsQuaternion&lt;T&gt;::toMatrix()
     yy = y * y2; yz = y * z2; zz = z * z2;
     wx = w * x2; wy = w * y2; wz = w * z2;
  
-	result(0, 0) = 1.0 - (yy + zz);
+	result(0, 0) = (T)1.0 - (yy + zz);
 	result(0, 1) = xy - wz;
-    //m[0][0] = 1.0 - (yy + zz); m[1][0] = xy - wz;
  
 	result(0, 2) = xz + wy;
-	//result(0, 3) = 0.0;
-    //m[2][0] = xz + wy; m[3][0] = 0.0;
  
 	result(1, 0) = xy + wz;
-	result(1, 1) = 1.0 - (xx + zz);
-    //m[0][1] = xy + wz; m[1][1] = 1.0 - (xx + zz);
+	result(1, 1) = (T)1.0 - (xx + zz);
  
 	result(1, 2) = yz - wx;
-	//result(1, 3) = 0.0;
-    //m[2][1] = yz - wx; m[3][1] = 0.0;
  
 	result(2, 0) = xz - wy;
 	result(2, 1) = yz + wx;
-    //m[0][2] = xz - wy; m[1][2] = yz + wx;
  
-	result(2, 2) = 1.0 - (xx + yy);
-	//result(3, 2) = 0.0;
-    //m[2][2] = 1.0 - (xx + yy); m[3][2] = 0.0;
+	result(2, 2) = (T)1.0 - (xx + yy);
+
+	return result;
+}
+
+template<typename T>
+matrix<T, 4> quaternion<T>::toMatrix4()
+{
+	matrix<T, 4> result;
+
+
+    T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
  
-	/*result(3, 0) = 0.0;
-	result(3, 1) = 0.0;
-	result(3, 2) = 0.0;
-	result(3, 3) = 1.0;*/
-    //m[0][3] = 0; m[1][3] = 0;
-    //m[2][3] = 0; m[3][3] = 1;
-	/*
-	double* orientationmatrix=(double*)m;
-	char c;
  
+    // calculate coefficients
+    x2 = x + x; y2 = y + y;
+    z2 = z + z;
+    xx = x * x2; xy = x * y2; xz = x * z2;
+    yy = y * y2; yz = y * z2; zz = z * z2;
+    wx = w * x2; wy = w * y2; wz = w * z2;
+
+	result(0, 0) = (T)1.0 - (yy + zz);
+	result(0, 1) = xy - wz;
+
+	result(0, 2) = xz + wy;
+
+	result(1, 0) = xy + wz;
+	result(1, 1) = (T)1.0 - (xx + zz);
+
+	result(1, 2) = yz - wx;
+
+	result(2, 0) = xz - wy;
+	result(2, 1) = yz + wx;
  
-	double* result=new double[16];
-	double* array=(double*)m;
-	for(int i=0; i<16; i++)
-		result[i]=array[i];
-	*/
+	result(2, 2) = (T)1.0 - (xx + yy);
+
+	result(3, 3) = (T)1.0;
  
 	return result;
 }
  
 template<typename T>
-rtsQuaternion<T>::rtsQuaternion()
+quaternion<T>::quaternion()
 {
 	w=0.0; x=0.0; y=0.0; z=0.0;
 }
  
 template<typename T>
-rtsQuaternion<T>::rtsQuaternion(T c, T i, T j, T k)
+quaternion<T>::quaternion(T c, T i, T j, T k)
 {
 	w=c;  x=i;  y=j;  z=k;
 }
  
-}	//end rts namespace
+}	//end rts namespace
+
+#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
+template<class T> using rtsQuaternion = rts::quaternion<T>;
+#endif
  
 #endif
@@ -10,7 +10,7 @@ namespace rts{
 #define RTS_BESSEL_MAXIMUM_FLOAT		-1e33
  
 template <typename T>
-CUDA_CALLABLE void sbesselj(int n, rtsComplex<T> x, rtsComplex<T>* j)
+CUDA_CALLABLE void sbesselj(int n, complex<T> x, complex<T>* j)
 {
     //compute the first bessel function
     if(n >= 0)
@@ -36,7 +36,7 @@ CUDA_CALLABLE void sbesselj(int n, rtsComplex&lt;T&gt; x, rtsComplex&lt;T&gt;* j)
 }
  
 template <typename T>
-CUDA_CALLABLE void sbessely(int n, rtsComplex<T> x, rtsComplex<T>* y)
+CUDA_CALLABLE void sbessely(int n, complex<T> x, complex<T>* y)
 {
     //compute the first bessel function
     if(n >= 0)
@@ -56,14 +56,14 @@ CUDA_CALLABLE void sbessely(int n, rtsComplex&lt;T&gt; x, rtsComplex&lt;T&gt;* y)
  
 //spherical Hankel functions of the first kind
 template <typename T>
-CUDA_CALLABLE void sbesselh1(int n, rtsComplex<T> x, rtsComplex<T>* h)
+CUDA_CALLABLE void sbesselh1(int n, complex<T> x, complex<T>* h)
 {
     //compute j_0 and j_1
-    rtsComplex<T> j[2];
+    complex<T> j[2];
     sbesselj(1, x, j);
  
     //compute y_0 and y_1
-    rtsComplex<T> y[2];
+    complex<T> y[2];
     sbessely(1, x, y);
  
     //compute the first-order Hhankel function
+#ifndef RTS_TRIANGLE_H
+#define RTS_TRIANGLE_H
+
+//enable CUDA_CALLABLE macro
+#include "rts/cuda/callable.h"
+#include "rts/math/vector.h"
+#include "rts/math/point.h"
+#include <iostream>
+
+namespace rts{
+
+template <class T, int N>
+struct triangle
+{
+    /*
+        A------>B
+        |      /
+        |     /
+        |    /
+        |   /
+        |  /
+        | /
+        C
+    */
+    private:
+
+    point<T, N> A;
+    point<T, N> B;
+    point<T, N> C;
+
+    CUDA_CALLABLE point<T, N> _p(T s, T t)
+    {
+        //This function returns the point specified by p = A + s(B-A) + t(C-A)
+        vector<T, N> E0 = B-A;
+        vector<T, N> E1 = C-A;
+
+        return A + s*E0 + t*E1;
+    }
+
+
+    public:
+
+
+
+    CUDA_CALLABLE triangle()
+	{
+
+	}
+
+	CUDA_CALLABLE triangle(point<T, N> a, point<T, N> b, point<T, N> c)
+	{
+		A = a;
+		B = b;
+		C = c;
+	}
+
+	CUDA_CALLABLE rts::point<T, N> operator()(T s, T t)
+	{
+        return _p(s, t);
+	}
+
+	CUDA_CALLABLE point<T, N> nearest(point<T, N> p)
+	{
+        //comptue the distance between a point and this triangle
+        //  This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf
+
+        vector<T, N> E0 = B-A;
+        vector<T, N> E1 = C-A;
+        vector<T, N> D = A - p;
+
+        T a = E0.dot(E0);
+        T b = E0.dot(E1);
+        T c = E1.dot(E1);
+        T d = E0.dot(D);
+        T e = E1.dot(D);
+        //T f = D.dot(D);
+
+        T det = a*c - b*b;
+        T s = b*e - c*d;
+        T t = b*d - a*e;
+
+        /*std::cout<<"E0: "<<E0<<std::endl;
+        std::cout<<"E1: "<<E1<<std::endl;
+        std::cout<<"a: "<<a<<std::endl;
+        std::cout<<"b: "<<b<<std::endl;
+        std::cout<<"c: "<<c<<std::endl;
+        std::cout<<"d: "<<d<<std::endl;
+        std::cout<<"e: "<<e<<std::endl;
+        std::cout<<"f: "<<f<<std::endl;
+        std::cout<<"det: "<<det<<std::endl;
+        std::cout<<"s: "<<s<<std::endl;
+        std::cout<<"t: "<<t<<std::endl;*/
+
+
+        if( s+t <= det)
+        {
+            if(s < 0)
+            {
+                if(t < 0)
+                {
+                    //region 4
+                    //std::cout<<"Region 4"<<std::endl;
+                    s = 0;
+                    t = 0;
+                    //done?
+                }
+                else
+                {
+                    //region 3
+                    //std::cout<<"Region 3"<<std::endl;
+                    s=0;
+                    t = ( e >= 0 ? 0 : ( -e >= c ? 1 : -e/c ) );
+                    //done
+                }
+            }
+            else if(t < 0)
+            {
+                //region 5
+                //std::cout<<"Region 5"<<std::endl;
+                s = ( d >= 0 ? 0 : ( -d >= a ? 1 : -d/a ) );
+                t = 0;
+                //done
+            }
+            else
+            {
+                //region 0
+                //std::cout<<"Region 0"<<std::endl;
+                T invDet = (ptype)1.0/det;
+                s *= invDet;
+                t *= invDet;
+                //done
+            }
+        }
+        else
+        {
+            if(s < 0)
+            {
+                //region 2
+                //std::cout<<"Region 2"<<std::endl;
+                s = 0;
+                t = 1;
+                //done?
+
+            }
+            else if(t < 0)
+            {
+                //region 6
+                //std::cout<<"Region 6"<<std::endl;
+                s = 1;
+                t = 0;
+                //done?
+            }
+            else
+            {
+                //region 1
+                //std::cout<<"Region 1"<<std::endl;
+                T numer = c + e - b - d;
+                if( numer <= 0 )
+                    s = 0;
+                else
+                {
+                    T denom = a - 2 * b + c;
+                    s = ( numer >= denom ? 1 : numer/denom );
+                }
+                t = 1 - s;
+                //done
+            }
+        }
+
+        //std::cout<<"s: "<<s<<std::endl;
+        //std::cout<<"t: "<<t<<std::endl;
+
+        //std::cout<<"p: "<<_p(s, t)<<std::endl;
+
+		return _p(s, t);
+
+	}
+
+	CUDA_CALLABLE T dist(point<T, N> p)
+	{
+        point<T, N> n = nearest(p);
+
+        return (p - n).len();
+	}
+};
+
+}
+
+#endif
 #ifndef RTS_VECTOR_H
 #define RTS_VECTOR_H
  
-#include <iostream>
-#include <cmath>
+#include <iostream>
+#include <cmath>
 #include <sstream>
 //#include "rts/point.h"
 #include "rts/cuda/callable.h"
@@ -13,11 +13,11 @@ namespace rts
  
  
 template <class T, int N>
-struct rtsVector
+struct vector
 {
 	T v[N];
  
-	CUDA_CALLABLE rtsVector()
+	CUDA_CALLABLE vector()
 	{
 		//memset(v, 0, sizeof(T) * N);
 		for(int i=0; i<N; i++)
@@ -25,7 +25,7 @@ struct rtsVector
 	}
  
 	//efficiency constructor, makes construction easier for 1D-4D vectors
-	CUDA_CALLABLE rtsVector(T x, T y = (T)0.0, T z = (T)0.0, T w = (T)0.0)
+	CUDA_CALLABLE vector(T x, T y = (T)0.0, T z = (T)0.0, T w = (T)0.0)
 	{
 		if(N >= 1)
 			v[0] = x;
@@ -37,7 +37,7 @@ struct rtsVector
 			v[3] = w;
 	}
  
-	CUDA_CALLABLE rtsVector(const T(&data)[N])
+	CUDA_CALLABLE vector(const T(&data)[N])
 	{
 		memcpy(v, data, sizeof(T) * N);
 	}
@@ -54,25 +54,25 @@ struct rtsVector
  
 	}
  
-	CUDA_CALLABLE rtsVector<T, N> cart2sph()
+	CUDA_CALLABLE vector<T, N> cart2sph()
 	{
 		//convert the vector from cartesian to spherical coordinates
 		//x, y, z -> r, theta, phi (where theta = 0 to 2*pi)
  
-		rtsVector<T, N> sph;
-		sph[0] = std::sqrt(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
-		sph[1] = std::atan2(v[1], v[0]);
+		vector<T, N> sph;
+		sph[0] = std::sqrt(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
+		sph[1] = std::atan2(v[1], v[0]);
 		sph[2] = std::acos(v[2] / sph[0]);
  
 		return sph;
 	}
  
-	CUDA_CALLABLE rtsVector<T, N> sph2cart()
+	CUDA_CALLABLE vector<T, N> sph2cart()
 	{
 		//convert the vector from cartesian to spherical coordinates
 		//r, theta, phi -> x, y, z (where theta = 0 to 2*pi)
  
-		rtsVector<T, N> cart;
+		vector<T, N> cart;
 		cart[0] = v[0] * std::cos(v[1]) * std::sin(v[2]);
 		cart[1] = v[0] * std::sin(v[1]) * std::sin(v[2]);
 		cart[2] = v[0] * std::cos(v[2]);
@@ -80,10 +80,10 @@ struct rtsVector
 		return cart;
 	}
  
-	CUDA_CALLABLE rtsVector<T, N> norm()
+	CUDA_CALLABLE vector<T, N> norm()
 	{
         //compute and return the vector norm
-        rtsVector<T, N> result;
+        vector<T, N> result;
  
         //compute the vector length
         T l = len();
@@ -97,9 +97,9 @@ struct rtsVector
         return result;
 	}
  
-	CUDA_CALLABLE rtsVector<T, 3> cross(rtsVector<T, 3> rhs)
+	CUDA_CALLABLE vector<T, 3> cross(vector<T, 3> rhs)
 	{
-		rtsVector<T, 3> result;
+		vector<T, 3> result;
  
 		//compute the cross product (only valid for 3D vectors)
 		result[0] = v[1] * rhs[2] - v[2] * rhs[1];
@@ -109,7 +109,7 @@ struct rtsVector
 		return result;
 	}
  
-    CUDA_CALLABLE T dot(rtsVector<T, N> rhs)
+    CUDA_CALLABLE T dot(vector<T, N> rhs)
     {
         T result = (T)0;
  
@@ -121,41 +121,49 @@ struct rtsVector
     }
  
 	//arithmetic
-	CUDA_CALLABLE rtsVector<T, N> operator+(rtsVector<T, N> rhs)
+	CUDA_CALLABLE vector<T, N> operator+(vector<T, N> rhs)
 	{
-        rtsVector<T, N> result;
+        vector<T, N> result;
  
         for(int i=0; i<N; i++)
             result.v[i] = v[i] + rhs.v[i];
  
         return result;
 	}
-	CUDA_CALLABLE rtsVector<T, N> operator-(rtsVector<T, N> rhs)
+	CUDA_CALLABLE vector<T, N> operator-(vector<T, N> rhs)
 	{
-        rtsVector<T, N> result;
+        vector<T, N> result;
  
         for(int i=0; i<N; i++)
             result.v[i] = v[i] - rhs.v[i];
  
         return result;
 	}
-	CUDA_CALLABLE rtsVector<T, N> operator*(T rhs)
+	CUDA_CALLABLE vector<T, N> operator*(T rhs)
 	{
-        rtsVector<T, N> result;
+        vector<T, N> result;
  
         for(int i=0; i<N; i++)
             result.v[i] = v[i] * rhs;
  
         return result;
 	}
-	CUDA_CALLABLE rtsVector<T, N> operator/(T rhs)
+	CUDA_CALLABLE vector<T, N> operator/(T rhs)
 	{
-        rtsVector<T, N> result;
+        vector<T, N> result;
  
         for(int i=0; i<N; i++)
             result.v[i] = v[i] / rhs;
  
         return result;
+	}
+
+	CUDA_CALLABLE bool operator==(vector<T, N> rhs)
+	{
+        if ( (rhs.v[0] == v[0]) && (rhs.v[1] == v[1]) && (rhs.v[2] == v[2]) )
+            return true;
+
+        return false;
 	}
  
 	std::string toStr()
@@ -186,7 +194,7 @@ struct rtsVector
 }	//end namespace rts
  
 template <typename T, int N>
-std::ostream& operator<<(std::ostream& os, rts::rtsVector<T, N> v)
+std::ostream& operator<<(std::ostream& os, rts::vector<T, N> v)
 {
     os<<v.toStr();
     return os;
@@ -194,9 +202,9 @@ std::ostream&amp; operator&lt;&lt;(std::ostream&amp; os, rts::rtsVector&lt;T, N&gt; v)
  
 //arithmetic operators
 template <typename T, int N>
-CUDA_CALLABLE rts::rtsVector<T, N> operator-(rts::rtsVector<T, N> v)
+CUDA_CALLABLE rts::vector<T, N> operator-(rts::vector<T, N> v)
 {
-    rts::rtsVector<T, N> r;
+    rts::vector<T, N> r;
  
     //negate the vector
     for(int i=0; i<N; i++)
@@ -206,11 +214,15 @@ CUDA_CALLABLE rts::rtsVector&lt;T, N&gt; operator-(rts::rtsVector&lt;T, N&gt; v)
 }
  
 template <typename T, int N>
-CUDA_CALLABLE rts::rtsVector<T, N> operator*(T lhs, rts::rtsVector<T, N> rhs)
+CUDA_CALLABLE rts::vector<T, N> operator*(T lhs, rts::vector<T, N> rhs)
 {
-    rts::rtsVector<T, N> r;
+    rts::vector<T, N> r;
  
     return rhs * lhs;
 }
  
+#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
+template<class T, int N> using rtsVector = rts::vector<T, N>;
+#endif
+
 #endif
@@ -9,6 +9,7 @@
 #include <algorithm>
 #include <sstream>
 #include "rts/math/complex.h"
+#include "rts/math/function.h"
  
 #define PI  3.14159
  
@@ -49,7 +50,7 @@ namespace rts{
     {
         //wavelength (in microns)
         T lambda;
-        rtsComplex<T> n;
+        complex<T> n;
     };
  
     template <class T>
@@ -111,10 +112,10 @@ namespace rts{
  
  
             //read the entry from an input string
-            for(int i=0; i<valueList.size(); i++)
+            for(unsigned int i=0; i<valueList.size(); i++)
             {
  
-                char c;
+
                 while(ss.peek() < '0' || ss.peek() > '9')
                 {
                     //cout<<"bad char"<<endl;
@@ -195,25 +196,17 @@ namespace rts{
     {
 		std::string name;
         //dispersion (refractive index as a function of wavelength)
-        std::vector< refIndex<T> > dispersion;
+        //std::vector< refIndex<T> > dispersion;
+		function< T, complex<T> > dispersion;
  
         //average refractive index (approximately 1.4)
         T n0;
  
-        void add(refIndex<T> ri)
+        /*void add(refIndex<T> ri)
         {
             //refIndex<T> converted = convert(ri, measurement);
             dispersion.push_back(ri);
-        }
-
-        void add(T lambda, rtsComplex<T> n)
-        {
-            refIndex<T> ri;
-            ri.lambda = lambda;
-            ri.n = n;
-
-            dispersion.push_back(ri);
-        }
+        }*/
  
         //comparison function for sorting
         static bool compare(refIndex<T> a, refIndex<T> b)
@@ -222,12 +215,17 @@ namespace rts{
         }
  
         //comparison function for searching lambda
-        static bool findCeiling(refIndex<T> a, refIndex<T> b)
+        /*static bool findCeiling(refIndex<T> a, refIndex<T> b)
         {
             return (a.lambda > b.lambda);
-        }
+        }*/
  
 	public:
+		void add(T lambda, complex<T> n)
+        {
+            dispersion.insert(lambda, n);
+        }
+
 		std::string getName()
 		{
 			return name;
@@ -244,6 +242,11 @@ namespace rts{
 		{
             n0 = n;
 		}
+
+		void setM(function< T, complex<T> > m)
+		{
+			dispersion = m;
+		}
         unsigned int nSamples()
         {
             return dispersion.size();
@@ -268,9 +271,9 @@ namespace rts{
  
 #ifdef FFTW_AVAILABLE
 			//allocate memory for the FFT
-			rtsComplex<T>* Chi2 = (rtsComplex<T>*)fftw_malloc(sizeof(rtsComplex<T>) * N * pf);
-			rtsComplex<T>* Chi2FFT = (rtsComplex<T>*)fftw_malloc(sizeof(rtsComplex<T>) * N * pf);
-			rtsComplex<T>* Chi1 = (rtsComplex<T>*)fftw_malloc(sizeof(rtsComplex<T>) * N * pf);
+			complex<T>* Chi2 = (complex<T>*)fftw_malloc(sizeof(complex<T>) * N * pf);
+			complex<T>* Chi2FFT = (complex<T>*)fftw_malloc(sizeof(complex<T>) * N * pf);
+			complex<T>* Chi1 = (complex<T>*)fftw_malloc(sizeof(complex<T>) * N * pf);
  
 			//create an FFT plan for the forward and inverse transforms
 			fftw_plan planForward, planInverse;
@@ -301,8 +304,8 @@ namespace rts{
 			}
  
 			//use linear interpolation between the start and end points to pad the spectrum
-			//rtsComplex<T> nMin = dispersion.back();
-			//rtsComplex<T> nMax = dispersion.front();
+			//complex<T> nMin = dispersion.back();
+			//complex<T> nMax = dispersion.front();
 			T a;
 			for(int i=N; i<N*pf; i++)
 			{
@@ -316,7 +319,7 @@ namespace rts{
 			fftw_execute(planForward);
  
 			//perform the Hilbert transform in the Fourier domain
-			rtsComplex<T> j(0, 1);
+			complex<T> j(0, 1);
 			for(int i=0; i<N*pf; i++)
 			{
                 //if w = 0, set the DC component to zero
@@ -400,13 +403,14 @@ namespace rts{
  
         material(T lambda = 1.0, T n = 1.4, T k = 0.0)
         {
-            //create a default refractive index
+			dispersion.insert(lambda, complex<T>(0.0, k));
+            /*//create a default refractive index
             refIndex<T> def;
             def.lambda = lambda;
             def.n.real(n);
             def.n.imag(k);
             add(def);
-
+			*/
             //set n0
             n0 = n;
         }
@@ -419,9 +423,9 @@ namespace rts{
  
         void fromFile(std::string filename, std::string format = "", T scaleA = 1.0)
         {
-			name = filename;
+            name = filename;
             //clear any previous values
-            dispersion.clear();
+            dispersion = rts::function< T, complex<T> >();
  
             //load the file into a string
             std::ifstream ifs(filename.c_str());
@@ -437,7 +441,6 @@ namespace rts{
             //process the file as a string
             std::string instr((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
             fromStr(instr, format, scaleA);
-
         }
  
         void fromStr(std::string str, std::string format = "", T scaleA = 1.0)
@@ -480,7 +483,6 @@ namespace rts{
  
             //std::cout<<"Loading material with format: "<<format<<std::endl;
  
-            T lambda, n, k;
             while(!ss.eof())
             {
                 //read a line from the string
@@ -490,14 +492,14 @@ namespace rts{
                 if(line[0] != '#')
                 {
                     //load the entry and add it to the dispersion list
-                    add(entry.inputEntry(line, scaleA));
+                    add(entry.inputEntry(line, scaleA).lambda, entry.inputEntry(line, scaleA).n);
                 }
                 //generally have to peek to trigger the eof flag
                 ss.peek();
             }
  
             //sort the vector by lambda
-            sort(dispersion.begin(), dispersion.end(), &material<T>::compare);
+            //sort(dispersion.begin(), dispersion.end(), &material<T>::compare);
         }
  
         //convert the material to a string
@@ -555,51 +557,30 @@ namespace rts{
  
 		}
  
-		rtsComplex<T> getN(T l)
+		complex<T> getN(T l)
 		{
-            //declare an iterator
-            typename std::vector< refIndex<T> >::iterator it;
-
-            refIndex<T> r;
-            r.lambda = l;
-
-            it = search(dispersion.begin(), dispersion.end(), &r, &r + 1, &material<T>::findCeiling);
-
-            //if the wavelength is past the end of the list, return the back
-            if(it == dispersion.end())
-                return dispersion.back().n;
-            //if the wavelength is before the beginning of the list, return the front
-            else if(it == dispersion.begin())
-                return dispersion.front().n;
-            //otherwise interpolate
-            else
-            {
-                T lMax = (*it).lambda;
-                T lMin = (*(it - 1)).lambda;
-                //std::cout<<lMin<<"----------"<<lMax<<std::endl;
-
-                T a = (l - lMin) / (lMax - lMin);
-                rtsComplex<T> riMin = (*(it - 1)).n;
-                rtsComplex<T> riMax = (*it).n;
-                rtsComplex<T> interp;
-                interp = rtsComplex<T>(a, 0.0) * riMin + rtsComplex<T>(1.0 - a, 0.0) * riMax;
-                return interp;
-            }
+			//return complex<T>(1.0, 0.0);
+			complex<T> ri = dispersion.linear(l) + n0;
+			return ri;
+		}
  
+		function<T, complex<T> > getF()
+		{
+			return dispersion + complex<T>(n0, 0.0);
 		}
  
 		//returns the scattering efficiency at wavelength l
-		rtsComplex<T> eta(T l)
+		complex<T> eta(T l)
 		{
             //get the refractive index
-            rtsComplex<T> ri = getN(l);
+            complex<T> ri = getN(l);
  
             //convert the refractive index to scattering efficiency
             return ri*ri - 1.0;
  
 		}
         //interpolate the given lambda value and return the index of refraction
-        rtsComplex<T> operator()(T l)
+        complex<T> operator()(T l)
         {
             return getN(l);
         }
+#include <qimage.h>
+#include <qcolor.h>
+#include <iostream>
+
+void qt_buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size)
+{
+    //x_size = 256;
+    //y_size = 256;
+	//create an image object
+	QImage image(x_size, y_size, QImage::Format_RGB32);
+	if(image.isNull())
+	{
+        std::cout<<"Error creating QImage."<<std::endl;
+        return;
+    }
+
+	int i;
+	unsigned char r, g, b;
+	unsigned int x, y;
+	for(y=0; y<y_size; y++)
+		for(x=0; x<x_size; x++)
+		{
+			//calculate the 1D index
+			i = y * x_size + x;
+
+			r = buffer[i * 3 + 0];
+			g = buffer[i * 3 + 1];
+			b = buffer[i * 3 + 2];
+
+			//set the image pixel
+			QColor color(r, g, b);
+			image.setPixel(x, y, color.rgb());
+		}
+
+	if(!image.save(filename.c_str()))
+        std::cout<<"Error saving QImage."<<std::endl;
+}
+#include <rts/math/vector.h>
+#include <rts/math/point.h>
+#include <rts/math/quaternion.h>
+#include <rts/math/matrix.h>
+
+#include <ostream>
+
+#ifndef RTS_CAMERA_H
+#define RTS_CAMERA_H
+
+namespace rts{
+
+class camera
+{
+	vector<float, 3> d;	//direction that the camera is pointing
+	point<float, 3> p;	//position of the camera
+	vector<float, 3> up;	//"up" direction
+	float focus;		//focal length of the camera
+	float fov;
+
+	//private function makes sure that the up vector is orthogonal to the direction vector and both are normalized
+	void stabalize()
+	{
+		vector<float, 3> side = up.cross(d);
+		up = d.cross(side);
+		up = up.norm();
+		d = d.norm();
+	}
+
+public:
+	void setPosition(point<float, 3> pos)
+	{
+		p = pos;
+	}
+	void setPosition(float x, float y, float z){setPosition(point<float, 3>(x, y, z));}
+
+	void setFocalDistance(float distance){focus = distance;}
+	void setFOV(float field_of_view){fov = field_of_view;}
+
+	void LookAt(point<float, 3> pos)
+	{
+		//find the new direction
+		d = pos - p;
+
+		//find the distance from the look-at point to the current position
+		focus = d.len();
+
+		//stabalize the camera
+		stabalize();
+	}
+	void LookAt(float px, float py, float pz){LookAt(point<float, 3>(px, py, pz));}
+	void LookAt(point<float, 3> pos, vector<float, 3> new_up){up = new_up; LookAt(pos);}
+	void LookAt(float px, float py, float pz, float ux, float uy, float uz){LookAt(point<float, 3>(px, py, pz), vector<float, 3>(ux, uy, uz));}
+	void LookAtDolly(float lx, float ly, float lz)
+	{
+		//find the current focus point
+		point<float, 3> f = p + focus*d;
+		vector<float, 3> T = point<float, 3>(lx, ly, lz) - f;
+		p = p + T;
+	}
+
+	void Dolly(vector<float, 3> direction)
+	{
+		p = p+direction;
+	}
+	void Dolly(float x, float y, float z){Dolly(vector<float, 3>(x, y, z));}
+	void Push(float delta)
+	{
+		if(delta > focus)
+			delta = focus;
+
+		focus -= delta;
+
+		Dolly(d*delta);
+	}
+
+	void Pan(float theta_x, float theta_y, float theta_z)
+	{
+		//x rotation is around the up axis
+		quaternion<float> qx;
+		qx.CreateRotation(theta_x, up[0], up[1], up[2]);
+
+		//y rotation is around the side axis
+		vector<float, 3> side = up.cross(d);
+		quaternion<float> qy;
+		qy.CreateRotation(theta_y, side[0], side[1], side[2]);
+
+		//z rotation is around the direction vector
+		quaternion<float> qz;
+		qz.CreateRotation(theta_z, d[0], d[1], d[2]);
+
+		//combine the rotations in x, y, z order
+		quaternion<float> final = qz*qy*qx;
+
+		//get the rotation matrix
+		matrix<float, 3> rot_matrix = final.toMatrix3();
+
+		//apply the rotation
+		d = rot_matrix*d;
+		up = rot_matrix*up;
+
+		//stabalize the camera
+		stabalize();
+
+	}
+	void Pan(float theta_x){Pan(theta_x, 0, 0);}
+	void Tilt(float theta_y){Pan(0, theta_y, 0);}
+	void Twist(float theta_z){Pan(0, 0, theta_z);}
+
+	void Zoom(float delta)
+	{
+		fov -= delta;
+		if(fov < 0.5)
+			fov = 0.5;
+		if(fov > 180)
+			fov = 180;
+	}
+
+	void OrbitFocus(float theta_x, float theta_y)
+	{
+		//find the focal point
+		point<float, 3> focal_point = p + focus*d;
+
+		//center the coordinate system on the focal point
+		point<float, 3> centered = p - (focal_point - point<float, 3>(0, 0, 0));
+
+		//create the x rotation (around the up vector)
+		quaternion<float> qx;
+		qx.CreateRotation(theta_x, up[0], up[1], up[2]);
+		centered = point<float, 3>(0, 0, 0) + qx.toMatrix3()*(centered - point<float, 3>(0, 0, 0));
+
+		//get a side vector for theta_y rotation
+		vector<float, 3> side = up.cross((point<float, 3>(0, 0, 0) - centered).norm());
+
+		quaternion<float> qy;
+		qy.CreateRotation(theta_y, side[0], side[1], side[2]);
+		centered = point<float, 3>(0, 0, 0) + qy.toMatrix3()*(centered - point<float, 3>(0, 0, 0));
+
+		//perform the rotation on the centered camera position
+		//centered = final.toMatrix()*centered;
+
+		//re-position the camera
+		p = centered + (focal_point - point<float, 3>(0, 0, 0));
+
+		//make sure we are looking at the focal point
+		LookAt(focal_point);
+
+		//stabalize the camera
+		stabalize();
+
+	}
+
+	void Slide(float u, float v)
+	{
+		vector<float, 3> V = up.norm();
+		vector<float, 3> U = up.cross(d).norm();
+
+		p = p + (V * v) + (U * u);
+	}
+
+	//accessor methods
+	point<float, 3> getPosition(){return p;}
+	vector<float, 3> getUp(){return up;}
+	vector<float, 3> getDirection(){return d;}
+	point<float, 3> getLookAt(){return p + focus*d;}
+	float getFOV(){return fov;}
+
+	//output the camera settings
+	const void print(std::ostream& output)
+	{
+		output<<"Position: "<<p<<std::endl;
+
+	}
+	friend std::ostream& operator<<(std::ostream& out, const camera& c)
+	{
+		out<<"Position: "<<c.p<<std::endl;
+		out<<"Direction: "<<c.d<<std::endl;
+		out<<"Up: "<<c.up<<std::endl;
+		out<<"Focal Distance: "<<c.focus<<std::endl;
+		return out;
+	}
+
+	//constructor
+	camera()
+	{
+		p = point<float, 3>(0, 0, 0);
+		d = vector<float, 3>(0, 0, 1);
+		up = vector<float, 3>(0, 1, 0);
+		focus = 1;
+
+	}
+};
+
+}
+
+
+
+#endif
+#ifndef RTS_COLORMAP_H
+#define RTS_COLORMAP_H
+
+#include <string>
+#include <stdlib.h>
+#include "rts/cuda/error.h"
+
+
+#define BREWER_CTRL_PTS 11
+
+void qt_buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size);
+
+static float  BREWERCP[BREWER_CTRL_PTS*4] = {0.192157f, 0.211765f, 0.584314f, 1.0f,
+                                      0.270588f, 0.458824f, 0.705882f, 1.0f,
+                                      0.454902f, 0.678431f, 0.819608f, 1.0f,
+                                      0.670588f, 0.85098f, 0.913725f, 1.0f,
+                                      0.878431f, 0.952941f, 0.972549f, 1.0f,
+                                      1.0f, 1.0f, 0.74902f, 1.0f,
+                                      0.996078f, 0.878431f, 0.564706f, 1.0f,
+                                      0.992157f, 0.682353f, 0.380392f, 1.0f,
+                                      0.956863f, 0.427451f, 0.262745f, 1.0f,
+                                      0.843137f, 0.188235f, 0.152941f, 1.0f,
+                                      0.647059f, 0.0f, 0.14902f, 1.0f};
+
+
+#ifdef __CUDACC__
+texture<float4, cudaTextureType1D> cudaTexBrewer;
+static cudaArray* gpuBrewer;
+#endif
+
+
+
+namespace rts{
+
+enum colormapType {cmBrewer, cmGrayscale};
+
+static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size)
+{
+    qt_buffer2image(buffer, filename, x_size, y_size);
+}
+
+#ifdef __CUDACC__
+static void initBrewer()
+{
+	//initialize the Brewer colormap
+
+	//allocate CPU space
+	float4 cpuColorMap[BREWER_CTRL_PTS];
+
+	//define control rtsPoints
+	cpuColorMap[0] = make_float4(0.192157f, 0.211765f, 0.584314f, 1.0f);
+	cpuColorMap[1] = make_float4(0.270588f, 0.458824f, 0.705882f, 1.0f);
+	cpuColorMap[2] = make_float4(0.454902f, 0.678431f, 0.819608f, 1.0f);
+	cpuColorMap[3] = make_float4(0.670588f, 0.85098f, 0.913725f, 1.0f);
+	cpuColorMap[4] = make_float4(0.878431f, 0.952941f, 0.972549f, 1.0f);
+	cpuColorMap[5] = make_float4(1.0f, 1.0f, 0.74902f, 1.0f);
+	cpuColorMap[6] = make_float4(0.996078f, 0.878431f, 0.564706f, 1.0f);
+	cpuColorMap[7] = make_float4(0.992157f, 0.682353f, 0.380392f, 1.0f);
+	cpuColorMap[8] = make_float4(0.956863f, 0.427451f, 0.262745f, 1.0f);
+	cpuColorMap[9] = make_float4(0.843137f, 0.188235f, 0.152941f, 1.0f);
+	cpuColorMap[10] = make_float4(0.647059f, 0.0f, 0.14902f, 1.0f);
+
+
+	int width = BREWER_CTRL_PTS;
+	int height = 0;
+
+
+	// allocate array and copy colormap data
+	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
+
+	HANDLE_ERROR(cudaMallocArray(&gpuBrewer, &channelDesc, width, height));
+
+	HANDLE_ERROR(cudaMemcpyToArray(gpuBrewer, 0, 0, cpuColorMap, sizeof(float4)*width, cudaMemcpyHostToDevice));
+
+	// set texture parameters
+    cudaTexBrewer.addressMode[0] = cudaAddressModeClamp;
+	//texBrewer.addressMode[1] = cudaAddressModeClamp;
+    cudaTexBrewer.filterMode = cudaFilterModeLinear;
+    cudaTexBrewer.normalized = true;  // access with normalized texture coordinates
+
+	// Bind the array to the texture
+    HANDLE_ERROR(cudaBindTextureToArray( cudaTexBrewer, gpuBrewer, channelDesc));
+
+}
+
+static void destroyBrewer()
+{
+    HANDLE_ERROR(cudaFreeArray(gpuBrewer));
+
+}
+
+template<class T>
+__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1)
+{
+
+	int i = blockIdx.y * gridDim.x * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+    if(i >= N) return;
+
+	//compute the normalized value on [minVal maxVal]
+	float a = (gpuSource[i] - minVal) / (maxVal - minVal);
+
+	//lookup the color
+	float shift = 1.0/(2*BREWER_CTRL_PTS);
+	float4 color = tex1D(cudaTexBrewer, a+shift);
+	//float4 color = tex1D(cudaTexBrewer, a);
+
+	gpuDest[i * 3 + 0] = 255 * color.x;
+	gpuDest[i * 3 + 1] = 255 * color.y;
+	gpuDest[i * 3 + 2] = 255 * color.z;
+}
+
+template<class T>
+__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1)
+{
+    int i = blockIdx.y * gridDim.x * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
+    if(i >= N) return;
+
+	//compute the normalized value on [minVal maxVal]
+	float a = (gpuSource[i] - minVal) / (maxVal - minVal);
+
+	//threshold
+	if(a > 1.0)
+        a = 1.0;
+    if(a < 0.0)
+        a = 0.0;
+
+	gpuDest[i * 3 + 0] = 255 * a;
+	gpuDest[i * 3 + 1] = 255 * a;
+	gpuDest[i * 3 + 2] = 255 * a;
+}
+
+template<class T>
+static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128)
+{
+	//This function converts a scalar field on the GPU to a color image on the GPU
+	int gridX = (nVals + blockDim - 1)/blockDim;
+	int gridY = 1;
+    if(gridX > 65535)
+    {
+        gridY = (gridX + 65535 - 1) / 65535;
+        gridX = 65535;
+    }
+    dim3 dimGrid(gridX, gridY);
+	//int gridDim = (nVals + blockDim - 1)/blockDim;
+	if(cm == cmGrayscale)
+		applyGrayscale<<<dimGrid, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal);
+	else if(cm == cmBrewer)
+	{
+		initBrewer();
+		applyBrewer<<<dimGrid, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal);
+		//HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3));
+		destroyBrewer();
+	}
+
+}
+
+template<class T>
+static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale)
+{
+    //this function converts a scalar field on the GPU to a color image on the CPU
+
+    //first create the color image on the GPU
+
+    //allocate GPU memory for the color image
+    unsigned char* gpuDest;
+    HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 ));
+
+	//HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals));
+
+    //create the image on the gpu
+    gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm);
+
+	//HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3));
+
+    //copy the image from the GPU to the CPU
+    HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost));
+
+	HANDLE_ERROR(cudaFree( gpuDest ));
+
+}
+
+template<typename T>
+static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale)
+{
+	//allocate a color buffer
+	unsigned char* cpuBuffer = NULL;
+	cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size);
+
+	//do the mapping
+	gpu2cpu<T>(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm);
+
+	//copy the buffer to an image
+	buffer2image(cpuBuffer, fileDest, x_size, y_size);
+
+	free(cpuBuffer);
+}
+
+#endif
+
+template<class T>
+static void cpuApplyBrewer(T* cpuSource, unsigned char* cpuDest, unsigned int N, T minVal = 0, T maxVal = 1)
+{
+    for(int i=0; i<N; i++)
+    {
+        //compute the normalized value on [minVal maxVal]
+        T v = cpuSource[i];
+        float a = (cpuSource[i] - minVal) / (maxVal - minVal);
+        if(a < 0) a = 0;
+        if(a > 1) a = 1;
+
+        float c = a * (float)(BREWER_CTRL_PTS-1);
+        int ptLow = (int)c;
+        float m = c - (float)ptLow;
+        //std::cout<<m<<std::endl;
+
+        float r, g, b;
+        if(ptLow == BREWER_CTRL_PTS - 1)
+        {
+            r = BREWERCP[ptLow * 4 + 0];
+            g = BREWERCP[ptLow * 4 + 1];
+            b = BREWERCP[ptLow * 4 + 2];
+        }
+        else
+        {
+            r = BREWERCP[ptLow * 4 + 0] * (1.0-m) + BREWERCP[ (ptLow+1) * 4 + 0] * m;
+            g = BREWERCP[ptLow * 4 + 1] * (1.0-m) + BREWERCP[ (ptLow+1) * 4 + 1] * m;
+            b = BREWERCP[ptLow * 4 + 2] * (1.0-m) + BREWERCP[ (ptLow+1) * 4 + 2] * m;
+        }
+
+
+        cpuDest[i * 3 + 0] = 255 * r;
+        cpuDest[i * 3 + 1] = 255 * g;
+        cpuDest[i * 3 + 2] = 255 * b;
+
+    }
+}
+
+template<class T>
+static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale)
+{
+
+    if(cm == cmBrewer)
+        cpuApplyBrewer(cpuSource, cpuDest, nVals, valMin, valMax);
+    else if(cm == cmGrayscale)
+    {
+        int i;
+        float a;
+        float range = valMax - valMin;
+        for(i = 0; i<nVals; i++)
+        {
+            //normalize to the range [valMin valMax]
+            a = (cpuSource[i] - valMin) / range;
+
+            if(a < 0) a = 0.0;
+            if(a > 1) a = 1.0;
+
+            cpuDest[i * 3 + 0] = 255 * a;
+            cpuDest[i * 3 + 1] = 255 * a;
+            cpuDest[i * 3 + 2] = 255 * a;
+        }
+    }
+}
+
+template<class T>
+static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, colormapType cm = cmGrayscale, bool positive = false)
+{
+    //computes the max and min range automatically
+
+    //find the largest magnitude value
+    T maxVal = fabs(cpuSource[0]);
+    for(int i=0; i<nVals; i++)
+	{
+        if(fabs(cpuSource[i]) > maxVal)
+            maxVal = fabs(cpuSource[i]);
+	}
+
+    if(positive)
+        cpu2cpu(cpuSource, cpuDest, nVals, (T)0.0, maxVal, cm);
+    else
+        cpu2cpu(cpuSource, cpuDest, nVals, -maxVal, maxVal, cm);
+
+}
+
+
+
+template<typename T>
+static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale)
+{
+    //allocate a color buffer
+	unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size);
+
+	//do the mapping
+	cpu2cpu<T>(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm);
+
+	//copy the buffer to an image
+	buffer2image(cpuBuffer, fileDest, x_size, y_size);
+
+	free(cpuBuffer);
+
+}
+
+template<typename T>
+static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, colormapType cm = cmGrayscale, bool positive = false)
+{
+    //allocate a color buffer
+	unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size);
+
+	//do the mapping
+	cpu2cpu<T>(cpuSource, cpuBuffer, x_size * y_size, cm, positive);
+
+	//copy the buffer to an image
+	buffer2image(cpuBuffer, fileDest, x_size, y_size);
+
+	free(cpuBuffer);
+
+}
+
+}	//end namespace colormap and rts
+
+#endif
+