diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..60063dc
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,52 @@
+#Specify the version being used aswell as the language
+cmake_minimum_required(VERSION 2.8)
+#Name your project here
+project(bimsym)
+
+
+
+#set the module directory
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}")
+
+#set up CUDA
+find_package(CUDA)
+
+#find OpenCV
+find_package(OpenCV REQUIRED )
+
+find_package(Boost REQUIRED)
+
+#find the STIM library
+find_package(STIM)
+
+#build the executable in the binary directory on MS Visual Studio
+if ( MSVC )
+	SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
+	SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
+endif ( MSVC )
+
+#set the include directories
+include_directories(
+	${CMAKE_CURRENT_BINARY_DIR}
+	${CMAKE_CURRENT_SOURCE_DIR}
+	${STIM_INCLUDE_DIRS}
+	${Boost_INCLUDE_DIRS}
+)
+
+#SET(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS};/maxregcount=60)
+
+#create an executable
+cuda_add_executable(bimsym 
+                    bimsym.cu
+                    sources.h
+)
+
+#set the link libraries
+target_link_libraries(bimsym
+					  ${CUDA_cufft_LIBRARY}
+					  ${CUDA_cublas_LIBRARY}
+					  ${OpenCV_LIBS}
+					  )
+
+
+
diff --git a/FindSTIM.cmake b/FindSTIM.cmake
new file mode 100644
index 0000000..bf5762a
--- /dev/null
+++ b/FindSTIM.cmake
@@ -0,0 +1,9 @@
+include(FindPackageHandleStandardArgs)
+
+set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH})
+
+find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
+
+if(STIM_FOUND)
+    set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR})
+endif()
diff --git a/bimsym.cu b/bimsym.cu
new file mode 100644
index 0000000..8046786
--- /dev/null
+++ b/bimsym.cu
@@ -0,0 +1,411 @@
+#define CUDA_FOUND
+#define USING_OPENCV
+
+#include <iostream>
+
+#include <stim/visualization/colormap.h>
+
+#include <stim/parser/arguments.h>
+#include <stim/math/vec3.h>
+#include <stim/optics/scalarbeam.h>
+#include <stim/optics/scalarmie.h>
+#include <stim/parser/filename.h>
+#include <stim/ui/progressbar.h>
+#include <stim/parser/table.h>
+
+#include <stim/cuda/cudatools.h>
+
+#include <sources.h>						//default source types
+
+#include <chrono>
+#include <thread>
+
+//field parameters
+size_t g_R_nf;								//near-field resolution along X and Y
+double g_S_nf;								//size of the near field simulation
+double g_Z_nf;								//z position of the field slice
+std::vector<stim::vec3<float>> g_sources;	//point source position list
+
+bool use_cuda = true;
+int cuda_device;
+int cuda_major = 3;
+int cuda_minor = 0;
+bool use_backprop = true;
+
+//incident beam parameters
+double g_NA_cond[2];							//condenser numerical aperture, [internal, external]
+double g_lambda;								//incident wavelength
+size_t g_Nl;									//number of orders used to calculate the incident field
+double g_A;										//amplitude of the incident field
+
+//scattering parameters
+stim::scalarcluster<float> spheres;
+//std::vector<double> g_radius(1);				//radius of the scattering sphere
+//std::vector< stim::complex<double> > g_n(1);	//refractive index of the scattering sphere
+size_t g_MC;									//number of monte-carlo samples used to calculate the scattered field
+//std::vector< stim::vec3<float> > g_c(1);		//center of the sphere
+
+//far field parameters
+double g_NA_obj[2];								//objective numerical aperture
+size_t g_padding;								//padding applied to the near-field calculation in order to improve the quality of the band-pass
+double g_backprop = 0;							//number of units that the field has to be backpropagated by in order to approximate a near-field slice passing through a sphere
+size_t g_R_ff;									//number of samples along X and Y in the far field image
+double g_S_ff;									//size (in units) of the far field image
+double g_Z_ff;									//position of the near-field plane projected into the far field (position of the back-propagated plane)
+
+//debugging
+bool g_debug_images = false;
+bool g_verbose = false;
+
+stim::filename outfile("out.bmp");						//output file name
+
+stim::arglist args;							//input arguments
+
+template<typename T>
+__global__ void cuda_absorbance(T* out, T* I, T* I0, size_t N){
+
+	size_t i = blockIdx.x * blockDim.x + threadIdx.x;
+
+	if(i >= N) return;
+
+	out[i] = -log10(I[i] / I0[i]);
+}
+
+/// Crops a 2D image composed of elements of type T
+/// @param dest is a device pointer to memory of size dx*dy that will store the cropped image
+/// @param src is a device pointer to memory of size sx*sy that stores the original image
+/// @param sx is the size of the source image along x
+/// @param sy is the size of the source image along y
+/// @param x is the x-coordinate of the start position of the cropped region within the source image
+/// @param y is the y-coordinate of the start position of the cropped region within the source image
+/// @param dx is the size of the destination image along x
+/// @param dy is the size of the destination image along y
+template<typename T>
+void gpu_absorbance(T* A, T* I, T* I0, size_t N){
+	int threads = stim::maxThreadsPerBlock();												//get the maximum number of threads per block for the CUDA device
+	
+	dim3 blocks( N / threads + 1 );															//calculate the optimal number of blocks
+	cuda_absorbance<T> <<< blocks, threads >>>(A, I, I0, N);
+}
+
+//function to display a progress bar
+void progressbar_thread(double* e){
+
+	unsigned int p = 0;
+	while(p != 100){
+		if(*e > p){
+			p = (unsigned int)(*e);
+			rtsProgressBar(p);
+		}
+	}
+	std::cout<<std::endl;				//put a newline after the completed progress bar
+}
+
+void advertise(){
+	//output advertisement
+		std::cout<<std::endl<<std::endl;
+		std::cout<<"========================================================================="<<std::endl;
+
+		std::cout<<"========================================================================="<<std::endl<<std::endl;
+
+		std::cout<<std::endl<<std::endl;
+
+		std::cout<<args.str();
+}
+
+void set_arguments(int argc, char* argv[]){
+	args.add("help", "prints this help text");
+
+	args.section("Field Slice Parameters");
+	args.add("res", "field resolution (number of samples along X and Y)", "256", "nonzero positive integer");
+	args.add("size", "size of the calculated field", "10", "nonzero positive value");
+	args.add("z", "position of the field slice along the z axis", "0", "any real value");
+
+	args.section("Incoherent Sources");
+	args.add("simage", "image of the source at the focal plane (z=0)", "", "any standard image format");
+	args.add("sgrid", "grid of n x n point sources", "", "n");
+
+	args.section("Incident Field Parameters");
+	args.add("f", "position of a single focal point", "0 0", "x y position (z = 0)");
+	args.add("condenser", "condenser numerical aperture (NA) and center obscuration", "1", "'sin(a1)' or 'sin(a1) sin(a0)', where a0 = center obscuration");
+	args.add("lambda", "incident wavelength", "1", "any real positive value");
+	args.add("wavenumber", "incident wavelength specified in cm^(-1)", "", "any real positive value, sets all units to microns");
+	args.add("order", "order used to calculate the incident field", "20", "any nonzero integer");
+
+	args.section("Mie Scattering Parameters");
+	args.add("c", "center of the scattering sphere", "0 0 0", "any real 3D coordinate");
+	args.add("radius", "radius of the scattering sphere", "0.5", "any real positive value");
+	args.add("n", "complex refractive index of the scattering sphere", "1.4 0.1", "[r] or [r i]");
+	args.add("samples", "number of Monte-Carlo samples used to calculate the scattered field", "1000", "any real positive integer");
+	args.add("spheres", "specify a file defining several spheres", "", "(1/line: radius, real RI, imag RI, x, y, z)");
+
+	args.section("Far Field Parameters");
+	args.add("objective", "objective numerical aperture (NA) and center obscuration", "1", "'sin(b1)' or 'sin(b1) sin(b0)', where b0 = center obscuration");
+	args.add("padding", "padding for the near field calculation (improves far-field calculation)", "1", "any real positive integer");
+	args.add("nobackprop", "don't use back-propagation to deal with sphere intersections");
+
+	args.section("Debugging");
+	args.add("cuda", "CUDA device ID of device to use (-1 disables CUDA)", "0", "integer value");
+	args.add("debug", "outputs a debug image at several important intermediate steps of the Mie scattering calculation");
+	args.add("verbose", "outputs additional information regarding the Mie calculations as they are performed");
+	args.parse(argc, argv);
+
+	if(args["help"].is_set()){
+		advertise();
+		exit(1);
+	}
+}
+
+void output_simulation(){
+	std::cout<<"far field slice-------"<<std::endl;
+	std::cout<<"R: "<<g_R_ff<<" x "<<g_R_ff<<" pixels"<<std::endl;
+	std::cout<<"S: "<<g_S_ff<<" x "<<g_S_ff<<" units"<<std::endl;
+	std::cout<<"Z: "<<g_Z_ff<<" units"<<std::endl;
+
+	std::cout<<"objective NA: "<<g_NA_obj[1];
+	if(g_NA_obj[0] != 0) std::cout<<" (internal = "<<g_NA_obj[0]<<")";
+	std::cout<<std::endl;
+	std::cout<<"padding:      "<<g_padding<<std::endl;
+	std::cout<<"backprop:     "<<g_backprop<<" units"<<std::endl;
+	std::cout<<std::endl;
+
+	std::cout<<"near field slice------"<<std::endl;
+	std::cout<<"R: "<<g_R_nf<<" x "<<g_R_nf<<" pixels"<<std::endl;
+	std::cout<<"S: "<<g_S_nf<<" x "<<g_S_nf<<" units"<<std::endl;
+	std::cout<<"Z: "<<g_Z_nf<<" units"<<std::endl;
+	std::cout<<std::endl;
+
+	std::cout<<"incident field--------"<<std::endl;
+	std::cout<<"condenser NA: "<<g_NA_cond[1];
+	if(g_NA_cond[0] != 0) std::cout<<" (internal = "<<g_NA_cond[0]<<")";
+	std::cout<<std::endl;
+	std::cout<<"wavelength:   "<<g_lambda<<" units"<<std::endl;
+	std::cout<<"order:        "<<g_Nl<<std::endl;
+	std::cout << "# sources:    " << g_sources.size()<<" ";
+	if (g_sources.size() == 1)
+		std::cout << g_sources[0].str();
+	std::cout << std::endl;
+	std::cout<<std::endl;
+
+	std::cout<<"mie scattering-------"<<std::endl;
+	if (spheres.size() == 1) {
+		std::cout << "center:       " << spheres[0].c << " units" << std::endl;
+		std::cout << "radius:       " << spheres[0].radius << " units" << std::endl;
+		std::cout << "ref. index:   " << spheres[0].n << std::endl;
+	}
+	else {
+		for (size_t si = 0; si < spheres.size(); si++) {
+			std::cout << "sphere [" << si << "]: r = " << spheres[si].radius << " n = " << spheres[si].n << " c = " << spheres[si].c << std::endl;
+		}
+	}
+	std::cout << "MC samples:   " << g_MC << std::endl;
+	std::cout<<std::endl;
+}
+
+void load_spheres(std::string filename) {
+	stim::table t;
+	t.read_ascii(filename, ' ');
+
+	std::cout << t.str() << std::endl;
+	size_t ns = t.rows();
+	spheres.resize(ns);
+	
+	std::vector< std::vector< double > > s = t.get_vector< double >();
+
+	
+	for (size_t si = 0; si < ns; si++) {							//for each sphere in the file
+		spheres[si] = stim::scalarmie<float>(s[si][0],				//generate a corresponding scalar mie object in the cluster
+			stim::complex<float>(s[si][1], s[si][2]),
+			stim::vec3<float>(s[si][3], s[si][4], s[si][5]));
+	}
+}
+
+void set_simulation(){
+
+	if(args.nargs() > 0) outfile = args.arg(0);								//the first argument is the filename (if any)
+
+	if (args["cuda"].as_int() == -1) use_cuda = false;
+	if (use_cuda) cuda_device = args["cuda"].as_int();
+	if (!stim::testDevice(cuda_device, cuda_major, cuda_minor)) {			//make sure the device supports the necessary compute capability
+		std::cout << "ERROR: CUDA device doesn't support compute capability " << cuda_major << "." << cuda_minor << std::endl;
+		exit(1);
+	}
+	if(args["debug"].is_set()) g_debug_images = true;						//set debug flags
+	if(args["verbose"].is_set()) g_verbose = true;
+
+	//set the condenser NA
+	g_NA_cond[1] = args["condenser"].as_float(0);							//set the external condenser NA
+	if(args["condenser"].nargs() == 2) g_NA_cond[0] = args["condenser"].as_float(1);	//if the internal NA is specified, use it
+	else                          g_NA_cond[0] = 0;							//otherwise set the internal NA to zero
+
+	//set the incident wavelength
+	if(args["wavenumber"].is_set())											//if the wavelength is given in terms of wavenumber
+		g_lambda = 10000.0 / args["wavenumber"].as_float(0);				//convert to wavelength (assume microns)
+	else
+		g_lambda = args["lambda"].as_float(0);								//otherwise assume the wavelength is specified in microns
+
+	g_Nl = args["order"].as_int(0);											//set the incident field order
+
+	//set the mie scattering parameters
+	g_MC = args["samples"].as_int();										//get the number of monte-carlo samples
+	if (args["spheres"]) {
+		load_spheres(args["spheres"].as_string());
+	}
+	else {																	//otherwise store the sphere specified at the command line
+		float radius = args["radius"].as_float(0);									//set the radius of the sphere
+		stim::complex<float> n;
+		n.real(args["n"].as_float(0));										//set the real part of the refractive index
+		if (args["n"].nargs() == 2)												//if an imaginary part is specified
+			n.imag(args["n"].as_float(1));									//set the imaginary part
+		else n.imag(0);														//otherwise assume the imaginary part is zero
+		stim::vec3<float> c = stim::vec3<float>(args["c"].as_float(0), args["c"].as_float(1), args["c"].as_float(2));
+		spheres.resize(1);
+		spheres[0] = stim::scalarmie<float>(radius, n, c);
+	}
+
+	g_padding = args["padding"].as_int();								//get the near field padding factor
+	g_R_ff = args["res"].as_int(0);
+	g_R_nf = g_R_ff * (2 * g_padding + 1);				//set the field resolution to the first resolution argument
+	g_S_ff = args["size"].as_int(0);
+	g_S_nf = g_S_ff * (2 * g_padding + 1);			//get the size of the field slice (assume square at first)
+	g_Z_ff = args["z"].as_float();										//get the z-axis position for the desired far-field image
+
+	if (args["nobackprop"]) use_backprop = false;
+	if(use_backprop && (abs(g_Z_ff) < spheres[0].radius)){											//if the near field slice is cutting through the sphere
+		g_backprop = g_Z_ff - spheres[0].radius;									//calculate the number of units that the field has to be back-propagated
+		g_Z_nf = spheres[0].radius;
+	}
+	else {
+		g_Z_nf = g_Z_ff;
+	}
+
+	g_NA_obj[1] = args["objective"].as_float(0);								//set the external condenser NA
+	if(args["objective"].nargs() == 2) g_NA_obj[0] = args["objective"].as_float(1);	//if the internal NA is specified, use it
+	else                          g_NA_obj[0] = 0;							//otherwise set the internal NA to zero
+
+	if (args["simage"].is_set())											//if a source image is provided
+		g_sources = simage<float>(g_S_ff, args["simage"].as_string());		//convert the image to a list of sources
+	else if (args["sgrid"]) {
+		if (args["sgrid"].nargs() == 1)
+			g_sources = sgrid<float>(g_S_ff, args["sgrid"].as_int());
+		else
+			g_sources = sgrid<float>(g_S_ff, args["sgrid"].as_int(0), args["sgrid"].as_int(1));
+	}
+	else
+		g_sources.push_back( stim::vec3<float>(args["f"].as_float(0), args["f"].as_float(1), 0) );			//otherwise just put a single point source at the origin
+}
+
+int main(int argc, char* argv[]){
+	set_arguments(argc, argv);								//set the input arguments
+	set_simulation();										//set all simulation parameters
+
+	if(g_verbose) output_simulation();						//output all simulation parameters
+
+	stim::vec3<float> d(0, 0, 1);							//beam direction
+
+	size_t I_bytes = sizeof(float) * g_R_ff * g_R_ff;								//number of bytes in the final intensity images
+
+	//background intensity image
+	float* I0;												//allocate space for the background intensity image
+	float* I;
+		
+	stim::scalarfield<float> E0(g_R_nf, g_R_nf, (float)g_S_nf, (float)g_Z_nf + (float)g_backprop);	//incident nearfield
+	stim::scalarfield<float> E(g_R_nf, g_R_nf, (float)g_S_nf, (float)g_Z_nf);						//create the scattered field
+	stim::scalarfield<float> Eff(g_R_ff, g_R_ff, (float)g_S_ff, (float)g_Z_ff);						//create the far field
+
+	float* X = (float*) malloc( E0.size() * sizeof(float) );
+	float* Y = (float*) malloc( E0.size() * sizeof(float) );
+	float* Z = (float*) malloc( E0.size() * sizeof(float) );
+	E.meshgrid(X, Y, Z, stim::CPUmem);								//create a meshgrid for the scattering plane (different from E0 b/c we can't propagate the internal field)
+	
+
+	E0.meshgrid();													//create meshgrids in the field object for field calculations
+	E.meshgrid();
+
+	if (use_cuda) {
+		cudaSetDevice(cuda_device);
+		HANDLE_ERROR(cudaMalloc(&I0, I_bytes));
+		HANDLE_ERROR(cudaMemset(I0, 0, I_bytes));			//create an array to store the image intensity
+		HANDLE_ERROR(cudaMalloc(&I, I_bytes));
+		HANDLE_ERROR(cudaMemset(I, 0, I_bytes));
+		E0.to_gpu();
+		E.to_gpu();
+	}
+	else {
+		I0 = (float*)calloc(I_bytes, 1);
+		I = (float*)calloc(I_bytes, 1);
+	}
+
+	std::chrono::high_resolution_clock::time_point t_begin = std::chrono::high_resolution_clock::now();
+	double P = 0;
+	std::thread t1(progressbar_thread, &P);							//start the progress bar thread
+	for(size_t s = 0; s < g_sources.size(); s++){
+		stim::scalarbeam<float> beam((float)g_lambda, 1, g_sources[s], d, (float)g_NA_cond[1], (float)g_NA_cond[0]);		//create a focused beam
+		
+		beam.eval(E0, g_Nl);										//evaluate the incident field
+
+		if(g_debug_images && s == 0){
+			stim::filename incident_file = outfile.prefix(outfile.prefix() + "_0_nfi");
+			E0.image(incident_file.str(), stim::complexMag);
+		}
+		E0.crop(g_padding, Eff);
+		Eff.intensity(I0);
+
+		spheres.eval(E, beam, g_Nl, g_MC);
+
+		if(g_debug_images && s == 0){
+			stim::filename nf_file = outfile.prefix(outfile.prefix() + "_1_nfs");
+			E.image(nf_file.str(), stim::complexMag);
+		}
+
+		E.propagate(g_backprop, stim::TAU / g_lambda);					//propagate the field back to the origin
+
+		if(g_debug_images && s == 0){
+			stim::filename propagate_file = outfile.prefix(outfile.prefix()+"_2_nfprop");
+			E.image(propagate_file.str(), stim::complexMag);
+		}
+
+		E.crop(g_padding, Eff);											//crop out the far field slice
+		Eff.intensity(I);												//calculate the far field intensity and add it to the single-beam image
+
+		if(g_debug_images && s == 0){
+			stim::filename cropped_file = outfile.prefix(outfile.prefix() + "_3_cropped");
+			Eff.image(cropped_file.str(), stim::complexMag);
+		}
+		P = (double)(s + 1) / (double)g_sources.size() * 100;
+
+	}
+	t1.join();
+
+	std::chrono::high_resolution_clock::time_point t_end = std::chrono::high_resolution_clock::now();
+	std::chrono::duration<double> t_total = (t_end - t_begin);
+	std::cout<<"Time: "<<t_total.count()<<" s"<<std::endl;
+
+	//save the near-field image
+	if (g_debug_images) {
+		stim::filename nearfield_real_file = outfile.prefix(outfile.prefix() + "_3_detector_real");
+		E.image(nearfield_real_file.str(), stim::complexReal);
+		stim::filename nearfield_imag_file = outfile.prefix(outfile.prefix() + "_3_detector_imag");
+		E.image(nearfield_imag_file.str(), stim::complexImaginary);
+	}
+	
+	//save the background and single-beam images
+	stim::filename background_file = outfile.prefix(outfile.prefix() + "background");
+	stim::filename intensity_file = outfile.prefix(outfile.prefix() + "intensity");
+
+	if (use_cuda) {
+		stim::gpu2image<float>(I0, background_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
+		stim::gpu2image<float>(I, intensity_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
+	}
+	else {
+		stim::cpu2image<float>(I0, background_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
+		stim::cpu2image<float>(I, intensity_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
+	}
+
+	
+	float* A;
+	HANDLE_ERROR( cudaMalloc(&A, sizeof(float) * g_R_ff * g_R_ff) );
+	gpu_absorbance(A, I, I0, g_R_ff * g_R_ff);
+	stim::filename absorbance_file = outfile.prefix(outfile.prefix() + "absorbance");
+	stim::gpu2image<float>(A, absorbance_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
+}
\ No newline at end of file
diff --git a/sources.h b/sources.h
new file mode 100644
index 0000000..0a4aa7a
--- /dev/null
+++ b/sources.h
@@ -0,0 +1,48 @@
+template<typename T>
+std::vector< stim::vec3<T> > simage(T size, std::string imagefile) {
+	std::vector< stim::vec3<T> > sources;
+
+	stim::image<unsigned char> src(imagefile);
+
+	float xmin = -size / 2;
+	float ymin = -size / 2;
+
+	size_t xi, yi;
+	float xa, ya;
+	for (yi = 0; yi < src.height(); yi++) {
+		for (xi = 0; xi < src.width(); xi++) {
+			if (src(xi, yi)) {
+				xa = (float)xi / (float)(src.width() - 1);
+				ya = (float)yi / (float)(src.height() - 1);
+
+				sources.push_back(stim::vec3<float>(xa * size + xmin, ya * size + ymin, 0));
+
+			}
+		}
+	}
+
+	return sources;
+}
+
+template<typename T>
+std::vector< stim::vec3<T> > sgrid(T size, unsigned int n, unsigned int m = 0) {
+	std::vector< stim::vec3<T> > sources;
+
+	if (m == 0) m = n;
+
+	float xmin = -size / 2 + size/(n + 1);
+	float ymin = -size / 2 + size/(m + 1);
+
+	size_t xi, yi;
+	float xa, ya;
+	for (yi = 0; yi < m; yi++) {
+		for (xi = 0; xi < n; xi++) {
+				xa = (float)xi / (float)(n + 1);
+				ya = (float)yi / (float)(m + 1);
+
+				sources.push_back(stim::vec3<float>(xa * size + xmin, ya * size + ymin, 0));
+		}
+	}
+
+	return sources;
+}
\ No newline at end of file
--
libgit2 0.21.4