updated optics files and FindSTIM to support new CMake standards

David Mayerich
1 parent 59b0e5f5
Showing 7 changed files with 129 additions and 79 deletions Show diff stats
cmake/FindSTIM.cmake
stim/envi/envi.h
stim/math/complex.h
stim/optics/scalarbeam.h
stim/optics/scalarfield.h
stim/optics/scalarmie.h
stim/parser/filename.h
@@ -3,20 +3,25 @@
 include(FindPackageHandleStandardArgs)
-set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH})
+set(STIM_ROOT $ENV{STIM_ROOT})
-find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
+IF(NOT UNIX)
+    IF(NOT STIM_ROOT)
+        MESSAGE("ERROR: STIM_ROOT environment variable must be set!")
+    ENDIF(NOT STIM_ROOT)
+
+    FIND_PATH(STIM_INCLUDE_DIRS DOC "Path to GLFW include directory."
+            NAMES stim/image/image.h
+            PATHS ${STIM_ROOT})
+ENDIF(NOT UNIX)
+
+find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIRS)
 if(STIM_FOUND)
-    set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR})
+    set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIRS})
 elseif(STIM_FOUND)
-	#if the STIM library isn't found, download it
-	#file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/stimlib)	#remove the stimlib directory if it exists
-	#set(STIM_GIT "https://git.stim.ee.uh.edu/codebase/stimlib.git")
-	#execute_process(COMMAND git clone --depth 1 ${STIM_GIT} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-	#set(STIM_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/stimlib" CACHE TYPE PATH)
-	message("STIM library not found. Set the STIMLIB_PATH environment variable to the STIMLIB location.")
+	message("STIM library not found. Set the STIM_ROOT environment variable to the STIM location.")
 	message("STIMLIB can be found here: https://git.stim.ee.uh.edu/codebase/stimlib")
 endif(STIM_FOUND)
-find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
+find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIRS)
@@ -128,7 +128,10 @@ public:
 			return false;
 		size_t targetBytes = H.data_bytes();			//get the number of bytes that SHOULD be in the data file
 		size_t bytes = stim::file_size(fname);
-		if(bytes != targetBytes) return false;			//if the data doesn't match the header, return false
+		if (bytes != targetBytes) {
+			std::cout << "ERROR: File size mismatch. Based on the header, a " << targetBytes << " byte file was expected. The data file contains " << bytes << " bytes." << std::endl;
+			return false;			//if the data doesn't match the header, return false
+		}
 		return true;									//otherwise everything looks fine
 	}
@@ -11,7 +11,7 @@
 namespace stim
 {
-    enum complexComponentType {complexReal, complexImaginary, complexMag, complexIntensity};
+    enum complexComponentType {complexFull, complexReal, complexImaginary, complexMag, complexIntensity};
 template <class T>
 struct complex
@@ -415,8 +415,8 @@ public:
 		A = amplitude;
 		f = focal_point;
 		d = direction.norm();					//make sure that the direction vector is normalized (makes calculations more efficient later on)
-		NA[0] = numerical_aperture;
-		NA[1] = center_obsc;
+		NA[0] = center_obsc;
+		NA[1] = numerical_aperture;
 	}
 	///Numerical Aperature functions
@@ -425,21 +425,32 @@ public:
 		NA[0] = (T)0;
 		NA[1] = na;
 	}
-	void setNA(T na0, T na1)
+	void setNA(T na_in, T na_out)
 	{
-		NA[0] = na0;
-		NA[1] = na1;
+		NA[0] = na_in;
+		NA[1] = na_out;
 	}
 	//Monte-Carlo decomposition into plane waves
 	std::vector< scalarwave<T> > mc(size_t N = 100000) const{
-		std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]);	//generate a random set of N vectors forming a focus
-		std::vector< scalarwave<T> > samples(N);											//create a vector of plane waves
 		T kmag = (T)stim::TAU / lambda;								//calculate the wavenumber
+		stim::vec3<T> kpw;											//declare the new k-vector based on the focused plane wave direction
 		stim::complex<T> apw;										//allocate space for the amplitude at the focal point
+
+		//deal with the degenerative case where the outer NA is 0, in which case the beam will be specified as a single plane wave
+		if (NA[1] == 0) {
+			std::vector< scalarwave<T> > samples(1);				//create a vector containing 1 sample
+			kpw = d * kmag;											//calculate the k-vector for the plane wave (beam direction scaled by wavenumber)
+			apw = A * exp(stim::complex<T>(0, kpw.dot(-f)));		//calculate the amplitude of the plane wave
+			samples[0] = scalarwave<T>(kpw, apw);					//create a plane wave based on the direction
+			return samples;
+		}
+
+		//otherwise, evaluate the system using N monte-carlo samples
+		std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]);	//generate a random set of N vectors forming a focus
+		std::vector< scalarwave<T> > samples(N);											//create a vector of plane waves		
 		T a = (T)(stim::TAU * ( (1 - cos(asin(NA[0]))) - (1 - cos(asin(NA[1])))) / (double)N);			//constant value weights plane waves based on the aperture and number of samples (N)
-		stim::vec3<T> kpw;											//declare the new k-vector based on the focused plane wave direction
 		for(size_t i=0; i<N; i++){									//for each sample
 			kpw = dirs[i] * kmag;									//calculate the k-vector for the new plane wave
 			apw = a * A * exp(stim::complex<T>(0, kpw.dot(-f)));	//calculate the amplitude for the new plane wave
@@ -449,17 +460,18 @@ public:
 	}
 	void eval(stim::scalarfield<T>& E, T* X, T* Y, T* Z, int order = 500){
-		cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[0], NA[1], order, E.spacing());
+		cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[1], NA[0], order, E.spacing());
 	}
 	/// Evaluate the beam to a scalar field using Debye focusing
 	void eval(stim::scalarfield<T>& E, int order = 500){
-		E.meshgrid();								//calculate a meshgrid if one isn't created
+		E.meshgrid();															//calculate a meshgrid if one isn't created
+
 		if(E.gpu())
-			gpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[0], NA[1], order, E.spacing());
+			gpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[1], NA[0], order, E.spacing());
 		else
-			cpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[0], NA[1], order, E.spacing());
-		//eval(E, E.x(), E.y(), E.z(), order);
+			cpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[1], NA[0], order, E.spacing());
+		
 	}
 	/// Calculate the field at a given point
@@ -538,14 +538,31 @@ public:
 			memset(E, 0, grid_bytes());
 	}
-	void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer, T minval = 0, T maxval = 0){
+	//write the field as a raw image to disk
+	void image_raw(std::string filename) {
+		if (loc == GPUmem) {
+			T* cpu_field = (T*)malloc(sizeof(T) * 2 * size());												//allocate temporary space on the CPU to store the image
+			HANDLE_ERROR(cudaMemcpy(cpu_field, E, sizeof(T) * 2 * size(), cudaMemcpyDeviceToHost));			//copy the field data from the GPU to the CPU	
+			std::ofstream outfile(filename, std::ios::binary);												//open a binary file for writing
+			outfile.write((const char*)cpu_field, sizeof(T) * 2 * size());									//write the raw field to disk
+			free(cpu_field);																				//free memory
+		}
+		//if the data is stored on the CPU, no need to cut it - just save it to disk
+		else {
+			std::ofstream outfile(filename, std::ios::binary);						//open a binary file for writing
+			outfile.write((const char*)E, sizeof(T) * 2 * size());					//write the raw field to disk
+		}
+	}
+	void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer, T minval = 0, T maxval = 0){
+		
 		if(loc == GPUmem){
 			T* image;
 			HANDLE_ERROR( cudaMalloc(&image, sizeof(T) * size()) );
 			int threads = stim::maxThreadsPerBlock();												//get the maximum number of threads per block for the CUDA device
 			dim3 blocks( R[0] * R[1] / threads + 1 );												//create a 1D array of blocks
+			// if the data is located on the GPU, execute a kernel that converts the image to the requested data type
 			switch(type){
 			case complexMag:
 				cuda_abs<T><<< blocks, threads >>>(image, E, size());
@@ -559,6 +576,9 @@ public:
 			case complexIntensity:
 				cuda_intensity<T><<< blocks, threads >>>(image, E, size());
 				break;
+			default:
+				std::cout << "ERROR: invalid complex component specified." << std::endl;
+				exit(1);
 			}
 			if (minval == maxval)
 				stim::gpu2image<T>(image, filename, R[0], R[1], cmap);
@@ -88,11 +88,28 @@ void A_coefficients(stim::complex&lt;T&gt;* A, T a, T k, stim::complex&lt;T&gt; n, int Nl){
 }
 #define LOCAL_NL	16
+
+/// CUDA kernel for calculating the Mie scattering solution given a set of points (x, y, z), a list of plane waves, and a look-up table for Bl*hl
+/// @param E (GPU) is the N x N destination scalar field
+/// @param N is the number of sample points to evaluate
+/// @param x (GPU) is the grid of X coordinates for each point in E
+/// @param y (GPU) is the grid of Y coordinates for each point in E
+/// @param z (GPU) is the grid of Z coordinates for each point in E
+/// @param W (GPU) is an array of coherent scalar plane waves incident on the Mie scatterer
+/// @param nW is the number of plane waves to evaluate (sum)
+/// @param a is the radius of the Mie scatterer
+/// @param n is the complex refractive index of the Mie scatterer
+/// @param c is the position of the sphere in (x, y, z)
+/// @param hB (GPU) is a look-up table of Hankel functions (equally spaced in distance from the sphere) pre-multiplied with scattering coefficients
+/// @param kr_min is the minimum kr value in the hB look-up table (corresponding to the closest point to the sphere)
+/// @param dkr is the spacing (in kr) between samples of the hB look-up table
+/// @param N_hB is the number of samples in hB
+/// @param Nl is the order of the calculation (number of Hankel function orders)
 template<typename T>
 __global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::vec3<T> c, stim::complex<T>* hB, T r_min, T dr, size_t N_hB, int Nl){
-	extern __shared__ stim::complex<T> shared_hB[];		//declare the list of waves in shared memory
+	extern __shared__ stim::complex<T> shared_hB[];						//declare the list of waves in shared memory
-	size_t i = blockIdx.x * blockDim.x + threadIdx.x;				//get the index into the array
+	size_t i = blockIdx.x * blockDim.x + threadIdx.x;					//get the index into the sample array (sample point associated with this thread)
 	if(i >= N) return;													//exit if this thread is outside the array
 	stim::vec3<T> p;
 	(x == NULL) ? p[0] = 0 : p[0] = x[i];								// test for NULL values and set positions
@@ -101,14 +118,14 @@ __global__ void cuda_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T* 
 	p = p - c;
 	T r = p.len();														//calculate the distance from the sphere
 	if(r < a) return;													//exit if the point is inside the sphere (we only calculate the internal field)
-	T fij = (r - r_min)/dr;											//FP index into the spherical bessel LUT
+	T fij = (r - r_min)/dr;												//FP index into the spherical bessel LUT
 	size_t ij = (size_t) fij;											//convert to an integral index
 	T alpha = fij - ij;													//calculate the fractional portion of the index
-	size_t n0j = ij * (Nl + 1);												//start of the first entry in the LUT
-	size_t n1j = (ij+1) * (Nl + 1);											//start of the second entry in the LUT
+	size_t n0j = ij * (Nl + 1);											//start of the first entry in the LUT
+	size_t n1j = (ij+1) * (Nl + 1);										//start of the second entry in the LUT
 	T cos_phi;	
-	T Pl_2, Pl_1, Pl;														//declare registers to store the previous two Legendre polynomials
+	T Pl_2, Pl_1, Pl;													//declare registers to store the previous two Legendre polynomials
 	stim::complex<T> hBl;
 	stim::complex<T> Ei = 0;											//create a register to store the result
@@ -154,25 +171,28 @@ __global__ void cuda_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T* 
 	E[i] += Ei;															//copy the result to device memory
 }
-///Calculate the scalar Mie scattered field on the GPU
+///Calculate the scalar Mie scattered field on the GPU when a list of GPU-based pre-multiplied Hankel functions are available
 /// @param E (GPU) is the N x N destination scalar field
 /// @param N is the number fo elements of the scalar field in each direction
 /// @param x (GPU) is the grid of X coordinates for each point in E
 /// @param y (GPU) is the grid of Y coordinates for each point in E
 /// @param z (GPU) is the grid of Z coordinates for each point in E
-/// @param W (CPU) is an array of coherent scalar plane waves incident on the Mie scatterer
+/// @param W (GPU) is an array of coherent scalar plane waves incident on the Mie scatterer
+/// @param nW is the number of plane waves to evaluate (sum)
 /// @param a is the radius of the Mie scatterer
 /// @param n is the complex refractive index of the Mie scatterer
-/// @param r_spacing is the minimum distance between r values of the sample points in E (used to calculate look-up tables)
+/// @param c is the position of the sphere in (x, y, z)
+/// @param hB (GPU) is a look-up table of Hankel functions (equally spaced in distance from the sphere) pre-multiplied with scattering coefficients
+/// @param kr_min is the minimum kr value in the hB look-up table (corresponding to the closest point to the sphere)
+/// @param dkr is the spacing (in kr) between samples of the hB look-up table
+/// @param N_hB is the number of samples in hB
+/// @param Nl is the order of the calculation (number of Hankel function orders)
 template<typename T>
 void gpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::vec3<T> c, stim::complex<T>* hB, T kr_min, T dkr, size_t N_hB, size_t Nl){
-	size_t max_shared_mem = stim::sharedMemPerBlock();	
-	size_t hBl_array = sizeof(stim::complex<T>) * (Nl + 1);
-	//std::cout<<"hl*Bl array size:  "<<hBl_array<<std::endl;
-	//std::cout<<"shared memory:     "<<max_shared_mem<<std::endl;
-	int threads = (int)((max_shared_mem / hBl_array) / 32 * 32);
-	//std::cout<<"threads per block: "<<threads<<std::endl;
+	size_t max_shared_mem = stim::sharedMemPerBlock();								//get the amount of shared memory per block
+	size_t hBl_array = sizeof(stim::complex<T>) * (Nl + 1);							//calculate the number of bytes required to store the LUT corresponding to a single sample in shared memory
+	int threads = (int)((max_shared_mem / hBl_array) / 32 * 32);					//calculate the optimal number of threads per block (make sure it's divisible by the number of warps - 32)
 	dim3 blocks((unsigned)(N / threads + 1));										//calculate the optimal number of blocks
 	size_t shared_mem;
@@ -197,7 +217,7 @@ __global__ void cuda_dist(T* r, T* x, T* y, T* z, size_t N, stim::vec3&lt;T&gt; c = st
 ///Calculate the scalar Mie scattered field on the GPU
 /// @param E (GPU) is the N x N destination scalar field
-/// @param N is the number fo elements of the scalar field in each direction
+/// @param N is the number of sample points of the scalar field
 /// @param x (GPU) is the grid of X coordinates for each point in E
 /// @param y (GPU) is the grid of Y coordinates for each point in E
 /// @param z (GPU) is the grid of Z coordinates for each point in E
@@ -211,29 +231,28 @@ void gpu_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T* y, T* z, std
 	//calculate the necessary number of orders required to represent the scattered field
 	T k = W[0].kmag();
-	int Nl = (int)ceil(k*a + 4 * cbrt( k * a ) + 2);
+	int Nl = (int)ceil(k*a + 4 * cbrt( k * a ) + 2);			//calculate the number of orders required to represent the sphere
 	if(Nl < LOCAL_NL) Nl = LOCAL_NL;							//always do at least the minimum number of local operations (kernel optimization)
-	//std::cout<<"Nl: "<<Nl<<std::endl;
 	//calculate the scattering coefficients for the sphere
 	stim::complex<T>* B = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * (Nl + 1) );	//allocate space for the scattering coefficients
-	B_coefficients(B, a, k, n, Nl);	
+	B_coefficients(B, a, k, n, Nl);																//calculate the scattering coefficients	
 	//	PLANE WAVES
-	stim::scalarwave<T>* dev_W;																//allocate space and copy plane waves
+	stim::scalarwave<T>* dev_W;																	//allocate space and copy plane waves
 	HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) );
 	HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) );
 	// BESSEL FUNCTION LOOK-UP TABLE
-	//calculate the distance from the sphere center
-	T* dev_r;
-	HANDLE_ERROR( cudaMalloc(&dev_r, sizeof(T) * N) );
+	//calculate the distance from the sphere center at each sample point and store the result in dev_r
+	T* dev_r;																					//declare the device pointer to hold the distance from the sphere center
+	HANDLE_ERROR( cudaMalloc(&dev_r, sizeof(T) * N) );											//allocate space for the array of distances
-	int threads = stim::maxThreadsPerBlock();
-	dim3 blocks((unsigned)(N / threads + 1));
-	cuda_dist<T> <<< blocks, threads >>>(dev_r, x, y, z, N, c);
+	int threads = stim::maxThreadsPerBlock();													//query the device to find the maximum number of threads per block
+	dim3 blocks((unsigned)(N / threads + 1));													//calculate the number of blocks necessary to evaluate the total number of sample points N
+	cuda_dist<T> <<< blocks, threads >>>(dev_r, x, y, z, N, c);									//calculate the distance
-	//Find the minimum and maximum values of r
+	//Use the cuBLAS library to find the minimum and maximum distances from the sphere center. This will be used to create a look-up table for the Hankel functions
     cublasStatus_t stat;
     cublasHandle_t handle;
@@ -261,49 +280,40 @@ void gpu_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T* y, T* z, std
 	HANDLE_ERROR( cudaMemcpy(&r_min, dev_r + i_min, sizeof(T), cudaMemcpyDeviceToHost) );		//copy the min and max values from the device to the CPU
 	HANDLE_ERROR( cudaMemcpy(&r_max, dev_r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
-	r_min = max(r_min, a);									//if the radius of the sphere is larger than r_min, change r_min to a (the scattered field doesn't exist inside the sphere)
-
-	//size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1);			//number of values in the look-up table based on the user-specified spacing along r
-	size_t N_hB_lut = (size_t)((r_max - r_min) / r_spacing + 1);
+	r_min = max(r_min, a);												//if the radius of the sphere is larger than r_min, change r_min to a (the scattered field doesn't exist inside the sphere)
-	//T kr_min = k * r_min;
-	//T kr_max = k * r_max;
+	size_t N_hB_lut = (size_t)((r_max - r_min) / r_spacing + 1);		//number of values in the look-up table based on the user-specified spacing along r
-	//temporary variables
-	double vm;															//allocate space to store the return values for the bessel function calculation
+	//Declare and evaluate variables used to calculate the spherical Bessel functions and store them temporarily on the CPU
+	double vm;																	//allocate space to store the return values for the bessel function calculation
 	double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
 	double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
 	double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
 	double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
-	size_t hB_bytes = sizeof(stim::complex<T>) * (Nl+1) * N_hB_lut;
-	stim::complex<T>* hB_lut = (stim::complex<T>*) malloc(hB_bytes);													//pointer to the look-up table
-	T dr = (r_max - r_min) / (N_hB_lut-1);												//distance between values in the LUT
-	//std::cout<<"LUT jl bytes:  "<<hB_bytes<<std::endl;
-	stim::complex<T> hl;
-	for(size_t ri = 0; ri < N_hB_lut; ri++){													//for each value in the LUT
+	size_t hB_bytes = sizeof(stim::complex<T>) * (Nl+1) * N_hB_lut;						//calculate the number of bytes necessary to store the Hankel function LUT
+	stim::complex<T>* hB_lut = (stim::complex<T>*) malloc(hB_bytes);					//pointer to the look-up table
+	T dr = (r_max - r_min) / (N_hB_lut-1);												//calculate the optimal distance between values in the LUT
+	stim::complex<T> hl;																//declare a complex value for the Hankel function result
+	for(size_t ri = 0; ri < N_hB_lut; ri++){											//for each value in the LUT
 		stim::bessjyv_sph<double>(Nl, k * (r_min + ri * dr), vm, jv, yv, djv, dyv);		//compute the list of spherical bessel functions from [0 Nl]
-		for(size_t l = 0; l <= Nl; l++){													//for each order
-			hl.r = (T)jv[l];
+		for(size_t l = 0; l <= Nl; l++){												//for each order
+			hl.r = (T)jv[l];															//generate the spherical Hankel function from the Bessel functions
 			hl.i = (T)yv[l];
-			hB_lut[ri * (Nl + 1) + l] = hl * B[l];										//store the bessel function result
-			//std::cout<<hB_lut[ri * (Nl + 1) + l]<<std::endl;
+			hB_lut[ri * (Nl + 1) + l] = hl * B[l];										//pre-multiply the Hankel function by the scattering coefficients
 		}
 	}
-	//T* real_lut = (T*) malloc(hB_bytes/2);
-	//stim::real(real_lut, hB_lut, N_hB_lut);
-	//stim::cpu2image<T>(real_lut, "hankel_B.bmp", Nl+1, N_hB_lut, stim::cmBrewer);
-	//Allocate device memory and copy everything to the GPU
+	//Copy the pre-multiplied Hankel function look-up table to the GPU - this LUT gives a list of uniformly spaced Hankel function values pre-multiplied by scattering coefficients
 	stim::complex<T>* dev_hB_lut;
 	HANDLE_ERROR( cudaMalloc(&dev_hB_lut, hB_bytes) );
 	HANDLE_ERROR( cudaMemcpy(dev_hB_lut, hB_lut, hB_bytes, cudaMemcpyHostToDevice) );
-	//std::cout << "r_min: " << r_min << std::endl;
-	//std::cout << "dr: " << dr << std::endl;
+
+	//calculate the Mie scattering solution on the GPU
 	gpu_scalar_mie_scatter<T>(E, N, x, y, z, dev_W, W.size(), a, n, c, dev_hB_lut, r_min, dr, N_hB_lut, Nl);
-	HANDLE_ERROR(cudaMemcpy(E, E, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost));			//copy the field from device memory
+	//HANDLE_ERROR(cudaMemcpy(E, E, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost));			//copy the field from device memory
 	HANDLE_ERROR(cudaFree(dev_hB_lut));
 	HANDLE_ERROR(cudaFree(dev_r));
@@ -238,7 +238,7 @@ public:
 	}
 	/// Create a matching file locator with a prefix s
-	stim::filename prefix(std::string s){
+	stim::filename with_prefix(std::string s){
 		stim::filename result = *this;
 		result._prefix = s;
 		return result;