Commit 6a53ac0c677590fee03b1fc26a0f56f8e07c5055

Authored by David Mayerich
1 parent 59b0e5f5

updated optics files and FindSTIM to support new CMake standards

cmake/FindSTIM.cmake
... ... @@ -3,20 +3,25 @@
3 3  
4 4 include(FindPackageHandleStandardArgs)
5 5  
6   -set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH})
  6 +set(STIM_ROOT $ENV{STIM_ROOT})
7 7  
8   -find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
  8 +IF(NOT UNIX)
  9 + IF(NOT STIM_ROOT)
  10 + MESSAGE("ERROR: STIM_ROOT environment variable must be set!")
  11 + ENDIF(NOT STIM_ROOT)
  12 +
  13 + FIND_PATH(STIM_INCLUDE_DIRS DOC "Path to GLFW include directory."
  14 + NAMES stim/image/image.h
  15 + PATHS ${STIM_ROOT})
  16 +ENDIF(NOT UNIX)
  17 +
  18 +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIRS)
9 19  
10 20 if(STIM_FOUND)
11   - set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR})
  21 + set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIRS})
12 22 elseif(STIM_FOUND)
13   - #if the STIM library isn't found, download it
14   - #file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/stimlib) #remove the stimlib directory if it exists
15   - #set(STIM_GIT "https://git.stim.ee.uh.edu/codebase/stimlib.git")
16   - #execute_process(COMMAND git clone --depth 1 ${STIM_GIT} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
17   - #set(STIM_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/stimlib" CACHE TYPE PATH)
18   - message("STIM library not found. Set the STIMLIB_PATH environment variable to the STIMLIB location.")
  23 + message("STIM library not found. Set the STIM_ROOT environment variable to the STIM location.")
19 24 message("STIMLIB can be found here: https://git.stim.ee.uh.edu/codebase/stimlib")
20 25 endif(STIM_FOUND)
21 26  
22   -find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
  27 +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIRS)
... ...
stim/envi/envi.h
... ... @@ -128,7 +128,10 @@ public:
128 128 return false;
129 129 size_t targetBytes = H.data_bytes(); //get the number of bytes that SHOULD be in the data file
130 130 size_t bytes = stim::file_size(fname);
131   - if(bytes != targetBytes) return false; //if the data doesn't match the header, return false
  131 + if (bytes != targetBytes) {
  132 + std::cout << "ERROR: File size mismatch. Based on the header, a " << targetBytes << " byte file was expected. The data file contains " << bytes << " bytes." << std::endl;
  133 + return false; //if the data doesn't match the header, return false
  134 + }
132 135 return true; //otherwise everything looks fine
133 136  
134 137 }
... ...
stim/math/complex.h
... ... @@ -11,7 +11,7 @@
11 11  
12 12 namespace stim
13 13 {
14   - enum complexComponentType {complexReal, complexImaginary, complexMag, complexIntensity};
  14 + enum complexComponentType {complexFull, complexReal, complexImaginary, complexMag, complexIntensity};
15 15  
16 16 template <class T>
17 17 struct complex
... ...
stim/optics/scalarbeam.h
... ... @@ -415,8 +415,8 @@ public:
415 415 A = amplitude;
416 416 f = focal_point;
417 417 d = direction.norm(); //make sure that the direction vector is normalized (makes calculations more efficient later on)
418   - NA[0] = numerical_aperture;
419   - NA[1] = center_obsc;
  418 + NA[0] = center_obsc;
  419 + NA[1] = numerical_aperture;
420 420 }
421 421  
422 422 ///Numerical Aperature functions
... ... @@ -425,21 +425,32 @@ public:
425 425 NA[0] = (T)0;
426 426 NA[1] = na;
427 427 }
428   - void setNA(T na0, T na1)
  428 + void setNA(T na_in, T na_out)
429 429 {
430   - NA[0] = na0;
431   - NA[1] = na1;
  430 + NA[0] = na_in;
  431 + NA[1] = na_out;
432 432 }
433 433  
434 434 //Monte-Carlo decomposition into plane waves
435 435 std::vector< scalarwave<T> > mc(size_t N = 100000) const{
436 436  
437   - std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]); //generate a random set of N vectors forming a focus
438   - std::vector< scalarwave<T> > samples(N); //create a vector of plane waves
439 437 T kmag = (T)stim::TAU / lambda; //calculate the wavenumber
  438 + stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction
440 439 stim::complex<T> apw; //allocate space for the amplitude at the focal point
  440 +
  441 + //deal with the degenerative case where the outer NA is 0, in which case the beam will be specified as a single plane wave
  442 + if (NA[1] == 0) {
  443 + std::vector< scalarwave<T> > samples(1); //create a vector containing 1 sample
  444 + kpw = d * kmag; //calculate the k-vector for the plane wave (beam direction scaled by wavenumber)
  445 + apw = A * exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude of the plane wave
  446 + samples[0] = scalarwave<T>(kpw, apw); //create a plane wave based on the direction
  447 + return samples;
  448 + }
  449 +
  450 + //otherwise, evaluate the system using N monte-carlo samples
  451 + std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]); //generate a random set of N vectors forming a focus
  452 + std::vector< scalarwave<T> > samples(N); //create a vector of plane waves
441 453 T a = (T)(stim::TAU * ( (1 - cos(asin(NA[0]))) - (1 - cos(asin(NA[1])))) / (double)N); //constant value weights plane waves based on the aperture and number of samples (N)
442   - stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction
443 454 for(size_t i=0; i<N; i++){ //for each sample
444 455 kpw = dirs[i] * kmag; //calculate the k-vector for the new plane wave
445 456 apw = a * A * exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave
... ... @@ -449,17 +460,18 @@ public:
449 460 }
450 461  
451 462 void eval(stim::scalarfield<T>& E, T* X, T* Y, T* Z, int order = 500){
452   - cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[0], NA[1], order, E.spacing());
  463 + cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[1], NA[0], order, E.spacing());
453 464 }
454 465  
455 466 /// Evaluate the beam to a scalar field using Debye focusing
456 467 void eval(stim::scalarfield<T>& E, int order = 500){
457   - E.meshgrid(); //calculate a meshgrid if one isn't created
  468 + E.meshgrid(); //calculate a meshgrid if one isn't created
  469 +
458 470 if(E.gpu())
459   - gpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[0], NA[1], order, E.spacing());
  471 + gpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[1], NA[0], order, E.spacing());
460 472 else
461   - cpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[0], NA[1], order, E.spacing());
462   - //eval(E, E.x(), E.y(), E.z(), order);
  473 + cpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[1], NA[0], order, E.spacing());
  474 +
463 475 }
464 476  
465 477 /// Calculate the field at a given point
... ...
stim/optics/scalarfield.h
... ... @@ -538,14 +538,31 @@ public:
538 538 memset(E, 0, grid_bytes());
539 539 }
540 540  
541   - void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer, T minval = 0, T maxval = 0){
  541 + //write the field as a raw image to disk
  542 + void image_raw(std::string filename) {
  543 + if (loc == GPUmem) {
  544 + T* cpu_field = (T*)malloc(sizeof(T) * 2 * size()); //allocate temporary space on the CPU to store the image
  545 + HANDLE_ERROR(cudaMemcpy(cpu_field, E, sizeof(T) * 2 * size(), cudaMemcpyDeviceToHost)); //copy the field data from the GPU to the CPU
  546 + std::ofstream outfile(filename, std::ios::binary); //open a binary file for writing
  547 + outfile.write((const char*)cpu_field, sizeof(T) * 2 * size()); //write the raw field to disk
  548 + free(cpu_field); //free memory
  549 + }
  550 + //if the data is stored on the CPU, no need to cut it - just save it to disk
  551 + else {
  552 + std::ofstream outfile(filename, std::ios::binary); //open a binary file for writing
  553 + outfile.write((const char*)E, sizeof(T) * 2 * size()); //write the raw field to disk
  554 + }
  555 + }
542 556  
  557 + void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer, T minval = 0, T maxval = 0){
  558 +
543 559 if(loc == GPUmem){
544 560 T* image;
545 561 HANDLE_ERROR( cudaMalloc(&image, sizeof(T) * size()) );
546 562 int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
547 563 dim3 blocks( R[0] * R[1] / threads + 1 ); //create a 1D array of blocks
548 564  
  565 + // if the data is located on the GPU, execute a kernel that converts the image to the requested data type
549 566 switch(type){
550 567 case complexMag:
551 568 cuda_abs<T><<< blocks, threads >>>(image, E, size());
... ... @@ -559,6 +576,9 @@ public:
559 576 case complexIntensity:
560 577 cuda_intensity<T><<< blocks, threads >>>(image, E, size());
561 578 break;
  579 + default:
  580 + std::cout << "ERROR: invalid complex component specified." << std::endl;
  581 + exit(1);
562 582 }
563 583 if (minval == maxval)
564 584 stim::gpu2image<T>(image, filename, R[0], R[1], cmap);
... ...
stim/optics/scalarmie.h
... ... @@ -88,11 +88,28 @@ void A_coefficients(stim::complex&lt;T&gt;* A, T a, T k, stim::complex&lt;T&gt; n, int Nl){
88 88 }
89 89  
90 90 #define LOCAL_NL 16
  91 +
  92 +/// CUDA kernel for calculating the Mie scattering solution given a set of points (x, y, z), a list of plane waves, and a look-up table for Bl*hl
  93 +/// @param E (GPU) is the N x N destination scalar field
  94 +/// @param N is the number of sample points to evaluate
  95 +/// @param x (GPU) is the grid of X coordinates for each point in E
  96 +/// @param y (GPU) is the grid of Y coordinates for each point in E
  97 +/// @param z (GPU) is the grid of Z coordinates for each point in E
  98 +/// @param W (GPU) is an array of coherent scalar plane waves incident on the Mie scatterer
  99 +/// @param nW is the number of plane waves to evaluate (sum)
  100 +/// @param a is the radius of the Mie scatterer
  101 +/// @param n is the complex refractive index of the Mie scatterer
  102 +/// @param c is the position of the sphere in (x, y, z)
  103 +/// @param hB (GPU) is a look-up table of Hankel functions (equally spaced in distance from the sphere) pre-multiplied with scattering coefficients
  104 +/// @param kr_min is the minimum kr value in the hB look-up table (corresponding to the closest point to the sphere)
  105 +/// @param dkr is the spacing (in kr) between samples of the hB look-up table
  106 +/// @param N_hB is the number of samples in hB
  107 +/// @param Nl is the order of the calculation (number of Hankel function orders)
91 108 template<typename T>
92 109 __global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::vec3<T> c, stim::complex<T>* hB, T r_min, T dr, size_t N_hB, int Nl){
93   - extern __shared__ stim::complex<T> shared_hB[]; //declare the list of waves in shared memory
  110 + extern __shared__ stim::complex<T> shared_hB[]; //declare the list of waves in shared memory
94 111  
95   - size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  112 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the sample array (sample point associated with this thread)
96 113 if(i >= N) return; //exit if this thread is outside the array
97 114 stim::vec3<T> p;
98 115 (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
... ... @@ -101,14 +118,14 @@ __global__ void cuda_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T*
101 118 p = p - c;
102 119 T r = p.len(); //calculate the distance from the sphere
103 120 if(r < a) return; //exit if the point is inside the sphere (we only calculate the internal field)
104   - T fij = (r - r_min)/dr; //FP index into the spherical bessel LUT
  121 + T fij = (r - r_min)/dr; //FP index into the spherical bessel LUT
105 122 size_t ij = (size_t) fij; //convert to an integral index
106 123 T alpha = fij - ij; //calculate the fractional portion of the index
107   - size_t n0j = ij * (Nl + 1); //start of the first entry in the LUT
108   - size_t n1j = (ij+1) * (Nl + 1); //start of the second entry in the LUT
  124 + size_t n0j = ij * (Nl + 1); //start of the first entry in the LUT
  125 + size_t n1j = (ij+1) * (Nl + 1); //start of the second entry in the LUT
109 126  
110 127 T cos_phi;
111   - T Pl_2, Pl_1, Pl; //declare registers to store the previous two Legendre polynomials
  128 + T Pl_2, Pl_1, Pl; //declare registers to store the previous two Legendre polynomials
112 129  
113 130 stim::complex<T> hBl;
114 131 stim::complex<T> Ei = 0; //create a register to store the result
... ... @@ -154,25 +171,28 @@ __global__ void cuda_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T*
154 171 E[i] += Ei; //copy the result to device memory
155 172 }
156 173  
157   -///Calculate the scalar Mie scattered field on the GPU
  174 +///Calculate the scalar Mie scattered field on the GPU when a list of GPU-based pre-multiplied Hankel functions are available
158 175 /// @param E (GPU) is the N x N destination scalar field
159 176 /// @param N is the number fo elements of the scalar field in each direction
160 177 /// @param x (GPU) is the grid of X coordinates for each point in E
161 178 /// @param y (GPU) is the grid of Y coordinates for each point in E
162 179 /// @param z (GPU) is the grid of Z coordinates for each point in E
163   -/// @param W (CPU) is an array of coherent scalar plane waves incident on the Mie scatterer
  180 +/// @param W (GPU) is an array of coherent scalar plane waves incident on the Mie scatterer
  181 +/// @param nW is the number of plane waves to evaluate (sum)
164 182 /// @param a is the radius of the Mie scatterer
165 183 /// @param n is the complex refractive index of the Mie scatterer
166   -/// @param r_spacing is the minimum distance between r values of the sample points in E (used to calculate look-up tables)
  184 +/// @param c is the position of the sphere in (x, y, z)
  185 +/// @param hB (GPU) is a look-up table of Hankel functions (equally spaced in distance from the sphere) pre-multiplied with scattering coefficients
  186 +/// @param kr_min is the minimum kr value in the hB look-up table (corresponding to the closest point to the sphere)
  187 +/// @param dkr is the spacing (in kr) between samples of the hB look-up table
  188 +/// @param N_hB is the number of samples in hB
  189 +/// @param Nl is the order of the calculation (number of Hankel function orders)
167 190 template<typename T>
168 191 void gpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::vec3<T> c, stim::complex<T>* hB, T kr_min, T dkr, size_t N_hB, size_t Nl){
169 192  
170   - size_t max_shared_mem = stim::sharedMemPerBlock();
171   - size_t hBl_array = sizeof(stim::complex<T>) * (Nl + 1);
172   - //std::cout<<"hl*Bl array size: "<<hBl_array<<std::endl;
173   - //std::cout<<"shared memory: "<<max_shared_mem<<std::endl;
174   - int threads = (int)((max_shared_mem / hBl_array) / 32 * 32);
175   - //std::cout<<"threads per block: "<<threads<<std::endl;
  193 + size_t max_shared_mem = stim::sharedMemPerBlock(); //get the amount of shared memory per block
  194 + size_t hBl_array = sizeof(stim::complex<T>) * (Nl + 1); //calculate the number of bytes required to store the LUT corresponding to a single sample in shared memory
  195 + int threads = (int)((max_shared_mem / hBl_array) / 32 * 32); //calculate the optimal number of threads per block (make sure it's divisible by the number of warps - 32)
176 196 dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks
177 197  
178 198 size_t shared_mem;
... ... @@ -197,7 +217,7 @@ __global__ void cuda_dist(T* r, T* x, T* y, T* z, size_t N, stim::vec3&lt;T&gt; c = st
197 217  
198 218 ///Calculate the scalar Mie scattered field on the GPU
199 219 /// @param E (GPU) is the N x N destination scalar field
200   -/// @param N is the number fo elements of the scalar field in each direction
  220 +/// @param N is the number of sample points of the scalar field
201 221 /// @param x (GPU) is the grid of X coordinates for each point in E
202 222 /// @param y (GPU) is the grid of Y coordinates for each point in E
203 223 /// @param z (GPU) is the grid of Z coordinates for each point in E
... ... @@ -211,29 +231,28 @@ void gpu_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T* y, T* z, std
211 231 //calculate the necessary number of orders required to represent the scattered field
212 232 T k = W[0].kmag();
213 233  
214   - int Nl = (int)ceil(k*a + 4 * cbrt( k * a ) + 2);
  234 + int Nl = (int)ceil(k*a + 4 * cbrt( k * a ) + 2); //calculate the number of orders required to represent the sphere
215 235 if(Nl < LOCAL_NL) Nl = LOCAL_NL; //always do at least the minimum number of local operations (kernel optimization)
216   - //std::cout<<"Nl: "<<Nl<<std::endl;
217 236  
218 237 //calculate the scattering coefficients for the sphere
219 238 stim::complex<T>* B = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * (Nl + 1) ); //allocate space for the scattering coefficients
220   - B_coefficients(B, a, k, n, Nl);
  239 + B_coefficients(B, a, k, n, Nl); //calculate the scattering coefficients
221 240  
222 241 // PLANE WAVES
223   - stim::scalarwave<T>* dev_W; //allocate space and copy plane waves
  242 + stim::scalarwave<T>* dev_W; //allocate space and copy plane waves
224 243 HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) );
225 244 HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) );
226 245  
227 246 // BESSEL FUNCTION LOOK-UP TABLE
228   - //calculate the distance from the sphere center
229   - T* dev_r;
230   - HANDLE_ERROR( cudaMalloc(&dev_r, sizeof(T) * N) );
  247 + //calculate the distance from the sphere center at each sample point and store the result in dev_r
  248 + T* dev_r; //declare the device pointer to hold the distance from the sphere center
  249 + HANDLE_ERROR( cudaMalloc(&dev_r, sizeof(T) * N) ); //allocate space for the array of distances
231 250  
232   - int threads = stim::maxThreadsPerBlock();
233   - dim3 blocks((unsigned)(N / threads + 1));
234   - cuda_dist<T> <<< blocks, threads >>>(dev_r, x, y, z, N, c);
  251 + int threads = stim::maxThreadsPerBlock(); //query the device to find the maximum number of threads per block
  252 + dim3 blocks((unsigned)(N / threads + 1)); //calculate the number of blocks necessary to evaluate the total number of sample points N
  253 + cuda_dist<T> <<< blocks, threads >>>(dev_r, x, y, z, N, c); //calculate the distance
235 254  
236   - //Find the minimum and maximum values of r
  255 + //Use the cuBLAS library to find the minimum and maximum distances from the sphere center. This will be used to create a look-up table for the Hankel functions
237 256 cublasStatus_t stat;
238 257 cublasHandle_t handle;
239 258  
... ... @@ -261,49 +280,40 @@ void gpu_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T* y, T* z, std
261 280 HANDLE_ERROR( cudaMemcpy(&r_min, dev_r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU
262 281 HANDLE_ERROR( cudaMemcpy(&r_max, dev_r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
263 282  
264   - r_min = max(r_min, a); //if the radius of the sphere is larger than r_min, change r_min to a (the scattered field doesn't exist inside the sphere)
265   -
266   - //size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r
267   - size_t N_hB_lut = (size_t)((r_max - r_min) / r_spacing + 1);
  283 + r_min = max(r_min, a); //if the radius of the sphere is larger than r_min, change r_min to a (the scattered field doesn't exist inside the sphere)
268 284  
269   - //T kr_min = k * r_min;
270   - //T kr_max = k * r_max;
  285 + size_t N_hB_lut = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r
271 286  
272   - //temporary variables
273   - double vm; //allocate space to store the return values for the bessel function calculation
  287 + //Declare and evaluate variables used to calculate the spherical Bessel functions and store them temporarily on the CPU
  288 + double vm; //allocate space to store the return values for the bessel function calculation
274 289 double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
275 290 double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
276 291 double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
277 292 double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
278 293  
279   - size_t hB_bytes = sizeof(stim::complex<T>) * (Nl+1) * N_hB_lut;
280   - stim::complex<T>* hB_lut = (stim::complex<T>*) malloc(hB_bytes); //pointer to the look-up table
281   - T dr = (r_max - r_min) / (N_hB_lut-1); //distance between values in the LUT
282   - //std::cout<<"LUT jl bytes: "<<hB_bytes<<std::endl;
283   - stim::complex<T> hl;
284   - for(size_t ri = 0; ri < N_hB_lut; ri++){ //for each value in the LUT
  294 + size_t hB_bytes = sizeof(stim::complex<T>) * (Nl+1) * N_hB_lut; //calculate the number of bytes necessary to store the Hankel function LUT
  295 + stim::complex<T>* hB_lut = (stim::complex<T>*) malloc(hB_bytes); //pointer to the look-up table
  296 + T dr = (r_max - r_min) / (N_hB_lut-1); //calculate the optimal distance between values in the LUT
  297 + stim::complex<T> hl; //declare a complex value for the Hankel function result
  298 + for(size_t ri = 0; ri < N_hB_lut; ri++){ //for each value in the LUT
285 299 stim::bessjyv_sph<double>(Nl, k * (r_min + ri * dr), vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]
286   - for(size_t l = 0; l <= Nl; l++){ //for each order
287   - hl.r = (T)jv[l];
  300 + for(size_t l = 0; l <= Nl; l++){ //for each order
  301 + hl.r = (T)jv[l]; //generate the spherical Hankel function from the Bessel functions
288 302 hl.i = (T)yv[l];
289 303  
290   - hB_lut[ri * (Nl + 1) + l] = hl * B[l]; //store the bessel function result
291   - //std::cout<<hB_lut[ri * (Nl + 1) + l]<<std::endl;
  304 + hB_lut[ri * (Nl + 1) + l] = hl * B[l]; //pre-multiply the Hankel function by the scattering coefficients
292 305 }
293 306 }
294   - //T* real_lut = (T*) malloc(hB_bytes/2);
295   - //stim::real(real_lut, hB_lut, N_hB_lut);
296   - //stim::cpu2image<T>(real_lut, "hankel_B.bmp", Nl+1, N_hB_lut, stim::cmBrewer);
297 307  
298   - //Allocate device memory and copy everything to the GPU
  308 + //Copy the pre-multiplied Hankel function look-up table to the GPU - this LUT gives a list of uniformly spaced Hankel function values pre-multiplied by scattering coefficients
299 309 stim::complex<T>* dev_hB_lut;
300 310 HANDLE_ERROR( cudaMalloc(&dev_hB_lut, hB_bytes) );
301 311 HANDLE_ERROR( cudaMemcpy(dev_hB_lut, hB_lut, hB_bytes, cudaMemcpyHostToDevice) );
302   - //std::cout << "r_min: " << r_min << std::endl;
303   - //std::cout << "dr: " << dr << std::endl;
  312 +
  313 + //calculate the Mie scattering solution on the GPU
304 314 gpu_scalar_mie_scatter<T>(E, N, x, y, z, dev_W, W.size(), a, n, c, dev_hB_lut, r_min, dr, N_hB_lut, Nl);
305 315  
306   - HANDLE_ERROR(cudaMemcpy(E, E, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost)); //copy the field from device memory
  316 + //HANDLE_ERROR(cudaMemcpy(E, E, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost)); //copy the field from device memory
307 317  
308 318 HANDLE_ERROR(cudaFree(dev_hB_lut));
309 319 HANDLE_ERROR(cudaFree(dev_r));
... ...
stim/parser/filename.h
... ... @@ -238,7 +238,7 @@ public:
238 238 }
239 239  
240 240 /// Create a matching file locator with a prefix s
241   - stim::filename prefix(std::string s){
  241 + stim::filename with_prefix(std::string s){
242 242 stim::filename result = *this;
243 243 result._prefix = s;
244 244 return result;
... ...