Commit 963d0676112d2df99897697051c8b8f482f677ca

Authored by David Mayerich
1 parent ca99f951

bug fixes related to the development of bimsym

@@ -1021,12 +1021,13 @@ public: @@ -1021,12 +1021,13 @@ public:
1021 //for each band 1021 //for each band
1022 for (unsigned long long z = b0; z < b1; z++) 1022 for (unsigned long long z = b0; z < b1; z++)
1023 { 1023 {
  1024 + //std::cout<<z<<std::endl;
1024 for (unsigned long long y = 0; y < lines; y++) 1025 for (unsigned long long y = 0; y < lines; y++)
1025 { 1026 {
1026 file.read((char *)(temp + y * samples), sizeof(T) * samples); 1027 file.read((char *)(temp + y * samples), sizeof(T) * samples);
1027 file.seekg(jumpl, std::ios::cur); //go to the next band 1028 file.seekg(jumpl, std::ios::cur); //go to the next band
1028 1029
1029 - if(PROGRESS) progress = (double)((z+1) * lines + y + 1) / ((b1 - b0) * lines) * 100; 1030 + if(PROGRESS) progress = (double)((z - b0 + 1) * lines + y + 1) / ((b1 - b0) * lines) * 100;
1030 } 1031 }
1031 out.write(reinterpret_cast<const char*>(temp), L); //write slice data into target file 1032 out.write(reinterpret_cast<const char*>(temp), L); //write slice data into target file
1032 file.seekg(jumpb, std::ios::cur); 1033 file.seekg(jumpb, std::ios::cur);
@@ -3,7 +3,7 @@ @@ -3,7 +3,7 @@
3 3
4 namespace stim{ 4 namespace stim{
5 5
6 - template<class T> 6 + /*template<class T>
7 void circshift(T *out, const T *in, size_t xdim, size_t ydim, size_t xshift, size_t yshift){ 7 void circshift(T *out, const T *in, size_t xdim, size_t ydim, size_t xshift, size_t yshift){
8 size_t i, j, ii, jj; 8 size_t i, j, ii, jj;
9 for (i =0; i < xdim; i++) { 9 for (i =0; i < xdim; i++) {
@@ -13,16 +13,31 @@ namespace stim{ @@ -13,16 +13,31 @@ namespace stim{
13 out[ii * ydim + jj] = in[i * ydim + j]; 13 out[ii * ydim + jj] = in[i * ydim + j];
14 } 14 }
15 } 15 }
  16 + }*/
  17 +
  18 + template<typename T>
  19 + void circshift(T *out, const T *in, int xdim, int ydim, int xshift, int yshift)
  20 + {
  21 + for (int i =0; i < xdim; i++) {
  22 + int ii = (i + xshift) % xdim;
  23 + if (ii<0) ii = xdim + ii;
  24 + for (int j = 0; j < ydim; j++) {
  25 + int jj = (j + yshift) % ydim;
  26 + if (jj<0) jj = ydim + jj;
  27 + //out[ii * ydim + jj] = in[i * ydim + j];
  28 + out[jj * xdim + ii] = in[j * xdim + i];
  29 + }
  30 + }
16 } 31 }
17 32
18 template<typename T> 33 template<typename T>
19 void cpu_fftshift(T* out, T* in, size_t xdim, size_t ydim){ 34 void cpu_fftshift(T* out, T* in, size_t xdim, size_t ydim){
20 - circshift(out, in, xdim, ydim, xdim/2, ydim/2); 35 + circshift(out, in, xdim, ydim, std::floor(xdim/2), std::floor(ydim/2));
21 } 36 }
22 37
23 template<typename T> 38 template<typename T>
24 void cpu_ifftshift(T* out, T* in, size_t xdim, size_t ydim){ 39 void cpu_ifftshift(T* out, T* in, size_t xdim, size_t ydim){
25 - circshift(out, in, xdim, ydim, xdim/2, ydim/2); 40 + circshift(out, in, xdim, ydim, std::ceil(xdim/2), std::ceil(ydim/2));
26 } 41 }
27 42
28 43
stim/optics/scalarbeam.h
@@ -213,13 +213,13 @@ void gpu_scalar_psf_local(stim::complex&lt;T&gt;* E, size_t N, T* r, T* phi, T lambda, @@ -213,13 +213,13 @@ void gpu_scalar_psf_local(stim::complex&lt;T&gt;* E, size_t N, T* r, T* phi, T lambda,
213 T dr = (r_max - r_min) / (Nlut_j-1); //distance between values in the LUT 213 T dr = (r_max - r_min) / (Nlut_j-1); //distance between values in the LUT
214 T jl; 214 T jl;
215 for(size_t ri = 0; ri < Nlut_j; ri++){ //for each value in the LUT 215 for(size_t ri = 0; ri < Nlut_j; ri++){ //for each value in the LUT
216 - for(size_t l = 0; l <= Nl; l++){ //for each order 216 + for(unsigned l = 0; l <= (unsigned)Nl; l++){ //for each order
217 jl = boost::math::sph_bessel<T>(l, k*(r_min + ri * dr)); //use boost to calculate the spherical bessel function 217 jl = boost::math::sph_bessel<T>(l, k*(r_min + ri * dr)); //use boost to calculate the spherical bessel function
218 j_lut[ri * (Nl + 1) + l] = jl; //store the bessel function result 218 j_lut[ri * (Nl + 1) + l] = jl; //store the bessel function result
219 } 219 }
220 } 220 }
221 221
222 - stim::cpu2image<T>(j_lut, "j_lut.bmp", Nl+1, Nlut_j, stim::cmBrewer); 222 + //stim::cpu2image<T>(j_lut, "j_lut.bmp", Nl+1, Nlut_j, stim::cmBrewer);
223 //Allocate device memory and copy everything to the GPU 223 //Allocate device memory and copy everything to the GPU
224 224
225 T* gpu_C; 225 T* gpu_C;
@@ -316,6 +316,7 @@ void gpu_scalar_psf_cart(stim::complex&lt;T&gt;* E, size_t N, T* x, T* y, T* z, T lamb @@ -316,6 +316,7 @@ void gpu_scalar_psf_cart(stim::complex&lt;T&gt;* E, size_t N, T* x, T* y, T* z, T lamb
316 316
317 stim::quaternion<T> q; //create a quaternion 317 stim::quaternion<T> q; //create a quaternion
318 q.CreateRotation(d, stim::vec3<T>(0, 0, 1)); //create a mapping from the propagation direction to the PSF space 318 q.CreateRotation(d, stim::vec3<T>(0, 0, 1)); //create a mapping from the propagation direction to the PSF space
  319 + stim::matrix<T, 3> rot = q.toMatrix3();
319 int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device 320 int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
320 dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks 321 dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks
321 cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space 322 cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space
@@ -442,15 +443,19 @@ public: @@ -442,15 +443,19 @@ public:
442 return samples; 443 return samples;
443 } 444 }
444 445
  446 + void eval(stim::scalarfield<T>& E, T* X, T* Y, T* Z, int order = 500){
  447 + cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[0], NA[1], order, E.spacing());
  448 + }
  449 +
445 /// Evaluate the beam to a scalar field using Debye focusing 450 /// Evaluate the beam to a scalar field using Debye focusing
446 - void eval(stim::scalarfield<T>& E, size_t order = 500){ 451 + void eval(stim::scalarfield<T>& E, int order = 500){
447 size_t array_size = E.grid_bytes(); 452 size_t array_size = E.grid_bytes();
448 T* X = (T*) malloc( array_size ); //allocate space for the coordinate meshes 453 T* X = (T*) malloc( array_size ); //allocate space for the coordinate meshes
449 T* Y = (T*) malloc( array_size ); 454 T* Y = (T*) malloc( array_size );
450 T* Z = (T*) malloc( array_size ); 455 T* Z = (T*) malloc( array_size );
451 456
452 E.meshgrid(X, Y, Z, stim::CPUmem); //calculate the coordinate meshes 457 E.meshgrid(X, Y, Z, stim::CPUmem); //calculate the coordinate meshes
453 - cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[0], NA[1], order, E.spacing()); 458 + eval(E, X, Y, Z, order);
454 459
455 free(X); //free the coordinate meshes 460 free(X); //free the coordinate meshes
456 free(Y); 461 free(Y);
stim/optics/scalarfield.h
1 #ifndef STIM_SCALARFIELD_H 1 #ifndef STIM_SCALARFIELD_H
2 #define STIM_SCALARFIELD_H 2 #define STIM_SCALARFIELD_H
3 3
  4 +
4 #include "../math/rect.h" 5 #include "../math/rect.h"
5 #include "../math/complex.h" 6 #include "../math/complex.h"
  7 +#include "../math/fft.h"
6 8
7 namespace stim{ 9 namespace stim{
8 10
  11 + /// Perform a k-space transform of a scalar field (FFT). The given field has a width of x and the calculated momentum space has a
  12 + /// width of kx (in radians).
  13 + /// @param K is a pointer to the output array of all plane waves in the field
  14 + /// @param kx is the width of the frame in momentum space
  15 + /// @param ky is the height of the frame in momentum space
  16 + /// @param E is the field to be transformed
  17 + /// @param x is the width of the field in the spatial domain
  18 + /// @param y is the height of the field in the spatial domain
  19 + /// @param nx is the number of pixels representing the field in the x (and kx) direction
  20 + /// @param ny is the number of pixels representing the field in the y (and ky) direction
  21 + template<typename T>
  22 + void cpu_scalar_to_kspace(stim::complex<T>* K, T& kx, T& ky, stim::complex<T>* E, T x, T y, size_t nx, size_t ny){
  23 +
  24 + kx = stim::TAU * nx / x; //calculate the width of the momentum space
  25 + ky = stim::TAU * ny / y;
  26 +
  27 + stim::complex<T>* dev_FFT;
  28 + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array
  29 +
  30 + stim::complex<T>* dev_E;
  31 + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field
  32 + HANDLE_ERROR( cudaMemcpy(dev_E, E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory
  33 +
  34 + cufftResult result;
  35 + cufftHandle plan;
  36 + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C);
  37 + if(result != CUFFT_SUCCESS){
  38 + std::cout<<"Error creating cuFFT plan."<<std::endl;
  39 + exit(1);
  40 + }
  41 +
  42 + result = cufftExecC2C(plan, (cufftComplex*)dev_E, (cufftComplex*)dev_FFT, CUFFT_FORWARD);
  43 + if(result != CUFFT_SUCCESS){
  44 + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl;
  45 + exit(1);
  46 + }
  47 +
  48 + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny);
  49 + HANDLE_ERROR( cudaMemcpy(fft, dev_FFT, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) );
  50 +
  51 + stim::cpu_fftshift(K, fft, nx, ny);
  52 + //memcpy(K, fft, sizeof(stim::complex<T>) * nx * ny);
  53 + }
  54 +
  55 + template<typename T>
  56 + void cpu_scalar_from_kspace(stim::complex<T>* E, T& x, T& y, stim::complex<T>* K, T kx, T ky, size_t nx, size_t ny){
  57 +
  58 + x = stim::TAU * nx / kx; //calculate the width of the momentum space
  59 + y = stim::TAU * ny / ky;
  60 +
  61 + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny);
  62 + stim::cpu_ifftshift(fft, K, nx, ny);
  63 + //memcpy(fft, K, sizeof(stim::complex<T>) * nx * ny);
  64 +
  65 + stim::complex<T>* dev_FFT;
  66 + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array
  67 + HANDLE_ERROR( cudaMemcpy(dev_FFT, fft, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory
  68 +
  69 + stim::complex<T>* dev_E;
  70 + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field
  71 +
  72 + cufftResult result;
  73 + cufftHandle plan;
  74 + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C);
  75 + if(result != CUFFT_SUCCESS){
  76 + std::cout<<"Error creating cuFFT plan."<<std::endl;
  77 + exit(1);
  78 + }
  79 +
  80 + result = cufftExecC2C(plan, (cufftComplex*)dev_FFT, (cufftComplex*)dev_E, CUFFT_INVERSE);
  81 + if(result != CUFFT_SUCCESS){
  82 + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl;
  83 + exit(1);
  84 + }
  85 +
  86 + HANDLE_ERROR( cudaMemcpy(E, dev_E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) );
  87 +
  88 +
  89 + }
  90 +
  91 + /// Propagate a field slice along its orthogonal direction by a given distance z
  92 + /// @param Enew is the resulting propogated field
  93 + /// @param E is the field to be propogated
  94 + /// @param sx is the size of the field in the lateral x direction
  95 + /// @param sy is the size of the field in the lateral y direction
  96 + /// @param z is the distance to be propagated
  97 + /// @param k is the wavenumber 2*pi/lambda
  98 + /// @param nx is the number of samples in the field along the lateral x direction
  99 + /// @param ny is the number of samples in the field along the lateral y direction
  100 + template<typename T>
  101 + void cpu_scalar_propagate(stim::complex<T>* Enew, stim::complex<T>* E, T sx, T sy, T z, T k, size_t nx, size_t ny){
  102 +
  103 + stim::complex<T>* K = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * nx * ny );
  104 +
  105 + T Kx, Ky; //width and height in k space
  106 + cpu_scalar_to_kspace(K, Kx, Ky, E ,sx, sy, nx, ny);
  107 +
  108 + T* mag = (T*) malloc( sizeof(T) * nx * ny );
  109 + stim::abs(mag, K, nx * ny);
  110 + stim::cpu2image<float>(mag, "kspace_pre_shift.bmp", nx, ny, stim::cmBrewer);
  111 +
  112 + size_t kxi, kyi;
  113 + size_t i;
  114 + T kx, kx_sq, ky, ky_sq, k_sq;
  115 + T kz;
  116 + stim::complex<T> shift;
  117 + T min_kx = -Kx / 2;
  118 + T dkx = Kx / (nx);
  119 +
  120 + T min_ky = -Ky / 2;
  121 + T dky = Ky / (ny);
  122 +
  123 + for(kyi = 0; kyi < ny; kyi++){ //for each plane wave in the ky direction
  124 + for(kxi = 0; kxi < nx; kxi++){ //for each plane wave in the ky direction
  125 + i = kyi * nx + kxi;
  126 +
  127 + kx = min_kx + kxi * dkx; //calculate the position of the current plane wave
  128 + ky = min_ky + kyi * dky;
  129 +
  130 + kx_sq = kx * kx;
  131 + ky_sq = ky * ky;
  132 + k_sq = k*k;
  133 +
  134 + if(kx_sq + ky_sq < k_sq){
  135 + kz = sqrt(k_sq - kx_sq - ky_sq); //estimate kz using the Fresnel approximation
  136 + shift = -exp(stim::complex<T>(0, kz * z));
  137 + K[i] *= shift;
  138 + K[i] /= (nx*ny); //normalize the DFT
  139 + }
  140 + else{
  141 + K[i] = 0;
  142 + }
  143 + }
  144 + }
  145 +
  146 + stim::abs(mag, K, nx * ny);
  147 + stim::cpu2image<float>(mag, "kspace_post_shift.bmp", nx, ny, stim::cmBrewer);
  148 +
  149 + cpu_scalar_from_kspace(Enew, sx, sy, K, Kx, Ky, nx, ny);
  150 + }
  151 +
  152 + /// Apply a lowpass filter to a field slice
  153 + /// @param Enew is the resulting propogated field
  154 + /// @param E is the field to be propogated
  155 + /// @param sx is the size of the field in the lateral x direction
  156 + /// @param sy is the size of the field in the lateral y direction
  157 + /// @param highest is the highest spatial frequency that can pass through the filter
  158 + /// @param nx is the number of samples in the field along the lateral x direction
  159 + /// @param ny is the number of samples in the field along the lateral y direction
  160 + template<typename T>
  161 + void cpu_scalar_lowpass(stim::complex<T>* Enew, stim::complex<T>* E, T sx, T sy, T highest, size_t nx, size_t ny){
  162 +
  163 + stim::complex<T>* K = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * nx * ny );
  164 +
  165 + T Kx, Ky; //width and height in k space
  166 + cpu_scalar_to_kspace(K, Kx, Ky, E ,sx, sy, nx, ny);
  167 +
  168 + T* mag = (T*) malloc( sizeof(T) * nx * ny );
  169 + stim::abs(mag, K, nx * ny);
  170 + stim::cpu2image<float>(mag, "kspace_pre_lowpass.bmp", nx, ny, stim::cmBrewer);
  171 +
  172 + size_t kxi, kyi;
  173 + size_t i;
  174 + T kx, kx_sq, ky, ky_sq, k_sq;
  175 + T kz;
  176 + stim::complex<T> shift;
  177 + T min_kx = -Kx / 2;
  178 + T dkx = Kx / (nx);
  179 +
  180 + T min_ky = -Ky / 2;
  181 + T dky = Ky / (ny);
  182 +
  183 + T highest_sq = highest * highest;
  184 +
  185 + for(kyi = 0; kyi < ny; kyi++){ //for each plane wave in the ky direction
  186 + for(kxi = 0; kxi < nx; kxi++){ //for each plane wave in the ky direction
  187 + i = kyi * nx + kxi;
  188 +
  189 + kx = min_kx + kxi * dkx; //calculate the position of the current plane wave
  190 + ky = min_ky + kyi * dky;
  191 +
  192 + kx_sq = kx * kx;
  193 + ky_sq = ky * ky;
  194 +
  195 + if(kx_sq + ky_sq > highest_sq){
  196 + K[i] = 0;
  197 + }
  198 + else
  199 + K[i] /= nx * ny; //normalize the DFT
  200 + }
  201 + }
  202 +
  203 + stim::abs(mag, K, nx * ny);
  204 + stim::cpu2image<float>(mag, "kspace_post_lowpass.bmp", nx, ny, stim::cmBrewer);
  205 +
  206 + cpu_scalar_from_kspace(Enew, sx, sy, K, Kx, Ky, nx, ny);
  207 + }
  208 +
9 enum locationType {CPUmem, GPUmem}; 209 enum locationType {CPUmem, GPUmem};
10 210
11 /// Class represents a scalar optical field. 211 /// Class represents a scalar optical field.
@@ -22,23 +222,20 @@ protected: @@ -22,23 +222,20 @@ protected:
22 size_t R[2]; 222 size_t R[2];
23 locationType loc; 223 locationType loc;
24 224
25 -  
26 -  
27 -public:  
28 -  
29 - CUDA_CALLABLE scalarfield(size_t X, size_t Y, T size = 1, T z_pos = 0) : rect<T>::rect(size, z_pos){  
30 - R[0] = X; //set the field resolution  
31 - R[1] = Y;  
32 -  
33 - E = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * R[0] * R[1]); //allocate in CPU memory  
34 - loc = CPUmem; 225 + /// Convert the field to a k-space representation (do an FFT)
  226 + void to_kspace(T& kx, T& ky){
  227 + cpu_scalar_to_kspace(E, kx, ky, E, X.len(), Y.len(), R[0], R[1]);
35 } 228 }
36 229
37 - CUDA_CALLABLE ~scalarfield(){  
38 - if(loc == CPUmem) free(E);  
39 - else cudaFree(E); 230 + void from_kspace(){
  231 + kx = stim::TAU * R[0] / X.len(); //calculate the width of the momentum space
  232 + ky = stim::TAU * R[1] / Y.len();
  233 + T x, y;
  234 + cpu_scalar_from_kspace(E, x, y, E, kx, ky, R[0], R[1]);
40 } 235 }
41 236
  237 +public:
  238 +
42 /// Returns the number of values in the field 239 /// Returns the number of values in the field
43 CUDA_CALLABLE size_t size(){ 240 CUDA_CALLABLE size_t size(){
44 return R[0] * R[1]; 241 return R[0] * R[1];
@@ -48,6 +245,20 @@ public: @@ -48,6 +245,20 @@ public:
48 return sizeof(stim::complex<T>) * R[0] * R[1]; 245 return sizeof(stim::complex<T>) * R[0] * R[1];
49 } 246 }
50 247
  248 + scalarfield(size_t X, size_t Y, T size = 1, T z_pos = 0) : rect<T>::rect(size, z_pos){
  249 + R[0] = X; //set the field resolution
  250 + R[1] = Y;
  251 +
  252 + E = (stim::complex<T>*) malloc(grid_bytes()); //allocate in CPU memory
  253 + memset(E, 0, grid_bytes());
  254 + loc = CPUmem;
  255 + }
  256 +
  257 + ~scalarfield(){
  258 + if(loc == CPUmem) free(E);
  259 + else cudaFree(E);
  260 + }
  261 +
51 /// Calculates the distance between points on the grid 262 /// Calculates the distance between points on the grid
52 T spacing(){ 263 T spacing(){
53 T du = rect<T>::X.len() / R[0]; 264 T du = rect<T>::X.len() / R[0];
@@ -78,6 +289,16 @@ public: @@ -78,6 +289,16 @@ public:
78 } 289 }
79 } 290 }
80 291
  292 + /// Propagate the field along its orthogonal direction by a distance d
  293 + void propagate(T d, T k){
  294 + cpu_scalar_propagate(E, E, X.len(), Y.len(), d, k, R[0], R[1]);
  295 + }
  296 +
  297 + /// Propagate the field along its orthogonal direction by a distance d
  298 + void lowpass(T highest){
  299 + cpu_scalar_lowpass(E, E, X.len(), Y.len(), highest, R[0], R[1]);
  300 + }
  301 +
81 std::string str(){ 302 std::string str(){
82 std::stringstream ss; 303 std::stringstream ss;
83 ss<<rect<T>::str()<<std::endl; 304 ss<<rect<T>::str()<<std::endl;
@@ -96,11 +317,11 @@ public: @@ -96,11 +317,11 @@ public:
96 317
97 /// Evaluate the cartesian coordinates of each point in the field. The resulting arrays are allocated in the same memory where the field is stored. 318 /// Evaluate the cartesian coordinates of each point in the field. The resulting arrays are allocated in the same memory where the field is stored.
98 void meshgrid(T* X, T* Y, T* Z, locationType location){ 319 void meshgrid(T* X, T* Y, T* Z, locationType location){
99 - size_t array_size = sizeof(T) * R[0] * R[1]; 320 + //size_t array_size = sizeof(T) * R[0] * R[1];
100 if(location == CPUmem){ 321 if(location == CPUmem){
101 322
102 - T du = 1.0 / (R[0] - 1); //calculate the spacing between points in the grid  
103 - T dv = 1.0 / (R[1] - 1); 323 + T du = (T)1.0 / (R[0] - 1); //calculate the spacing between points in the grid
  324 + T dv = (T)1.0 / (R[1] - 1);
104 325
105 size_t ui, vi, i; 326 size_t ui, vi, i;
106 stim::vec3<T> p; 327 stim::vec3<T> p;
@@ -114,9 +335,9 @@ public: @@ -114,9 +335,9 @@ public:
114 i++; 335 i++;
115 } 336 }
116 } 337 }
117 - stim::cpu2image(X, "X.bmp", R[0], R[1], stim::cmBrewer);  
118 - stim::cpu2image(Y, "Y.bmp", R[0], R[1], stim::cmBrewer);  
119 - stim::cpu2image(Z, "Z.bmp", R[0], R[1], stim::cmBrewer); 338 + //stim::cpu2image(X, "X.bmp", R[0], R[1], stim::cmBrewer);
  339 + //stim::cpu2image(Y, "Y.bmp", R[0], R[1], stim::cmBrewer);
  340 + //stim::cpu2image(Z, "Z.bmp", R[0], R[1], stim::cmBrewer);
120 } 341 }
121 else{ 342 else{
122 std::cout<<"GPU allocation of a meshgrid isn't supported yet. You'll have to write kernels to do the calculation."; 343 std::cout<<"GPU allocation of a meshgrid isn't supported yet. You'll have to write kernels to do the calculation.";
@@ -124,6 +345,14 @@ public: @@ -124,6 +345,14 @@ public:
124 } 345 }
125 } 346 }
126 347
  348 + //clear the field, setting all values to zero
  349 + void clear(){
  350 + if(loc == GPUmem)
  351 + HANDLE_ERROR(cudaMemset(E, 0, grid_bytes()));
  352 + else
  353 + memset(E, 0, grid_bytes());
  354 + }
  355 +
127 void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer){ 356 void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer){
128 357
129 if(loc == GPUmem) to_cpu(); //if the field is in the GPU, move it to the CPU 358 if(loc == GPUmem) to_cpu(); //if the field is in the GPU, move it to the CPU
stim/optics/mie.h renamed to stim/optics/scalarmie.h
@@ -366,7 +366,7 @@ __global__ void cuda_scalar_mie_internal(stim::complex&lt;T&gt;* E, size_t N, T* x, T* @@ -366,7 +366,7 @@ __global__ void cuda_scalar_mie_internal(stim::complex&lt;T&gt;* E, size_t N, T* x, T*
366 (z == NULL) ? p[2] = 0 : p[2] = z[i]; 366 (z == NULL) ? p[2] = 0 : p[2] = z[i];
367 367
368 T r = p.len(); //calculate the distance from the sphere 368 T r = p.len(); //calculate the distance from the sphere
369 - if(r > a) return; //exit if the point is inside the sphere (we only calculate the internal field) 369 + if(r >= a) return; //exit if the point is inside the sphere (we only calculate the internal field)
370 T fij = (r - r_min)/dr; //FP index into the spherical bessel LUT 370 T fij = (r - r_min)/dr; //FP index into the spherical bessel LUT
371 size_t ij = (size_t) fij; //convert to an integral index 371 size_t ij = (size_t) fij; //convert to an integral index
372 T alpha = fij - ij; //calculate the fractional portion of the index 372 T alpha = fij - ij; //calculate the fractional portion of the index
@@ -608,6 +608,39 @@ void cpu_scalar_mie_internal(stim::complex&lt;T&gt;* E, size_t N, T* x, T* y, T* z, st @@ -608,6 +608,39 @@ void cpu_scalar_mie_internal(stim::complex&lt;T&gt;* E, size_t N, T* x, T* y, T* z, st
608 cpu_scalar_mie_internal(E, N, x, y, z, W, a, n, r_spacing); 608 cpu_scalar_mie_internal(E, N, x, y, z, W, a, n, r_spacing);
609 } 609 }
610 610
611 -} 611 +
  612 +/// Class stim::scalarmie represents a scalar Mie scattering model that can be used to calculate the fields produced by a scattering sphere.
  613 +template<typename T>
  614 +class scalarmie
  615 +{
  616 +private:
  617 + T radius; //radius of the scattering sphere
  618 + stim::complex<T> n; //refractive index of the scattering sphere
  619 +
  620 +public:
  621 +
  622 + scalarmie(T r, stim::complex<T> ri){
  623 + radius = r;
  624 + n = ri;
  625 + }
  626 +
  627 + void eval(stim::scalarfield<T>& E, stim::scalarbeam<T> b, int order = 500, int samples = 1000){
  628 +
  629 + size_t array_size = E.grid_bytes(); //calculate the number of bytes in the scalar grid
  630 + float* X = (float*) malloc( array_size ); //allocate space for the coordinate meshes
  631 + float* Y = (float*) malloc( array_size );
  632 + float* Z = (float*) malloc( array_size );
  633 + E.meshgrid(X, Y, Z, stim::CPUmem); //calculate the coordinate meshes
  634 +
  635 + b.eval(E, X, Y, Z, order); //evaluate the incident field using a plane wave expansion
  636 +
  637 + std::vector< stim::scalarwave<float> > wave_array = b.mc(samples); //decompose the beam into an array of plane waves
  638 + stim::cpu_scalar_mie_scatter<float>(E.ptr(), E.size(), X, Y, Z, wave_array, radius, n, E.spacing());
  639 + stim::cpu_scalar_mie_internal<float>(E.ptr(), E.size(), X, Y, Z, wave_array, radius, n, E.spacing());
  640 + }
  641 +
  642 +}; //end stim::scalarmie
  643 +
  644 +} //end namespace stim
612 645
613 #endif 646 #endif
614 \ No newline at end of file 647 \ No newline at end of file
stim/optics/scalarwave.h
@@ -210,7 +210,7 @@ template&lt;typename T&gt; @@ -210,7 +210,7 @@ template&lt;typename T&gt;
210 __global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){ 210 __global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){
211 extern __shared__ stim::scalarwave<T> shared_W[]; //declare the list of waves in shared memory 211 extern __shared__ stim::scalarwave<T> shared_W[]; //declare the list of waves in shared memory
212 212
213 - stim::cuda::sharedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x); //copy the plane waves into shared memory for faster access 213 + stim::cuda::threadedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x); //copy the plane waves into shared memory for faster access
214 __syncthreads(); //synchronize threads to insure all data is copied 214 __syncthreads(); //synchronize threads to insure all data is copied
215 215
216 size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array 216 size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
stim/parser/arguments.h
@@ -42,11 +42,13 @@ namespace stim{ @@ -42,11 +42,13 @@ namespace stim{
42 bool char_numerical(char c){ 42 bool char_numerical(char c){
43 if( (c >= '0' && c <= '9') || c == '-' || c == '.') 43 if( (c >= '0' && c <= '9') || c == '-' || c == '.')
44 return true; 44 return true;
  45 + return false;
45 } 46 }
46 47
47 bool char_integral(char c){ 48 bool char_integral(char c){
48 if( (c >= '0' && c <= '9') || c == '-') 49 if( (c >= '0' && c <= '9') || c == '-')
49 return true; 50 return true;
  51 + return false;
50 } 52 }
51 53
52 /// Test if a given string contains a numerical (real) value 54 /// Test if a given string contains a numerical (real) value
@@ -475,31 +477,31 @@ namespace stim{ @@ -475,31 +477,31 @@ namespace stim{
475 ///Determines of a parameter has been set and returns true if it has 477 ///Determines of a parameter has been set and returns true if it has
476 478
477 /// @param _name is the name of the argument 479 /// @param _name is the name of the argument
478 - bool operator()(std::string _name)  
479 - {  
480 - size_t i = find(opts.begin(), opts.end(), _name) - opts.begin(); 480 + bool operator()(std::string _name){
  481 + std::vector<cmd_option>::iterator it;
  482 + it = std::find(opts.begin(), opts.end(), _name);// - opts.begin();
481 483
482 - if(i < 0){ 484 + if(it == opts.end()){
483 std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; 485 std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl;
484 exit(1); 486 exit(1);
485 } 487 }
486 488
487 - return opts[i].is_set(); 489 + return it->is_set();
488 } 490 }
489 491
490 ///Returns the number of parameters for a specified argument 492 ///Returns the number of parameters for a specified argument
491 493
492 /// @param _name is the name of the argument whose parameter number will be returned 494 /// @param _name is the name of the argument whose parameter number will be returned
493 - size_t nargs(std::string _name)  
494 - {  
495 - size_t i = find(opts.begin(), opts.end(), _name) - opts.begin(); 495 + size_t nargs(std::string _name){
  496 + std::vector<cmd_option>::iterator it;
  497 + it = find(opts.begin(), opts.end(), _name);// - opts.begin();
496 498
497 - if(i < 0){ 499 + if(it == opts.end()){
498 std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; 500 std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl;
499 exit(1); 501 exit(1);
500 } 502 }
501 503
502 - return opts[i].nargs(); 504 + return it->nargs();
503 } 505 }
504 506
505 ///Returns the number of arguments that have been set 507 ///Returns the number of arguments that have been set
@@ -525,17 +527,16 @@ namespace stim{ @@ -525,17 +527,16 @@ namespace stim{
525 ///Returns an object describing the argument 527 ///Returns an object describing the argument
526 528
527 /// @param _name is the name of the requested argument 529 /// @param _name is the name of the requested argument
528 - cmd_option operator[](std::string _name)  
529 - {  
530 - size_t i = find(opts.begin(), opts.end(), _name) - opts.begin(); 530 + cmd_option operator[](std::string _name){
  531 + std::vector<cmd_option>::iterator it;
  532 + it = find(opts.begin(), opts.end(), _name);// - opts.begin();
531 533
532 - if(i < 0 || i >= opts.size())  
533 - { 534 + if(it == opts.end()){
534 std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; 535 std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl;
535 exit(1); 536 exit(1);
536 } 537 }
537 538
538 - return opts[i]; 539 + return *it;
539 } 540 }
540 541
541 542
stim/parser/filename.h
@@ -68,6 +68,7 @@ protected: @@ -68,6 +68,7 @@ protected:
68 68
69 //parse a file locator string 69 //parse a file locator string
70 void parse(std::string loc){ 70 void parse(std::string loc){
  71 + if(loc.size() == 0) return;
71 72
72 loc = unix_div(loc); 73 loc = unix_div(loc);
73 74
@@ -307,6 +308,13 @@ public: @@ -307,6 +308,13 @@ public:
307 parse_name(fname); 308 parse_name(fname);
308 } 309 }
309 310
  311 + //append a string to the filename and return a new filename
  312 + stim::filename append(std::string s){
  313 + stim::filename result = *this; //create a new filename, copy the current filename
  314 + result.prefix = prefix + s; //append the string to the filename
  315 + return result;
  316 + }
  317 +
310 //get a path relative to the current one 318 //get a path relative to the current one
311 stim::filename get_relative(std::string rel){ 319 stim::filename get_relative(std::string rel){
312 320