Commit 963d0676112d2df99897697051c8b8f482f677ca
1 parent
ca99f951
bug fixes related to the development of bimsym
Showing
8 changed files
with
338 additions
and
46 deletions
Show diff stats
stim/envi/bsq.h
@@ -1021,12 +1021,13 @@ public: | @@ -1021,12 +1021,13 @@ public: | ||
1021 | //for each band | 1021 | //for each band |
1022 | for (unsigned long long z = b0; z < b1; z++) | 1022 | for (unsigned long long z = b0; z < b1; z++) |
1023 | { | 1023 | { |
1024 | + //std::cout<<z<<std::endl; | ||
1024 | for (unsigned long long y = 0; y < lines; y++) | 1025 | for (unsigned long long y = 0; y < lines; y++) |
1025 | { | 1026 | { |
1026 | file.read((char *)(temp + y * samples), sizeof(T) * samples); | 1027 | file.read((char *)(temp + y * samples), sizeof(T) * samples); |
1027 | file.seekg(jumpl, std::ios::cur); //go to the next band | 1028 | file.seekg(jumpl, std::ios::cur); //go to the next band |
1028 | 1029 | ||
1029 | - if(PROGRESS) progress = (double)((z+1) * lines + y + 1) / ((b1 - b0) * lines) * 100; | 1030 | + if(PROGRESS) progress = (double)((z - b0 + 1) * lines + y + 1) / ((b1 - b0) * lines) * 100; |
1030 | } | 1031 | } |
1031 | out.write(reinterpret_cast<const char*>(temp), L); //write slice data into target file | 1032 | out.write(reinterpret_cast<const char*>(temp), L); //write slice data into target file |
1032 | file.seekg(jumpb, std::ios::cur); | 1033 | file.seekg(jumpb, std::ios::cur); |
stim/math/fft.h
@@ -3,7 +3,7 @@ | @@ -3,7 +3,7 @@ | ||
3 | 3 | ||
4 | namespace stim{ | 4 | namespace stim{ |
5 | 5 | ||
6 | - template<class T> | 6 | + /*template<class T> |
7 | void circshift(T *out, const T *in, size_t xdim, size_t ydim, size_t xshift, size_t yshift){ | 7 | void circshift(T *out, const T *in, size_t xdim, size_t ydim, size_t xshift, size_t yshift){ |
8 | size_t i, j, ii, jj; | 8 | size_t i, j, ii, jj; |
9 | for (i =0; i < xdim; i++) { | 9 | for (i =0; i < xdim; i++) { |
@@ -13,16 +13,31 @@ namespace stim{ | @@ -13,16 +13,31 @@ namespace stim{ | ||
13 | out[ii * ydim + jj] = in[i * ydim + j]; | 13 | out[ii * ydim + jj] = in[i * ydim + j]; |
14 | } | 14 | } |
15 | } | 15 | } |
16 | + }*/ | ||
17 | + | ||
18 | + template<typename T> | ||
19 | + void circshift(T *out, const T *in, int xdim, int ydim, int xshift, int yshift) | ||
20 | + { | ||
21 | + for (int i =0; i < xdim; i++) { | ||
22 | + int ii = (i + xshift) % xdim; | ||
23 | + if (ii<0) ii = xdim + ii; | ||
24 | + for (int j = 0; j < ydim; j++) { | ||
25 | + int jj = (j + yshift) % ydim; | ||
26 | + if (jj<0) jj = ydim + jj; | ||
27 | + //out[ii * ydim + jj] = in[i * ydim + j]; | ||
28 | + out[jj * xdim + ii] = in[j * xdim + i]; | ||
29 | + } | ||
30 | + } | ||
16 | } | 31 | } |
17 | 32 | ||
18 | template<typename T> | 33 | template<typename T> |
19 | void cpu_fftshift(T* out, T* in, size_t xdim, size_t ydim){ | 34 | void cpu_fftshift(T* out, T* in, size_t xdim, size_t ydim){ |
20 | - circshift(out, in, xdim, ydim, xdim/2, ydim/2); | 35 | + circshift(out, in, xdim, ydim, std::floor(xdim/2), std::floor(ydim/2)); |
21 | } | 36 | } |
22 | 37 | ||
23 | template<typename T> | 38 | template<typename T> |
24 | void cpu_ifftshift(T* out, T* in, size_t xdim, size_t ydim){ | 39 | void cpu_ifftshift(T* out, T* in, size_t xdim, size_t ydim){ |
25 | - circshift(out, in, xdim, ydim, xdim/2, ydim/2); | 40 | + circshift(out, in, xdim, ydim, std::ceil(xdim/2), std::ceil(ydim/2)); |
26 | } | 41 | } |
27 | 42 | ||
28 | 43 |
stim/optics/scalarbeam.h
@@ -213,13 +213,13 @@ void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, | @@ -213,13 +213,13 @@ void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, | ||
213 | T dr = (r_max - r_min) / (Nlut_j-1); //distance between values in the LUT | 213 | T dr = (r_max - r_min) / (Nlut_j-1); //distance between values in the LUT |
214 | T jl; | 214 | T jl; |
215 | for(size_t ri = 0; ri < Nlut_j; ri++){ //for each value in the LUT | 215 | for(size_t ri = 0; ri < Nlut_j; ri++){ //for each value in the LUT |
216 | - for(size_t l = 0; l <= Nl; l++){ //for each order | 216 | + for(unsigned l = 0; l <= (unsigned)Nl; l++){ //for each order |
217 | jl = boost::math::sph_bessel<T>(l, k*(r_min + ri * dr)); //use boost to calculate the spherical bessel function | 217 | jl = boost::math::sph_bessel<T>(l, k*(r_min + ri * dr)); //use boost to calculate the spherical bessel function |
218 | j_lut[ri * (Nl + 1) + l] = jl; //store the bessel function result | 218 | j_lut[ri * (Nl + 1) + l] = jl; //store the bessel function result |
219 | } | 219 | } |
220 | } | 220 | } |
221 | 221 | ||
222 | - stim::cpu2image<T>(j_lut, "j_lut.bmp", Nl+1, Nlut_j, stim::cmBrewer); | 222 | + //stim::cpu2image<T>(j_lut, "j_lut.bmp", Nl+1, Nlut_j, stim::cmBrewer); |
223 | //Allocate device memory and copy everything to the GPU | 223 | //Allocate device memory and copy everything to the GPU |
224 | 224 | ||
225 | T* gpu_C; | 225 | T* gpu_C; |
@@ -316,6 +316,7 @@ void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lamb | @@ -316,6 +316,7 @@ void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lamb | ||
316 | 316 | ||
317 | stim::quaternion<T> q; //create a quaternion | 317 | stim::quaternion<T> q; //create a quaternion |
318 | q.CreateRotation(d, stim::vec3<T>(0, 0, 1)); //create a mapping from the propagation direction to the PSF space | 318 | q.CreateRotation(d, stim::vec3<T>(0, 0, 1)); //create a mapping from the propagation direction to the PSF space |
319 | + stim::matrix<T, 3> rot = q.toMatrix3(); | ||
319 | int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device | 320 | int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device |
320 | dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks | 321 | dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks |
321 | cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space | 322 | cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space |
@@ -442,15 +443,19 @@ public: | @@ -442,15 +443,19 @@ public: | ||
442 | return samples; | 443 | return samples; |
443 | } | 444 | } |
444 | 445 | ||
446 | + void eval(stim::scalarfield<T>& E, T* X, T* Y, T* Z, int order = 500){ | ||
447 | + cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[0], NA[1], order, E.spacing()); | ||
448 | + } | ||
449 | + | ||
445 | /// Evaluate the beam to a scalar field using Debye focusing | 450 | /// Evaluate the beam to a scalar field using Debye focusing |
446 | - void eval(stim::scalarfield<T>& E, size_t order = 500){ | 451 | + void eval(stim::scalarfield<T>& E, int order = 500){ |
447 | size_t array_size = E.grid_bytes(); | 452 | size_t array_size = E.grid_bytes(); |
448 | T* X = (T*) malloc( array_size ); //allocate space for the coordinate meshes | 453 | T* X = (T*) malloc( array_size ); //allocate space for the coordinate meshes |
449 | T* Y = (T*) malloc( array_size ); | 454 | T* Y = (T*) malloc( array_size ); |
450 | T* Z = (T*) malloc( array_size ); | 455 | T* Z = (T*) malloc( array_size ); |
451 | 456 | ||
452 | E.meshgrid(X, Y, Z, stim::CPUmem); //calculate the coordinate meshes | 457 | E.meshgrid(X, Y, Z, stim::CPUmem); //calculate the coordinate meshes |
453 | - cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[0], NA[1], order, E.spacing()); | 458 | + eval(E, X, Y, Z, order); |
454 | 459 | ||
455 | free(X); //free the coordinate meshes | 460 | free(X); //free the coordinate meshes |
456 | free(Y); | 461 | free(Y); |
stim/optics/scalarfield.h
1 | #ifndef STIM_SCALARFIELD_H | 1 | #ifndef STIM_SCALARFIELD_H |
2 | #define STIM_SCALARFIELD_H | 2 | #define STIM_SCALARFIELD_H |
3 | 3 | ||
4 | + | ||
4 | #include "../math/rect.h" | 5 | #include "../math/rect.h" |
5 | #include "../math/complex.h" | 6 | #include "../math/complex.h" |
7 | +#include "../math/fft.h" | ||
6 | 8 | ||
7 | namespace stim{ | 9 | namespace stim{ |
8 | 10 | ||
11 | + /// Perform a k-space transform of a scalar field (FFT). The given field has a width of x and the calculated momentum space has a | ||
12 | + /// width of kx (in radians). | ||
13 | + /// @param K is a pointer to the output array of all plane waves in the field | ||
14 | + /// @param kx is the width of the frame in momentum space | ||
15 | + /// @param ky is the height of the frame in momentum space | ||
16 | + /// @param E is the field to be transformed | ||
17 | + /// @param x is the width of the field in the spatial domain | ||
18 | + /// @param y is the height of the field in the spatial domain | ||
19 | + /// @param nx is the number of pixels representing the field in the x (and kx) direction | ||
20 | + /// @param ny is the number of pixels representing the field in the y (and ky) direction | ||
21 | + template<typename T> | ||
22 | + void cpu_scalar_to_kspace(stim::complex<T>* K, T& kx, T& ky, stim::complex<T>* E, T x, T y, size_t nx, size_t ny){ | ||
23 | + | ||
24 | + kx = stim::TAU * nx / x; //calculate the width of the momentum space | ||
25 | + ky = stim::TAU * ny / y; | ||
26 | + | ||
27 | + stim::complex<T>* dev_FFT; | ||
28 | + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array | ||
29 | + | ||
30 | + stim::complex<T>* dev_E; | ||
31 | + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field | ||
32 | + HANDLE_ERROR( cudaMemcpy(dev_E, E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory | ||
33 | + | ||
34 | + cufftResult result; | ||
35 | + cufftHandle plan; | ||
36 | + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C); | ||
37 | + if(result != CUFFT_SUCCESS){ | ||
38 | + std::cout<<"Error creating cuFFT plan."<<std::endl; | ||
39 | + exit(1); | ||
40 | + } | ||
41 | + | ||
42 | + result = cufftExecC2C(plan, (cufftComplex*)dev_E, (cufftComplex*)dev_FFT, CUFFT_FORWARD); | ||
43 | + if(result != CUFFT_SUCCESS){ | ||
44 | + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl; | ||
45 | + exit(1); | ||
46 | + } | ||
47 | + | ||
48 | + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny); | ||
49 | + HANDLE_ERROR( cudaMemcpy(fft, dev_FFT, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) ); | ||
50 | + | ||
51 | + stim::cpu_fftshift(K, fft, nx, ny); | ||
52 | + //memcpy(K, fft, sizeof(stim::complex<T>) * nx * ny); | ||
53 | + } | ||
54 | + | ||
55 | + template<typename T> | ||
56 | + void cpu_scalar_from_kspace(stim::complex<T>* E, T& x, T& y, stim::complex<T>* K, T kx, T ky, size_t nx, size_t ny){ | ||
57 | + | ||
58 | + x = stim::TAU * nx / kx; //calculate the width of the momentum space | ||
59 | + y = stim::TAU * ny / ky; | ||
60 | + | ||
61 | + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny); | ||
62 | + stim::cpu_ifftshift(fft, K, nx, ny); | ||
63 | + //memcpy(fft, K, sizeof(stim::complex<T>) * nx * ny); | ||
64 | + | ||
65 | + stim::complex<T>* dev_FFT; | ||
66 | + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array | ||
67 | + HANDLE_ERROR( cudaMemcpy(dev_FFT, fft, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory | ||
68 | + | ||
69 | + stim::complex<T>* dev_E; | ||
70 | + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field | ||
71 | + | ||
72 | + cufftResult result; | ||
73 | + cufftHandle plan; | ||
74 | + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C); | ||
75 | + if(result != CUFFT_SUCCESS){ | ||
76 | + std::cout<<"Error creating cuFFT plan."<<std::endl; | ||
77 | + exit(1); | ||
78 | + } | ||
79 | + | ||
80 | + result = cufftExecC2C(plan, (cufftComplex*)dev_FFT, (cufftComplex*)dev_E, CUFFT_INVERSE); | ||
81 | + if(result != CUFFT_SUCCESS){ | ||
82 | + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl; | ||
83 | + exit(1); | ||
84 | + } | ||
85 | + | ||
86 | + HANDLE_ERROR( cudaMemcpy(E, dev_E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) ); | ||
87 | + | ||
88 | + | ||
89 | + } | ||
90 | + | ||
91 | + /// Propagate a field slice along its orthogonal direction by a given distance z | ||
92 | + /// @param Enew is the resulting propogated field | ||
93 | + /// @param E is the field to be propogated | ||
94 | + /// @param sx is the size of the field in the lateral x direction | ||
95 | + /// @param sy is the size of the field in the lateral y direction | ||
96 | + /// @param z is the distance to be propagated | ||
97 | + /// @param k is the wavenumber 2*pi/lambda | ||
98 | + /// @param nx is the number of samples in the field along the lateral x direction | ||
99 | + /// @param ny is the number of samples in the field along the lateral y direction | ||
100 | + template<typename T> | ||
101 | + void cpu_scalar_propagate(stim::complex<T>* Enew, stim::complex<T>* E, T sx, T sy, T z, T k, size_t nx, size_t ny){ | ||
102 | + | ||
103 | + stim::complex<T>* K = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * nx * ny ); | ||
104 | + | ||
105 | + T Kx, Ky; //width and height in k space | ||
106 | + cpu_scalar_to_kspace(K, Kx, Ky, E ,sx, sy, nx, ny); | ||
107 | + | ||
108 | + T* mag = (T*) malloc( sizeof(T) * nx * ny ); | ||
109 | + stim::abs(mag, K, nx * ny); | ||
110 | + stim::cpu2image<float>(mag, "kspace_pre_shift.bmp", nx, ny, stim::cmBrewer); | ||
111 | + | ||
112 | + size_t kxi, kyi; | ||
113 | + size_t i; | ||
114 | + T kx, kx_sq, ky, ky_sq, k_sq; | ||
115 | + T kz; | ||
116 | + stim::complex<T> shift; | ||
117 | + T min_kx = -Kx / 2; | ||
118 | + T dkx = Kx / (nx); | ||
119 | + | ||
120 | + T min_ky = -Ky / 2; | ||
121 | + T dky = Ky / (ny); | ||
122 | + | ||
123 | + for(kyi = 0; kyi < ny; kyi++){ //for each plane wave in the ky direction | ||
124 | + for(kxi = 0; kxi < nx; kxi++){ //for each plane wave in the ky direction | ||
125 | + i = kyi * nx + kxi; | ||
126 | + | ||
127 | + kx = min_kx + kxi * dkx; //calculate the position of the current plane wave | ||
128 | + ky = min_ky + kyi * dky; | ||
129 | + | ||
130 | + kx_sq = kx * kx; | ||
131 | + ky_sq = ky * ky; | ||
132 | + k_sq = k*k; | ||
133 | + | ||
134 | + if(kx_sq + ky_sq < k_sq){ | ||
135 | + kz = sqrt(k_sq - kx_sq - ky_sq); //estimate kz using the Fresnel approximation | ||
136 | + shift = -exp(stim::complex<T>(0, kz * z)); | ||
137 | + K[i] *= shift; | ||
138 | + K[i] /= (nx*ny); //normalize the DFT | ||
139 | + } | ||
140 | + else{ | ||
141 | + K[i] = 0; | ||
142 | + } | ||
143 | + } | ||
144 | + } | ||
145 | + | ||
146 | + stim::abs(mag, K, nx * ny); | ||
147 | + stim::cpu2image<float>(mag, "kspace_post_shift.bmp", nx, ny, stim::cmBrewer); | ||
148 | + | ||
149 | + cpu_scalar_from_kspace(Enew, sx, sy, K, Kx, Ky, nx, ny); | ||
150 | + } | ||
151 | + | ||
152 | + /// Apply a lowpass filter to a field slice | ||
153 | + /// @param Enew is the resulting propogated field | ||
154 | + /// @param E is the field to be propogated | ||
155 | + /// @param sx is the size of the field in the lateral x direction | ||
156 | + /// @param sy is the size of the field in the lateral y direction | ||
157 | + /// @param highest is the highest spatial frequency that can pass through the filter | ||
158 | + /// @param nx is the number of samples in the field along the lateral x direction | ||
159 | + /// @param ny is the number of samples in the field along the lateral y direction | ||
160 | + template<typename T> | ||
161 | + void cpu_scalar_lowpass(stim::complex<T>* Enew, stim::complex<T>* E, T sx, T sy, T highest, size_t nx, size_t ny){ | ||
162 | + | ||
163 | + stim::complex<T>* K = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * nx * ny ); | ||
164 | + | ||
165 | + T Kx, Ky; //width and height in k space | ||
166 | + cpu_scalar_to_kspace(K, Kx, Ky, E ,sx, sy, nx, ny); | ||
167 | + | ||
168 | + T* mag = (T*) malloc( sizeof(T) * nx * ny ); | ||
169 | + stim::abs(mag, K, nx * ny); | ||
170 | + stim::cpu2image<float>(mag, "kspace_pre_lowpass.bmp", nx, ny, stim::cmBrewer); | ||
171 | + | ||
172 | + size_t kxi, kyi; | ||
173 | + size_t i; | ||
174 | + T kx, kx_sq, ky, ky_sq, k_sq; | ||
175 | + T kz; | ||
176 | + stim::complex<T> shift; | ||
177 | + T min_kx = -Kx / 2; | ||
178 | + T dkx = Kx / (nx); | ||
179 | + | ||
180 | + T min_ky = -Ky / 2; | ||
181 | + T dky = Ky / (ny); | ||
182 | + | ||
183 | + T highest_sq = highest * highest; | ||
184 | + | ||
185 | + for(kyi = 0; kyi < ny; kyi++){ //for each plane wave in the ky direction | ||
186 | + for(kxi = 0; kxi < nx; kxi++){ //for each plane wave in the ky direction | ||
187 | + i = kyi * nx + kxi; | ||
188 | + | ||
189 | + kx = min_kx + kxi * dkx; //calculate the position of the current plane wave | ||
190 | + ky = min_ky + kyi * dky; | ||
191 | + | ||
192 | + kx_sq = kx * kx; | ||
193 | + ky_sq = ky * ky; | ||
194 | + | ||
195 | + if(kx_sq + ky_sq > highest_sq){ | ||
196 | + K[i] = 0; | ||
197 | + } | ||
198 | + else | ||
199 | + K[i] /= nx * ny; //normalize the DFT | ||
200 | + } | ||
201 | + } | ||
202 | + | ||
203 | + stim::abs(mag, K, nx * ny); | ||
204 | + stim::cpu2image<float>(mag, "kspace_post_lowpass.bmp", nx, ny, stim::cmBrewer); | ||
205 | + | ||
206 | + cpu_scalar_from_kspace(Enew, sx, sy, K, Kx, Ky, nx, ny); | ||
207 | + } | ||
208 | + | ||
9 | enum locationType {CPUmem, GPUmem}; | 209 | enum locationType {CPUmem, GPUmem}; |
10 | 210 | ||
11 | /// Class represents a scalar optical field. | 211 | /// Class represents a scalar optical field. |
@@ -22,23 +222,20 @@ protected: | @@ -22,23 +222,20 @@ protected: | ||
22 | size_t R[2]; | 222 | size_t R[2]; |
23 | locationType loc; | 223 | locationType loc; |
24 | 224 | ||
25 | - | ||
26 | - | ||
27 | -public: | ||
28 | - | ||
29 | - CUDA_CALLABLE scalarfield(size_t X, size_t Y, T size = 1, T z_pos = 0) : rect<T>::rect(size, z_pos){ | ||
30 | - R[0] = X; //set the field resolution | ||
31 | - R[1] = Y; | ||
32 | - | ||
33 | - E = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * R[0] * R[1]); //allocate in CPU memory | ||
34 | - loc = CPUmem; | 225 | + /// Convert the field to a k-space representation (do an FFT) |
226 | + void to_kspace(T& kx, T& ky){ | ||
227 | + cpu_scalar_to_kspace(E, kx, ky, E, X.len(), Y.len(), R[0], R[1]); | ||
35 | } | 228 | } |
36 | 229 | ||
37 | - CUDA_CALLABLE ~scalarfield(){ | ||
38 | - if(loc == CPUmem) free(E); | ||
39 | - else cudaFree(E); | 230 | + void from_kspace(){ |
231 | + kx = stim::TAU * R[0] / X.len(); //calculate the width of the momentum space | ||
232 | + ky = stim::TAU * R[1] / Y.len(); | ||
233 | + T x, y; | ||
234 | + cpu_scalar_from_kspace(E, x, y, E, kx, ky, R[0], R[1]); | ||
40 | } | 235 | } |
41 | 236 | ||
237 | +public: | ||
238 | + | ||
42 | /// Returns the number of values in the field | 239 | /// Returns the number of values in the field |
43 | CUDA_CALLABLE size_t size(){ | 240 | CUDA_CALLABLE size_t size(){ |
44 | return R[0] * R[1]; | 241 | return R[0] * R[1]; |
@@ -48,6 +245,20 @@ public: | @@ -48,6 +245,20 @@ public: | ||
48 | return sizeof(stim::complex<T>) * R[0] * R[1]; | 245 | return sizeof(stim::complex<T>) * R[0] * R[1]; |
49 | } | 246 | } |
50 | 247 | ||
248 | + scalarfield(size_t X, size_t Y, T size = 1, T z_pos = 0) : rect<T>::rect(size, z_pos){ | ||
249 | + R[0] = X; //set the field resolution | ||
250 | + R[1] = Y; | ||
251 | + | ||
252 | + E = (stim::complex<T>*) malloc(grid_bytes()); //allocate in CPU memory | ||
253 | + memset(E, 0, grid_bytes()); | ||
254 | + loc = CPUmem; | ||
255 | + } | ||
256 | + | ||
257 | + ~scalarfield(){ | ||
258 | + if(loc == CPUmem) free(E); | ||
259 | + else cudaFree(E); | ||
260 | + } | ||
261 | + | ||
51 | /// Calculates the distance between points on the grid | 262 | /// Calculates the distance between points on the grid |
52 | T spacing(){ | 263 | T spacing(){ |
53 | T du = rect<T>::X.len() / R[0]; | 264 | T du = rect<T>::X.len() / R[0]; |
@@ -78,6 +289,16 @@ public: | @@ -78,6 +289,16 @@ public: | ||
78 | } | 289 | } |
79 | } | 290 | } |
80 | 291 | ||
292 | + /// Propagate the field along its orthogonal direction by a distance d | ||
293 | + void propagate(T d, T k){ | ||
294 | + cpu_scalar_propagate(E, E, X.len(), Y.len(), d, k, R[0], R[1]); | ||
295 | + } | ||
296 | + | ||
297 | + /// Propagate the field along its orthogonal direction by a distance d | ||
298 | + void lowpass(T highest){ | ||
299 | + cpu_scalar_lowpass(E, E, X.len(), Y.len(), highest, R[0], R[1]); | ||
300 | + } | ||
301 | + | ||
81 | std::string str(){ | 302 | std::string str(){ |
82 | std::stringstream ss; | 303 | std::stringstream ss; |
83 | ss<<rect<T>::str()<<std::endl; | 304 | ss<<rect<T>::str()<<std::endl; |
@@ -96,11 +317,11 @@ public: | @@ -96,11 +317,11 @@ public: | ||
96 | 317 | ||
97 | /// Evaluate the cartesian coordinates of each point in the field. The resulting arrays are allocated in the same memory where the field is stored. | 318 | /// Evaluate the cartesian coordinates of each point in the field. The resulting arrays are allocated in the same memory where the field is stored. |
98 | void meshgrid(T* X, T* Y, T* Z, locationType location){ | 319 | void meshgrid(T* X, T* Y, T* Z, locationType location){ |
99 | - size_t array_size = sizeof(T) * R[0] * R[1]; | 320 | + //size_t array_size = sizeof(T) * R[0] * R[1]; |
100 | if(location == CPUmem){ | 321 | if(location == CPUmem){ |
101 | 322 | ||
102 | - T du = 1.0 / (R[0] - 1); //calculate the spacing between points in the grid | ||
103 | - T dv = 1.0 / (R[1] - 1); | 323 | + T du = (T)1.0 / (R[0] - 1); //calculate the spacing between points in the grid |
324 | + T dv = (T)1.0 / (R[1] - 1); | ||
104 | 325 | ||
105 | size_t ui, vi, i; | 326 | size_t ui, vi, i; |
106 | stim::vec3<T> p; | 327 | stim::vec3<T> p; |
@@ -114,9 +335,9 @@ public: | @@ -114,9 +335,9 @@ public: | ||
114 | i++; | 335 | i++; |
115 | } | 336 | } |
116 | } | 337 | } |
117 | - stim::cpu2image(X, "X.bmp", R[0], R[1], stim::cmBrewer); | ||
118 | - stim::cpu2image(Y, "Y.bmp", R[0], R[1], stim::cmBrewer); | ||
119 | - stim::cpu2image(Z, "Z.bmp", R[0], R[1], stim::cmBrewer); | 338 | + //stim::cpu2image(X, "X.bmp", R[0], R[1], stim::cmBrewer); |
339 | + //stim::cpu2image(Y, "Y.bmp", R[0], R[1], stim::cmBrewer); | ||
340 | + //stim::cpu2image(Z, "Z.bmp", R[0], R[1], stim::cmBrewer); | ||
120 | } | 341 | } |
121 | else{ | 342 | else{ |
122 | std::cout<<"GPU allocation of a meshgrid isn't supported yet. You'll have to write kernels to do the calculation."; | 343 | std::cout<<"GPU allocation of a meshgrid isn't supported yet. You'll have to write kernels to do the calculation."; |
@@ -124,6 +345,14 @@ public: | @@ -124,6 +345,14 @@ public: | ||
124 | } | 345 | } |
125 | } | 346 | } |
126 | 347 | ||
348 | + //clear the field, setting all values to zero | ||
349 | + void clear(){ | ||
350 | + if(loc == GPUmem) | ||
351 | + HANDLE_ERROR(cudaMemset(E, 0, grid_bytes())); | ||
352 | + else | ||
353 | + memset(E, 0, grid_bytes()); | ||
354 | + } | ||
355 | + | ||
127 | void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer){ | 356 | void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer){ |
128 | 357 | ||
129 | if(loc == GPUmem) to_cpu(); //if the field is in the GPU, move it to the CPU | 358 | if(loc == GPUmem) to_cpu(); //if the field is in the GPU, move it to the CPU |
stim/optics/mie.h renamed to stim/optics/scalarmie.h
@@ -366,7 +366,7 @@ __global__ void cuda_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* | @@ -366,7 +366,7 @@ __global__ void cuda_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* | ||
366 | (z == NULL) ? p[2] = 0 : p[2] = z[i]; | 366 | (z == NULL) ? p[2] = 0 : p[2] = z[i]; |
367 | 367 | ||
368 | T r = p.len(); //calculate the distance from the sphere | 368 | T r = p.len(); //calculate the distance from the sphere |
369 | - if(r > a) return; //exit if the point is inside the sphere (we only calculate the internal field) | 369 | + if(r >= a) return; //exit if the point is inside the sphere (we only calculate the internal field) |
370 | T fij = (r - r_min)/dr; //FP index into the spherical bessel LUT | 370 | T fij = (r - r_min)/dr; //FP index into the spherical bessel LUT |
371 | size_t ij = (size_t) fij; //convert to an integral index | 371 | size_t ij = (size_t) fij; //convert to an integral index |
372 | T alpha = fij - ij; //calculate the fractional portion of the index | 372 | T alpha = fij - ij; //calculate the fractional portion of the index |
@@ -608,6 +608,39 @@ void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, st | @@ -608,6 +608,39 @@ void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, st | ||
608 | cpu_scalar_mie_internal(E, N, x, y, z, W, a, n, r_spacing); | 608 | cpu_scalar_mie_internal(E, N, x, y, z, W, a, n, r_spacing); |
609 | } | 609 | } |
610 | 610 | ||
611 | -} | 611 | + |
612 | +/// Class stim::scalarmie represents a scalar Mie scattering model that can be used to calculate the fields produced by a scattering sphere. | ||
613 | +template<typename T> | ||
614 | +class scalarmie | ||
615 | +{ | ||
616 | +private: | ||
617 | + T radius; //radius of the scattering sphere | ||
618 | + stim::complex<T> n; //refractive index of the scattering sphere | ||
619 | + | ||
620 | +public: | ||
621 | + | ||
622 | + scalarmie(T r, stim::complex<T> ri){ | ||
623 | + radius = r; | ||
624 | + n = ri; | ||
625 | + } | ||
626 | + | ||
627 | + void eval(stim::scalarfield<T>& E, stim::scalarbeam<T> b, int order = 500, int samples = 1000){ | ||
628 | + | ||
629 | + size_t array_size = E.grid_bytes(); //calculate the number of bytes in the scalar grid | ||
630 | + float* X = (float*) malloc( array_size ); //allocate space for the coordinate meshes | ||
631 | + float* Y = (float*) malloc( array_size ); | ||
632 | + float* Z = (float*) malloc( array_size ); | ||
633 | + E.meshgrid(X, Y, Z, stim::CPUmem); //calculate the coordinate meshes | ||
634 | + | ||
635 | + b.eval(E, X, Y, Z, order); //evaluate the incident field using a plane wave expansion | ||
636 | + | ||
637 | + std::vector< stim::scalarwave<float> > wave_array = b.mc(samples); //decompose the beam into an array of plane waves | ||
638 | + stim::cpu_scalar_mie_scatter<float>(E.ptr(), E.size(), X, Y, Z, wave_array, radius, n, E.spacing()); | ||
639 | + stim::cpu_scalar_mie_internal<float>(E.ptr(), E.size(), X, Y, Z, wave_array, radius, n, E.spacing()); | ||
640 | + } | ||
641 | + | ||
642 | +}; //end stim::scalarmie | ||
643 | + | ||
644 | +} //end namespace stim | ||
612 | 645 | ||
613 | #endif | 646 | #endif |
614 | \ No newline at end of file | 647 | \ No newline at end of file |
stim/optics/scalarwave.h
@@ -210,7 +210,7 @@ template<typename T> | @@ -210,7 +210,7 @@ template<typename T> | ||
210 | __global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){ | 210 | __global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){ |
211 | extern __shared__ stim::scalarwave<T> shared_W[]; //declare the list of waves in shared memory | 211 | extern __shared__ stim::scalarwave<T> shared_W[]; //declare the list of waves in shared memory |
212 | 212 | ||
213 | - stim::cuda::sharedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x); //copy the plane waves into shared memory for faster access | 213 | + stim::cuda::threadedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x); //copy the plane waves into shared memory for faster access |
214 | __syncthreads(); //synchronize threads to insure all data is copied | 214 | __syncthreads(); //synchronize threads to insure all data is copied |
215 | 215 | ||
216 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array | 216 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array |
stim/parser/arguments.h
@@ -42,11 +42,13 @@ namespace stim{ | @@ -42,11 +42,13 @@ namespace stim{ | ||
42 | bool char_numerical(char c){ | 42 | bool char_numerical(char c){ |
43 | if( (c >= '0' && c <= '9') || c == '-' || c == '.') | 43 | if( (c >= '0' && c <= '9') || c == '-' || c == '.') |
44 | return true; | 44 | return true; |
45 | + return false; | ||
45 | } | 46 | } |
46 | 47 | ||
47 | bool char_integral(char c){ | 48 | bool char_integral(char c){ |
48 | if( (c >= '0' && c <= '9') || c == '-') | 49 | if( (c >= '0' && c <= '9') || c == '-') |
49 | return true; | 50 | return true; |
51 | + return false; | ||
50 | } | 52 | } |
51 | 53 | ||
52 | /// Test if a given string contains a numerical (real) value | 54 | /// Test if a given string contains a numerical (real) value |
@@ -475,31 +477,31 @@ namespace stim{ | @@ -475,31 +477,31 @@ namespace stim{ | ||
475 | ///Determines of a parameter has been set and returns true if it has | 477 | ///Determines of a parameter has been set and returns true if it has |
476 | 478 | ||
477 | /// @param _name is the name of the argument | 479 | /// @param _name is the name of the argument |
478 | - bool operator()(std::string _name) | ||
479 | - { | ||
480 | - size_t i = find(opts.begin(), opts.end(), _name) - opts.begin(); | 480 | + bool operator()(std::string _name){ |
481 | + std::vector<cmd_option>::iterator it; | ||
482 | + it = std::find(opts.begin(), opts.end(), _name);// - opts.begin(); | ||
481 | 483 | ||
482 | - if(i < 0){ | 484 | + if(it == opts.end()){ |
483 | std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; | 485 | std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; |
484 | exit(1); | 486 | exit(1); |
485 | } | 487 | } |
486 | 488 | ||
487 | - return opts[i].is_set(); | 489 | + return it->is_set(); |
488 | } | 490 | } |
489 | 491 | ||
490 | ///Returns the number of parameters for a specified argument | 492 | ///Returns the number of parameters for a specified argument |
491 | 493 | ||
492 | /// @param _name is the name of the argument whose parameter number will be returned | 494 | /// @param _name is the name of the argument whose parameter number will be returned |
493 | - size_t nargs(std::string _name) | ||
494 | - { | ||
495 | - size_t i = find(opts.begin(), opts.end(), _name) - opts.begin(); | 495 | + size_t nargs(std::string _name){ |
496 | + std::vector<cmd_option>::iterator it; | ||
497 | + it = find(opts.begin(), opts.end(), _name);// - opts.begin(); | ||
496 | 498 | ||
497 | - if(i < 0){ | 499 | + if(it == opts.end()){ |
498 | std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; | 500 | std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; |
499 | exit(1); | 501 | exit(1); |
500 | } | 502 | } |
501 | 503 | ||
502 | - return opts[i].nargs(); | 504 | + return it->nargs(); |
503 | } | 505 | } |
504 | 506 | ||
505 | ///Returns the number of arguments that have been set | 507 | ///Returns the number of arguments that have been set |
@@ -525,17 +527,16 @@ namespace stim{ | @@ -525,17 +527,16 @@ namespace stim{ | ||
525 | ///Returns an object describing the argument | 527 | ///Returns an object describing the argument |
526 | 528 | ||
527 | /// @param _name is the name of the requested argument | 529 | /// @param _name is the name of the requested argument |
528 | - cmd_option operator[](std::string _name) | ||
529 | - { | ||
530 | - size_t i = find(opts.begin(), opts.end(), _name) - opts.begin(); | 530 | + cmd_option operator[](std::string _name){ |
531 | + std::vector<cmd_option>::iterator it; | ||
532 | + it = find(opts.begin(), opts.end(), _name);// - opts.begin(); | ||
531 | 533 | ||
532 | - if(i < 0 || i >= opts.size()) | ||
533 | - { | 534 | + if(it == opts.end()){ |
534 | std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; | 535 | std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; |
535 | exit(1); | 536 | exit(1); |
536 | } | 537 | } |
537 | 538 | ||
538 | - return opts[i]; | 539 | + return *it; |
539 | } | 540 | } |
540 | 541 | ||
541 | 542 |
stim/parser/filename.h
@@ -68,6 +68,7 @@ protected: | @@ -68,6 +68,7 @@ protected: | ||
68 | 68 | ||
69 | //parse a file locator string | 69 | //parse a file locator string |
70 | void parse(std::string loc){ | 70 | void parse(std::string loc){ |
71 | + if(loc.size() == 0) return; | ||
71 | 72 | ||
72 | loc = unix_div(loc); | 73 | loc = unix_div(loc); |
73 | 74 | ||
@@ -307,6 +308,13 @@ public: | @@ -307,6 +308,13 @@ public: | ||
307 | parse_name(fname); | 308 | parse_name(fname); |
308 | } | 309 | } |
309 | 310 | ||
311 | + //append a string to the filename and return a new filename | ||
312 | + stim::filename append(std::string s){ | ||
313 | + stim::filename result = *this; //create a new filename, copy the current filename | ||
314 | + result.prefix = prefix + s; //append the string to the filename | ||
315 | + return result; | ||
316 | + } | ||
317 | + | ||
310 | //get a path relative to the current one | 318 | //get a path relative to the current one |
311 | stim::filename get_relative(std::string rel){ | 319 | stim::filename get_relative(std::string rel){ |
312 | 320 |