Commit 7a2d001231356d6903d068e28fb914a9af0071c0
1 parent
24aab7c9
mirst1d updates
Showing
7 changed files
with
618 additions
and
103 deletions
Show diff stats
math/complexfield.cuh
@@ -6,16 +6,36 @@ | @@ -6,16 +6,36 @@ | ||
6 | 6 | ||
7 | #include "../math/field.cuh" | 7 | #include "../math/field.cuh" |
8 | #include "../math/complex.h" | 8 | #include "../math/complex.h" |
9 | +#include "../math/realfield.cuh" | ||
9 | 10 | ||
10 | namespace rts{ | 11 | namespace rts{ |
11 | 12 | ||
13 | +template<typename T> | ||
14 | +__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){ | ||
15 | + | ||
16 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
17 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
18 | + | ||
19 | + //make sure that the thread indices are in-bounds | ||
20 | + if(iu >= r0 || iv >= r1) return; | ||
21 | + | ||
22 | + //compute the index into the field | ||
23 | + int i = iv*r0 + iu; | ||
24 | + | ||
25 | + //calculate and store the result | ||
26 | + dest[i] = source[i].abs(); | ||
27 | +} | ||
28 | + | ||
12 | /*This class stores functions for saving images of complex fields | 29 | /*This class stores functions for saving images of complex fields |
13 | */ | 30 | */ |
14 | -template<typename T, unsigned int D> | 31 | +template<typename T, unsigned int D = 1> |
15 | class complexfield : public field< rts::complex<T>, D >{ | 32 | class complexfield : public field< rts::complex<T>, D >{ |
16 | using field< rts::complex<T>, D >::R; | 33 | using field< rts::complex<T>, D >::R; |
17 | using field< rts::complex<T>, D >::X; | 34 | using field< rts::complex<T>, D >::X; |
18 | using field< rts::complex<T>, D >::shape; | 35 | using field< rts::complex<T>, D >::shape; |
36 | + using field< rts::complex<T>, D >::cuda_params; | ||
37 | + | ||
38 | + | ||
19 | 39 | ||
20 | public: | 40 | public: |
21 | 41 | ||
@@ -56,12 +76,20 @@ public: | @@ -56,12 +76,20 @@ public: | ||
56 | 76 | ||
57 | public: | 77 | public: |
58 | 78 | ||
79 | + enum attribute {magnitude, real, imaginary}; | ||
80 | + | ||
59 | //constructor (no parameters) | 81 | //constructor (no parameters) |
60 | complexfield() : field<rts::complex<T>, D>(){}; | 82 | complexfield() : field<rts::complex<T>, D>(){}; |
61 | 83 | ||
62 | //constructor (resolution specified) | 84 | //constructor (resolution specified) |
63 | complexfield(unsigned int r0, unsigned int r1) : field<rts::complex<T>, D>(r0, r1){}; | 85 | complexfield(unsigned int r0, unsigned int r1) : field<rts::complex<T>, D>(r0, r1){}; |
64 | 86 | ||
87 | + //assignment from a field of complex values | ||
88 | + complexfield & operator=(const field< rts::complex<T>, D > rhs){ | ||
89 | + field< complex<T>, D >::operator=(rhs); | ||
90 | + return *this; | ||
91 | + } | ||
92 | + | ||
65 | //assignment operator (scalar value) | 93 | //assignment operator (scalar value) |
66 | complexfield & operator= (const complex<T> rhs){ | 94 | complexfield & operator= (const complex<T> rhs){ |
67 | 95 | ||
@@ -76,7 +104,26 @@ public: | @@ -76,7 +104,26 @@ public: | ||
76 | return *this; | 104 | return *this; |
77 | } | 105 | } |
78 | 106 | ||
79 | - void toImage(std::string filename, unsigned int n){ | 107 | + //cropping |
108 | + complexfield crop(unsigned int width, unsigned int height){ | ||
109 | + | ||
110 | + complexfield<T, D> result; | ||
111 | + result = field< complex<T>, D>::crop(width, height); | ||
112 | + return result; | ||
113 | + } | ||
114 | + | ||
115 | + void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){ | ||
116 | + | ||
117 | + realfield<T, 1, true> rf(R[0], R[1]); | ||
118 | + | ||
119 | + //get cuda parameters | ||
120 | + dim3 blocks, grids; | ||
121 | + cuda_params(grids, blocks); | ||
122 | + | ||
123 | + if(type == magnitude){ | ||
124 | + gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]); | ||
125 | + rf.toImages(filename, 0); | ||
126 | + } | ||
80 | 127 | ||
81 | } | 128 | } |
82 | 129 |
math/field.cuh
@@ -47,7 +47,27 @@ __global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1 | @@ -47,7 +47,27 @@ __global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1 | ||
47 | ptr[i] = val; | 47 | ptr[i] = val; |
48 | } | 48 | } |
49 | 49 | ||
50 | -template<typename T, unsigned int D> | 50 | +//crop the field to the new dimensions (width x height) |
51 | +template<typename T> | ||
52 | +__global__ void gpu_field_crop(T* dest, T* source, | ||
53 | + unsigned int r0, unsigned int r1, | ||
54 | + unsigned int width, unsigned int height){ | ||
55 | + | ||
56 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
57 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
58 | + | ||
59 | + //make sure that the thread indices are in-bounds | ||
60 | + if(iu >= width || iv >= height) return; | ||
61 | + | ||
62 | + //compute the index into the field | ||
63 | + int is = iv*r0 + iu; | ||
64 | + int id = iv*width + iu; | ||
65 | + | ||
66 | + //calculate and store the result | ||
67 | + dest[id] = source[is]; | ||
68 | +} | ||
69 | + | ||
70 | +template<typename T, unsigned int D = 1> | ||
51 | class field{ | 71 | class field{ |
52 | 72 | ||
53 | protected: | 73 | protected: |
@@ -56,6 +76,16 @@ protected: | @@ -56,6 +76,16 @@ protected: | ||
56 | unsigned int R[2]; //field resolution | 76 | unsigned int R[2]; //field resolution |
57 | rts::rect<T> shape; //position and shape of the field slice | 77 | rts::rect<T> shape; //position and shape of the field slice |
58 | 78 | ||
79 | + //calculates the optimal block and grid sizes using information from the GPU | ||
80 | + void cuda_params(dim3& grids, dim3& blocks){ | ||
81 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | ||
82 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
83 | + | ||
84 | + //create one thread for each detector pixel | ||
85 | + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | ||
86 | + grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
87 | + } | ||
88 | + | ||
59 | public: | 89 | public: |
60 | 90 | ||
61 | //returns a list of file names given an input string with wild cards | 91 | //returns a list of file names given an input string with wild cards |
@@ -101,7 +131,6 @@ public: | @@ -101,7 +131,6 @@ public: | ||
101 | HANDLE_ERROR(cudaFree(X[n])); | 131 | HANDLE_ERROR(cudaFree(X[n])); |
102 | } | 132 | } |
103 | 133 | ||
104 | - | ||
105 | public: | 134 | public: |
106 | //field constructor | 135 | //field constructor |
107 | field(){ | 136 | field(){ |
@@ -241,6 +270,25 @@ public: | @@ -241,6 +270,25 @@ public: | ||
241 | HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1])); | 270 | HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1])); |
242 | } | 271 | } |
243 | 272 | ||
273 | + //crop the field | ||
274 | + field<T, D> crop(unsigned int width, unsigned int height){ | ||
275 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | ||
276 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
277 | + | ||
278 | + //create one thread for each detector pixel | ||
279 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
280 | + dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
281 | + | ||
282 | + //create a scalar field to store the result | ||
283 | + field<T, D> result(width, height); | ||
284 | + | ||
285 | + for(int n=0; n<D; n++) | ||
286 | + rts::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height); | ||
287 | + | ||
288 | + return result; | ||
289 | + | ||
290 | + } | ||
291 | + | ||
244 | }; | 292 | }; |
245 | 293 | ||
246 | } //end namespace rts | 294 | } //end namespace rts |
math/realfield.cuh
@@ -37,7 +37,7 @@ namespace rts{ | @@ -37,7 +37,7 @@ namespace rts{ | ||
37 | 37 | ||
38 | //multiply R = X * Y | 38 | //multiply R = X * Y |
39 | template<typename T> | 39 | template<typename T> |
40 | -__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){ | 40 | +__global__ void gpu_realfield_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){ |
41 | 41 | ||
42 | int iu = blockIdx.x * blockDim.x + threadIdx.x; | 42 | int iu = blockIdx.x * blockDim.x + threadIdx.x; |
43 | int iv = blockIdx.y * blockDim.y + threadIdx.y; | 43 | int iv = blockIdx.y * blockDim.y + threadIdx.y; |
@@ -244,7 +244,7 @@ public: | @@ -244,7 +244,7 @@ public: | ||
244 | realfield<P, N, positive> result(R[0], R[1]); | 244 | realfield<P, N, positive> result(R[0], R[1]); |
245 | 245 | ||
246 | for(int n=0; n<N; n++) | 246 | for(int n=0; n<N; n++) |
247 | - rts::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]); | 247 | + rts::gpu_realfield_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]); |
248 | 248 | ||
249 | return result; | 249 | return result; |
250 | } | 250 | } |
optics/material.h
@@ -403,9 +403,9 @@ namespace rts{ | @@ -403,9 +403,9 @@ namespace rts{ | ||
403 | #endif | 403 | #endif |
404 | } | 404 | } |
405 | 405 | ||
406 | - material(T lambda = 1.0, T n = 1.4, T k = 0.0) | 406 | + material(T lambda = 1, T n = 1, T k = 0) |
407 | { | 407 | { |
408 | - dispersion.insert(lambda, complex<T>(0.0, k)); | 408 | + dispersion.insert(lambda, complex<T>(n, k)); |
409 | /*//create a default refractive index | 409 | /*//create a default refractive index |
410 | refIndex<T> def; | 410 | refIndex<T> def; |
411 | def.lambda = lambda; | 411 | def.lambda = lambda; |
@@ -414,7 +414,7 @@ namespace rts{ | @@ -414,7 +414,7 @@ namespace rts{ | ||
414 | add(def); | 414 | add(def); |
415 | */ | 415 | */ |
416 | //set n0 | 416 | //set n0 |
417 | - n0 = n; | 417 | + n0 = 0; |
418 | } | 418 | } |
419 | 419 | ||
420 | material(std::string filename, std::string format = "", T scaleA = 1.0) | 420 | material(std::string filename, std::string format = "", T scaleA = 1.0) |
1 | +#include "../optics/material.h" | ||
2 | +#include "../math/complexfield.cuh" | ||
3 | + | ||
4 | +#include "cufft.h" | ||
5 | + | ||
6 | +#include <vector> | ||
7 | +#include <sstream> | ||
8 | + | ||
9 | +namespace rts{ | ||
10 | + | ||
11 | +//this function writes a sinc function to "dest" such that an iFFT produces a slab | ||
12 | +template<typename T> | ||
13 | +__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri, | ||
14 | + T* src, T* zf, | ||
15 | + T w, unsigned int zR, unsigned int nuR){ | ||
16 | + //dest = complex field representing the sample | ||
17 | + //ri = refractive indices for each wavelength | ||
18 | + //src = intensity of the light source for each wavelength | ||
19 | + //zf = z position of the slab interface for each wavelength (accounting for optical path length) | ||
20 | + //w = width of the slab (in pixels) | ||
21 | + //zR = number of z-axis samples | ||
22 | + //nuR = number of wavelengths | ||
23 | + | ||
24 | + //get the current coordinate in the plane slice | ||
25 | + int ifz = blockIdx.x * blockDim.x + threadIdx.x; | ||
26 | + int inu = blockIdx.y * blockDim.y + threadIdx.y; | ||
27 | + | ||
28 | + //make sure that the thread indices are in-bounds | ||
29 | + if(inu >= nuR || ifz >= zR) return; | ||
30 | + | ||
31 | + int i = inu * zR + ifz; | ||
32 | + | ||
33 | + T fz; | ||
34 | + if(ifz < zR/2) | ||
35 | + fz = ifz / (T)zR; | ||
36 | + else | ||
37 | + fz = -(zR - ifz) / (T)zR; | ||
38 | + | ||
39 | + //if the slab starts outside of the simulation domain, just return | ||
40 | + if(zf[inu] >= zR) return; | ||
41 | + | ||
42 | + //fill the array along z with a sinc function representing the Fourier transform of the layer | ||
43 | + | ||
44 | + T opl = w * ri[inu].real(); //optical path length | ||
45 | + | ||
46 | + //handle the case where the slab goes outside the simulation domain | ||
47 | + if(zf[inu] + opl >= zR) | ||
48 | + opl = zR - zf[inu]; | ||
49 | + | ||
50 | + if(opl == 0) return; | ||
51 | + | ||
52 | + //T l = w * ri[inu].real(); | ||
53 | + //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0)); | ||
54 | + complex<T> e(0, -2 * PI * fz * (zf[inu] + opl/2)); | ||
55 | + | ||
56 | + complex<T> eta = ri[inu] * ri[inu] - 1; | ||
57 | + | ||
58 | + //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz); | ||
59 | + if(ifz == 0) | ||
60 | + dest[i] += opl * exp(e) * eta * src[inu]; | ||
61 | + else | ||
62 | + dest[i] += opl * exp(e) * eta * src[inu] * sin(PI * fz * opl) / (PI * fz * opl); | ||
63 | +} | ||
64 | + | ||
65 | +template<typename T> | ||
66 | +__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){ | ||
67 | + //zf = current z depth (optical path length) in pixels | ||
68 | + //ri = refractive index of the material | ||
69 | + //w = actual width of the layer (in pixels) | ||
70 | + | ||
71 | + | ||
72 | + //compute the index for this thread | ||
73 | + int i = blockIdx.x * blockDim.x + threadIdx.x; | ||
74 | + if(i >= S) return; | ||
75 | + | ||
76 | + if(ri == NULL) | ||
77 | + zf[i] += w; | ||
78 | + else | ||
79 | + zf[i] += ri[i].real() * w; | ||
80 | +} | ||
81 | + | ||
82 | +//apply the 1D MIRST filter to an existing sample (overwriting the sample) | ||
83 | +template<typename T> | ||
84 | +__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda, | ||
85 | + T dFz, | ||
86 | + T inNA, T outNA, | ||
87 | + unsigned int lambdaR, unsigned int zR, | ||
88 | + T sigma = 0){ | ||
89 | + //sampleFFT = the sample in the Fourier domain (will be overwritten) | ||
90 | + //lambda = list of wavelengths | ||
91 | + //dFz = delta along the Fz axis in the frequency domain | ||
92 | + //inNA = NA of the internal obscuration | ||
93 | + //outNA = NA of the objective | ||
94 | + //zR = number of pixels along the Fz axis (same as the z-axis) | ||
95 | + //lambdaR = number of wavelengths | ||
96 | + //sigma = width of the Gaussian source | ||
97 | + int ifz = blockIdx.x * blockDim.x + threadIdx.x; | ||
98 | + int inu = blockIdx.y * blockDim.y + threadIdx.y; | ||
99 | + | ||
100 | + if(inu >= lambdaR || ifz >= zR) return; | ||
101 | + | ||
102 | + //calculate the index into the sample FT | ||
103 | + int i = inu * zR + ifz; | ||
104 | + | ||
105 | + //compute the frequency (and set all negative spatial frequencies to zero) | ||
106 | + T fz; | ||
107 | + if(ifz < zR / 2) | ||
108 | + fz = ifz * dFz; | ||
109 | + //if the spatial frequency is negative, set it to zero and exit | ||
110 | + else{ | ||
111 | + sampleFFT[i] = 0; | ||
112 | + return; | ||
113 | + } | ||
114 | + | ||
115 | + //compute the frequency in inverse microns | ||
116 | + T nu = 1/lambda[inu]; | ||
117 | + | ||
118 | + //determine the radius of the integration circle | ||
119 | + T nu_sq = nu * nu; | ||
120 | + T fz_sq = (fz * fz) / 4; | ||
121 | + | ||
122 | + //cut off frequencies above the diffraction limit | ||
123 | + T r; | ||
124 | + if(fz_sq < nu_sq) | ||
125 | + r = sqrt(nu_sq - fz_sq); | ||
126 | + else | ||
127 | + r = 0; | ||
128 | + | ||
129 | + //account for the optics | ||
130 | + T Q = 0; | ||
131 | + if(r > nu * inNA && r < nu * outNA) | ||
132 | + Q = 1; | ||
133 | + | ||
134 | + //account for the source | ||
135 | + //T sigma = 30.0; | ||
136 | + T s = exp( - (r*r * sigma*sigma) / 2 ); | ||
137 | + //T s=1; | ||
138 | + | ||
139 | + //compute the final filter | ||
140 | + T mirst = 0; | ||
141 | + if(fz != 0) | ||
142 | + mirst = 2 * PI * r * s * Q * (1/fz); | ||
143 | + | ||
144 | + sampleFFT[i] *= mirst; | ||
145 | + | ||
146 | +} | ||
147 | + | ||
148 | +/*This object performs a 1-dimensional (layered) MIRST simulation | ||
149 | +*/ | ||
150 | +template<typename T> | ||
151 | +class mirst1d{ | ||
152 | + | ||
153 | +private: | ||
154 | + unsigned int Z; //z-axis resolution | ||
155 | + unsigned int pad; //pixel padding on either side of the sample | ||
156 | + | ||
157 | + std::vector< material<T> > matlist; //list of materials | ||
158 | + std::vector< T > layers; //list of layer thicknesses | ||
159 | + | ||
160 | + std::vector< T > lambdas; //list of wavelengths that are being simulated | ||
161 | + unsigned int S; //number of wavelengths (size of "lambdas") | ||
162 | + | ||
163 | + T NA[2]; //numerical aperature (central obscuration and outer diameter) | ||
164 | + | ||
165 | + complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc. | ||
166 | + | ||
167 | + void fft(int direction = CUFFT_FORWARD){ | ||
168 | + | ||
169 | + unsigned padZ = Z + pad; | ||
170 | + | ||
171 | + //create cuFFT handles | ||
172 | + cufftHandle plan; | ||
173 | + cufftResult result; | ||
174 | + | ||
175 | + if(sizeof(T) == 4) | ||
176 | + result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision | ||
177 | + else | ||
178 | + result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision | ||
179 | + | ||
180 | + //check for Plan 1D errors | ||
181 | + if(result != CUFFT_SUCCESS){ | ||
182 | + std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl; | ||
183 | + CufftError(result); | ||
184 | + exit(1); | ||
185 | + } | ||
186 | + | ||
187 | + if(sizeof(T) == 4) | ||
188 | + result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction); | ||
189 | + else | ||
190 | + result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction); | ||
191 | + | ||
192 | + //check for FFT errors | ||
193 | + if(result != CUFFT_SUCCESS){ | ||
194 | + std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl; | ||
195 | + CufftError(result); | ||
196 | + exit(1); | ||
197 | + } | ||
198 | + | ||
199 | + cufftDestroy(plan); | ||
200 | + } | ||
201 | + | ||
202 | + | ||
203 | + //initialize the scratch memory | ||
204 | + void init_scratch(){ | ||
205 | + scratch = complexfield<T, 1>(Z + pad , lambdas.size()); | ||
206 | + scratch = 0; | ||
207 | + } | ||
208 | + | ||
209 | + //get the list of scattering efficiency (eta) values for a specified layer | ||
210 | + std::vector< complex<T> > layer_etas(unsigned int l){ | ||
211 | + | ||
212 | + std::vector< complex<T> > etas; | ||
213 | + | ||
214 | + //fill the list of etas | ||
215 | + for(unsigned int i=0; i<lambdas.size(); i++) | ||
216 | + etas.push_back( matlist[l].eta(lambdas[i]) ); | ||
217 | + return etas; | ||
218 | + } | ||
219 | + | ||
220 | + //calculates the optimal block and grid sizes using information from the GPU | ||
221 | + void cuda_params(dim3& grids, dim3& blocks){ | ||
222 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | ||
223 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
224 | + | ||
225 | + //create one thread for each detector pixel | ||
226 | + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | ||
227 | + grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
228 | + } | ||
229 | + | ||
230 | + //add the fourier transform of layer n to the scratch space | ||
231 | + void build_layer_fft(unsigned int n, T* zf){ | ||
232 | + unsigned int paddedZ = Z + pad; | ||
233 | + | ||
234 | + T wpx = layers[n] / dz(); //calculate the width of the layer in pixels | ||
235 | + | ||
236 | + //allocate memory for the refractive index | ||
237 | + complex<T>* gpuRi; | ||
238 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S)); | ||
239 | + | ||
240 | + //allocate memory for the source profile | ||
241 | + T* gpuSrc; | ||
242 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S)); | ||
243 | + | ||
244 | + complex<T> ri; | ||
245 | + T source; | ||
246 | + //store the refractive index and source profile in a CPU array | ||
247 | + for(int inu=0; inu<S; inu++){ | ||
248 | + //save the refractive index to the GPU | ||
249 | + ri = matlist[n].getN(lambdas[inu]); | ||
250 | + HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice )); | ||
251 | + | ||
252 | + //save the source profile to the GPU | ||
253 | + source = 1; | ||
254 | + HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice )); | ||
255 | + | ||
256 | + } | ||
257 | + | ||
258 | + //create one thread for each pixel of the field slice | ||
259 | + dim3 gridDim, blockDim; | ||
260 | + cuda_params(gridDim, blockDim); | ||
261 | + rts::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S); | ||
262 | + | ||
263 | + int linBlock = rts::maxThreadsPerBlock(); //compute the optimal block size | ||
264 | + int linGrid = S / linBlock + 1; | ||
265 | + rts::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S); | ||
266 | + | ||
267 | + //free memory | ||
268 | + HANDLE_ERROR(cudaFree(gpuRi)); | ||
269 | + HANDLE_ERROR(cudaFree(gpuSrc)); | ||
270 | + } | ||
271 | + | ||
272 | + void build_sample(){ | ||
273 | + init_scratch(); //initialize the GPU scratch space | ||
274 | + //build_layer(1); | ||
275 | + | ||
276 | + T* zf; | ||
277 | + HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S)); | ||
278 | + HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S)); | ||
279 | + | ||
280 | + //render each layer of the sample | ||
281 | + for(unsigned int l=0; l<layers.size(); l++){ | ||
282 | + build_layer_fft(l, zf); | ||
283 | + } | ||
284 | + | ||
285 | + HANDLE_ERROR(cudaFree(zf)); | ||
286 | + } | ||
287 | + | ||
288 | + void apply_filter(){ | ||
289 | + dim3 gridDim, blockDim; | ||
290 | + cuda_params(gridDim, blockDim); | ||
291 | + | ||
292 | + unsigned int Zpad = Z + pad; | ||
293 | + | ||
294 | + T sim_range = dz() * Zpad; | ||
295 | + T dFz = 1 / sim_range; | ||
296 | + | ||
297 | + //copy the array of wavelengths to the GPU | ||
298 | + T* gpuLambdas; | ||
299 | + HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad)); | ||
300 | + HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice)); | ||
301 | + rts::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas, | ||
302 | + dFz, | ||
303 | + NA[0], NA[1], | ||
304 | + S, Zpad); | ||
305 | + } | ||
306 | + | ||
307 | + //crop the image to the sample thickness - keep in mind that sample thickness != optical path length | ||
308 | + void crop(){ | ||
309 | + | ||
310 | + scratch = scratch.crop(Z, S); | ||
311 | + } | ||
312 | + | ||
313 | + | ||
314 | +public: | ||
315 | + | ||
316 | + //constructor | ||
317 | + mirst1d(unsigned int rZ = 100, | ||
318 | + unsigned int padding = 0){ | ||
319 | + Z = rZ; | ||
320 | + pad = padding; | ||
321 | + NA[0] = 0; | ||
322 | + NA[1] = 0.8; | ||
323 | + } | ||
324 | + | ||
325 | + //add a layer, thickness = microns | ||
326 | + void add_layer(material<T> mat, T thickness){ | ||
327 | + matlist.push_back(mat); | ||
328 | + layers.push_back(thickness); | ||
329 | + } | ||
330 | + | ||
331 | + void add_layer(std::string filename, T thickness){ | ||
332 | + add_layer(material<T>(filename), thickness); | ||
333 | + } | ||
334 | + | ||
335 | + //adds a block of wavenumbers (cm^-1) to the simulation parameters | ||
336 | + void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){ | ||
337 | + unsigned int nu = start; | ||
338 | + while(nu <= stop){ | ||
339 | + lambdas.push_back((T)10000 / nu); | ||
340 | + nu += step; | ||
341 | + } | ||
342 | + S = lambdas.size(); //increment the number of wavelengths (shorthand for later) | ||
343 | + } | ||
344 | + | ||
345 | + T thickness(){ | ||
346 | + T t = 0; | ||
347 | + for(unsigned int l=0; l<layers.size(); l++) | ||
348 | + t += layers[l]; | ||
349 | + return t; | ||
350 | + } | ||
351 | + | ||
352 | + void padding(unsigned int padding = 0){ | ||
353 | + pad = padding; | ||
354 | + } | ||
355 | + | ||
356 | + T dz(){ | ||
357 | + return thickness() / Z; //calculate the z-axis step size | ||
358 | + } | ||
359 | + | ||
360 | + void na(T in, T out){ | ||
361 | + NA[0] = in; | ||
362 | + NA[1] = out; | ||
363 | + } | ||
364 | + | ||
365 | + void na(T out){ | ||
366 | + na(0, out); | ||
367 | + } | ||
368 | + | ||
369 | + void save_sample(std::string filename){ | ||
370 | + //create a sample and save the magnitude as an image | ||
371 | + build_sample(); | ||
372 | + fft(CUFFT_INVERSE); | ||
373 | + scratch.toImage(filename, rts::complexfield<T, 1>::magnitude); | ||
374 | + } | ||
375 | + | ||
376 | + void save_mirst(std::string filename){ | ||
377 | + //apply the MIRST filter to a sample and save the image | ||
378 | + | ||
379 | + //build the sample in the Fourier domain | ||
380 | + build_sample(); | ||
381 | + | ||
382 | + //apply the MIRST filter | ||
383 | + apply_filter(); | ||
384 | + | ||
385 | + //apply an inverse FFT to bring the results back into the spatial domain | ||
386 | + fft(CUFFT_INVERSE); | ||
387 | + | ||
388 | + crop(); | ||
389 | + | ||
390 | + //save the image | ||
391 | + scratch.toImage(filename, rts::complexfield<T, 1>::magnitude); | ||
392 | + } | ||
393 | + | ||
394 | + | ||
395 | + | ||
396 | + | ||
397 | + std::string str(){ | ||
398 | + | ||
399 | + stringstream ss; | ||
400 | + | ||
401 | + ss<<"z-axis resolution: "<<Z<<std::endl; | ||
402 | + ss<<"number of wavelengths: "<<lambdas.size()<<std::endl; | ||
403 | + ss<<"padding: "<<pad<<std::endl; | ||
404 | + ss<<"sample thickness: "<<thickness()<<" um"<<std::endl; | ||
405 | + ss<<"dz: "<<dz()<<" um"<<std::endl; | ||
406 | + ss<<std::endl; | ||
407 | + ss<<"layers: "<<layers.size()<<" -----------------"<<std::endl; | ||
408 | + for(unsigned int l=0; l<layers.size(); l++) | ||
409 | + ss<<"layer "<<l<<": "<<layers[l]<<" um"<<" ("<<matlist[l].getName()<<")"<<std::endl; | ||
410 | + | ||
411 | + return ss.str(); | ||
412 | + | ||
413 | + | ||
414 | + } | ||
415 | + | ||
416 | + | ||
417 | + | ||
418 | +}; | ||
419 | + | ||
420 | +} | ||
0 | \ No newline at end of file | 421 | \ No newline at end of file |
tools/arguments.h renamed to ui/arguments.h
1 | -#ifndef RTS_ARGUMENTS | ||
2 | -#define RTS_ARGUMENTS | ||
3 | - | ||
4 | -#include <string> | ||
5 | -#include <vector> | ||
6 | -#include <iostream> | ||
7 | -#include <iomanip> | ||
8 | -#include <sstream> | ||
9 | -#include <iterator> | ||
10 | -#include <algorithm> | ||
11 | - | ||
12 | -#ifdef _WIN32 | ||
13 | -#include <Windows.h> | ||
14 | -#endif | ||
15 | - | ||
16 | -namespace rts{ | ||
17 | - | ||
18 | - class argument | ||
19 | - { | ||
20 | - private: | ||
21 | - bool ansi; | ||
22 | - | ||
23 | - //argument name | ||
24 | - std::string name; | ||
25 | - | ||
26 | - //description of the argument | ||
27 | - std::vector<std::string> desc; | ||
28 | - | ||
29 | - //argument values | 1 | +#ifndef RTS_ARGUMENTS |
2 | +#define RTS_ARGUMENTS | ||
3 | + | ||
4 | +#include <string> | ||
5 | +#include <vector> | ||
6 | +#include <iostream> | ||
7 | +#include <iomanip> | ||
8 | +#include <sstream> | ||
9 | +#include <iterator> | ||
10 | +#include <algorithm> | ||
11 | + | ||
12 | +#ifdef _WIN32 | ||
13 | +#include <Windows.h> | ||
14 | +#endif | ||
15 | + | ||
16 | +namespace rts{ | ||
17 | + | ||
18 | + class argument | ||
19 | + { | ||
20 | + private: | ||
21 | + bool ansi; | ||
22 | + | ||
23 | + //argument name | ||
24 | + std::string name; | ||
25 | + | ||
26 | + //description of the argument | ||
27 | + std::vector<std::string> desc; | ||
28 | + | ||
29 | + //argument values | ||
30 | std::vector<std::string> vals; | 30 | std::vector<std::string> vals; |
31 | 31 | ||
32 | //range or example | 32 | //range or example |
33 | std::string range; | 33 | std::string range; |
34 | 34 | ||
35 | //flag is true when the argument is user-specified | 35 | //flag is true when the argument is user-specified |
36 | - bool flag; | ||
37 | - | ||
38 | - void parse_val(const std::string &s){ | ||
39 | - | 36 | + bool flag; |
37 | + | ||
38 | + void parse_val(const std::string &s){ | ||
39 | + | ||
40 | vals.clear(); | 40 | vals.clear(); |
41 | 41 | ||
42 | - std::stringstream ss(s); | ||
43 | - std::string item; | ||
44 | - while (std::getline(ss, item, ' ')) { | ||
45 | - vals.push_back(item); | ||
46 | - } | ||
47 | - } | ||
48 | - | ||
49 | - void parse_desc(const std::string &s){ | ||
50 | - | ||
51 | - desc.clear(); | ||
52 | - | ||
53 | - std::stringstream ss(s); | ||
54 | - std::string item; | ||
55 | - while (std::getline(ss, item, '\n')) { | ||
56 | - desc.push_back(item); | ||
57 | - } | ||
58 | - } | ||
59 | - | 42 | + std::stringstream ss(s); |
43 | + std::string item; | ||
44 | + while (std::getline(ss, item, ' ')) { | ||
45 | + vals.push_back(item); | ||
46 | + } | ||
47 | + } | ||
48 | + | ||
49 | + void parse_desc(const std::string &s){ | ||
50 | + | ||
51 | + desc.clear(); | ||
52 | + | ||
53 | + std::stringstream ss(s); | ||
54 | + std::string item; | ||
55 | + while (std::getline(ss, item, '\n')) { | ||
56 | + desc.push_back(item); | ||
57 | + } | ||
58 | + } | ||
59 | + | ||
60 | public: | 60 | public: |
61 | void set_ansi(bool b){ ansi = b; } | 61 | void set_ansi(bool b){ ansi = b; } |
62 | - //create an argument with a given name, description, and default value | ||
63 | - argument(std::string _name, std::string _desc, std::string _default = "", std::string _range = "") | ||
64 | - { | ||
65 | - name = _name; | ||
66 | - parse_desc(_desc); | 62 | + //create an argument with a given name, description, and default value |
63 | + argument(std::string _name, std::string _desc, std::string _default = "", std::string _range = "") | ||
64 | + { | ||
65 | + name = _name; | ||
66 | + parse_desc(_desc); | ||
67 | parse_val(_default); | 67 | parse_val(_default); |
68 | 68 | ||
69 | //if a default value is provided, set the flag | 69 | //if a default value is provided, set the flag |
@@ -73,7 +73,7 @@ namespace rts{ | @@ -73,7 +73,7 @@ namespace rts{ | ||
73 | 73 | ||
74 | range = _range; | 74 | range = _range; |
75 | 75 | ||
76 | - | 76 | + |
77 | } | 77 | } |
78 | 78 | ||
79 | int nargs() | 79 | int nargs() |
@@ -156,26 +156,26 @@ namespace rts{ | @@ -156,26 +156,26 @@ namespace rts{ | ||
156 | n += 4; | 156 | n += 4; |
157 | 157 | ||
158 | return n; | 158 | return n; |
159 | - } | ||
160 | - | ||
161 | - | ||
162 | - //string output | ||
163 | - std::string toStr(int width = 0) | ||
164 | - { | 159 | + } |
160 | + | ||
161 | + | ||
162 | + //string output | ||
163 | + std::string toStr(int width = 0) | ||
164 | + { | ||
165 | std::stringstream ss; | 165 | std::stringstream ss; |
166 | 166 | ||
167 | int color_size = 0; | 167 | int color_size = 0; |
168 | 168 | ||
169 | - | ||
170 | - //create the left column | ||
171 | - std::string left_part = std::string(" --") + name; | ||
172 | - if(vals.size() != 0) | 169 | + |
170 | + //create the left column | ||
171 | + std::string left_part = std::string(" --") + name; | ||
172 | + if(vals.size() != 0) | ||
173 | { | 173 | { |
174 | if(ansi) | 174 | if(ansi) |
175 | left_part += "\033[1;32m"; | 175 | left_part += "\033[1;32m"; |
176 | - left_part += " ( = "; | ||
177 | - for(int v=0; v<vals.size(); v++) | ||
178 | - left_part += vals[v] + std::string(" "); | 176 | + left_part += " ( = "; |
177 | + for(int v=0; v<vals.size(); v++) | ||
178 | + left_part += vals[v] + std::string(" "); | ||
179 | left_part += ")"; | 179 | left_part += ")"; |
180 | if(ansi) | 180 | if(ansi) |
181 | left_part += "\033[0m"; | 181 | left_part += "\033[0m"; |
@@ -183,30 +183,30 @@ namespace rts{ | @@ -183,30 +183,30 @@ namespace rts{ | ||
183 | color_size = 11; | 183 | color_size = 11; |
184 | } | 184 | } |
185 | else | 185 | else |
186 | - color_size = 0; | ||
187 | - | ||
188 | - //if no width is passed, put 4 spaces between left and right columns | 186 | + color_size = 0; |
187 | + | ||
188 | + //if no width is passed, put 4 spaces between left and right columns | ||
189 | if(width == 0) width = col_width(); | 189 | if(width == 0) width = col_width(); |
190 | - | ||
191 | - ss<<std::left<<std::setw(width + color_size)<<left_part; | ||
192 | - | ||
193 | - //output right column | ||
194 | - for(int d=0; d<desc.size(); d++) | ||
195 | - { | ||
196 | - if(d == 0) | ||
197 | - ss<<desc[0]; | 190 | + |
191 | + ss<<std::left<<std::setw(width + color_size)<<left_part; | ||
192 | + | ||
193 | + //output right column | ||
194 | + for(int d=0; d<desc.size(); d++) | ||
195 | + { | ||
196 | + if(d == 0) | ||
197 | + ss<<desc[0]; | ||
198 | else | 198 | else |
199 | ss<<std::endl<<setfill(' ')<<setw(width)<<" "<<desc[d]; | 199 | ss<<std::endl<<setfill(' ')<<setw(width)<<" "<<desc[d]; |
200 | - | 200 | + |
201 | } | 201 | } |
202 | 202 | ||
203 | //output the range in the right column | 203 | //output the range in the right column |
204 | if(range != "" && ansi) | 204 | if(range != "" && ansi) |
205 | - ss<<std::endl<<setfill(' ')<<setw(width)<<" "<<" "<<std::string("\033[1;36m") + range + "\033[0m"; | ||
206 | - else if(range != "") | ||
207 | - ss<<std::endl<<setfill(' ')<<setw(width)<<" "<<" "<<range; | ||
208 | - | ||
209 | - return ss.str(); | 205 | + ss<<std::endl<<setfill(' ')<<setw(width)<<" "<<" "<<std::string("\033[1;36m") + range + "\033[0m"; |
206 | + else if(range != "") | ||
207 | + ss<<std::endl<<setfill(' ')<<setw(width)<<" "<<" "<<range; | ||
208 | + | ||
209 | + return ss.str(); | ||
210 | } | 210 | } |
211 | 211 | ||
212 | //compare the name of the argument to a string | 212 | //compare the name of the argument to a string |
@@ -227,8 +227,8 @@ namespace rts{ | @@ -227,8 +227,8 @@ namespace rts{ | ||
227 | bool is_set() | 227 | bool is_set() |
228 | { | 228 | { |
229 | return flag; | 229 | return flag; |
230 | - } | ||
231 | - | 230 | + } |
231 | + | ||
232 | }; | 232 | }; |
233 | 233 | ||
234 | struct argsection | 234 | struct argsection |
@@ -280,7 +280,7 @@ namespace rts{ | @@ -280,7 +280,7 @@ namespace rts{ | ||
280 | } | 280 | } |
281 | 281 | ||
282 | //output the arguments (generally in response to --help) | 282 | //output the arguments (generally in response to --help) |
283 | - std::string toStr() | 283 | + std::string str() |
284 | { | 284 | { |
285 | std::stringstream ss; | 285 | std::stringstream ss; |
286 | 286 | ||
@@ -407,10 +407,10 @@ namespace rts{ | @@ -407,10 +407,10 @@ namespace rts{ | ||
407 | } | 407 | } |
408 | 408 | ||
409 | 409 | ||
410 | - }; | ||
411 | - | ||
412 | - | ||
413 | - | 410 | + }; |
411 | + | ||
412 | + | ||
413 | + | ||
414 | } //end namespace rts | 414 | } //end namespace rts |
415 | 415 | ||
416 | #endif | 416 | #endif |
tools/progressbar.h renamed to ui/progressbar.h