mirst-1d.cuh
11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
#include "../optics/material.h"
#include "../math/complexfield.cuh"
#include "../math/constants.h"
//#include "../envi/bil.h"
#include "cufft.h"
#include <vector>
#include <sstream>
namespace stim{
//this function writes a sinc function to "dest" such that an iFFT produces a slab
template<typename T>
__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,
T* src, T* zf,
T w, unsigned int zR, unsigned int nuR){
//dest = complex field representing the sample
//ri = refractive indices for each wavelength
//src = intensity of the light source for each wavelength
//zf = z position of the slab interface for each wavelength (accounting for optical path length)
//w = width of the slab (in pixels)
//zR = number of z-axis samples
//nuR = number of wavelengths
//get the current coordinate in the plane slice
int ifz = blockIdx.x * blockDim.x + threadIdx.x;
int inu = blockIdx.y * blockDim.y + threadIdx.y;
//make sure that the thread indices are in-bounds
if(inu >= nuR || ifz >= zR) return;
int i = inu * zR + ifz;
T fz;
if(ifz < zR/2)
fz = ifz / (T)zR;
else
fz = -(zR - ifz) / (T)zR;
//if the slab starts outside of the simulation domain, just return
if(zf[inu] >= zR) return;
//fill the array along z with a sinc function representing the Fourier transform of the layer
T opl = w * ri[inu].real(); //optical path length
//handle the case where the slab goes outside the simulation domain
if(zf[inu] + opl >= zR)
opl = zR - zf[inu];
if(opl == 0) return;
//T l = w * ri[inu].real();
//complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
complex<T> eta = ri[inu] * ri[inu] - 1;
//dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
if(ifz == 0)
dest[i] += opl * exp(e) * eta * src[inu];
else
dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
}
template<typename T>
__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
//zf = current z depth (optical path length) in pixels
//ri = refractive index of the material
//w = actual width of the layer (in pixels)
//compute the index for this thread
int i = blockIdx.x * blockDim.x + threadIdx.x;
if(i >= S) return;
if(ri == NULL)
zf[i] += w;
else
zf[i] += ri[i].real() * w;
}
//apply the 1D MIRST filter to an existing sample (overwriting the sample)
template<typename T>
__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,
T dFz,
T inNA, T outNA,
unsigned int lambdaR, unsigned int zR,
T sigma = 0){
//sampleFFT = the sample in the Fourier domain (will be overwritten)
//lambda = list of wavelengths
//dFz = delta along the Fz axis in the frequency domain
//inNA = NA of the internal obscuration
//outNA = NA of the objective
//zR = number of pixels along the Fz axis (same as the z-axis)
//lambdaR = number of wavelengths
//sigma = width of the Gaussian source
int ifz = blockIdx.x * blockDim.x + threadIdx.x;
int inu = blockIdx.y * blockDim.y + threadIdx.y;
if(inu >= lambdaR || ifz >= zR) return;
//calculate the index into the sample FT
int i = inu * zR + ifz;
//compute the frequency (and set all negative spatial frequencies to zero)
T fz;
if(ifz < zR / 2)
fz = ifz * dFz;
//if the spatial frequency is negative, set it to zero and exit
else{
sampleFFT[i] = 0;
return;
}
//compute the frequency in inverse microns
T nu = 1/lambda[inu];
//determine the radius of the integration circle
T nu_sq = nu * nu;
T fz_sq = (fz * fz) / 4;
//cut off frequencies above the diffraction limit
T r;
if(fz_sq < nu_sq)
r = sqrt(nu_sq - fz_sq);
else
r = 0;
//account for the optics
T Q = 0;
if(r > nu * inNA && r < nu * outNA)
Q = 1;
//account for the source
//T sigma = 30.0;
T s = exp( - (r*r * sigma*sigma) / 2 );
//T s=1;
//compute the final filter
T mirst = 0;
if(fz != 0)
mirst = 2 * stimPI * r * s * Q * (1/fz);
sampleFFT[i] *= mirst;
}
/*This object performs a 1-dimensional (layered) MIRST simulation
*/
template<typename T>
class mirst1d{
private:
unsigned int Z; //z-axis resolution
unsigned int pad; //pixel padding on either side of the sample
std::vector< material<T> > matlist; //list of materials
std::vector< T > layers; //list of layer thicknesses
std::vector< T > lambdas; //list of wavelengths that are being simulated
unsigned int S; //number of wavelengths (size of "lambdas")
T NA[2]; //numerical aperature (central obscuration and outer diameter)
function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)
complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.
void fft(int direction = CUFFT_FORWARD){
unsigned padZ = Z + pad;
//create cuFFT handles
cufftHandle plan;
cufftResult result;
if(sizeof(T) == 4)
result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision
else
result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision
//check for Plan 1D errors
if(result != CUFFT_SUCCESS){
std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
CufftError(result);
exit(1);
}
if(sizeof(T) == 4)
result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
else
result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
//check for FFT errors
if(result != CUFFT_SUCCESS){
std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
CufftError(result);
exit(1);
}
cufftDestroy(plan);
}
//initialize the scratch memory
void init_scratch(){
scratch = complexfield<T, 1>(Z + pad , lambdas.size());
scratch = 0;
}
//get the list of scattering efficiency (eta) values for a specified layer
std::vector< complex<T> > layer_etas(unsigned int l){
std::vector< complex<T> > etas;
//fill the list of etas
for(unsigned int i=0; i<lambdas.size(); i++)
etas.push_back( matlist[l].eta(lambdas[i]) );
return etas;
}
//calculates the optimal block and grid sizes using information from the GPU
void cuda_params(dim3& grids, dim3& blocks){
int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
//create one thread for each detector pixel
blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
}
//add the fourier transform of layer n to the scratch space
void build_layer_fft(unsigned int n, T* zf){
unsigned int paddedZ = Z + pad;
T wpx = layers[n] / dz(); //calculate the width of the layer in pixels
//allocate memory for the refractive index
complex<T>* gpuRi;
HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
//allocate memory for the source profile
T* gpuSrc;
HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
complex<T> ri;
T source;
//store the refractive index and source profile in a CPU array
for(int inu=0; inu<S; inu++){
//save the refractive index to the GPU
ri = matlist[n].getN(lambdas[inu]);
HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
//save the source profile to the GPU
source = source_profile(10000 / lambdas[inu]);
HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
}
//create one thread for each pixel of the field slice
dim3 gridDim, blockDim;
cuda_params(gridDim, blockDim);
stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
int linGrid = S / linBlock + 1;
stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
//free memory
HANDLE_ERROR(cudaFree(gpuRi));
HANDLE_ERROR(cudaFree(gpuSrc));
}
void build_sample(){
init_scratch(); //initialize the GPU scratch space
//build_layer(1);
T* zf;
HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
//render each layer of the sample
for(unsigned int l=0; l<layers.size(); l++){
build_layer_fft(l, zf);
}
HANDLE_ERROR(cudaFree(zf));
}
void apply_filter(){
dim3 gridDim, blockDim;
cuda_params(gridDim, blockDim);
unsigned int Zpad = Z + pad;
T sim_range = dz() * Zpad;
T dFz = 1 / sim_range;
//copy the array of wavelengths to the GPU
T* gpuLambdas;
HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,
dFz,
NA[0], NA[1],
S, Zpad);
}
//crop the image to the sample thickness - keep in mind that sample thickness != optical path length
void crop(){
scratch = scratch.crop(Z, S);
}
//save the scratch field as a binary file
void to_binary(std::string filename){
}
public:
//constructor
mirst1d(unsigned int rZ = 100,
unsigned int padding = 0){
Z = rZ;
pad = padding;
NA[0] = 0;
NA[1] = 0.8;
S = 0;
source_profile = 1;
}
//add a layer, thickness = microns
void add_layer(material<T> mat, T thickness){
matlist.push_back(mat);
layers.push_back(thickness);
}
void add_layer(std::string filename, T thickness){
add_layer(material<T>(filename), thickness);
}
//adds a profile spectrum for the light source
void set_source(std::string filename){
source_profile.load(filename);
}
//adds a block of wavenumbers (cm^-1) to the simulation parameters
void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
unsigned int nu = start;
while(nu <= stop){
lambdas.push_back((T)10000 / nu);
nu += step;
}
S = lambdas.size(); //increment the number of wavelengths (shorthand for later)
}
T thickness(){
T t = 0;
for(unsigned int l=0; l<layers.size(); l++)
t += layers[l];
return t;
}
void padding(unsigned int padding = 0){
pad = padding;
}
T dz(){
return thickness() / Z; //calculate the z-axis step size
}
void na(T in, T out){
NA[0] = in;
NA[1] = out;
}
void na(T out){
na(0, out);
}
stim::function<T, T> get_source(){
return source_profile;
}
void save_sample(std::string filename){
//create a sample and save the magnitude as an image
build_sample();
fft(CUFFT_INVERSE);
scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
}
void save_mirst(std::string filename, bool binary = true){
//apply the MIRST filter to a sample and save the image
//build the sample in the Fourier domain
build_sample();
//apply the MIRST filter
apply_filter();
//apply an inverse FFT to bring the results back into the spatial domain
fft(CUFFT_INVERSE);
crop();
//save the image
if(binary)
to_binary(filename);
else
scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
}
std::string str(){
stringstream ss;
ss<<"1D MIRST Simulation========================="<<std::endl;
ss<<"z-axis resolution: "<<Z<<std::endl;
ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
ss<<"padding: "<<pad<<std::endl;
ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
ss<<"dz: "<<dz()<<" um"<<std::endl;
ss<<std::endl;
ss<<layers.size()<<" layers-------------"<<std::endl;
for(unsigned int l=0; l<layers.size(); l++)
ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
ss<<"source profile-----------"<<std::endl;
ss<<get_source().str()<<std::endl;
return ss.str();
}
};
}