Blame view

stim/optics/mirst-1d.cuh 12.2 KB
81e0d221   David Mayerich   separated executa...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
  #include "../optics/material.h"

  #include "../math/complexfield.cuh"

  #include "../math/constants.h"

  //#include "../envi/bil.h"

  

  #include "cufft.h"

  

  #include <vector>

  #include <sstream>

  

  namespace stim{

  

  //this function writes a sinc function to "dest" such that an iFFT produces a slab

  template<typename T>

  __global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri, 

  									  T* src, T* zf, 

  									  T w, unsigned int zR, unsigned int nuR){

  	//dest = complex field representing the sample

  	//ri = refractive indices for each wavelength

  	//src = intensity of the light source for each wavelength

  	//zf = z position of the slab interface for each wavelength (accounting for optical path length)

  	//w = width of the slab (in pixels)

  	//zR = number of z-axis samples

  	//nuR = number of wavelengths

  

      //get the current coordinate in the plane slice

  	int ifz = blockIdx.x * blockDim.x + threadIdx.x;

  	int inu = blockIdx.y * blockDim.y + threadIdx.y;

  

  	//make sure that the thread indices are in-bounds

  	if(inu >= nuR || ifz >= zR) return;

  

  	int i = inu * zR + ifz;

  

      T fz;

      if(ifz < zR/2)

          fz = ifz / (T)zR;

      else

          fz = -(zR - ifz) / (T)zR;

  

      //if the slab starts outside of the simulation domain, just return

      if(zf[inu] >= zR) return;

  

  	//fill the array along z with a sinc function representing the Fourier transform of the layer

  

  	T opl = w * ri[inu].real();			//optical path length

  

  	//handle the case where the slab goes outside the simulation domain

  	if(zf[inu] + opl >= zR)

  		opl = zR - zf[inu];

  

  	if(opl == 0) return;

  

  	//T l = w * ri[inu].real();

  	//complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));

  	complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));

  

  	complex<T> eta = ri[inu] * ri[inu] - 1;

  

  	//dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);

  	if(ifz == 0)

          dest[i] += opl * exp(e) * eta * src[inu];

      else

          dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);

  }

  

  template<typename T>

  __global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){

  	//zf = current z depth (optical path length) in pixels

  	//ri = refractive index of the material

  	//w = actual width of the layer (in pixels)

  

  

  	//compute the index for this thread

  	int i = blockIdx.x * blockDim.x + threadIdx.x;

  	if(i >= S) return;

  

  	if(ri == NULL)

  		zf[i] += w;

  	else

  		zf[i] += ri[i].real() * w;

  }

  

  //apply the 1D MIRST filter to an existing sample (overwriting the sample)

  template<typename T>

  __global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda, 

  								 T dFz,

  								 T inNA, T outNA, 

  								 unsigned int lambdaR, unsigned int zR, 

  								 T sigma = 0){

  	//sampleFFT = the sample in the Fourier domain (will be overwritten)

  	//lambda = list of wavelengths

  	//dFz = delta along the Fz axis in the frequency domain

  	//inNA = NA of the internal obscuration

  	//outNA = NA of the objective

  	//zR = number of pixels along the Fz axis (same as the z-axis)

  	//lambdaR = number of wavelengths

  	//sigma = width of the Gaussian source

  	int ifz = blockIdx.x * blockDim.x + threadIdx.x;

  	int inu = blockIdx.y * blockDim.y + threadIdx.y;

  

  	if(inu >= lambdaR || ifz >= zR) return;

  

  	//calculate the index into the sample FT

  	int i = inu * zR + ifz;

  

  	//compute the frequency (and set all negative spatial frequencies to zero)

  	T fz;

  	if(ifz < zR / 2)

  	    fz = ifz * dFz;

  	//if the spatial frequency is negative, set it to zero and exit

  	else{

  	    sampleFFT[i] = 0;

  	    return;

  	}

  

  	//compute the frequency in inverse microns

  	T nu = 1/lambda[inu];

  

  	//determine the radius of the integration circle

  	T nu_sq = nu * nu;

  	T fz_sq = (fz * fz) / 4;

  

  	//cut off frequencies above the diffraction limit

  	T r;

  	if(fz_sq < nu_sq)

  	    r = sqrt(nu_sq - fz_sq);

  	else

  	    r = 0;

  

  	//account for the optics

  	T Q = 0;

  	if(r > nu * inNA && r < nu * outNA)

  	    Q = 1;

  

  	//account for the source

  	//T sigma = 30.0;

  	T s = exp( - (r*r * sigma*sigma) / 2 );

  	//T s=1;

  

  	//compute the final filter

  	T mirst = 0;

  	if(fz != 0)

  	    mirst = 2 * stimPI * r * s * Q * (1/fz);

  

  	sampleFFT[i] *= mirst;

  

  }

  

  /*This object performs a 1-dimensional (layered) MIRST simulation

  */

  template<typename T>

  class mirst1d{

  

  private:

  	unsigned int Z;	//z-axis resolution

  	unsigned int pad;	//pixel padding on either side of the sample

  

  	std::vector< material<T> > matlist;	//list of materials

  	std::vector< T > layers;				//list of layer thicknesses

  

  	std::vector< T > lambdas;		//list of wavelengths that are being simulated

  	unsigned int S;					//number of wavelengths (size of "lambdas")

  

  	T NA[2];						//numerical aperature (central obscuration and outer diameter)

  

  	function<T, T> source_profile;	//profile (spectrum) of the source (expressed in inverse centimeters)

  

  	complexfield<T, 1> scratch;		//scratch GPU memory used to build samples, transforms, etc.

  

  	void fft(int direction = CUFFT_FORWARD){

  

  		unsigned padZ = Z + pad;

  		

  		//create cuFFT handles

  		cufftHandle plan;

  		cufftResult result;

  		

  		if(sizeof(T) == 4)

  			result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size());	//single precision

  		else

  			result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size());	//double precision

  

  		//check for Plan 1D errors

  		if(result != CUFFT_SUCCESS){

  			std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;

  			CufftError(result);

  			exit(1);

  		}

  

  		if(sizeof(T) == 4)

  			result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);

  		else

  			result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);

  

  		//check for FFT errors

  		if(result != CUFFT_SUCCESS){

  			std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;

  			CufftError(result);

  			exit(1);

  		}

  

  		cufftDestroy(plan);

  	}

  

  

  	//initialize the scratch memory

  	void init_scratch(){

  		scratch = complexfield<T, 1>(Z + pad , lambdas.size());

  		scratch = 0;

  	}

  

  	//get the list of scattering efficiency (eta) values for a specified layer

  	std::vector< complex<T> > layer_etas(unsigned int l){

  

  		std::vector< complex<T> > etas;

  

  		//fill the list of etas

  		for(unsigned int i=0; i<lambdas.size(); i++)

  			etas.push_back( matlist[l].eta(lambdas[i]) );

  		return etas;

  	}

  

  	//calculates the optimal block and grid sizes using information from the GPU

  	void cuda_params(dim3& grids, dim3& blocks){

  		int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size

  		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);

  

  		//create one thread for each detector pixel

  		blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);

  		grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);

  	}

  

  	//add the fourier transform of layer n to the scratch space

  	void build_layer_fft(unsigned int n, T* zf){

  		unsigned int paddedZ = Z + pad;

  

  		T wpx = layers[n] / dz();	//calculate the width of the layer in pixels

  

  		//allocate memory for the refractive index

  		complex<T>* gpuRi;

  		HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));

  

  		//allocate memory for the source profile

  		T* gpuSrc;

  		HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));

  

  		complex<T> ri;

  		T source;

  		//store the refractive index and source profile in a CPU array

  		for(int inu=0; inu<S; inu++){

  			//save the refractive index to the GPU

  			ri = matlist[n].getN(lambdas[inu]);

  			HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));

  

  			//save the source profile to the GPU

  			source = source_profile(10000 / lambdas[inu]);

  			HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));

  

  		}

  

  		//create one thread for each pixel of the field slice

  		dim3 gridDim, blockDim;

  		cuda_params(gridDim, blockDim);

  		stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);

  

  		int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size

  		int linGrid = S / linBlock + 1;

  		stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);

  

  		//free memory

  		HANDLE_ERROR(cudaFree(gpuRi));

  		HANDLE_ERROR(cudaFree(gpuSrc));

  	}

  

  	void build_sample(){

  		init_scratch();		//initialize the GPU scratch space

  		//build_layer(1);

  

  		T* zf;

  		HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));

  		HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));

  

  		//render each layer of the sample

  		for(unsigned int l=0; l<layers.size(); l++){

  			build_layer_fft(l, zf);

  		}

  

  		HANDLE_ERROR(cudaFree(zf));

  	}

  

  	void apply_filter(){

  		dim3 gridDim, blockDim;

  		cuda_params(gridDim, blockDim);

  

  		unsigned int Zpad = Z + pad;

  

  		T sim_range = dz() * Zpad;

      	T dFz = 1 / sim_range;

  

  		//copy the array of wavelengths to the GPU

  		T* gpuLambdas;

  		HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));

  		HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));

  		stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas, 

  								 dFz,

  								 NA[0], NA[1], 

  								 S, Zpad);

  	}

  

  	//crop the image to the sample thickness - keep in mind that sample thickness != optical path length

  	void crop(){

  

  		scratch = scratch.crop(Z, S);

  	}

  	

  	//save the scratch field as a binary file

  	void to_binary(std::string filename){

  

  	}

  

  

  public:

  

  	//constructor

  	mirst1d(unsigned int rZ = 100,

  			unsigned int padding = 0){

  		Z = rZ;

  		pad = padding;

  		NA[0] = 0;

  		NA[1] = 0.8;

  		S = 0;

  		source_profile = 1;

  	}

  

  	//add a layer, thickness = microns

  	void add_layer(material<T> mat, T thickness){

  		matlist.push_back(mat);

  		layers.push_back(thickness);

  	}

  

  	void add_layer(std::string filename, T thickness){

  		add_layer(material<T>(filename), thickness);

  	}

  

  	//adds a profile spectrum for the light source

  	void set_source(std::string filename){

  		source_profile.load(filename);

  	}

  

  	//adds a block of wavenumbers (cm^-1) to the simulation parameters

  	void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){

  		unsigned int nu = start;

  		while(nu <= stop){

  			lambdas.push_back((T)10000 / nu);

  			nu += step;

  		}

  		S = lambdas.size();		//increment the number of wavelengths (shorthand for later)

  	}

  

  	T thickness(){

  		T t = 0;

  		for(unsigned int l=0; l<layers.size(); l++)

  			t += layers[l];

  		return t;

  	}

  

  	void padding(unsigned int padding = 0){

  		pad = padding;

  	}

  

  	T dz(){

  		return thickness() / Z;		//calculate the z-axis step size

  	}

  

  	void na(T in, T out){

  		NA[0] = in;

  		NA[1] = out;

  	}

  

  	void na(T out){

  		na(0, out);

  	}

  

  	stim::function<T, T> get_source(){

  		return source_profile;

  	}

  

  	void save_sample(std::string filename){

  		//create a sample and save the magnitude as an image

  		build_sample();

  		fft(CUFFT_INVERSE);

  		scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);

  	}

  

  	void save_mirst(std::string filename, bool binary = true){

  		//apply the MIRST filter to a sample and save the image

  

  		//build the sample in the Fourier domain

  		build_sample();

  

  		//apply the MIRST filter

  		apply_filter();

  

  		//apply an inverse FFT to bring the results back into the spatial domain

  		fft(CUFFT_INVERSE);

  

  		crop();

  

  		//save the image

  		if(binary)

  			to_binary(filename);

  		else

  			scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);

  	}

  

  

  

  

  	std::string str(){

  

  		stringstream ss;

  		ss<<"1D MIRST Simulation========================="<<std::endl;

  		ss<<"z-axis resolution: "<<Z<<std::endl;

  		ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;

  		ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;

  		ss<<"padding: "<<pad<<std::endl;

  		ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;

  		ss<<"dz: "<<dz()<<" um"<<std::endl;

  		ss<<std::endl;

  		ss<<layers.size()<<" layers-------------"<<std::endl;

  		for(unsigned int l=0; l<layers.size(); l++)

  			ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;

  

  		ss<<"source profile-----------"<<std::endl;

  		ss<<get_source().str()<<std::endl;

  

  		return ss.str();

  

  

  	}

  

  

  

  };

  

  }