Blame view

stim/optics_old/mirst-1d.cuh 11.8 KB
8e4f8364   David Mayerich   started a new opt...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
  #include "../optics/material.h"
  #include "../math/complexfield.cuh"
  #include "../math/constants.h"
  //#include "../envi/bil.h"
  
  #include "cufft.h"
  
  #include <vector>
  #include <sstream>
  
  namespace stim{
  
  //this function writes a sinc function to "dest" such that an iFFT produces a slab
  template<typename T>
  __global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri, 
  									  T* src, T* zf, 
  									  T w, unsigned int zR, unsigned int nuR){
  	//dest = complex field representing the sample
  	//ri = refractive indices for each wavelength
  	//src = intensity of the light source for each wavelength
  	//zf = z position of the slab interface for each wavelength (accounting for optical path length)
  	//w = width of the slab (in pixels)
  	//zR = number of z-axis samples
  	//nuR = number of wavelengths
  
      //get the current coordinate in the plane slice
  	int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  	int inu = blockIdx.y * blockDim.y + threadIdx.y;
  
  	//make sure that the thread indices are in-bounds
  	if(inu >= nuR || ifz >= zR) return;
  
  	int i = inu * zR + ifz;
  
      T fz;
      if(ifz < zR/2)
          fz = ifz / (T)zR;
      else
          fz = -(zR - ifz) / (T)zR;
  
      //if the slab starts outside of the simulation domain, just return
      if(zf[inu] >= zR) return;
  
  	//fill the array along z with a sinc function representing the Fourier transform of the layer
  
  	T opl = w * ri[inu].real();			//optical path length
  
  	//handle the case where the slab goes outside the simulation domain
  	if(zf[inu] + opl >= zR)
  		opl = zR - zf[inu];
  
  	if(opl == 0) return;
  
  	//T l = w * ri[inu].real();
  	//complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
  	complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
  
  	complex<T> eta = ri[inu] * ri[inu] - 1;
  
  	//dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
  	if(ifz == 0)
          dest[i] += opl * exp(e) * eta * src[inu];
      else
          dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
  }
  
  template<typename T>
  __global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
  	//zf = current z depth (optical path length) in pixels
  	//ri = refractive index of the material
  	//w = actual width of the layer (in pixels)
  
  
  	//compute the index for this thread
  	int i = blockIdx.x * blockDim.x + threadIdx.x;
  	if(i >= S) return;
  
  	if(ri == NULL)
  		zf[i] += w;
  	else
  		zf[i] += ri[i].real() * w;
  }
  
  //apply the 1D MIRST filter to an existing sample (overwriting the sample)
  template<typename T>
  __global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda, 
  								 T dFz,
  								 T inNA, T outNA, 
  								 unsigned int lambdaR, unsigned int zR, 
  								 T sigma = 0){
  	//sampleFFT = the sample in the Fourier domain (will be overwritten)
  	//lambda = list of wavelengths
  	//dFz = delta along the Fz axis in the frequency domain
  	//inNA = NA of the internal obscuration
  	//outNA = NA of the objective
  	//zR = number of pixels along the Fz axis (same as the z-axis)
  	//lambdaR = number of wavelengths
  	//sigma = width of the Gaussian source
  	int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  	int inu = blockIdx.y * blockDim.y + threadIdx.y;
  
  	if(inu >= lambdaR || ifz >= zR) return;
  
  	//calculate the index into the sample FT
  	int i = inu * zR + ifz;
  
  	//compute the frequency (and set all negative spatial frequencies to zero)
  	T fz;
  	if(ifz < zR / 2)
  	    fz = ifz * dFz;
  	//if the spatial frequency is negative, set it to zero and exit
  	else{
  	    sampleFFT[i] = 0;
  	    return;
  	}
  
  	//compute the frequency in inverse microns
  	T nu = 1/lambda[inu];
  
  	//determine the radius of the integration circle
  	T nu_sq = nu * nu;
  	T fz_sq = (fz * fz) / 4;
  
  	//cut off frequencies above the diffraction limit
  	T r;
  	if(fz_sq < nu_sq)
  	    r = sqrt(nu_sq - fz_sq);
  	else
  	    r = 0;
  
  	//account for the optics
  	T Q = 0;
  	if(r > nu * inNA && r < nu * outNA)
  	    Q = 1;
  
  	//account for the source
  	//T sigma = 30.0;
  	T s = exp( - (r*r * sigma*sigma) / 2 );
  	//T s=1;
  
  	//compute the final filter
  	T mirst = 0;
  	if(fz != 0)
  	    mirst = 2 * stimPI * r * s * Q * (1/fz);
  
  	sampleFFT[i] *= mirst;
  
  }
  
  /*This object performs a 1-dimensional (layered) MIRST simulation
  */
  template<typename T>
  class mirst1d{
  
  private:
  	unsigned int Z;	//z-axis resolution
  	unsigned int pad;	//pixel padding on either side of the sample
  
  	std::vector< material<T> > matlist;	//list of materials
  	std::vector< T > layers;				//list of layer thicknesses
  
  	std::vector< T > lambdas;		//list of wavelengths that are being simulated
  	unsigned int S;					//number of wavelengths (size of "lambdas")
  
  	T NA[2];						//numerical aperature (central obscuration and outer diameter)
  
  	function<T, T> source_profile;	//profile (spectrum) of the source (expressed in inverse centimeters)
  
  	complexfield<T, 1> scratch;		//scratch GPU memory used to build samples, transforms, etc.
  
  	void fft(int direction = CUFFT_FORWARD){
  
  		unsigned padZ = Z + pad;
  		
  		//create cuFFT handles
  		cufftHandle plan;
  		cufftResult result;
  		
  		if(sizeof(T) == 4)
  			result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size());	//single precision
  		else
  			result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size());	//double precision
  
  		//check for Plan 1D errors
  		if(result != CUFFT_SUCCESS){
  			std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
  			CufftError(result);
  			exit(1);
  		}
  
  		if(sizeof(T) == 4)
  			result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
  		else
  			result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
  
  		//check for FFT errors
  		if(result != CUFFT_SUCCESS){
  			std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
  			CufftError(result);
  			exit(1);
  		}
  
  		cufftDestroy(plan);
  	}
  
  
  	//initialize the scratch memory
  	void init_scratch(){
  		scratch = complexfield<T, 1>(Z + pad , lambdas.size());
  		scratch = 0;
  	}
  
  	//get the list of scattering efficiency (eta) values for a specified layer
  	std::vector< complex<T> > layer_etas(unsigned int l){
  
  		std::vector< complex<T> > etas;
  
  		//fill the list of etas
  		for(unsigned int i=0; i<lambdas.size(); i++)
  			etas.push_back( matlist[l].eta(lambdas[i]) );
  		return etas;
  	}
  
  	//calculates the optimal block and grid sizes using information from the GPU
  	void cuda_params(dim3& grids, dim3& blocks){
  		int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  
  		//create one thread for each detector pixel
  		blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
  		grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
  	}
  
  	//add the fourier transform of layer n to the scratch space
  	void build_layer_fft(unsigned int n, T* zf){
  		unsigned int paddedZ = Z + pad;
  
  		T wpx = layers[n] / dz();	//calculate the width of the layer in pixels
  
  		//allocate memory for the refractive index
  		complex<T>* gpuRi;
  		HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
  
  		//allocate memory for the source profile
  		T* gpuSrc;
  		HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
  
  		complex<T> ri;
  		T source;
  		//store the refractive index and source profile in a CPU array
  		for(int inu=0; inu<S; inu++){
  			//save the refractive index to the GPU
  			ri = matlist[n].getN(lambdas[inu]);
  			HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
  
  			//save the source profile to the GPU
  			source = source_profile(10000 / lambdas[inu]);
  			HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
  
  		}
  
  		//create one thread for each pixel of the field slice
  		dim3 gridDim, blockDim;
  		cuda_params(gridDim, blockDim);
  		stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
  
  		int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
  		int linGrid = S / linBlock + 1;
  		stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
  
  		//free memory
  		HANDLE_ERROR(cudaFree(gpuRi));
  		HANDLE_ERROR(cudaFree(gpuSrc));
  	}
  
  	void build_sample(){
  		init_scratch();		//initialize the GPU scratch space
  		//build_layer(1);
  
  		T* zf;
  		HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
  		HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
  
  		//render each layer of the sample
  		for(unsigned int l=0; l<layers.size(); l++){
  			build_layer_fft(l, zf);
  		}
  
  		HANDLE_ERROR(cudaFree(zf));
  	}
  
  	void apply_filter(){
  		dim3 gridDim, blockDim;
  		cuda_params(gridDim, blockDim);
  
  		unsigned int Zpad = Z + pad;
  
  		T sim_range = dz() * Zpad;
      	T dFz = 1 / sim_range;
  
  		//copy the array of wavelengths to the GPU
  		T* gpuLambdas;
  		HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
  		HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
  		stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas, 
  								 dFz,
  								 NA[0], NA[1], 
  								 S, Zpad);
  	}
  
  	//crop the image to the sample thickness - keep in mind that sample thickness != optical path length
  	void crop(){
  
  		scratch = scratch.crop(Z, S);
  	}
  	
  	//save the scratch field as a binary file
  	void to_binary(std::string filename){
  
  	}
  
  
  public:
  
  	//constructor
  	mirst1d(unsigned int rZ = 100,
  			unsigned int padding = 0){
  		Z = rZ;
  		pad = padding;
  		NA[0] = 0;
  		NA[1] = 0.8;
  		S = 0;
  		source_profile = 1;
  	}
  
  	//add a layer, thickness = microns
  	void add_layer(material<T> mat, T thickness){
  		matlist.push_back(mat);
  		layers.push_back(thickness);
  	}
  
  	void add_layer(std::string filename, T thickness){
  		add_layer(material<T>(filename), thickness);
  	}
  
  	//adds a profile spectrum for the light source
  	void set_source(std::string filename){
  		source_profile.load(filename);
  	}
  
  	//adds a block of wavenumbers (cm^-1) to the simulation parameters
  	void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
  		unsigned int nu = start;
  		while(nu <= stop){
  			lambdas.push_back((T)10000 / nu);
  			nu += step;
  		}
  		S = lambdas.size();		//increment the number of wavelengths (shorthand for later)
  	}
  
  	T thickness(){
  		T t = 0;
  		for(unsigned int l=0; l<layers.size(); l++)
  			t += layers[l];
  		return t;
  	}
  
  	void padding(unsigned int padding = 0){
  		pad = padding;
  	}
  
  	T dz(){
  		return thickness() / Z;		//calculate the z-axis step size
  	}
  
  	void na(T in, T out){
  		NA[0] = in;
  		NA[1] = out;
  	}
  
  	void na(T out){
  		na(0, out);
  	}
  
  	stim::function<T, T> get_source(){
  		return source_profile;
  	}
  
  	void save_sample(std::string filename){
  		//create a sample and save the magnitude as an image
  		build_sample();
  		fft(CUFFT_INVERSE);
  		scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  	}
  
  	void save_mirst(std::string filename, bool binary = true){
  		//apply the MIRST filter to a sample and save the image
  
  		//build the sample in the Fourier domain
  		build_sample();
  
  		//apply the MIRST filter
  		apply_filter();
  
  		//apply an inverse FFT to bring the results back into the spatial domain
  		fft(CUFFT_INVERSE);
  
  		crop();
  
  		//save the image
  		if(binary)
  			to_binary(filename);
  		else
  			scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  	}
  
  
  
  
  	std::string str(){
  
  		stringstream ss;
  		ss<<"1D MIRST Simulation========================="<<std::endl;
  		ss<<"z-axis resolution: "<<Z<<std::endl;
  		ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
  		ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
  		ss<<"padding: "<<pad<<std::endl;
  		ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
  		ss<<"dz: "<<dz()<<" um"<<std::endl;
  		ss<<std::endl;
  		ss<<layers.size()<<" layers-------------"<<std::endl;
  		for(unsigned int l=0; l<layers.size(); l++)
  			ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
  
  		ss<<"source profile-----------"<<std::endl;
  		ss<<get_source().str()<<std::endl;
  
  		return ss.str();
  
  
  	}
  
  
  
  };
  
  }