Blame view

bimsym.cu 17.2 KB
8d8c3ebd   David Mayerich   initial commit tr...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
  #define CUDA_FOUND
  #define USING_OPENCV
  
  #include <iostream>
  
  #include <stim/visualization/colormap.h>
  
  #include <stim/parser/arguments.h>
  #include <stim/math/vec3.h>
  #include <stim/optics/scalarbeam.h>
  #include <stim/optics/scalarmie.h>
  #include <stim/parser/filename.h>
  #include <stim/ui/progressbar.h>
  #include <stim/parser/table.h>
  
  #include <stim/cuda/cudatools.h>
  
  #include <sources.h>						//default source types
  
  #include <chrono>
  #include <thread>
  
  //field parameters
  size_t g_R_nf;								//near-field resolution along X and Y
  double g_S_nf;								//size of the near field simulation
  double g_Z_nf;								//z position of the field slice
  std::vector<stim::vec3<float>> g_sources;	//point source position list
  
  bool use_cuda = true;
  int cuda_device;
  int cuda_major = 3;
  int cuda_minor = 0;
  bool use_backprop = true;
  
  //incident beam parameters
  double g_NA_cond[2];							//condenser numerical aperture, [internal, external]
  double g_lambda;								//incident wavelength
  size_t g_Nl;									//number of orders used to calculate the incident field
  double g_A;										//amplitude of the incident field
  
  //scattering parameters
  stim::scalarcluster<float> spheres;
  //std::vector<double> g_radius(1);				//radius of the scattering sphere
  //std::vector< stim::complex<double> > g_n(1);	//refractive index of the scattering sphere
  size_t g_MC;									//number of monte-carlo samples used to calculate the scattered field
  //std::vector< stim::vec3<float> > g_c(1);		//center of the sphere
  
  //far field parameters
  double g_NA_obj[2];								//objective numerical aperture
  size_t g_padding;								//padding applied to the near-field calculation in order to improve the quality of the band-pass
  double g_backprop = 0;							//number of units that the field has to be backpropagated by in order to approximate a near-field slice passing through a sphere
  size_t g_R_ff;									//number of samples along X and Y in the far field image
  double g_S_ff;									//size (in units) of the far field image
  double g_Z_ff;									//position of the near-field plane projected into the far field (position of the back-propagated plane)
  
  //debugging
  bool g_debug_images = false;
  bool g_verbose = false;
  
  stim::filename outfile("out.bmp");						//output file name
  
  stim::arglist args;							//input arguments
  
  template<typename T>
  __global__ void cuda_absorbance(T* out, T* I, T* I0, size_t N){
  
  	size_t i = blockIdx.x * blockDim.x + threadIdx.x;
  
  	if(i >= N) return;
  
  	out[i] = -log10(I[i] / I0[i]);
  }
  
  /// Crops a 2D image composed of elements of type T
  /// @param dest is a device pointer to memory of size dx*dy that will store the cropped image
  /// @param src is a device pointer to memory of size sx*sy that stores the original image
  /// @param sx is the size of the source image along x
  /// @param sy is the size of the source image along y
  /// @param x is the x-coordinate of the start position of the cropped region within the source image
  /// @param y is the y-coordinate of the start position of the cropped region within the source image
  /// @param dx is the size of the destination image along x
  /// @param dy is the size of the destination image along y
  template<typename T>
  void gpu_absorbance(T* A, T* I, T* I0, size_t N){
  	int threads = stim::maxThreadsPerBlock();												//get the maximum number of threads per block for the CUDA device
  	
  	dim3 blocks( N / threads + 1 );															//calculate the optimal number of blocks
  	cuda_absorbance<T> <<< blocks, threads >>>(A, I, I0, N);
  }
  
  //function to display a progress bar
  void progressbar_thread(double* e){
  
  	unsigned int p = 0;
  	while(p != 100){
  		if(*e > p){
  			p = (unsigned int)(*e);
  			rtsProgressBar(p);
  		}
  	}
  	std::cout<<std::endl;				//put a newline after the completed progress bar
  }
  
  void advertise(){
  	//output advertisement
  		std::cout<<std::endl<<std::endl;
  		std::cout<<"========================================================================="<<std::endl;
  
  		std::cout<<"========================================================================="<<std::endl<<std::endl;
  
  		std::cout<<std::endl<<std::endl;
  
  		std::cout<<args.str();
  }
  
  void set_arguments(int argc, char* argv[]){
  	args.add("help", "prints this help text");
  
  	args.section("Field Slice Parameters");
  	args.add("res", "field resolution (number of samples along X and Y)", "256", "nonzero positive integer");
  	args.add("size", "size of the calculated field", "10", "nonzero positive value");
  	args.add("z", "position of the field slice along the z axis", "0", "any real value");
  
  	args.section("Incoherent Sources");
  	args.add("simage", "image of the source at the focal plane (z=0)", "", "any standard image format");
  	args.add("sgrid", "grid of n x n point sources", "", "n");
  
  	args.section("Incident Field Parameters");
  	args.add("f", "position of a single focal point", "0 0", "x y position (z = 0)");
  	args.add("condenser", "condenser numerical aperture (NA) and center obscuration", "1", "'sin(a1)' or 'sin(a1) sin(a0)', where a0 = center obscuration");
  	args.add("lambda", "incident wavelength", "1", "any real positive value");
  	args.add("wavenumber", "incident wavelength specified in cm^(-1)", "", "any real positive value, sets all units to microns");
  	args.add("order", "order used to calculate the incident field", "20", "any nonzero integer");
  
  	args.section("Mie Scattering Parameters");
  	args.add("c", "center of the scattering sphere", "0 0 0", "any real 3D coordinate");
  	args.add("radius", "radius of the scattering sphere", "0.5", "any real positive value");
  	args.add("n", "complex refractive index of the scattering sphere", "1.4 0.1", "[r] or [r i]");
  	args.add("samples", "number of Monte-Carlo samples used to calculate the scattered field", "1000", "any real positive integer");
  	args.add("spheres", "specify a file defining several spheres", "", "(1/line: radius, real RI, imag RI, x, y, z)");
  
  	args.section("Far Field Parameters");
  	args.add("objective", "objective numerical aperture (NA) and center obscuration", "1", "'sin(b1)' or 'sin(b1) sin(b0)', where b0 = center obscuration");
  	args.add("padding", "padding for the near field calculation (improves far-field calculation)", "1", "any real positive integer");
  	args.add("nobackprop", "don't use back-propagation to deal with sphere intersections");
  
  	args.section("Debugging");
  	args.add("cuda", "CUDA device ID of device to use (-1 disables CUDA)", "0", "integer value");
  	args.add("debug", "outputs a debug image at several important intermediate steps of the Mie scattering calculation");
  	args.add("verbose", "outputs additional information regarding the Mie calculations as they are performed");
  	args.parse(argc, argv);
  
  	if(args["help"].is_set()){
  		advertise();
  		exit(1);
  	}
  }
  
  void output_simulation(){
  	std::cout<<"far field slice-------"<<std::endl;
  	std::cout<<"R: "<<g_R_ff<<" x "<<g_R_ff<<" pixels"<<std::endl;
  	std::cout<<"S: "<<g_S_ff<<" x "<<g_S_ff<<" units"<<std::endl;
  	std::cout<<"Z: "<<g_Z_ff<<" units"<<std::endl;
  
  	std::cout<<"objective NA: "<<g_NA_obj[1];
  	if(g_NA_obj[0] != 0) std::cout<<" (internal = "<<g_NA_obj[0]<<")";
  	std::cout<<std::endl;
  	std::cout<<"padding:      "<<g_padding<<std::endl;
  	std::cout<<"backprop:     "<<g_backprop<<" units"<<std::endl;
  	std::cout<<std::endl;
  
  	std::cout<<"near field slice------"<<std::endl;
  	std::cout<<"R: "<<g_R_nf<<" x "<<g_R_nf<<" pixels"<<std::endl;
  	std::cout<<"S: "<<g_S_nf<<" x "<<g_S_nf<<" units"<<std::endl;
  	std::cout<<"Z: "<<g_Z_nf<<" units"<<std::endl;
  	std::cout<<std::endl;
  
  	std::cout<<"incident field--------"<<std::endl;
  	std::cout<<"condenser NA: "<<g_NA_cond[1];
  	if(g_NA_cond[0] != 0) std::cout<<" (internal = "<<g_NA_cond[0]<<")";
  	std::cout<<std::endl;
  	std::cout<<"wavelength:   "<<g_lambda<<" units"<<std::endl;
  	std::cout<<"order:        "<<g_Nl<<std::endl;
  	std::cout << "# sources:    " << g_sources.size()<<" ";
  	if (g_sources.size() == 1)
  		std::cout << g_sources[0].str();
  	std::cout << std::endl;
  	std::cout<<std::endl;
  
  	std::cout<<"mie scattering-------"<<std::endl;
  	if (spheres.size() == 1) {
  		std::cout << "center:       " << spheres[0].c << " units" << std::endl;
  		std::cout << "radius:       " << spheres[0].radius << " units" << std::endl;
  		std::cout << "ref. index:   " << spheres[0].n << std::endl;
  	}
  	else {
  		for (size_t si = 0; si < spheres.size(); si++) {
  			std::cout << "sphere [" << si << "]: r = " << spheres[si].radius << " n = " << spheres[si].n << " c = " << spheres[si].c << std::endl;
  		}
  	}
  	std::cout << "MC samples:   " << g_MC << std::endl;
  	std::cout<<std::endl;
  }
  
  void load_spheres(std::string filename) {
  	stim::table t;
  	t.read_ascii(filename, ' ');
  
  	std::cout << t.str() << std::endl;
  	size_t ns = t.rows();
  	spheres.resize(ns);
  	
  	std::vector< std::vector< double > > s = t.get_vector< double >();
  
  	
  	for (size_t si = 0; si < ns; si++) {							//for each sphere in the file
  		spheres[si] = stim::scalarmie<float>(s[si][0],				//generate a corresponding scalar mie object in the cluster
  			stim::complex<float>(s[si][1], s[si][2]),
  			stim::vec3<float>(s[si][3], s[si][4], s[si][5]));
  	}
  }
  
  void set_simulation(){
  
  	if(args.nargs() > 0) outfile = args.arg(0);								//the first argument is the filename (if any)
  
  	if (args["cuda"].as_int() == -1) use_cuda = false;
  	if (use_cuda) cuda_device = args["cuda"].as_int();
  	if (!stim::testDevice(cuda_device, cuda_major, cuda_minor)) {			//make sure the device supports the necessary compute capability
  		std::cout << "ERROR: CUDA device doesn't support compute capability " << cuda_major << "." << cuda_minor << std::endl;
  		exit(1);
  	}
  	if(args["debug"].is_set()) g_debug_images = true;						//set debug flags
  	if(args["verbose"].is_set()) g_verbose = true;
  
  	//set the condenser NA
  	g_NA_cond[1] = args["condenser"].as_float(0);							//set the external condenser NA
  	if(args["condenser"].nargs() == 2) g_NA_cond[0] = args["condenser"].as_float(1);	//if the internal NA is specified, use it
  	else                          g_NA_cond[0] = 0;							//otherwise set the internal NA to zero
  
  	//set the incident wavelength
  	if(args["wavenumber"].is_set())											//if the wavelength is given in terms of wavenumber
  		g_lambda = 10000.0 / args["wavenumber"].as_float(0);				//convert to wavelength (assume microns)
  	else
  		g_lambda = args["lambda"].as_float(0);								//otherwise assume the wavelength is specified in microns
  
  	g_Nl = args["order"].as_int(0);											//set the incident field order
  
  	//set the mie scattering parameters
  	g_MC = args["samples"].as_int();										//get the number of monte-carlo samples
  	if (args["spheres"]) {
  		load_spheres(args["spheres"].as_string());
  	}
  	else {																	//otherwise store the sphere specified at the command line
  		float radius = args["radius"].as_float(0);									//set the radius of the sphere
  		stim::complex<float> n;
  		n.real(args["n"].as_float(0));										//set the real part of the refractive index
  		if (args["n"].nargs() == 2)												//if an imaginary part is specified
  			n.imag(args["n"].as_float(1));									//set the imaginary part
  		else n.imag(0);														//otherwise assume the imaginary part is zero
  		stim::vec3<float> c = stim::vec3<float>(args["c"].as_float(0), args["c"].as_float(1), args["c"].as_float(2));
  		spheres.resize(1);
  		spheres[0] = stim::scalarmie<float>(radius, n, c);
  	}
  
  	g_padding = args["padding"].as_int();								//get the near field padding factor
  	g_R_ff = args["res"].as_int(0);
  	g_R_nf = g_R_ff * (2 * g_padding + 1);				//set the field resolution to the first resolution argument
  	g_S_ff = args["size"].as_int(0);
  	g_S_nf = g_S_ff * (2 * g_padding + 1);			//get the size of the field slice (assume square at first)
  	g_Z_ff = args["z"].as_float();										//get the z-axis position for the desired far-field image
  
  	if (args["nobackprop"]) use_backprop = false;
  	if(use_backprop && (abs(g_Z_ff) < spheres[0].radius)){											//if the near field slice is cutting through the sphere
  		g_backprop = g_Z_ff - spheres[0].radius;									//calculate the number of units that the field has to be back-propagated
  		g_Z_nf = spheres[0].radius;
  	}
  	else {
  		g_Z_nf = g_Z_ff;
  	}
  
  	g_NA_obj[1] = args["objective"].as_float(0);								//set the external condenser NA
  	if(args["objective"].nargs() == 2) g_NA_obj[0] = args["objective"].as_float(1);	//if the internal NA is specified, use it
  	else                          g_NA_obj[0] = 0;							//otherwise set the internal NA to zero
  
  	if (args["simage"].is_set())											//if a source image is provided
  		g_sources = simage<float>(g_S_ff, args["simage"].as_string());		//convert the image to a list of sources
  	else if (args["sgrid"]) {
  		if (args["sgrid"].nargs() == 1)
  			g_sources = sgrid<float>(g_S_ff, args["sgrid"].as_int());
  		else
  			g_sources = sgrid<float>(g_S_ff, args["sgrid"].as_int(0), args["sgrid"].as_int(1));
  	}
  	else
  		g_sources.push_back( stim::vec3<float>(args["f"].as_float(0), args["f"].as_float(1), 0) );			//otherwise just put a single point source at the origin
  }
  
  int main(int argc, char* argv[]){
  	set_arguments(argc, argv);								//set the input arguments
  	set_simulation();										//set all simulation parameters
  
  	if(g_verbose) output_simulation();						//output all simulation parameters
  
  	stim::vec3<float> d(0, 0, 1);							//beam direction
  
  	size_t I_bytes = sizeof(float) * g_R_ff * g_R_ff;								//number of bytes in the final intensity images
  
  	//background intensity image
  	float* I0;												//allocate space for the background intensity image
  	float* I;
  		
  	stim::scalarfield<float> E0(g_R_nf, g_R_nf, (float)g_S_nf, (float)g_Z_nf + (float)g_backprop);	//incident nearfield
  	stim::scalarfield<float> E(g_R_nf, g_R_nf, (float)g_S_nf, (float)g_Z_nf);						//create the scattered field
  	stim::scalarfield<float> Eff(g_R_ff, g_R_ff, (float)g_S_ff, (float)g_Z_ff);						//create the far field
  
  	float* X = (float*) malloc( E0.size() * sizeof(float) );
  	float* Y = (float*) malloc( E0.size() * sizeof(float) );
  	float* Z = (float*) malloc( E0.size() * sizeof(float) );
  	E.meshgrid(X, Y, Z, stim::CPUmem);								//create a meshgrid for the scattering plane (different from E0 b/c we can't propagate the internal field)
  	
  
  	E0.meshgrid();													//create meshgrids in the field object for field calculations
  	E.meshgrid();
  
  	if (use_cuda) {
  		cudaSetDevice(cuda_device);
  		HANDLE_ERROR(cudaMalloc(&I0, I_bytes));
  		HANDLE_ERROR(cudaMemset(I0, 0, I_bytes));			//create an array to store the image intensity
  		HANDLE_ERROR(cudaMalloc(&I, I_bytes));
  		HANDLE_ERROR(cudaMemset(I, 0, I_bytes));
  		E0.to_gpu();
  		E.to_gpu();
  	}
  	else {
  		I0 = (float*)calloc(I_bytes, 1);
  		I = (float*)calloc(I_bytes, 1);
  	}
  
  	std::chrono::high_resolution_clock::time_point t_begin = std::chrono::high_resolution_clock::now();
  	double P = 0;
  	std::thread t1(progressbar_thread, &P);							//start the progress bar thread
  	for(size_t s = 0; s < g_sources.size(); s++){
  		stim::scalarbeam<float> beam((float)g_lambda, 1, g_sources[s], d, (float)g_NA_cond[1], (float)g_NA_cond[0]);		//create a focused beam
  		
  		beam.eval(E0, g_Nl);										//evaluate the incident field
  
  		if(g_debug_images && s == 0){
  			stim::filename incident_file = outfile.prefix(outfile.prefix() + "_0_nfi");
  			E0.image(incident_file.str(), stim::complexMag);
  		}
  		E0.crop(g_padding, Eff);
  		Eff.intensity(I0);
  
  		spheres.eval(E, beam, g_Nl, g_MC);
  
  		if(g_debug_images && s == 0){
  			stim::filename nf_file = outfile.prefix(outfile.prefix() + "_1_nfs");
  			E.image(nf_file.str(), stim::complexMag);
  		}
  
  		E.propagate(g_backprop, stim::TAU / g_lambda);					//propagate the field back to the origin
  
  		if(g_debug_images && s == 0){
  			stim::filename propagate_file = outfile.prefix(outfile.prefix()+"_2_nfprop");
  			E.image(propagate_file.str(), stim::complexMag);
  		}
  
  		E.crop(g_padding, Eff);											//crop out the far field slice
  		Eff.intensity(I);												//calculate the far field intensity and add it to the single-beam image
  
  		if(g_debug_images && s == 0){
  			stim::filename cropped_file = outfile.prefix(outfile.prefix() + "_3_cropped");
  			Eff.image(cropped_file.str(), stim::complexMag);
  		}
  		P = (double)(s + 1) / (double)g_sources.size() * 100;
  
  	}
  	t1.join();
  
  	std::chrono::high_resolution_clock::time_point t_end = std::chrono::high_resolution_clock::now();
  	std::chrono::duration<double> t_total = (t_end - t_begin);
  	std::cout<<"Time: "<<t_total.count()<<" s"<<std::endl;
  
  	//save the near-field image
  	if (g_debug_images) {
  		stim::filename nearfield_real_file = outfile.prefix(outfile.prefix() + "_3_detector_real");
  		E.image(nearfield_real_file.str(), stim::complexReal);
  		stim::filename nearfield_imag_file = outfile.prefix(outfile.prefix() + "_3_detector_imag");
  		E.image(nearfield_imag_file.str(), stim::complexImaginary);
  	}
  	
  	//save the background and single-beam images
  	stim::filename background_file = outfile.prefix(outfile.prefix() + "background");
  	stim::filename intensity_file = outfile.prefix(outfile.prefix() + "intensity");
  
  	if (use_cuda) {
  		stim::gpu2image<float>(I0, background_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
  		stim::gpu2image<float>(I, intensity_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
  	}
  	else {
  		stim::cpu2image<float>(I0, background_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
  		stim::cpu2image<float>(I, intensity_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
  	}
  
  	
  	float* A;
  	HANDLE_ERROR( cudaMalloc(&A, sizeof(float) * g_R_ff * g_R_ff) );
  	gpu_absorbance(A, I, I0, g_R_ff * g_R_ff);
  	stim::filename absorbance_file = outfile.prefix(outfile.prefix() + "absorbance");
  	stim::gpu2image<float>(A, absorbance_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
  }