bimsim.cu
17.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
#define CUDA_FOUND
#define USING_OPENCV
#include <iostream>
#include <stim/visualization/colormap.h>
#include <stim/parser/arguments.h>
#include <stim/math/vec3.h>
#include <stim/optics/scalarbeam.h>
#include <stim/optics/scalarmie.h>
#include <stim/parser/filename.h>
#include <stim/ui/progressbar.h>
#include <stim/parser/table.h>
#include <stim/cuda/cudatools.h>
#include <sources.h> //default source types
#include <chrono>
#include <thread>
//field parameters
size_t g_R_nf; //near-field resolution along X and Y
double g_S_nf; //size of the near field simulation
double g_Z_nf; //z position of the field slice
std::vector<stim::vec3<float>> g_sources; //point source position list
bool use_cuda = true;
int cuda_device;
int cuda_major = 3;
int cuda_minor = 0;
bool use_backprop = true;
//incident beam parameters
double g_NA_cond[2]; //condenser numerical aperture, [internal, external]
double g_lambda; //incident wavelength
size_t g_Nl; //number of orders used to calculate the incident field
double g_A; //amplitude of the incident field
//scattering parameters
stim::scalarcluster<float> spheres;
//std::vector<double> g_radius(1); //radius of the scattering sphere
//std::vector< stim::complex<double> > g_n(1); //refractive index of the scattering sphere
size_t g_MC; //number of monte-carlo samples used to calculate the scattered field
//std::vector< stim::vec3<float> > g_c(1); //center of the sphere
//far field parameters
double g_NA_obj[2]; //objective numerical aperture
size_t g_padding; //padding applied to the near-field calculation in order to improve the quality of the band-pass
double g_backprop = 0; //number of units that the field has to be backpropagated by in order to approximate a near-field slice passing through a sphere
size_t g_R_ff; //number of samples along X and Y in the far field image
double g_S_ff; //size (in units) of the far field image
double g_Z_ff; //position of the near-field plane projected into the far field (position of the back-propagated plane)
//debugging
bool g_debug_images = false;
bool g_verbose = false;
stim::filename outfile("out.bmp"); //output file name
stim::arglist args; //input arguments
template<typename T>
__global__ void cuda_absorbance(T* out, T* I, T* I0, size_t N){
size_t i = blockIdx.x * blockDim.x + threadIdx.x;
if(i >= N) return;
out[i] = -log10(I[i] / I0[i]);
}
/// Crops a 2D image composed of elements of type T
/// @param dest is a device pointer to memory of size dx*dy that will store the cropped image
/// @param src is a device pointer to memory of size sx*sy that stores the original image
/// @param sx is the size of the source image along x
/// @param sy is the size of the source image along y
/// @param x is the x-coordinate of the start position of the cropped region within the source image
/// @param y is the y-coordinate of the start position of the cropped region within the source image
/// @param dx is the size of the destination image along x
/// @param dy is the size of the destination image along y
template<typename T>
void gpu_absorbance(T* A, T* I, T* I0, size_t N){
int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
dim3 blocks( N / threads + 1 ); //calculate the optimal number of blocks
cuda_absorbance<T> <<< blocks, threads >>>(A, I, I0, N);
}
//function to display a progress bar
void progressbar_thread(double* e){
unsigned int p = 0;
while(p != 100){
if(*e > p){
p = (unsigned int)(*e);
rtsProgressBar(p);
}
}
std::cout<<std::endl; //put a newline after the completed progress bar
}
void advertise(){
//output advertisement
std::cout<<std::endl<<std::endl;
std::cout<<"========================================================================="<<std::endl;
std::cout<<"========================================================================="<<std::endl<<std::endl;
std::cout<<std::endl<<std::endl;
std::cout<<args.str();
}
void set_arguments(int argc, char* argv[]){
args.add("help", "prints this help text");
args.section("Field Slice Parameters");
args.add("res", "field resolution (number of samples along X and Y)", "256", "nonzero positive integer");
args.add("size", "size of the calculated field", "10", "nonzero positive value");
args.add("z", "position of the field slice along the z axis", "0", "any real value");
args.section("Incoherent Sources");
args.add("simage", "image of the source at the focal plane (z=0)", "", "any standard image format");
args.add("sgrid", "grid of n x n point sources", "", "n");
args.section("Incident Field Parameters");
args.add("f", "position of a single focal point", "0 0", "x y position (z = 0)");
args.add("condenser", "condenser numerical aperture (NA) and center obscuration", "1", "'sin(a1)' or 'sin(a1) sin(a0)', where a0 = center obscuration");
args.add("lambda", "incident wavelength", "1", "any real positive value");
args.add("wavenumber", "incident wavelength specified in cm^(-1)", "", "any real positive value, sets all units to microns");
args.add("order", "order used to calculate the incident field", "20", "any nonzero integer");
args.section("Mie Scattering Parameters");
args.add("c", "center of the scattering sphere", "0 0 0", "any real 3D coordinate");
args.add("radius", "radius of the scattering sphere", "0.5", "any real positive value");
args.add("n", "complex refractive index of the scattering sphere", "1.4 0.1", "[r] or [r i]");
args.add("samples", "number of Monte-Carlo samples used to calculate the scattered field", "1000", "any real positive integer");
args.add("spheres", "specify a file defining several spheres", "", "(1/line: radius, real RI, imag RI, x, y, z)");
args.section("Far Field Parameters");
args.add("objective", "objective numerical aperture (NA) and center obscuration", "1", "'sin(b1)' or 'sin(b1) sin(b0)', where b0 = center obscuration");
args.add("padding", "padding for the near field calculation (improves far-field calculation)", "1", "any real positive integer");
args.add("nobackprop", "don't use back-propagation to deal with sphere intersections");
args.section("Debugging");
args.add("cuda", "CUDA device ID of device to use (-1 disables CUDA)", "0", "integer value");
args.add("debug", "outputs a debug image at several important intermediate steps of the Mie scattering calculation");
args.add("verbose", "outputs additional information regarding the Mie calculations as they are performed");
args.parse(argc, argv);
if(args["help"].is_set()){
advertise();
exit(1);
}
}
void output_simulation(){
std::cout<<"far field slice-------"<<std::endl;
std::cout<<"R: "<<g_R_ff<<" x "<<g_R_ff<<" pixels"<<std::endl;
std::cout<<"S: "<<g_S_ff<<" x "<<g_S_ff<<" units"<<std::endl;
std::cout<<"Z: "<<g_Z_ff<<" units"<<std::endl;
std::cout<<"objective NA: "<<g_NA_obj[1];
if(g_NA_obj[0] != 0) std::cout<<" (internal = "<<g_NA_obj[0]<<")";
std::cout<<std::endl;
std::cout<<"padding: "<<g_padding<<std::endl;
std::cout<<"backprop: "<<g_backprop<<" units"<<std::endl;
std::cout<<std::endl;
std::cout<<"near field slice------"<<std::endl;
std::cout<<"R: "<<g_R_nf<<" x "<<g_R_nf<<" pixels"<<std::endl;
std::cout<<"S: "<<g_S_nf<<" x "<<g_S_nf<<" units"<<std::endl;
std::cout<<"Z: "<<g_Z_nf<<" units"<<std::endl;
std::cout<<std::endl;
std::cout<<"incident field--------"<<std::endl;
std::cout<<"condenser NA: "<<g_NA_cond[1];
if(g_NA_cond[0] != 0) std::cout<<" (internal = "<<g_NA_cond[0]<<")";
std::cout<<std::endl;
std::cout<<"wavelength: "<<g_lambda<<" units"<<std::endl;
std::cout<<"order: "<<g_Nl<<std::endl;
std::cout << "# sources: " << g_sources.size()<<" ";
if (g_sources.size() == 1)
std::cout << g_sources[0].str();
std::cout << std::endl;
std::cout<<std::endl;
std::cout<<"mie scattering-------"<<std::endl;
if (spheres.size() == 1) {
std::cout << "center: " << spheres[0].c << " units" << std::endl;
std::cout << "radius: " << spheres[0].radius << " units" << std::endl;
std::cout << "ref. index: " << spheres[0].n << std::endl;
}
else {
for (size_t si = 0; si < spheres.size(); si++) {
std::cout << "sphere [" << si << "]: r = " << spheres[si].radius << " n = " << spheres[si].n << " c = " << spheres[si].c << std::endl;
}
}
std::cout << "MC samples: " << g_MC << std::endl;
std::cout<<std::endl;
}
void load_spheres(std::string filename) {
stim::table t;
t.read_ascii(filename, ' ');
std::cout << t.str() << std::endl;
size_t ns = t.rows();
spheres.resize(ns);
std::vector< std::vector< double > > s = t.get_vector< double >();
for (size_t si = 0; si < ns; si++) { //for each sphere in the file
spheres[si] = stim::scalarmie<float>(s[si][0], //generate a corresponding scalar mie object in the cluster
stim::complex<float>(s[si][1], s[si][2]),
stim::vec3<float>(s[si][3], s[si][4], s[si][5]));
}
}
void set_simulation(){
if(args.nargs() > 0) outfile = args.arg(0); //the first argument is the filename (if any)
if (args["cuda"].as_int() == -1) use_cuda = false;
if (use_cuda) cuda_device = args["cuda"].as_int();
if (!stim::testDevice(cuda_device, cuda_major, cuda_minor)) { //make sure the device supports the necessary compute capability
std::cout << "ERROR: CUDA device doesn't support compute capability " << cuda_major << "." << cuda_minor << std::endl;
exit(1);
}
if(args["debug"].is_set()) g_debug_images = true; //set debug flags
if(args["verbose"].is_set()) g_verbose = true;
//set the condenser NA
g_NA_cond[1] = args["condenser"].as_float(0); //set the external condenser NA
if(args["condenser"].nargs() == 2) g_NA_cond[0] = args["condenser"].as_float(1); //if the internal NA is specified, use it
else g_NA_cond[0] = 0; //otherwise set the internal NA to zero
//set the incident wavelength
if(args["wavenumber"].is_set()) //if the wavelength is given in terms of wavenumber
g_lambda = 10000.0 / args["wavenumber"].as_float(0); //convert to wavelength (assume microns)
else
g_lambda = args["lambda"].as_float(0); //otherwise assume the wavelength is specified in microns
g_Nl = args["order"].as_int(0); //set the incident field order
//set the mie scattering parameters
g_MC = args["samples"].as_int(); //get the number of monte-carlo samples
if (args["spheres"]) {
load_spheres(args["spheres"].as_string());
}
else { //otherwise store the sphere specified at the command line
float radius = args["radius"].as_float(0); //set the radius of the sphere
stim::complex<float> n;
n.real(args["n"].as_float(0)); //set the real part of the refractive index
if (args["n"].nargs() == 2) //if an imaginary part is specified
n.imag(args["n"].as_float(1)); //set the imaginary part
else n.imag(0); //otherwise assume the imaginary part is zero
stim::vec3<float> c = stim::vec3<float>(args["c"].as_float(0), args["c"].as_float(1), args["c"].as_float(2));
spheres.resize(1);
spheres[0] = stim::scalarmie<float>(radius, n, c);
}
g_padding = args["padding"].as_int(); //get the near field padding factor
g_R_ff = args["res"].as_int(0);
g_R_nf = g_R_ff * (2 * g_padding + 1); //set the field resolution to the first resolution argument
g_S_ff = args["size"].as_int(0);
g_S_nf = g_S_ff * (2 * g_padding + 1); //get the size of the field slice (assume square at first)
g_Z_ff = args["z"].as_float(); //get the z-axis position for the desired far-field image
if (args["nobackprop"]) use_backprop = false;
if(use_backprop && (abs(g_Z_ff) < spheres[0].radius)){ //if the near field slice is cutting through the sphere
g_backprop = g_Z_ff - spheres[0].radius; //calculate the number of units that the field has to be back-propagated
g_Z_nf = spheres[0].radius;
}
else {
g_Z_nf = g_Z_ff;
}
g_NA_obj[1] = args["objective"].as_float(0); //set the external condenser NA
if(args["objective"].nargs() == 2) g_NA_obj[0] = args["objective"].as_float(1); //if the internal NA is specified, use it
else g_NA_obj[0] = 0; //otherwise set the internal NA to zero
if (args["simage"].is_set()) //if a source image is provided
g_sources = simage<float>(g_S_ff, args["simage"].as_string()); //convert the image to a list of sources
else if (args["sgrid"]) {
if (args["sgrid"].nargs() == 1)
g_sources = sgrid<float>(g_S_ff, args["sgrid"].as_int());
else
g_sources = sgrid<float>(g_S_ff, args["sgrid"].as_int(0), args["sgrid"].as_int(1));
}
else
g_sources.push_back( stim::vec3<float>(args["f"].as_float(0), args["f"].as_float(1), 0) ); //otherwise just put a single point source at the origin
}
int main(int argc, char* argv[]){
set_arguments(argc, argv); //set the input arguments
set_simulation(); //set all simulation parameters
if(g_verbose) output_simulation(); //output all simulation parameters
stim::vec3<float> d(0, 0, 1); //beam direction
size_t I_bytes = sizeof(float) * g_R_ff * g_R_ff; //number of bytes in the final intensity images
//background intensity image
float* I0; //allocate space for the background intensity image
float* I;
stim::scalarfield<float> E0(g_R_nf, g_R_nf, (float)g_S_nf, (float)g_Z_nf + (float)g_backprop); //incident nearfield
stim::scalarfield<float> E(g_R_nf, g_R_nf, (float)g_S_nf, (float)g_Z_nf); //create the scattered field
stim::scalarfield<float> Eff(g_R_ff, g_R_ff, (float)g_S_ff, (float)g_Z_ff); //create the far field
float* X = (float*) malloc( E0.size() * sizeof(float) );
float* Y = (float*) malloc( E0.size() * sizeof(float) );
float* Z = (float*) malloc( E0.size() * sizeof(float) );
E.meshgrid(X, Y, Z, stim::CPUmem); //create a meshgrid for the scattering plane (different from E0 b/c we can't propagate the internal field)
E0.meshgrid(); //create meshgrids in the field object for field calculations
E.meshgrid();
if (use_cuda) {
cudaSetDevice(cuda_device);
HANDLE_ERROR(cudaMalloc(&I0, I_bytes));
HANDLE_ERROR(cudaMemset(I0, 0, I_bytes)); //create an array to store the image intensity
HANDLE_ERROR(cudaMalloc(&I, I_bytes));
HANDLE_ERROR(cudaMemset(I, 0, I_bytes));
E0.to_gpu();
E.to_gpu();
}
else {
I0 = (float*)calloc(I_bytes, 1);
I = (float*)calloc(I_bytes, 1);
}
std::chrono::high_resolution_clock::time_point t_begin = std::chrono::high_resolution_clock::now();
double P = 0;
std::thread t1(progressbar_thread, &P); //start the progress bar thread
for(size_t s = 0; s < g_sources.size(); s++){
stim::scalarbeam<float> beam((float)g_lambda, 1, g_sources[s], d, (float)g_NA_cond[1], (float)g_NA_cond[0]); //create a focused beam
beam.eval(E0, g_Nl); //evaluate the incident field
if(g_debug_images && s == 0){
stim::filename incident_file = outfile.prefix(outfile.prefix() + "_0_nfi");
E0.image(incident_file.str(), stim::complexMag);
}
E0.crop(g_padding, Eff);
Eff.intensity(I0);
spheres.eval(E, beam, g_Nl, g_MC);
if(g_debug_images && s == 0){
stim::filename nf_file = outfile.prefix(outfile.prefix() + "_1_nfs");
E.image(nf_file.str(), stim::complexMag);
}
E.propagate(g_backprop, stim::TAU / g_lambda); //propagate the field back to the origin
if(g_debug_images && s == 0){
stim::filename propagate_file = outfile.prefix(outfile.prefix()+"_2_nfprop");
E.image(propagate_file.str(), stim::complexMag);
}
E.crop(g_padding, Eff); //crop out the far field slice
Eff.intensity(I); //calculate the far field intensity and add it to the single-beam image
if(g_debug_images && s == 0){
stim::filename cropped_file = outfile.prefix(outfile.prefix() + "_3_cropped");
Eff.image(cropped_file.str(), stim::complexMag);
}
P = (double)(s + 1) / (double)g_sources.size() * 100;
}
t1.join();
std::chrono::high_resolution_clock::time_point t_end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> t_total = (t_end - t_begin);
std::cout<<"Time: "<<t_total.count()<<" s"<<std::endl;
//save the near-field image
if (g_debug_images) {
stim::filename nearfield_real_file = outfile.prefix(outfile.prefix() + "_3_detector_real");
E.image(nearfield_real_file.str(), stim::complexReal);
stim::filename nearfield_imag_file = outfile.prefix(outfile.prefix() + "_3_detector_imag");
E.image(nearfield_imag_file.str(), stim::complexImaginary);
}
//save the background and single-beam images
stim::filename background_file = outfile.prefix(outfile.prefix() + "background");
stim::filename intensity_file = outfile.prefix(outfile.prefix() + "intensity");
if (use_cuda) {
stim::gpu2image<float>(I0, background_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
stim::gpu2image<float>(I, intensity_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
}
else {
stim::cpu2image<float>(I0, background_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
stim::cpu2image<float>(I, intensity_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
}
float* A;
HANDLE_ERROR( cudaMalloc(&A, sizeof(float) * g_R_ff * g_R_ff) );
gpu_absorbance(A, I, I0, g_R_ff * g_R_ff);
stim::filename absorbance_file = outfile.prefix(outfile.prefix() + "absorbance");
stim::gpu2image<float>(A, absorbance_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
}