#define CUDA_FOUND #define USING_OPENCV #include #include #include #include #include #include #include #include #include #include #include //default source types #include #include //field parameters size_t g_R_nf; //near-field resolution along X and Y double g_S_nf; //size of the near field simulation double g_Z_nf; //z position of the field slice std::vector> g_sources; //point source position list bool use_cuda = true; int cuda_device; int cuda_major = 3; int cuda_minor = 0; bool use_backprop = true; //incident beam parameters double g_NA_cond[2]; //condenser numerical aperture, [internal, external] double g_lambda; //incident wavelength size_t g_Nl; //number of orders used to calculate the incident field double g_A; //amplitude of the incident field //scattering parameters stim::scalarcluster spheres; //std::vector g_radius(1); //radius of the scattering sphere //std::vector< stim::complex > g_n(1); //refractive index of the scattering sphere size_t g_MC; //number of monte-carlo samples used to calculate the scattered field //std::vector< stim::vec3 > g_c(1); //center of the sphere //far field parameters double g_NA_obj[2]; //objective numerical aperture size_t g_padding; //padding applied to the near-field calculation in order to improve the quality of the band-pass double g_backprop = 0; //number of units that the field has to be backpropagated by in order to approximate a near-field slice passing through a sphere size_t g_R_ff; //number of samples along X and Y in the far field image double g_S_ff; //size (in units) of the far field image double g_Z_ff; //position of the near-field plane projected into the far field (position of the back-propagated plane) //debugging bool g_debug_images = false; bool g_verbose = false; stim::filename outfile("out.bmp"); //output file name stim::arglist args; //input arguments template __global__ void cuda_absorbance(T* out, T* I, T* I0, size_t N){ size_t i = blockIdx.x * blockDim.x + threadIdx.x; if(i >= N) return; out[i] = -log10(I[i] / I0[i]); } /// Crops a 2D image composed of elements of type T /// @param dest is a device pointer to memory of size dx*dy that will store the cropped image /// @param src is a device pointer to memory of size sx*sy that stores the original image /// @param sx is the size of the source image along x /// @param sy is the size of the source image along y /// @param x is the x-coordinate of the start position of the cropped region within the source image /// @param y is the y-coordinate of the start position of the cropped region within the source image /// @param dx is the size of the destination image along x /// @param dy is the size of the destination image along y template void gpu_absorbance(T* A, T* I, T* I0, size_t N){ int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device dim3 blocks( N / threads + 1 ); //calculate the optimal number of blocks cuda_absorbance <<< blocks, threads >>>(A, I, I0, N); } //function to display a progress bar void progressbar_thread(double* e){ unsigned int p = 0; while(p != 100){ if(*e > p){ p = (unsigned int)(*e); rtsProgressBar(p); } } std::cout< > s = t.get_vector< double >(); for (size_t si = 0; si < ns; si++) { //for each sphere in the file spheres[si] = stim::scalarmie(s[si][0], //generate a corresponding scalar mie object in the cluster stim::complex(s[si][1], s[si][2]), stim::vec3(s[si][3], s[si][4], s[si][5])); } } void set_simulation(){ if(args.nargs() > 0) outfile = args.arg(0); //the first argument is the filename (if any) if (args["cuda"].as_int() == -1) use_cuda = false; if (use_cuda) cuda_device = args["cuda"].as_int(); if (!stim::testDevice(cuda_device, cuda_major, cuda_minor)) { //make sure the device supports the necessary compute capability std::cout << "ERROR: CUDA device doesn't support compute capability " << cuda_major << "." << cuda_minor << std::endl; exit(1); } if(args["debug"].is_set()) g_debug_images = true; //set debug flags if(args["verbose"].is_set()) g_verbose = true; //set the condenser NA g_NA_cond[1] = args["condenser"].as_float(0); //set the external condenser NA if(args["condenser"].nargs() == 2) g_NA_cond[0] = args["condenser"].as_float(1); //if the internal NA is specified, use it else g_NA_cond[0] = 0; //otherwise set the internal NA to zero //set the incident wavelength if(args["wavenumber"].is_set()) //if the wavelength is given in terms of wavenumber g_lambda = 10000.0 / args["wavenumber"].as_float(0); //convert to wavelength (assume microns) else g_lambda = args["lambda"].as_float(0); //otherwise assume the wavelength is specified in microns g_Nl = args["order"].as_int(0); //set the incident field order //set the mie scattering parameters g_MC = args["samples"].as_int(); //get the number of monte-carlo samples if (args["spheres"]) { load_spheres(args["spheres"].as_string()); } else { //otherwise store the sphere specified at the command line float radius = args["radius"].as_float(0); //set the radius of the sphere stim::complex n; n.real(args["n"].as_float(0)); //set the real part of the refractive index if (args["n"].nargs() == 2) //if an imaginary part is specified n.imag(args["n"].as_float(1)); //set the imaginary part else n.imag(0); //otherwise assume the imaginary part is zero stim::vec3 c = stim::vec3(args["c"].as_float(0), args["c"].as_float(1), args["c"].as_float(2)); spheres.resize(1); spheres[0] = stim::scalarmie(radius, n, c); } g_padding = args["padding"].as_int(); //get the near field padding factor g_R_ff = args["res"].as_int(0); g_R_nf = g_R_ff * (2 * g_padding + 1); //set the field resolution to the first resolution argument g_S_ff = args["size"].as_int(0); g_S_nf = g_S_ff * (2 * g_padding + 1); //get the size of the field slice (assume square at first) g_Z_ff = args["z"].as_float(); //get the z-axis position for the desired far-field image if (args["nobackprop"]) use_backprop = false; if(use_backprop && (abs(g_Z_ff) < spheres[0].radius)){ //if the near field slice is cutting through the sphere g_backprop = g_Z_ff - spheres[0].radius; //calculate the number of units that the field has to be back-propagated g_Z_nf = spheres[0].radius; } else { g_Z_nf = g_Z_ff; } g_NA_obj[1] = args["objective"].as_float(0); //set the external condenser NA if(args["objective"].nargs() == 2) g_NA_obj[0] = args["objective"].as_float(1); //if the internal NA is specified, use it else g_NA_obj[0] = 0; //otherwise set the internal NA to zero if (args["simage"].is_set()) //if a source image is provided g_sources = simage(g_S_ff, args["simage"].as_string()); //convert the image to a list of sources else if (args["sgrid"]) { if (args["sgrid"].nargs() == 1) g_sources = sgrid(g_S_ff, args["sgrid"].as_int()); else g_sources = sgrid(g_S_ff, args["sgrid"].as_int(0), args["sgrid"].as_int(1)); } else g_sources.push_back( stim::vec3(args["f"].as_float(0), args["f"].as_float(1), 0) ); //otherwise just put a single point source at the origin } int main(int argc, char* argv[]){ set_arguments(argc, argv); //set the input arguments set_simulation(); //set all simulation parameters if(g_verbose) output_simulation(); //output all simulation parameters stim::vec3 d(0, 0, 1); //beam direction size_t I_bytes = sizeof(float) * g_R_ff * g_R_ff; //number of bytes in the final intensity images //background intensity image float* I0; //allocate space for the background intensity image float* I; stim::scalarfield E0(g_R_nf, g_R_nf, (float)g_S_nf, (float)g_Z_nf + (float)g_backprop); //incident nearfield stim::scalarfield E(g_R_nf, g_R_nf, (float)g_S_nf, (float)g_Z_nf); //create the scattered field stim::scalarfield Eff(g_R_ff, g_R_ff, (float)g_S_ff, (float)g_Z_ff); //create the far field float* X = (float*) malloc( E0.size() * sizeof(float) ); float* Y = (float*) malloc( E0.size() * sizeof(float) ); float* Z = (float*) malloc( E0.size() * sizeof(float) ); E.meshgrid(X, Y, Z, stim::CPUmem); //create a meshgrid for the scattering plane (different from E0 b/c we can't propagate the internal field) E0.meshgrid(); //create meshgrids in the field object for field calculations E.meshgrid(); if (use_cuda) { cudaSetDevice(cuda_device); HANDLE_ERROR(cudaMalloc(&I0, I_bytes)); HANDLE_ERROR(cudaMemset(I0, 0, I_bytes)); //create an array to store the image intensity HANDLE_ERROR(cudaMalloc(&I, I_bytes)); HANDLE_ERROR(cudaMemset(I, 0, I_bytes)); E0.to_gpu(); E.to_gpu(); } else { I0 = (float*)calloc(I_bytes, 1); I = (float*)calloc(I_bytes, 1); } std::chrono::high_resolution_clock::time_point t_begin = std::chrono::high_resolution_clock::now(); double P = 0; std::thread t1(progressbar_thread, &P); //start the progress bar thread for(size_t s = 0; s < g_sources.size(); s++){ stim::scalarbeam beam((float)g_lambda, 1, g_sources[s], d, (float)g_NA_cond[1], (float)g_NA_cond[0]); //create a focused beam beam.eval(E0, g_Nl); //evaluate the incident field if(g_debug_images && s == 0){ stim::filename incident_file = outfile.prefix(outfile.prefix() + "_0_nfi"); E0.image(incident_file.str(), stim::complexMag); } E0.crop(g_padding, Eff); Eff.intensity(I0); spheres.eval(E, beam, g_Nl, g_MC); if(g_debug_images && s == 0){ stim::filename nf_file = outfile.prefix(outfile.prefix() + "_1_nfs"); E.image(nf_file.str(), stim::complexMag); } E.propagate(g_backprop, stim::TAU / g_lambda); //propagate the field back to the origin if(g_debug_images && s == 0){ stim::filename propagate_file = outfile.prefix(outfile.prefix()+"_2_nfprop"); E.image(propagate_file.str(), stim::complexMag); } E.crop(g_padding, Eff); //crop out the far field slice Eff.intensity(I); //calculate the far field intensity and add it to the single-beam image if(g_debug_images && s == 0){ stim::filename cropped_file = outfile.prefix(outfile.prefix() + "_3_cropped"); Eff.image(cropped_file.str(), stim::complexMag); } P = (double)(s + 1) / (double)g_sources.size() * 100; } t1.join(); std::chrono::high_resolution_clock::time_point t_end = std::chrono::high_resolution_clock::now(); std::chrono::duration t_total = (t_end - t_begin); std::cout<<"Time: "<(I0, background_file.str(), g_R_ff, g_R_ff, stim::cmBrewer); stim::gpu2image(I, intensity_file.str(), g_R_ff, g_R_ff, stim::cmBrewer); } else { stim::cpu2image(I0, background_file.str(), g_R_ff, g_R_ff, stim::cmBrewer); stim::cpu2image(I, intensity_file.str(), g_R_ff, g_R_ff, stim::cmBrewer); } float* A; HANDLE_ERROR( cudaMalloc(&A, sizeof(float) * g_R_ff * g_R_ff) ); gpu_absorbance(A, I, I0, g_R_ff * g_R_ff); stim::filename absorbance_file = outfile.prefix(outfile.prefix() + "absorbance"); stim::gpu2image(A, absorbance_file.str(), g_R_ff, g_R_ff, stim::cmBrewer); }