bimsym.cu

#define CUDA_FOUND
#define USING_OPENCV
#include <iostream>
#include <stim/visualization/colormap.h>
#include <stim/parser/arguments.h>
#include <stim/math/vec3.h>
#include <stim/optics/scalarbeam.h>
#include <stim/optics/scalarmie.h>
#include <stim/parser/filename.h>
#include <stim/ui/progressbar.h>
#include <stim/parser/table.h>
#include <stim/cuda/cudatools.h>
#include <sources.h>						//default source types
#include <chrono>
#include <thread>
//field parameters
size_t g_R_nf;								//near-field resolution along X and Y
double g_S_nf;								//size of the near field simulation
double g_Z_nf;								//z position of the field slice
std::vector<stim::vec3<float>> g_sources;	//point source position list
bool use_cuda = true;
int cuda_device;
int cuda_major = 3;
int cuda_minor = 0;
bool use_backprop = true;
//incident beam parameters
double g_NA_cond[2];							//condenser numerical aperture, [internal, external]
double g_lambda;								//incident wavelength
size_t g_Nl;									//number of orders used to calculate the incident field
double g_A;										//amplitude of the incident field
//scattering parameters
stim::scalarcluster<float> spheres;
//std::vector<double> g_radius(1);				//radius of the scattering sphere
//std::vector< stim::complex<double> > g_n(1);	//refractive index of the scattering sphere
size_t g_MC;									//number of monte-carlo samples used to calculate the scattered field
//std::vector< stim::vec3<float> > g_c(1);		//center of the sphere
//far field parameters
double g_NA_obj[2];								//objective numerical aperture
size_t g_padding;								//padding applied to the near-field calculation in order to improve the quality of the band-pass
double g_backprop = 0;							//number of units that the field has to be backpropagated by in order to approximate a near-field slice passing through a sphere
size_t g_R_ff;									//number of samples along X and Y in the far field image
double g_S_ff;									//size (in units) of the far field image
double g_Z_ff;									//position of the near-field plane projected into the far field (position of the back-propagated plane)
//debugging
bool g_debug_images = false;
bool g_verbose = false;
stim::filename outfile("out.bmp");						//output file name
stim::arglist args;							//input arguments
template<typename T>
__global__ void cuda_absorbance(T* out, T* I, T* I0, size_t N){
	size_t i = blockIdx.x * blockDim.x + threadIdx.x;
	if(i >= N) return;
	out[i] = -log10(I[i] / I0[i]);
}
/// Crops a 2D image composed of elements of type T
/// @param dest is a device pointer to memory of size dx*dy that will store the cropped image
/// @param src is a device pointer to memory of size sx*sy that stores the original image
/// @param sx is the size of the source image along x
/// @param sy is the size of the source image along y
/// @param x is the x-coordinate of the start position of the cropped region within the source image
/// @param y is the y-coordinate of the start position of the cropped region within the source image
/// @param dx is the size of the destination image along x
/// @param dy is the size of the destination image along y
template<typename T>
void gpu_absorbance(T* A, T* I, T* I0, size_t N){
	int threads = stim::maxThreadsPerBlock();												//get the maximum number of threads per block for the CUDA device
	
	dim3 blocks( N / threads + 1 );															//calculate the optimal number of blocks
	cuda_absorbance<T> <<< blocks, threads >>>(A, I, I0, N);
}
//function to display a progress bar
void progressbar_thread(double* e){
	unsigned int p = 0;
	while(p != 100){
		if(*e > p){
			p = (unsigned int)(*e);
			rtsProgressBar(p);
		}
	}
	std::cout<<std::endl;				//put a newline after the completed progress bar
}
void advertise(){
	//output advertisement
		std::cout<<std::endl<<std::endl;
		std::cout<<"========================================================================="<<std::endl;
		std::cout<<"========================================================================="<<std::endl<<std::endl;
		std::cout<<std::endl<<std::endl;
		std::cout<<args.str();
}
void set_arguments(int argc, char* argv[]){
	args.add("help", "prints this help text");
	args.section("Field Slice Parameters");
	args.add("res", "field resolution (number of samples along X and Y)", "256", "nonzero positive integer");
	args.add("size", "size of the calculated field", "10", "nonzero positive value");
	args.add("z", "position of the field slice along the z axis", "0", "any real value");
	args.section("Incoherent Sources");
	args.add("simage", "image of the source at the focal plane (z=0)", "", "any standard image format");
	args.add("sgrid", "grid of n x n point sources", "", "n");
	args.section("Incident Field Parameters");
	args.add("f", "position of a single focal point", "0 0", "x y position (z = 0)");
	args.add("condenser", "condenser numerical aperture (NA) and center obscuration", "1", "'sin(a1)' or 'sin(a1) sin(a0)', where a0 = center obscuration");
	args.add("lambda", "incident wavelength", "1", "any real positive value");
	args.add("wavenumber", "incident wavelength specified in cm^(-1)", "", "any real positive value, sets all units to microns");
	args.add("order", "order used to calculate the incident field", "20", "any nonzero integer");
	args.section("Mie Scattering Parameters");
	args.add("c", "center of the scattering sphere", "0 0 0", "any real 3D coordinate");
	args.add("radius", "radius of the scattering sphere", "0.5", "any real positive value");
	args.add("n", "complex refractive index of the scattering sphere", "1.4 0.1", "[r] or [r i]");
	args.add("samples", "number of Monte-Carlo samples used to calculate the scattered field", "1000", "any real positive integer");
	args.add("spheres", "specify a file defining several spheres", "", "(1/line: radius, real RI, imag RI, x, y, z)");
	args.section("Far Field Parameters");
	args.add("objective", "objective numerical aperture (NA) and center obscuration", "1", "'sin(b1)' or 'sin(b1) sin(b0)', where b0 = center obscuration");
	args.add("padding", "padding for the near field calculation (improves far-field calculation)", "1", "any real positive integer");
	args.add("nobackprop", "don't use back-propagation to deal with sphere intersections");
	args.section("Debugging");
	args.add("cuda", "CUDA device ID of device to use (-1 disables CUDA)", "0", "integer value");
	args.add("debug", "outputs a debug image at several important intermediate steps of the Mie scattering calculation");
	args.add("verbose", "outputs additional information regarding the Mie calculations as they are performed");
	args.parse(argc, argv);
	if(args["help"].is_set()){
		advertise();
		exit(1);
	}
}
void output_simulation(){
	std::cout<<"far field slice-------"<<std::endl;
	std::cout<<"R: "<<g_R_ff<<" x "<<g_R_ff<<" pixels"<<std::endl;
	std::cout<<"S: "<<g_S_ff<<" x "<<g_S_ff<<" units"<<std::endl;
	std::cout<<"Z: "<<g_Z_ff<<" units"<<std::endl;
	std::cout<<"objective NA: "<<g_NA_obj[1];
	if(g_NA_obj[0] != 0) std::cout<<" (internal = "<<g_NA_obj[0]<<")";
	std::cout<<std::endl;
	std::cout<<"padding:      "<<g_padding<<std::endl;
	std::cout<<"backprop:     "<<g_backprop<<" units"<<std::endl;
	std::cout<<std::endl;
	std::cout<<"near field slice------"<<std::endl;
	std::cout<<"R: "<<g_R_nf<<" x "<<g_R_nf<<" pixels"<<std::endl;
	std::cout<<"S: "<<g_S_nf<<" x "<<g_S_nf<<" units"<<std::endl;
	std::cout<<"Z: "<<g_Z_nf<<" units"<<std::endl;
	std::cout<<std::endl;
	std::cout<<"incident field--------"<<std::endl;
	std::cout<<"condenser NA: "<<g_NA_cond[1];
	if(g_NA_cond[0] != 0) std::cout<<" (internal = "<<g_NA_cond[0]<<")";
	std::cout<<std::endl;
	std::cout<<"wavelength:   "<<g_lambda<<" units"<<std::endl;
	std::cout<<"order:        "<<g_Nl<<std::endl;
	std::cout << "# sources:    " << g_sources.size()<<" ";
	if (g_sources.size() == 1)
		std::cout << g_sources[0].str();
	std::cout << std::endl;
	std::cout<<std::endl;
	std::cout<<"mie scattering-------"<<std::endl;
	if (spheres.size() == 1) {
		std::cout << "center:       " << spheres[0].c << " units" << std::endl;
		std::cout << "radius:       " << spheres[0].radius << " units" << std::endl;
		std::cout << "ref. index:   " << spheres[0].n << std::endl;
	}
	else {
		for (size_t si = 0; si < spheres.size(); si++) {
			std::cout << "sphere [" << si << "]: r = " << spheres[si].radius << " n = " << spheres[si].n << " c = " << spheres[si].c << std::endl;
		}
	}
	std::cout << "MC samples:   " << g_MC << std::endl;
	std::cout<<std::endl;
}
void load_spheres(std::string filename) {
	stim::table t;
	t.read_ascii(filename, ' ');
	std::cout << t.str() << std::endl;
	size_t ns = t.rows();
	spheres.resize(ns);
	
	std::vector< std::vector< double > > s = t.get_vector< double >();
	
	for (size_t si = 0; si < ns; si++) {							//for each sphere in the file
		spheres[si] = stim::scalarmie<float>(s[si][0],				//generate a corresponding scalar mie object in the cluster
			stim::complex<float>(s[si][1], s[si][2]),
			stim::vec3<float>(s[si][3], s[si][4], s[si][5]));
	}
}
void set_simulation(){
	if(args.nargs() > 0) outfile = args.arg(0);								//the first argument is the filename (if any)
	if (args["cuda"].as_int() == -1) use_cuda = false;
	if (use_cuda) cuda_device = args["cuda"].as_int();
	if (!stim::testDevice(cuda_device, cuda_major, cuda_minor)) {			//make sure the device supports the necessary compute capability
		std::cout << "ERROR: CUDA device doesn't support compute capability " << cuda_major << "." << cuda_minor << std::endl;
		exit(1);
	}
	if(args["debug"].is_set()) g_debug_images = true;						//set debug flags
	if(args["verbose"].is_set()) g_verbose = true;
	//set the condenser NA
	g_NA_cond[1] = args["condenser"].as_float(0);							//set the external condenser NA
	if(args["condenser"].nargs() == 2) g_NA_cond[0] = args["condenser"].as_float(1);	//if the internal NA is specified, use it
	else                          g_NA_cond[0] = 0;							//otherwise set the internal NA to zero
	//set the incident wavelength
	if(args["wavenumber"].is_set())											//if the wavelength is given in terms of wavenumber
		g_lambda = 10000.0 / args["wavenumber"].as_float(0);				//convert to wavelength (assume microns)
	else
		g_lambda = args["lambda"].as_float(0);								//otherwise assume the wavelength is specified in microns
	g_Nl = args["order"].as_int(0);											//set the incident field order
	//set the mie scattering parameters
	g_MC = args["samples"].as_int();										//get the number of monte-carlo samples
	if (args["spheres"]) {
		load_spheres(args["spheres"].as_string());
	}
	else {																	//otherwise store the sphere specified at the command line
		float radius = args["radius"].as_float(0);									//set the radius of the sphere
		stim::complex<float> n;
		n.real(args["n"].as_float(0));										//set the real part of the refractive index
		if (args["n"].nargs() == 2)												//if an imaginary part is specified
			n.imag(args["n"].as_float(1));									//set the imaginary part
		else n.imag(0);														//otherwise assume the imaginary part is zero
		stim::vec3<float> c = stim::vec3<float>(args["c"].as_float(0), args["c"].as_float(1), args["c"].as_float(2));
		spheres.resize(1);
		spheres[0] = stim::scalarmie<float>(radius, n, c);
	}
	g_padding = args["padding"].as_int();								//get the near field padding factor
	g_R_ff = args["res"].as_int(0);
	g_R_nf = g_R_ff * (2 * g_padding + 1);				//set the field resolution to the first resolution argument
	g_S_ff = args["size"].as_int(0);
	g_S_nf = g_S_ff * (2 * g_padding + 1);			//get the size of the field slice (assume square at first)
	g_Z_ff = args["z"].as_float();										//get the z-axis position for the desired far-field image
	if (args["nobackprop"]) use_backprop = false;
	if(use_backprop && (abs(g_Z_ff) < spheres[0].radius)){											//if the near field slice is cutting through the sphere
		g_backprop = g_Z_ff - spheres[0].radius;									//calculate the number of units that the field has to be back-propagated
		g_Z_nf = spheres[0].radius;
	}
	else {
		g_Z_nf = g_Z_ff;
	}
	g_NA_obj[1] = args["objective"].as_float(0);								//set the external condenser NA
	if(args["objective"].nargs() == 2) g_NA_obj[0] = args["objective"].as_float(1);	//if the internal NA is specified, use it
	else                          g_NA_obj[0] = 0;							//otherwise set the internal NA to zero
	if (args["simage"].is_set())											//if a source image is provided
		g_sources = simage<float>(g_S_ff, args["simage"].as_string());		//convert the image to a list of sources
	else if (args["sgrid"]) {
		if (args["sgrid"].nargs() == 1)
			g_sources = sgrid<float>(g_S_ff, args["sgrid"].as_int());
		else
			g_sources = sgrid<float>(g_S_ff, args["sgrid"].as_int(0), args["sgrid"].as_int(1));
	}
	else
		g_sources.push_back( stim::vec3<float>(args["f"].as_float(0), args["f"].as_float(1), 0) );			//otherwise just put a single point source at the origin
}
int main(int argc, char* argv[]){
	set_arguments(argc, argv);								//set the input arguments
	set_simulation();										//set all simulation parameters
	if(g_verbose) output_simulation();						//output all simulation parameters
	stim::vec3<float> d(0, 0, 1);							//beam direction
	size_t I_bytes = sizeof(float) * g_R_ff * g_R_ff;								//number of bytes in the final intensity images
	//background intensity image
	float* I0;												//allocate space for the background intensity image
	float* I;
		
	stim::scalarfield<float> E0(g_R_nf, g_R_nf, (float)g_S_nf, (float)g_Z_nf + (float)g_backprop);	//incident nearfield
	stim::scalarfield<float> E(g_R_nf, g_R_nf, (float)g_S_nf, (float)g_Z_nf);						//create the scattered field
	stim::scalarfield<float> Eff(g_R_ff, g_R_ff, (float)g_S_ff, (float)g_Z_ff);						//create the far field
	float* X = (float*) malloc( E0.size() * sizeof(float) );
	float* Y = (float*) malloc( E0.size() * sizeof(float) );
	float* Z = (float*) malloc( E0.size() * sizeof(float) );
	E.meshgrid(X, Y, Z, stim::CPUmem);								//create a meshgrid for the scattering plane (different from E0 b/c we can't propagate the internal field)
	
	E0.meshgrid();													//create meshgrids in the field object for field calculations
	E.meshgrid();
	if (use_cuda) {
		cudaSetDevice(cuda_device);
		HANDLE_ERROR(cudaMalloc(&I0, I_bytes));
		HANDLE_ERROR(cudaMemset(I0, 0, I_bytes));			//create an array to store the image intensity
		HANDLE_ERROR(cudaMalloc(&I, I_bytes));
		HANDLE_ERROR(cudaMemset(I, 0, I_bytes));
		E0.to_gpu();
		E.to_gpu();
	}
	else {
		I0 = (float*)calloc(I_bytes, 1);
		I = (float*)calloc(I_bytes, 1);
	}
	std::chrono::high_resolution_clock::time_point t_begin = std::chrono::high_resolution_clock::now();
	double P = 0;
	std::thread t1(progressbar_thread, &P);							//start the progress bar thread
	for(size_t s = 0; s < g_sources.size(); s++){
		stim::scalarbeam<float> beam((float)g_lambda, 1, g_sources[s], d, (float)g_NA_cond[1], (float)g_NA_cond[0]);		//create a focused beam
		
		beam.eval(E0, g_Nl);										//evaluate the incident field
		if(g_debug_images && s == 0){
			stim::filename incident_file = outfile.prefix(outfile.prefix() + "_0_nfi");
			E0.image(incident_file.str(), stim::complexMag);
		}
		E0.crop(g_padding, Eff);
		Eff.intensity(I0);
		spheres.eval(E, beam, g_Nl, g_MC);
		if(g_debug_images && s == 0){
			stim::filename nf_file = outfile.prefix(outfile.prefix() + "_1_nfs");
			E.image(nf_file.str(), stim::complexMag);
		}
		E.propagate(g_backprop, stim::TAU / g_lambda);					//propagate the field back to the origin
		if(g_debug_images && s == 0){
			stim::filename propagate_file = outfile.prefix(outfile.prefix()+"_2_nfprop");
			E.image(propagate_file.str(), stim::complexMag);
		}
		E.crop(g_padding, Eff);											//crop out the far field slice
		Eff.intensity(I);												//calculate the far field intensity and add it to the single-beam image
		if(g_debug_images && s == 0){
			stim::filename cropped_file = outfile.prefix(outfile.prefix() + "_3_cropped");
			Eff.image(cropped_file.str(), stim::complexMag);
		}
		P = (double)(s + 1) / (double)g_sources.size() * 100;
	}
	t1.join();
	std::chrono::high_resolution_clock::time_point t_end = std::chrono::high_resolution_clock::now();
	std::chrono::duration<double> t_total = (t_end - t_begin);
	std::cout<<"Time: "<<t_total.count()<<" s"<<std::endl;
	//save the near-field image
	if (g_debug_images) {
		stim::filename nearfield_real_file = outfile.prefix(outfile.prefix() + "_3_detector_real");
		E.image(nearfield_real_file.str(), stim::complexReal);
		stim::filename nearfield_imag_file = outfile.prefix(outfile.prefix() + "_3_detector_imag");
		E.image(nearfield_imag_file.str(), stim::complexImaginary);
	}
	
	//save the background and single-beam images
	stim::filename background_file = outfile.prefix(outfile.prefix() + "background");
	stim::filename intensity_file = outfile.prefix(outfile.prefix() + "intensity");
	if (use_cuda) {
		stim::gpu2image<float>(I0, background_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
		stim::gpu2image<float>(I, intensity_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
	}
	else {
		stim::cpu2image<float>(I0, background_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
		stim::cpu2image<float>(I, intensity_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
	}
	
	float* A;
	HANDLE_ERROR( cudaMalloc(&A, sizeof(float) * g_R_ff * g_R_ff) );
	gpu_absorbance(A, I, I0, g_R_ff * g_R_ff);
	stim::filename absorbance_file = outfile.prefix(outfile.prefix() + "absorbance");
	stim::gpu2image<float>(A, absorbance_file.str(), g_R_ff, g_R_ff, stim::cmBrewer);
}