diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..60063dc --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,52 @@ +#Specify the version being used aswell as the language +cmake_minimum_required(VERSION 2.8) +#Name your project here +project(bimsym) + + + +#set the module directory +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}") + +#set up CUDA +find_package(CUDA) + +#find OpenCV +find_package(OpenCV REQUIRED ) + +find_package(Boost REQUIRED) + +#find the STIM library +find_package(STIM) + +#build the executable in the binary directory on MS Visual Studio +if ( MSVC ) + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}") + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}") +endif ( MSVC ) + +#set the include directories +include_directories( + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} + ${STIM_INCLUDE_DIRS} + ${Boost_INCLUDE_DIRS} +) + +#SET(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS};/maxregcount=60) + +#create an executable +cuda_add_executable(bimsym + bimsym.cu + sources.h +) + +#set the link libraries +target_link_libraries(bimsym + ${CUDA_cufft_LIBRARY} + ${CUDA_cublas_LIBRARY} + ${OpenCV_LIBS} + ) + + + diff --git a/FindSTIM.cmake b/FindSTIM.cmake new file mode 100644 index 0000000..bf5762a --- /dev/null +++ b/FindSTIM.cmake @@ -0,0 +1,9 @@ +include(FindPackageHandleStandardArgs) + +set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH}) + +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR) + +if(STIM_FOUND) + set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR}) +endif() diff --git a/bimsym.cu b/bimsym.cu new file mode 100644 index 0000000..8046786 --- /dev/null +++ b/bimsym.cu @@ -0,0 +1,411 @@ +#define CUDA_FOUND +#define USING_OPENCV + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include //default source types + +#include +#include + +//field parameters +size_t g_R_nf; //near-field resolution along X and Y +double g_S_nf; //size of the near field simulation +double g_Z_nf; //z position of the field slice +std::vector> g_sources; //point source position list + +bool use_cuda = true; +int cuda_device; +int cuda_major = 3; +int cuda_minor = 0; +bool use_backprop = true; + +//incident beam parameters +double g_NA_cond[2]; //condenser numerical aperture, [internal, external] +double g_lambda; //incident wavelength +size_t g_Nl; //number of orders used to calculate the incident field +double g_A; //amplitude of the incident field + +//scattering parameters +stim::scalarcluster spheres; +//std::vector g_radius(1); //radius of the scattering sphere +//std::vector< stim::complex > g_n(1); //refractive index of the scattering sphere +size_t g_MC; //number of monte-carlo samples used to calculate the scattered field +//std::vector< stim::vec3 > g_c(1); //center of the sphere + +//far field parameters +double g_NA_obj[2]; //objective numerical aperture +size_t g_padding; //padding applied to the near-field calculation in order to improve the quality of the band-pass +double g_backprop = 0; //number of units that the field has to be backpropagated by in order to approximate a near-field slice passing through a sphere +size_t g_R_ff; //number of samples along X and Y in the far field image +double g_S_ff; //size (in units) of the far field image +double g_Z_ff; //position of the near-field plane projected into the far field (position of the back-propagated plane) + +//debugging +bool g_debug_images = false; +bool g_verbose = false; + +stim::filename outfile("out.bmp"); //output file name + +stim::arglist args; //input arguments + +template +__global__ void cuda_absorbance(T* out, T* I, T* I0, size_t N){ + + size_t i = blockIdx.x * blockDim.x + threadIdx.x; + + if(i >= N) return; + + out[i] = -log10(I[i] / I0[i]); +} + +/// Crops a 2D image composed of elements of type T +/// @param dest is a device pointer to memory of size dx*dy that will store the cropped image +/// @param src is a device pointer to memory of size sx*sy that stores the original image +/// @param sx is the size of the source image along x +/// @param sy is the size of the source image along y +/// @param x is the x-coordinate of the start position of the cropped region within the source image +/// @param y is the y-coordinate of the start position of the cropped region within the source image +/// @param dx is the size of the destination image along x +/// @param dy is the size of the destination image along y +template +void gpu_absorbance(T* A, T* I, T* I0, size_t N){ + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device + + dim3 blocks( N / threads + 1 ); //calculate the optimal number of blocks + cuda_absorbance <<< blocks, threads >>>(A, I, I0, N); +} + +//function to display a progress bar +void progressbar_thread(double* e){ + + unsigned int p = 0; + while(p != 100){ + if(*e > p){ + p = (unsigned int)(*e); + rtsProgressBar(p); + } + } + std::cout< > s = t.get_vector< double >(); + + + for (size_t si = 0; si < ns; si++) { //for each sphere in the file + spheres[si] = stim::scalarmie(s[si][0], //generate a corresponding scalar mie object in the cluster + stim::complex(s[si][1], s[si][2]), + stim::vec3(s[si][3], s[si][4], s[si][5])); + } +} + +void set_simulation(){ + + if(args.nargs() > 0) outfile = args.arg(0); //the first argument is the filename (if any) + + if (args["cuda"].as_int() == -1) use_cuda = false; + if (use_cuda) cuda_device = args["cuda"].as_int(); + if (!stim::testDevice(cuda_device, cuda_major, cuda_minor)) { //make sure the device supports the necessary compute capability + std::cout << "ERROR: CUDA device doesn't support compute capability " << cuda_major << "." << cuda_minor << std::endl; + exit(1); + } + if(args["debug"].is_set()) g_debug_images = true; //set debug flags + if(args["verbose"].is_set()) g_verbose = true; + + //set the condenser NA + g_NA_cond[1] = args["condenser"].as_float(0); //set the external condenser NA + if(args["condenser"].nargs() == 2) g_NA_cond[0] = args["condenser"].as_float(1); //if the internal NA is specified, use it + else g_NA_cond[0] = 0; //otherwise set the internal NA to zero + + //set the incident wavelength + if(args["wavenumber"].is_set()) //if the wavelength is given in terms of wavenumber + g_lambda = 10000.0 / args["wavenumber"].as_float(0); //convert to wavelength (assume microns) + else + g_lambda = args["lambda"].as_float(0); //otherwise assume the wavelength is specified in microns + + g_Nl = args["order"].as_int(0); //set the incident field order + + //set the mie scattering parameters + g_MC = args["samples"].as_int(); //get the number of monte-carlo samples + if (args["spheres"]) { + load_spheres(args["spheres"].as_string()); + } + else { //otherwise store the sphere specified at the command line + float radius = args["radius"].as_float(0); //set the radius of the sphere + stim::complex n; + n.real(args["n"].as_float(0)); //set the real part of the refractive index + if (args["n"].nargs() == 2) //if an imaginary part is specified + n.imag(args["n"].as_float(1)); //set the imaginary part + else n.imag(0); //otherwise assume the imaginary part is zero + stim::vec3 c = stim::vec3(args["c"].as_float(0), args["c"].as_float(1), args["c"].as_float(2)); + spheres.resize(1); + spheres[0] = stim::scalarmie(radius, n, c); + } + + g_padding = args["padding"].as_int(); //get the near field padding factor + g_R_ff = args["res"].as_int(0); + g_R_nf = g_R_ff * (2 * g_padding + 1); //set the field resolution to the first resolution argument + g_S_ff = args["size"].as_int(0); + g_S_nf = g_S_ff * (2 * g_padding + 1); //get the size of the field slice (assume square at first) + g_Z_ff = args["z"].as_float(); //get the z-axis position for the desired far-field image + + if (args["nobackprop"]) use_backprop = false; + if(use_backprop && (abs(g_Z_ff) < spheres[0].radius)){ //if the near field slice is cutting through the sphere + g_backprop = g_Z_ff - spheres[0].radius; //calculate the number of units that the field has to be back-propagated + g_Z_nf = spheres[0].radius; + } + else { + g_Z_nf = g_Z_ff; + } + + g_NA_obj[1] = args["objective"].as_float(0); //set the external condenser NA + if(args["objective"].nargs() == 2) g_NA_obj[0] = args["objective"].as_float(1); //if the internal NA is specified, use it + else g_NA_obj[0] = 0; //otherwise set the internal NA to zero + + if (args["simage"].is_set()) //if a source image is provided + g_sources = simage(g_S_ff, args["simage"].as_string()); //convert the image to a list of sources + else if (args["sgrid"]) { + if (args["sgrid"].nargs() == 1) + g_sources = sgrid(g_S_ff, args["sgrid"].as_int()); + else + g_sources = sgrid(g_S_ff, args["sgrid"].as_int(0), args["sgrid"].as_int(1)); + } + else + g_sources.push_back( stim::vec3(args["f"].as_float(0), args["f"].as_float(1), 0) ); //otherwise just put a single point source at the origin +} + +int main(int argc, char* argv[]){ + set_arguments(argc, argv); //set the input arguments + set_simulation(); //set all simulation parameters + + if(g_verbose) output_simulation(); //output all simulation parameters + + stim::vec3 d(0, 0, 1); //beam direction + + size_t I_bytes = sizeof(float) * g_R_ff * g_R_ff; //number of bytes in the final intensity images + + //background intensity image + float* I0; //allocate space for the background intensity image + float* I; + + stim::scalarfield E0(g_R_nf, g_R_nf, (float)g_S_nf, (float)g_Z_nf + (float)g_backprop); //incident nearfield + stim::scalarfield E(g_R_nf, g_R_nf, (float)g_S_nf, (float)g_Z_nf); //create the scattered field + stim::scalarfield Eff(g_R_ff, g_R_ff, (float)g_S_ff, (float)g_Z_ff); //create the far field + + float* X = (float*) malloc( E0.size() * sizeof(float) ); + float* Y = (float*) malloc( E0.size() * sizeof(float) ); + float* Z = (float*) malloc( E0.size() * sizeof(float) ); + E.meshgrid(X, Y, Z, stim::CPUmem); //create a meshgrid for the scattering plane (different from E0 b/c we can't propagate the internal field) + + + E0.meshgrid(); //create meshgrids in the field object for field calculations + E.meshgrid(); + + if (use_cuda) { + cudaSetDevice(cuda_device); + HANDLE_ERROR(cudaMalloc(&I0, I_bytes)); + HANDLE_ERROR(cudaMemset(I0, 0, I_bytes)); //create an array to store the image intensity + HANDLE_ERROR(cudaMalloc(&I, I_bytes)); + HANDLE_ERROR(cudaMemset(I, 0, I_bytes)); + E0.to_gpu(); + E.to_gpu(); + } + else { + I0 = (float*)calloc(I_bytes, 1); + I = (float*)calloc(I_bytes, 1); + } + + std::chrono::high_resolution_clock::time_point t_begin = std::chrono::high_resolution_clock::now(); + double P = 0; + std::thread t1(progressbar_thread, &P); //start the progress bar thread + for(size_t s = 0; s < g_sources.size(); s++){ + stim::scalarbeam beam((float)g_lambda, 1, g_sources[s], d, (float)g_NA_cond[1], (float)g_NA_cond[0]); //create a focused beam + + beam.eval(E0, g_Nl); //evaluate the incident field + + if(g_debug_images && s == 0){ + stim::filename incident_file = outfile.prefix(outfile.prefix() + "_0_nfi"); + E0.image(incident_file.str(), stim::complexMag); + } + E0.crop(g_padding, Eff); + Eff.intensity(I0); + + spheres.eval(E, beam, g_Nl, g_MC); + + if(g_debug_images && s == 0){ + stim::filename nf_file = outfile.prefix(outfile.prefix() + "_1_nfs"); + E.image(nf_file.str(), stim::complexMag); + } + + E.propagate(g_backprop, stim::TAU / g_lambda); //propagate the field back to the origin + + if(g_debug_images && s == 0){ + stim::filename propagate_file = outfile.prefix(outfile.prefix()+"_2_nfprop"); + E.image(propagate_file.str(), stim::complexMag); + } + + E.crop(g_padding, Eff); //crop out the far field slice + Eff.intensity(I); //calculate the far field intensity and add it to the single-beam image + + if(g_debug_images && s == 0){ + stim::filename cropped_file = outfile.prefix(outfile.prefix() + "_3_cropped"); + Eff.image(cropped_file.str(), stim::complexMag); + } + P = (double)(s + 1) / (double)g_sources.size() * 100; + + } + t1.join(); + + std::chrono::high_resolution_clock::time_point t_end = std::chrono::high_resolution_clock::now(); + std::chrono::duration t_total = (t_end - t_begin); + std::cout<<"Time: "<(I0, background_file.str(), g_R_ff, g_R_ff, stim::cmBrewer); + stim::gpu2image(I, intensity_file.str(), g_R_ff, g_R_ff, stim::cmBrewer); + } + else { + stim::cpu2image(I0, background_file.str(), g_R_ff, g_R_ff, stim::cmBrewer); + stim::cpu2image(I, intensity_file.str(), g_R_ff, g_R_ff, stim::cmBrewer); + } + + + float* A; + HANDLE_ERROR( cudaMalloc(&A, sizeof(float) * g_R_ff * g_R_ff) ); + gpu_absorbance(A, I, I0, g_R_ff * g_R_ff); + stim::filename absorbance_file = outfile.prefix(outfile.prefix() + "absorbance"); + stim::gpu2image(A, absorbance_file.str(), g_R_ff, g_R_ff, stim::cmBrewer); +} \ No newline at end of file diff --git a/sources.h b/sources.h new file mode 100644 index 0000000..0a4aa7a --- /dev/null +++ b/sources.h @@ -0,0 +1,48 @@ +template +std::vector< stim::vec3 > simage(T size, std::string imagefile) { + std::vector< stim::vec3 > sources; + + stim::image src(imagefile); + + float xmin = -size / 2; + float ymin = -size / 2; + + size_t xi, yi; + float xa, ya; + for (yi = 0; yi < src.height(); yi++) { + for (xi = 0; xi < src.width(); xi++) { + if (src(xi, yi)) { + xa = (float)xi / (float)(src.width() - 1); + ya = (float)yi / (float)(src.height() - 1); + + sources.push_back(stim::vec3(xa * size + xmin, ya * size + ymin, 0)); + + } + } + } + + return sources; +} + +template +std::vector< stim::vec3 > sgrid(T size, unsigned int n, unsigned int m = 0) { + std::vector< stim::vec3 > sources; + + if (m == 0) m = n; + + float xmin = -size / 2 + size/(n + 1); + float ymin = -size / 2 + size/(m + 1); + + size_t xi, yi; + float xa, ya; + for (yi = 0; yi < m; yi++) { + for (xi = 0; xi < n; xi++) { + xa = (float)xi / (float)(n + 1); + ya = (float)yi / (float)(m + 1); + + sources.push_back(stim::vec3(xa * size + xmin, ya * size + ymin, 0)); + } + } + + return sources; +} \ No newline at end of file -- libgit2 0.21.4