diff --git a/bessjy.cpp b/bessjy.cpp index 2dc98af..e72ff28 100644 --- a/bessjy.cpp +++ b/bessjy.cpp @@ -13,7 +13,9 @@ // #define _USE_MATH_DEFINES #include -#include "bessel.h" +#include "bessel.h" + +#define PI 3.14159 double gamma(double x); // @@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &nm,double *jn,double *yn, 0.2775764465332031, -1.993531733751297, 2.724882731126854e1}; - + int i,k,m; nm = n; if ((x < 0.0) || (n < 0)) return 1; @@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &vm,double *jv,double *yv, } vm = n + v0; return 0; +} + +int bessjyv_sph(int v, double z, double &vm, double* cjv, + double* cyv, double* cjvp, double* cyvp) +{ + //first, compute the bessel functions of fractional order + bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); + + //iterate through each and scale + for(int n = 0; n<=v; n++) + { + + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); + + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0)); + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0)); + } + + return 0; + } - + diff --git a/cbessjy.cpp b/cbessjy.cpp index 517dc77..8710800 100644 --- a/cbessjy.cpp +++ b/cbessjy.cpp @@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex z,double &vm,complex*cjv, //iterate through each and scale for(int n = 0; n<=v; n++) { + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); diff --git a/colormap.h b/colormap.h deleted file mode 100644 index 17b7e34..0000000 --- a/colormap.h +++ /dev/null @@ -1,229 +0,0 @@ -#ifndef RTS_COLORMAP_H -#define RTS_COLORMAP_H - -#include -#include -#include -#include "rts/cuda/error.h" - - -#define BREWER_CTRL_PTS 11 - -#ifdef __CUDACC__ -texture cudaTexBrewer; -static cudaArray* gpuBrewer; -#endif - - - -namespace rts{ - namespace colormap{ - -enum colormapType {cmBrewer, cmGrayscale}; - -static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size) -{ - //create an image object - QImage image(x_size, y_size, QImage::Format_RGB32); - - int i; - unsigned char r, g, b; - unsigned int x, y; - for(y=0; y -__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) -{ - int i = blockIdx.x * blockDim.x + threadIdx.x; - if(i >= N) return; - - //compute the normalized value on [minVal maxVal] - float a = (gpuSource[i] - minVal) / (maxVal - minVal); - - //lookup the color - float shift = 1.0/BREWER_CTRL_PTS; - float4 color = tex1D(cudaTexBrewer, a+shift); - - gpuDest[i * 3 + 0] = 255 * color.x; - gpuDest[i * 3 + 1] = 255 * color.y; - gpuDest[i * 3 + 2] = 255 * color.z; -} - -template -__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) -{ - int i = blockIdx.x * blockDim.x + threadIdx.x; - if(i >= N) return; - - //compute the normalized value on [minVal maxVal] - float a = (gpuSource[i] - minVal) / (maxVal - minVal); - - gpuDest[i * 3 + 0] = 255 * a; - gpuDest[i * 3 + 1] = 255 * a; - gpuDest[i * 3 + 2] = 255 * a; -} - -template -static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128) -{ - //This function converts a scalar field on the GPU to a color image on the GPU - int gridDim = (nVals + blockDim - 1)/blockDim; - if(cm == cmGrayscale) - applyGrayscale<<>>(gpuSource, gpuDest, nVals, minVal, maxVal); - else if(cm == cmBrewer) - { - initBrewer(); - applyBrewer<<>>(gpuSource, gpuDest, nVals, minVal, maxVal); - destroyBrewer(); - } - -} - -template -static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale) -{ - //this function converts a scalar field on the GPU to a color image on the CPU - - //first create the color image on the GPU - - //allocate GPU memory for the color image - unsigned char* gpuDest; - HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 )); - - //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals)); - - //create the image on the gpu - gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm); - - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3)); - - //copy the image from the GPU to the CPU - HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost)); - - HANDLE_ERROR(cudaFree( gpuDest )); - -} - -template -static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) -{ - //allocate a color buffer - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); - - //do the mapping - gpu2cpu(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); - - //copy the buffer to an image - buffer2image(cpuBuffer, fileDest, x_size, y_size); - - free(cpuBuffer); -} - -#endif - -template -static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale) -{ - int i; - float a; - float range = valMax - valMin; - for(i = 0; i -static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) -{ - //allocate a color buffer - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); - - //do the mapping - cpu2cpu(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); - - //copy the buffer to an image - buffer2image(cpuBuffer, fileDest, x_size, y_size); - - free(cpuBuffer); - -} - -}} //end namespace colormap and rts - -#endif - diff --git a/dataTypes.h b/dataTypes.h index 9515714..ee2efdf 100644 --- a/dataTypes.h +++ b/dataTypes.h @@ -24,6 +24,8 @@ typedef double ptype; typedef ptype fieldPoint; +extern bool verbose; + //hybrid GPU/CPU complex data typ #include "rts/math/complex.h" #include "rts/math/vector.h" diff --git a/defaults.h b/defaults.h index 5ed9204..f0dd9d5 100644 --- a/defaults.h +++ b/defaults.h @@ -15,14 +15,14 @@ #define DEFAULT_FOCUS_X 0 #define DEFAULT_FOCUS_Y 0 #define DEFAULT_FOCUS_Z 0 -#define DEFAULT_INCIDENT_ORDER 100 +//#define DEFAULT_INCIDENT_ORDER 20 #define DEFAULT_STABILITY_PARM 1.4 //optics -#define DEFAULT_CONDENSER_MIN 0.0 +#define DEFAULT_CONDENSER_MIN 0 #define DEFAULT_CONDENSER_MAX 1 -#define DEFAULT_OBJECTIVE_MIN 0.0 +#define DEFAULT_OBJECTIVE_MIN 0 #define DEFAULT_OBJECTIVE_MAX 1 //incident light direction @@ -36,17 +36,20 @@ //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective -#define DEFAULT_SLICE_MIN_X -5 -#define DEFAULT_SLICE_MIN_Y 0 -#define DEFAULT_SLICE_MIN_Z -5 +#define DEFAULT_PLANE_MIN_X -5 +#define DEFAULT_PLANE_MIN_Y 0 +#define DEFAULT_PLANE_MIN_Z -5 -#define DEFAULT_SLICE_MAX_X 5 -#define DEFAULT_SLICE_MAX_Y 0 -#define DEFAULT_SLICE_MAX_Z 5 +#define DEFAULT_PLANE_MAX_X 5 +#define DEFAULT_PLANE_MAX_Y 0 +#define DEFAULT_PLANE_MAX_Z 5 -#define DEFAULT_SLICE_NORM_X 0 -#define DEFAULT_SLICE_NORM_Y 1 -#define DEFAULT_SLICE_NORM_Z 0 +#define DEFAULT_PLANE_NORM_X 0 +#define DEFAULT_PLANE_NORM_Y 1 +#define DEFAULT_PLANE_NORM_Z 0 + +#define DEFAULT_PLANE_SIZE 40 +#define DEFAULT_PLANE_POSITION 0 /* @@ -64,21 +67,23 @@ */ -#define DEFAULT_FIELD_ORDER 200 +#define DEFAULT_FIELD_ORDER 10 -#define DEFAULT_SAMPLES 200 +#define DEFAULT_SAMPLES 400 #define DEFAULT_SLICE_RES 256 +#define DEFAULT_SPHERE_THETA_R 1000 + #define DEFAULT_PADDING 1 #define DEFAULT_SUPERSAMPLE 1 -#define DEFAULT_INTENSITY_FILE "testappend" +#define DEFAULT_INTENSITY_FILE "out_i.bmp" #define DEFAULT_TRANSMITTANCE_FILE "" -#define DEFAULT_ABSORBANCE_FILE "out_a" +#define DEFAULT_ABSORBANCE_FILE "out_a.bmp" #define DEFAULT_NEAR_FILE "out_n.bmp" #define DEFAULT_FAR_FILE "out_f.bmp" -#define DEFAULT_EXTENDED_SOURCE "einstein_small.jpg" +#define DEFAULT_EXTENDED_SOURCE "" #define DEFAULT_FIELD_TYPE "magnitude" #define DEFAULT_FORMAT fileoutStruct::formatImage #define DEFAULT_COLORMAP "brewer" diff --git a/fieldslice.cpp b/fieldslice.cpp index bf63766..882603d 100644 --- a/fieldslice.cpp +++ b/fieldslice.cpp @@ -8,14 +8,16 @@ using namespace std; fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) -{ +{ + x_hat = y_hat = z_hat = NULL; + //save the slice resolution R[0] = x_size; R[1] = x_size; scalarField = true; - //init_gpu(); + init_gpu(); } @@ -101,5 +103,5 @@ fieldslice::fieldslice() fieldslice::~fieldslice() { - //kill_gpu(); + kill_gpu(); } diff --git a/fieldslice.cu b/fieldslice.cu index 1b02998..856a91b 100644 --- a/fieldslice.cu +++ b/fieldslice.cu @@ -1,12 +1,15 @@ #include "fieldslice.h" #include "dataTypes.h" -#include "rts/cuda/error.h" +#include "rts/cuda/error.h" +#include "rts/cuda/threads.h" __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N) { //compute the index for this thread - int i = blockIdx.x * blockDim.x + threadIdx.x; + //int i = blockIdx.x * blockDim.x + threadIdx.x; + int i = ThreadIndex1D(); + if(i >= N) return; ptype xm = x[i].abs(); @@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) { //compute the index for this thread - int i = blockIdx.x * blockDim.x + threadIdx.x; + //int i = blockIdx.x * blockDim.x + threadIdx.x; + int i = ThreadIndex1D(); if(i >= N) return; V[i] = field_component[i].real(); @@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N) { //compute the index for this thread - int i = blockIdx.x * blockDim.x + threadIdx.x; + //int i = blockIdx.x * blockDim.x + threadIdx.x; + int i = ThreadIndex1D(); if(i >= N) return; V[i] = field_component[i].imag(); @@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N) { //compute the index for this thread - int i = blockIdx.x * blockDim.x + threadIdx.x; + //int i = blockIdx.x * blockDim.x + threadIdx.x; + int i = ThreadIndex1D(); if(i >= N) return; output[i] = sqrt(input[i]); @@ -115,7 +121,8 @@ scalarslice fieldslice::Mag() //compute the total number of values in the slice unsigned int N = R[0] * R[1]; - int gridDim = (N+BLOCK-1)/BLOCK; + //int gridDim = (N+BLOCK-1)/BLOCK; + dim3 gridDim = GenGrid1D(N, BLOCK); field_intensity<<>>(x_hat, y_hat, z_hat, result->S, N); field_sqrt<<>>(result->S, result->S, N); @@ -132,7 +139,8 @@ scalarslice fieldslice::Real() //compute the total number of values in the slice unsigned int N = R[0] * R[1]; - int gridDim = (N+BLOCK-1)/BLOCK; + //int gridDim = (N+BLOCK-1)/BLOCK; + dim3 gridDim = GenGrid1D(N, BLOCK); field_real<<>>(x_hat, result->S, N); @@ -148,7 +156,8 @@ scalarslice fieldslice::Imag() //compute the total number of values in the slice unsigned int N = R[0] * R[1]; - int gridDim = (N+BLOCK-1)/BLOCK; + //int gridDim = (N+BLOCK-1)/BLOCK; + dim3 gridDim = GenGrid1D(N, BLOCK); field_imaginary<<>>(x_hat, result->S, N); @@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v) //compute the total number of values in the slice unsigned int N = R[0] * R[1]; - //cout<<"Size of mag field: "<>>(x_hat, y_hat, z_hat, N, v); @@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v) } void fieldslice::init_gpu() -{ +{ + //if the field has no size, return + if(R[0] == 0 || R[1] == 0) + return; + + //free any previous memory allocations + if(x_hat) + HANDLE_ERROR(cudaFree(x_hat)); + if(y_hat) + HANDLE_ERROR(cudaFree(y_hat)); + if(z_hat) + HANDLE_ERROR(cudaFree(z_hat)); + //allocate space on the GPU for the field slice HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex))); - //HANDLE_ERROR(cudaMemset(x_hat, 0, R[0] * R[1] * sizeof(bsComplex))); - //if the field is scalar, y_hat and z_hat are unused - if(scalarField) - { - y_hat = NULL; - z_hat = NULL; - - } - else + if(!scalarField) { HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex))); //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex))); @@ -233,6 +245,8 @@ void fieldslice::kill_gpu() if(z_hat != NULL) HANDLE_ERROR(cudaFree(z_hat)); + x_hat = y_hat = z_hat = NULL; + } void fieldslice::clear_gpu() @@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) result.scalarField = scalarField; //allocate space for the new field - result.init_gpu(); + //result.init_gpu(); //create one thread for each pixel of the field slice dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); @@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) return result; } + +fieldslice::fieldslice(const fieldslice& rhs) +{ + R[0] = rhs.R[0]; + R[1] = rhs.R[1]; + scalarField = rhs.scalarField; + + x_hat = y_hat = z_hat = NULL; + + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1]; + if(rhs.x_hat != NULL) + { + HANDLE_ERROR(cudaMalloc( (void**)&x_hat, bytes)); + HANDLE_ERROR(cudaMemcpy( x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice)); + } + if(rhs.y_hat != NULL) + { + HANDLE_ERROR(cudaMalloc( (void**)&y_hat, bytes)); + HANDLE_ERROR(cudaMemcpy( y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice)); + } + if(rhs.z_hat != NULL) + { + HANDLE_ERROR(cudaMalloc( (void**)&z_hat, bytes)); + HANDLE_ERROR(cudaMemcpy( z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice)); + } + +} + +fieldslice& fieldslice::operator=(const fieldslice& rhs) +{ + //make sure this isn't a self-allocation + if(this != &rhs) + { + //make a shallow copy + R[0] = rhs.R[0]; + R[1] = rhs.R[1]; + scalarField = rhs.scalarField; + + //initialize to new parameters + init_gpu(); + + //make a deep copy + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1]; + if(x_hat != NULL) + HANDLE_ERROR(cudaMemcpy(x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice)); + if(y_hat != NULL) + HANDLE_ERROR(cudaMemcpy(y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice)); + if(z_hat != NULL) + HANDLE_ERROR(cudaMemcpy(z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice)); + } + + return *this; + +} diff --git a/fieldslice.h b/fieldslice.h index d5c7bbd..43fdef4 100644 --- a/fieldslice.h +++ b/fieldslice.h @@ -31,6 +31,9 @@ struct fieldslice ~fieldslice(); + //copy constructor + fieldslice(const fieldslice& rhs); + //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); scalarslice Mag(); @@ -47,6 +50,7 @@ struct fieldslice //crop a region from the field fieldslice crop(int u, int v, int su, int sv); + fieldslice& operator=(const fieldslice& rhs); void init_gpu(); void kill_gpu(); diff --git a/fileout.cu b/fileout.cu index 31e51c7..37e86b3 100644 --- a/fileout.cu +++ b/fileout.cu @@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope) //save images of the fields in the microscope //if the user specifies an extended source - if(scope->focalPoints.size() > 1) + if(scope->focalPoints.size() > 0) { //simulate the extended source and output the detector image scope->SimulateExtendedSource(); + //saveNearField(&scope->nf); + saveFarField(scope); + + //save the detector images + saveDetector(scope); + + //simulate scattering for the last point (so that you have a near field image) + scope->SimulateScattering(); + saveNearField(&scope->nf); + } else { @@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope) //run the far-field simulation scope->SimulateImaging(); + //saveNearField(&scope->nf); saveFarField(scope); + //save the detector images + saveDetector(scope); + } - //save the detector images - saveDetector(scope); + } diff --git a/fileout.h b/fileout.h index c1d4e58..96d0c70 100644 --- a/fileout.h +++ b/fileout.h @@ -5,7 +5,7 @@ //#include "defaults.h" #include "dataTypes.h" -#include "colormap.h" +#include "rts/graphics/colormap.h" #include "fieldslice.h" #include "nearfield.h" #include "microscope.h" @@ -34,7 +34,7 @@ struct fileoutStruct{ //image_source source; //color map info - rts::colormap::colormapType colormap; + rts::colormapType colormap; ptype colorMax; void Save(microscopeStruct* scope); diff --git a/main.cpp b/main.cpp index 4773cc0..9440b8f 100644 --- a/main.cpp +++ b/main.cpp @@ -24,6 +24,7 @@ microscopeStruct* SCOPE; #include "warnings.h" fileoutStruct gFileOut; +bool verbose = false; using namespace std; int cbessjyva(double v,complex z,double &vm,complex*cjv, @@ -31,32 +32,19 @@ int cbessjyva(double v,complex z,double &vm,complex*cjv, int main(int argc, char *argv[]) { - //test Envi loading and saving - //EnviFile envi("testenvi", "w"); - - //float* data = (float*)malloc(sizeof(float) * 100 * 100); - //envi.addBand(data, 100, 100, 100); - - //envi.close(); - - //return 0; SCOPE = new microscopeStruct(); - cout<nf.Uf.R[0]<init(); - OutputOptions(); - gFileOut.Save(SCOPE); - //NF->destroy(); + if(verbose) + OutputOptions(); + SCOPE->destroy(); diff --git a/microscope.cu b/microscope.cu index 21b5253..3c2dfb2 100644 --- a/microscope.cu +++ b/microscope.cu @@ -4,7 +4,7 @@ #include "rts/tools/progressbar.h" #include "rts/cuda/timer.h" #include "dataTypes.h" -#include "colormap.h" +#include "rts/graphics/colormap.h" #include @@ -112,8 +112,8 @@ void microscopeStruct::getFarField() //Compute the Far Field image of the focal plane //clear the memory from previous detector fields - Ud.kill_gpu(); - Ufd.kill_gpu(); + //Ud.kill_gpu(); + //Ufd.kill_gpu(); //first crop the filtered near-field image of the source and scattered fields Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); @@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource() t += gpuStopTimer(); rtsProgressBar((double)(i+1)/(double)npts * 100); + //unsigned char c; + //cin>>c; } - cout< +#include + +#ifdef _WIN32 +#define isnan(x) _isnan(x) +#define isinf(x) (!_finite(x)) +#endif + +int bessjyv_sph(int v, double z, double &vm, double* cjv, + double* cyv, double* cjvp, double* cyvp); nearfieldStruct::nearfieldStruct() { scalarSim = true; planeWave = false; + lut_us = true; + lut_uf = false; nWaves = 0; } @@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr() ss<<"Condenser NA: "< n = mVector[imat](lambda); - //std::cout<<"Sphere refractive index: "<>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); - } - - //float t = gpuStopTimer(); - //std::cout<<"Scalar Uf Time: "< texJ; + +__global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR); + +__global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoint f, bsVector k, ptype A, ptype cosAlpha, ptype cosBeta, int nl, ptype dmin, ptype dmax, int dR) +{ + /*This function computes the focused field for a 2D slice + + Uf = destination field slice + ABCD = plane representing the field slice in world space + uR, vR = resolution of the Uf field + f = focal point of the condenser + k = direction of the incident light + A = amplitude of the incident field + cosAlpha= cosine of the solid angle subtended by the condenser obscuration + cosBeta = cosine of the solid angle subtended by the condenser aperature + nl = number of orders used to compute the field + dR = number of Bessel function values in the look-up texture + + */ + + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index (easier access to the scalar field array) + int i = iv*uR + iu; + + //compute the parameters for u and v + ptype u = (ptype)iu / (uR); + ptype v = (ptype)iv / (vR); + + + + //get the rtsPoint in world space and then the r vector + bsPoint p = ABCD(u, v); + bsVector r = p - f; + ptype d = r.len(); + + if(d == 0) + { + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); + return; + } + + //get info for the light direction and frequency + r = r.norm(); + + //compute the imaginary factor i^l + bsComplex im = bsComplex(0, 1); + bsComplex il = bsComplex(1, 0); + + //Legendre functions are computed dynamically to save memory + //initialize the Legendre functions + + ptype P[2]; + //get the angle between k and r (light direction and position vector) + ptype cosTheta; + cosTheta = k.dot(r); + + rts::init_legendre(cosTheta, P[0], P[1]); + + //initialize legendre functions for the cassegrain angles + ptype Palpha[3]; + rts::init_legendre(cosAlpha, Palpha[0], Palpha[1]); + Palpha[2] = 1; + + ptype Pbeta[3]; + rts::init_legendre(cosBeta, Pbeta[0], Pbeta[1]); + Pbeta[2] = 1; + + //for each order l + bsComplex sumUf(0.0, 0.0); + ptype jl = 0.0; + ptype Pl; + ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1); + for(int l = 0; l<=nl; l++) + { + jl = tex2D(texJ, l + 0.5, di + 0.5); + if(l==0) + Pl = P[0]; + else if(l==1) + { + Pl = P[1]; + + //adjust the cassegrain Legendre function + Palpha[2] = Palpha[0]; + rts::shift_legendre(l+1, cosAlpha, Palpha[0], Palpha[1]); + Pbeta[2] = Pbeta[0]; + rts::shift_legendre(l+1, cosBeta, Pbeta[0], Pbeta[1]); + } + else + { + rts::shift_legendre(l, cosTheta, P[0], P[1]); + + Pl = P[1]; + + //adjust the cassegrain outer Legendre function + Palpha[2] = Palpha[0]; + rts::shift_legendre(l+1, cosAlpha, Palpha[0], Palpha[1]); + Pbeta[2] = Pbeta[0]; + rts::shift_legendre(l+1, cosBeta, Pbeta[0], Pbeta[1]); + } + + sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); + //sumUf += jl; + + il *= im; + } + + Uf[i] = sumUf * 2 * PI * A; + //Uf[i] = u; + //return; +} + +void nearfieldStruct::scalarUfLut() +{ + gpuStartTimer(); + + //calculate the minimum and maximum points in the focused field + d_min = pos.dist(focus); + d_max = pos.dist_max(focus); + + //allocate space for the Bessel function + int dR = 2 * max(Uf.R[0], Uf.R[1]); + ptype* j = NULL; + j = (ptype*) malloc(sizeof(ptype) * dR * (m+1)); + + //calculate Bessel function LUT + calcBesselLut(j, d_min, d_max, dR); + + //create a CUDA array structure and specify the format description + cudaArray* arrayJ; + cudaChannelFormatDesc channelDesc = + cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); + + //allocate memory + HANDLE_ERROR(cudaMallocArray(&arrayJ, &channelDesc, m+1, dR)); + + //specify texture properties + texJ.addressMode[0] = cudaAddressModeMirror; + texJ.addressMode[1] = cudaAddressModeMirror; + texJ.filterMode = cudaFilterModeLinear; + texJ.normalized = false; + + //bind the texture to the array + HANDLE_ERROR(cudaBindTextureToArray(texJ, arrayJ, channelDesc)); + + //copy the CPU Bessel LUT to the GPU-based array + HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice)); + + //----------------Compute the focused field + //create one thread for each pixel of the field slice + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //if we are computing a plane wave, call the gpuScalarUfp function + if(planeWave) + { + gpuScalarUfp<<>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]); + } + //otherwise compute the condenser info and create a focused field + else + { + //pre-compute the cosine of the obscuration and objective angles + ptype cosAlpha = cos(asin(condenser[0])); + ptype cosBeta = cos(asin(condenser[1])); + //compute the scalar Uf field (this will be in the x_hat channel of Uf) + gpuScalarUfLut<<>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR); + } + + + //free everything + free(j); + + HANDLE_ERROR(cudaFreeArray(arrayJ)); + + t_Uf = gpuStopTimer(); +} diff --git a/nfScalarUpLut.cu b/nfScalarUpLut.cu new file mode 100644 index 0000000..9a2c0a1 --- /dev/null +++ b/nfScalarUpLut.cu @@ -0,0 +1,192 @@ +#include "nearfield.h" +#include "rts/math/spherical_bessel.h" +#include "rts/math/legendre.h" +#include +#include "rts/cuda/error.h" +#include "rts/cuda/timer.h" + +texture texUsp; +texture texUip; + +__global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR) +{ + /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field + produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field. + + Us = final scattered field + k = list of incoming plane waves (Monte-Carlo samples) + nk = number of incoming MC samples + kmag= magnitude of the incoming field 2pi/lambda + dmin= minimum distance of the Usp texture + dmax= maximum distance of the Usp texture + f = position of the focus + ps = position of the sphere + A = total amplitude of the incident field arriving at the focal spot + ABCD= rectangle representing the field slice + uR = resolution of the field slice in the u direction + vR = resolution of the field slice in the v direction + dR = resolution of the Usp texture in the d direction + thetaR= resolution of the Usp texture in the theta direction + */ + + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index (easier access to the scalar field array) + int i = iv*uR + iu; + + //compute the parameters for u and v + ptype u = (ptype)iu / (uR); + ptype v = (ptype)iv / (vR); + + //get the rtsPoint in world space and then the r vector + bsPoint p = ABCD(u, v); + bsVector r = p - ps; + ptype d = r.len(); + float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1); + float ai = ( (d - dmin)/(a - dmin)) * (aR - 1); + + bsComplex sumUs(0, 0); + //for each plane wave in the wave list + for(int iw = 0; iw < nk; iw++) + { + //normalize the direction vectors and find their inner product + r = r.norm(); + ptype cos_theta = k[iw].dot(r); + if(cos_theta < -1) + cos_theta = -1; + if(cos_theta > 1) + cos_theta = 1; + float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1); + + //compute the phase factor for spheres that are not at the origin + bsVector c = ps - f; + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); + + //compute the internal field if we are inside a sphere + if(d < a) + { + float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5); + sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y); + } + //otherwise compute the scattered field + else + { + float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5); + sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y); + } + + } + + Us[i] += sumUs; +} + +void nearfieldStruct::scalarUpLut() +{ + //get the number of spheres + int nSpheres = sVector.size(); + + //if there are no spheres, nothing to do here + if(nSpheres == 0) + return; + + //time the calculation of the focused field + gpuStartTimer(); + + //clear the scattered field + U.clear_gpu(); + + //create one thread for each pixel of the field slice + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff) + bsVector* gpuk; + int nWaves; + ptype subA; + if(planeWave) + { + nWaves = 1; + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) ); + HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice)); + subA = A; + } + else + { + nWaves = inWaves.size(); + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) ); + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice)); + //compute the amplitude that makes it through the condenser + subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); + } + + //for each sphere + for(int s = 0; s>>(U.x_hat, + gpuk, + nWaves, + 2 * PI / lambda, + sVector[s].a, + sVector[s].d_min, + sVector[s].d_max, + focus, + sVector[s].p, + subA, + pos, + U.R[0], + U.R[1], + dR, + aR, + thetaR); + + cudaFreeArray(arrayUsp); + cudaFreeArray(arrayUip); + + } + + + //store the time to compute the scattered field + t_Us = gpuStopTimer(); + + //free monte-carlo samples + cudaFree(gpuk); + +} diff --git a/nfScalarUs.cu b/nfScalarUs.cu index d1a79fc..813fb0b 100644 --- a/nfScalarUs.cu +++ b/nfScalarUs.cu @@ -163,7 +163,7 @@ void nearfieldStruct::scalarUs() return; //time the calculation of the focused field - //gpuStartTimer(); + gpuStartTimer(); //clear the scattered field U.clear_gpu(); @@ -251,9 +251,8 @@ void nearfieldStruct::scalarUs() } + //store the time to compute the scattered field + t_Us = gpuStopTimer(); - //float t = gpuStopTimer(); - //std::cout<<"Scalar Us Time: "< namespace po = boost::program_options; -static void loadSpheres(string sphereList) +extern bool verbose; + + + +static void lNearfield(po::variables_map vm) +{ + //test to see if we are simulating a plane wave + bool planeWave = DEFAULT_PLANEWAVE; + if(vm.count("plane-wave")) + planeWave = !planeWave; + SCOPE->nf.planeWave = planeWave; + + //get the incident field amplitude + SCOPE->nf.A = vm["amplitude"].as(); + + //get the condenser parameters + SCOPE->nf.condenser[0] = DEFAULT_CONDENSER_MIN; + SCOPE->nf.condenser[1] = DEFAULT_CONDENSER_MAX; + + if(vm.count("condenser")) + { + vector cparams = vm["condenser"].as< vector >(); + + if(cparams.size() == 1) + SCOPE->nf.condenser[1] = cparams[0]; + else + { + SCOPE->nf.condenser[0] = cparams[0]; + SCOPE->nf.condenser[1] = cparams[1]; + } + } + + + //get the focal rtsPoint position + SCOPE->nf.focus[0] = DEFAULT_FOCUS_X; + SCOPE->nf.focus[1] = DEFAULT_FOCUS_Y; + SCOPE->nf.focus[2] = DEFAULT_FOCUS_Z; + if(vm.count("focus")) + { + vector fpos = vm["focus"].as< vector >(); + if(fpos.size() != 3) + { + cout<<"BIMSIM Error - the incident focal point is incorrectly specified; it must have three components."<nf.focus[0] = fpos[0]; + SCOPE->nf.focus[1] = fpos[1]; + SCOPE->nf.focus[2] = fpos[2]; + } + + //get the incident light direction (k-vector) + bsVector spherical(1, 0, 0); + + //if a k-vector is specified + if(vm.count("k")) + { + vector kvec = vm["k"].as< vector >(); + if(kvec.size() != 2) + { + cout<<"BIMSIM Error - k-vector is not specified correctly: it must contain two elements"<nf.k = spherical.sph2cart(); + + + //incident field order + SCOPE->nf.m = vm["field-order"].as(); + + //number of Monte-Carlo samples + SCOPE->nf.nWaves = vm["samples"].as(); + + //random number seed for Monte-Carlo samples + if(vm.count("seed")) + srand(vm["seed"].as()); + + + +} + + +static void loadOutputParams(po::variables_map vm) +{ + //append simulation results to previous binary files + gFileOut.append = DEFAULT_APPEND; + if(vm.count("append")) + gFileOut.append = true; + + //image parameters + //component of the field to be saved + std::string fieldStr; + fieldStr = vm["output-type"].as(); + + if(fieldStr == "magnitude") + gFileOut.field = fileoutStruct::fieldMag; + else if(fieldStr == "intensity") + gFileOut.field = fileoutStruct::fieldIntensity; + else if(fieldStr == "polarization") + gFileOut.field = fileoutStruct::fieldPolar; + else if(fieldStr == "imaginary") + gFileOut.field = fileoutStruct::fieldImag; + else if(fieldStr == "real") + gFileOut.field = fileoutStruct::fieldReal; + else if(fieldStr == "angular-spectrum") + gFileOut.field = fileoutStruct::fieldAngularSpectrum; + + + //image file names + gFileOut.intFile = vm["intensity"].as(); + gFileOut.absFile = vm["absorbance"].as(); + gFileOut.transFile = vm["transmittance"].as(); + gFileOut.nearFile = vm["near-field"].as(); + gFileOut.farFile = vm["far-field"].as(); + + //colormap + std::string cmapStr; + cmapStr = vm["colormap"].as(); + if(cmapStr == "brewer") + gFileOut.colormap = rts::cmBrewer; + else if(cmapStr == "gray") + gFileOut.colormap = rts::cmGrayscale; + else + cout<<"color-map value not recognized (using default): "<nf.lut_us = false; + SCOPE->nf.lut_uf = false; + } + else if(vm.count("recursive-us")) + { + SCOPE->nf.lut_us = false; + } + else if(vm.count("lut-uf")) + { + SCOPE->nf.lut_uf = true; + } +} + +void lWavelength(po::variables_map vm) +{ + //load the wavelength + if(vm.count("nu")) + { + //wavelength is given in wavenumber - transform and flag + SCOPE->nf.lambda = 10000/vm["nu"].as(); + gFileOut.wavenumber = true; + } + //otherwise we are using lambda = wavelength + else + { + SCOPE->nf.lambda = vm["lambda"].as(); + gFileOut.wavenumber = false; + } +} + +static void lSpheres(string sphereList) { /*This function loads a list of sphere given in the string sphereList The format is: @@ -58,17 +230,60 @@ static void loadSpheres(string sphereList) //check out the next element (this should set the EOF error flag) ss.peek(); } +} +void lSpheres(po::variables_map vm) +{ + //if a sphere is specified at the command line + if(vm.count("spheres")) + { + //convert the sphere to a string + vector sdesc = vm["spheres"].as< vector >(); + //compute the number of spheres specified + unsigned int nS; + if(sdesc.size() <= 5) + nS = 1; + else + { + //if the number of parameters is divisible by 4, compute the number of spheres + if(sdesc.size() % 5 == 0) + nS = sdesc.size() / 5; + else + { + cout<<"BIMSIM Error: Invalid number of sphere parameters."< filenames = vm["sphere-file"].as< vector >(); //load each file for(int iS=0; iS(ifs)), std::istreambuf_iterator()); //load the list of spheres from a string - loadSpheres(instr); + lSpheres(instr); } } - //load the sphere from the command line - if(vm.count("sx") || vm.count("sy") || vm.count("sz") || vm.count("s")) - { - //create a new sphere - sphere newS; - - //set defaults - if(vm.count("sx")) - newS.p[0] = vm["sx"].as(); - else - newS.p[0] = DEFAULT_SPHERE_X; - - - if(vm.count("sy")) - newS.p[1] = vm["sy"].as(); - else - newS.p[1] = DEFAULT_SPHERE_Y; - - if(vm.count("sz")) - newS.p[2] = vm["sz"].as(); - else - newS.p[2] = DEFAULT_SPHERE_Z; - - if(vm.count("radius")) - newS.a = vm["radius"].as(); - else - newS.a = DEFAULT_SPHERE_A; - - //add the sphere to the sphere vector - SCOPE->nf.sVector.push_back(newS); + //make sure the appropriate materials are loaded + unsigned int nS = SCOPE->nf.sVector.size(); + //for each sphere + for(unsigned int s = 0; snf.sVector[s].iMaterial + 1 > SCOPE->nf.mVector.size()) + { + //otherwise output an error + cout<<"BIMSIM Error - A material is not loaded for sphere "< matVec = vm["materials"].as< vector >(); - if(matVec.size() %2 != 0) + if(matVec.size() == 1) + { + rts::material newM(SCOPE->nf.lambda, matVec[0], 0); + SCOPE->nf.mVector.push_back(newM); + } + else if(matVec.size() %2 != 0) { cout<<"BIMSim Error: materials must be specified in n, k pairs"< newM(SCOPE->nf.lambda, matVec[i], matVec[i+1]); - SCOPE->nf.mVector.push_back(newM); + for(int i=0; i newM(SCOPE->nf.lambda, matVec[i], matVec[i+1]); + SCOPE->nf.mVector.push_back(newM); + } } } - else - { - //add the command line material as the default (material 0) - rts::material newM(SCOPE->nf.lambda, vm["n"].as(), vm["k"].as()); - SCOPE->nf.mVector.push_back(newM); - } //if file names are specified, load the materials if(vm.count("material-file")) @@ -169,57 +366,109 @@ static void loadMaterials(po::variables_map vm) } -static void loadNearfieldParams(po::variables_map vm) +static void lOptics(po::variables_map vm) { - //test to see if we are simulating a plane wave - bool planeWave = DEFAULT_PLANEWAVE; - if(vm.count("plane-wave")) - planeWave = !planeWave; - SCOPE->nf.planeWave = planeWave; - - //get the wavelength - //SCOPE->nf.lambda = vm["lambda"].as(); - - //get the incident field amplitude - SCOPE->nf.A = vm["amplitude"].as(); - - //get the condenser parameters - SCOPE->nf.condenser[0] = vm["condenser-min"].as(); - SCOPE->nf.condenser[1] = vm["condenser-max"].as(); - - - //get the focal rtsPoint position - SCOPE->nf.focus[0] = vm["fx"].as(); - SCOPE->nf.focus[1] = vm["fy"].as(); - SCOPE->nf.focus[2] = vm["fz"].as(); - - //get the incident light direction (k-vector) - bsVector spherical; - spherical[0] = 1.0; - spherical[1] = vm["theta"].as(); - spherical[2] = vm["phi"].as(); - SCOPE->nf.k = spherical.sph2cart(); - - - //incident field order - SCOPE->nf.m = vm["field-order"].as(); - - //number of Monte-Carlo samples - SCOPE->nf.nWaves = vm["samples"].as(); - - + SCOPE->objective[0] = DEFAULT_OBJECTIVE_MIN; + SCOPE->objective[1] = DEFAULT_OBJECTIVE_MAX; + if(vm.count("objective")) + { + vector oparams = vm["objective"].as< vector >(); + if(oparams.size() == 1) + SCOPE->objective[1] = oparams[0]; + else + { + SCOPE->objective[0] = oparams[0]; + SCOPE->objective[1] = oparams[1]; + } + } } -static void loadSliceParams(po::variables_map vm) +static void lImagePlane(po::variables_map vm) { - //parameters for the sample plane - + bsPoint pMin(DEFAULT_PLANE_MIN_X, DEFAULT_PLANE_MIN_Y, DEFAULT_PLANE_MIN_Z); + bsPoint pMax(DEFAULT_PLANE_MAX_X, DEFAULT_PLANE_MAX_Y, DEFAULT_PLANE_MAX_Z); + bsVector normal(DEFAULT_PLANE_NORM_X, DEFAULT_PLANE_NORM_Y, DEFAULT_PLANE_NORM_Z); //set the default values for the slice position and orientation - bsPoint pMin(vm["plane-min-x"].as(), vm["plane-min-y"].as(), vm["plane-min-z"].as()); - bsPoint pMax(vm["plane-max-x"].as(), vm["plane-max-y"].as(), vm["plane-max-z"].as()); - bsVector normal(vm["plane-norm-x"].as(), vm["plane-norm-y"].as(), vm["plane-norm-z"].as()); + if(vm.count("plane-lower-left") && vm.count("plane-upper-right") && vm.count("plane-normal")) + { + vector ll = vm["plane-lower-left"].as< vector >(); + if(ll.size() != 3) + { + cout<<"BIMSIM Error - The lower-left corner of the image plane is incorrectly specified."< ur = vm["plane-lower-left"].as< vector >(); + if(ur.size() != 3) + { + cout<<"BIMSIM Error - The upper-right corner of the image plane is incorrectly specified."< norm = vm["plane-lower-left"].as< vector >(); + if(norm.size() != 3) + { + cout<<"BIMSIM Error - The normal of the image plane is incorrectly specified."< xy = vm["xy"].as< vector >(); + if(xy.size() >= 1) + s = xy[0]; + if(xy.size() >= 2) + pos = xy[1]; + + //calculate the plane corners and normal based on the size and position + pMin = bsPoint(-s/2, -s/2, pos); + pMax = bsPoint(s/2, s/2, pos); + normal = bsVector(0, 0, 1); + } + else if(vm.count("xz")) + { + //default plane size in microns + ptype size = DEFAULT_PLANE_SIZE; + ptype pos = DEFAULT_PLANE_POSITION; + + vector xz = vm["xz"].as< vector >(); + if(xz.size() >= 1) + size = xz[0]; + if(xz.size() >= 2) + pos = xz[1]; + + //calculate the plane corners and normal based on the size and position + pMin = bsPoint(-size/2, pos, -size/2); + pMax = bsPoint(size/2, pos, size/2); + normal = bsVector(0, -1, 0); + } + else if(vm.count("yz")) + { + //default plane size in microns + ptype size = DEFAULT_PLANE_SIZE; + ptype pos = DEFAULT_PLANE_POSITION; + + vector yz = vm["yz"].as< vector >(); + if(yz.size() >= 1) + size = yz[0]; + if(yz.size() >= 2) + pos = yz[1]; + + //calculate the plane corners and normal based on the size and position + pMin = bsPoint(pos, -size/2, -size/2); + pMax = bsPoint(pos, size/2, size/2); + normal = bsVector(1, 0, 0); + } SCOPE->setPos(pMin, pMax, normal); //resolution @@ -233,175 +482,111 @@ static void loadSliceParams(po::variables_map vm) SCOPE->setNearfield(); - - - -} - -static void loadMicroscopeParams(po::variables_map vm) -{ - //objective - SCOPE->objective[0] = vm["objective-min"].as(); - SCOPE->objective[1] = vm["objective-max"].as(); - - - - - -} - -static void loadOutputParams(po::variables_map vm) -{ - //append simulation results to previous binary files - gFileOut.append = DEFAULT_APPEND; - if(vm.count("append")) - gFileOut.append = true; - - //image parameters - //component of the field to be saved - std::string fieldStr; - fieldStr = vm["output-type"].as(); - - if(fieldStr == "magnitude") - gFileOut.field = fileoutStruct::fieldMag; - else if(fieldStr == "intensity") - gFileOut.field = fileoutStruct::fieldIntensity; - else if(fieldStr == "polarization") - gFileOut.field = fileoutStruct::fieldPolar; - else if(fieldStr == "imaginary") - gFileOut.field = fileoutStruct::fieldImag; - else if(fieldStr == "real") - gFileOut.field = fileoutStruct::fieldReal; - else if(fieldStr == "angular-spectrum") - gFileOut.field = fileoutStruct::fieldAngularSpectrum; - - - //image file names - gFileOut.intFile = vm["intensity"].as(); - gFileOut.absFile = vm["absorbance"].as(); - gFileOut.transFile = vm["transmittance"].as(); - gFileOut.nearFile = vm["near-field"].as(); - gFileOut.farFile = vm["far-field"].as(); - - //colormap - std::string cmapStr; - cmapStr = vm["colormap"].as(); - if(cmapStr == "brewer") - gFileOut.colormap = rts::colormap::cmBrewer; - else if(cmapStr == "gray") - gFileOut.colormap = rts::colormap::cmGrayscale; - else - cout<<"color-map value not recognized (using default): "<nf.toStr(); + cout<toStr(); cout<<"# of source points: "<focalPoints.size()< test; static void SetOptions(po::options_description &desc) { desc.add_options() - ("help,h", "prints this help") - ("plane-wave,P", "simulates an incident plane wave") - ("intensity,I", po::value()->default_value(DEFAULT_INTENSITY_FILE), "output measured intensity (filename)") - ("absorbance,A", po::value()->default_value(DEFAULT_ABSORBANCE_FILE), "output measured absorbance (filename)") - ("transmittance,T", po::value()->default_value(DEFAULT_TRANSMITTANCE_FILE), "output measured transmittance (filename)") - ("far-field,F", po::value()->default_value(DEFAULT_FAR_FILE), "output far-field at detector (filename)") - ("near-field,N", po::value()->default_value(DEFAULT_NEAR_FILE), "output field at focal plane (filename)") - ("extended-source,X", po::value()->default_value(DEFAULT_EXTENDED_SOURCE), "image of source at focus (filename)") - //("sx,x", po::value()->default_value(DEFAULT_SPHERE_X), "sphere coordinates") - //("sy,y", po::value()->default_value(DEFAULT_SPHERE_Y)) - //("sz,z", po::value()->default_value(DEFAULT_SPHERE_Z)) - ("sx,x", po::value(), "sphere coordinates") - ("sy,y", po::value()) - ("sz,z", po::value()) - ("radius,r", po::value()->default_value(DEFAULT_SPHERE_A), "sphere radius") - ("samples,s", po::value()->default_value(DEFAULT_SAMPLES), "Monte-Carlo samples used to compute Us") - ("sphere-file,S", po::value< vector >()->multitoken(), "sphere file:\n [x y z radius material]") - ("amplitude,a", po::value()->default_value(DEFAULT_AMPLITUDE), "incident field amplitude") - ("n,n", po::value()->default_value(DEFAULT_N, "1.4"), "sphere phase speed") - ("k,k", po::value()->default_value(DEFAULT_K), "sphere absorption coefficient") - ("material-file,M", po::value< vector >()->multitoken(), "material file:\n [lambda n k]") - ("materials", po::value< vector >()->multitoken(), "materials specified using n, k pairs:\n ex. --materials n1 k1 n2 k2\n (if used --n and --k are ignored)") - ("lambda,l", po::value()->default_value(DEFAULT_LAMBDA), "incident wavelength") + ("help", "prints this help") + ("verbose", "verbose output\n") + + ("intensity", po::value()->default_value(DEFAULT_INTENSITY_FILE), "output measured intensity (filename)") + ("absorbance", po::value()->default_value(DEFAULT_ABSORBANCE_FILE), "output measured absorbance (filename)") + ("transmittance", po::value()->default_value(DEFAULT_TRANSMITTANCE_FILE), "output measured transmittance (filename)") + ("far-field", po::value()->default_value(DEFAULT_FAR_FILE), "output far-field at detector (filename)") + ("near-field", po::value()->default_value(DEFAULT_NEAR_FILE), "output field at focal plane (filename)") + ("extended-source", po::value()->default_value(DEFAULT_EXTENDED_SOURCE), "image of source at focus (filename)\n") + + ("spheres", po::value< vector >()->multitoken(), "sphere position: x y z a m") + ("sphere-file", po::value< vector >()->multitoken(), "sphere file:\n [x y z radius material]") + ("materials", po::value< vector >()->multitoken(), "refractive indices as n, k pairs:\n ex. -m n0 k0 n1 k1 n2 k2") + ("material-file", po::value< vector >()->multitoken(), "material file:\n [lambda n k]\n") + + ("lambda", po::value()->default_value(DEFAULT_LAMBDA), "incident wavelength") ("nu", po::value(), "incident frequency (in cm^-1)\n(if specified, lambda is ignored)") - ("theta,t", po::value()->default_value(DEFAULT_K_THETA), "light direction (polar coords)") - ("phi,p", po::value()->default_value(DEFAULT_K_PHI)) - ("fx", po::value()->default_value(DEFAULT_FOCUS_X), "incident focal point") - ("fy", po::value()->default_value(DEFAULT_FOCUS_Y)) - ("fz", po::value()->default_value(DEFAULT_FOCUS_Z)) - ("condenser-max,C", po::value()->default_value(DEFAULT_CONDENSER_MAX), "condenser numerical aperature") - ("condenser-min,c", po::value()->default_value(DEFAULT_CONDENSER_MIN), "condenser obscuration NA") - ("objective-max,O", po::value()->default_value(DEFAULT_OBJECTIVE_MAX), "objective numerical aperature") - ("objective-min,o", po::value()->default_value(DEFAULT_OBJECTIVE_MIN), "objective obscuration NA") - ("field-order", po::value()->default_value(DEFAULT_FIELD_ORDER), "order of the incident field") - ("output-type,f", po::value()->default_value(DEFAULT_FIELD_TYPE), "output field value:\n magnitude, polarization, real, imaginary, angular-spectrum") - ("resolution,R", po::value()->default_value(DEFAULT_SLICE_RES), "resolution of the detector") - ("padding,d", po::value()->default_value(DEFAULT_PADDING), "FFT padding for the objective bandpass") + ("k", po::value< vector >()->multitoken(), "k-vector direction: -k theta phi\n theta = [0 2*pi], phi = [0 pi]") + ("amplitude", po::value()->default_value(DEFAULT_AMPLITUDE), "incident field amplitude") + ("condenser", po::value< vector >()->multitoken(), "condenser numerical aperature\nA pair of values can be used to specify an inner obscuration: -c NAin NAout") + ("objective", po::value< vector >()->multitoken(), "objective numerical aperature\nA pair of values can be used to specify an inner obscuration: -c NAin NAout") + ("focus", po::value< vector >()->multitoken(), "focal position for the incident point source\n (default = --focus 0 0 0)") + ("plane-wave", "simulates an incident plane wave\n") + + ("resolution", po::value()->default_value(DEFAULT_SLICE_RES), "resolution of the detector") + ("plane-lower-left", po::value< vector >()->multitoken(), "lower-left position of the image plane") + ("plane-upper-right", po::value< vector >()->multitoken(), "upper-right position of the image plane") + ("plane-normal", po::value< vector >()->multitoken(), "normal for the image plane") + ("xy", po::value< vector >()->multitoken(), "specify an x-y image plane\n (standard microscope)") + ("xz", po::value< vector >()->multitoken(), "specify a x-z image plane\n (cross-section of the focal volume)") + ("yz", po::value< vector >()->multitoken(), "specify a y-z image plane\n (cross-section of the focal volume)\n") + + ("samples", po::value()->default_value(DEFAULT_SAMPLES), "Monte-Carlo samples used to compute Us") + ("padding", po::value()->default_value(DEFAULT_PADDING), "FFT padding for the objective bandpass") ("supersample", po::value()->default_value(DEFAULT_SUPERSAMPLE), "super-sampling rate for the detector field") + ("field-order", po::value()->default_value(DEFAULT_FIELD_ORDER), "order of the incident field") + ("seed", po::value(), "seed for the Monte-Carlo random number generator") + ("recursive", "evaluate all Bessel functions recursively\n") + ("recursive-us", "evaluate scattered-field Bessel functions recursively\n") + ("lut-uf", "evaluate the focused-field using a look-up table\n") + + ("output-type", po::value()->default_value(DEFAULT_FIELD_TYPE), "output field value:\n magnitude, polarization, real, imaginary, angular-spectrum") ("colormap", po::value()->default_value(DEFAULT_COLORMAP), "colormap: gray, brewer") ("append", "append result to an existing file\n (binary files only)") - ("plane-min-x,u", po::value()->default_value(DEFAULT_SLICE_MIN_X), "lower-left corner of the field slice") - ("plane-min-y,v", po::value()->default_value(DEFAULT_SLICE_MIN_Y)) - ("plane-min-z,w", po::value()->default_value(DEFAULT_SLICE_MIN_Z)) - ("plane-max-x,U", po::value()->default_value(DEFAULT_SLICE_MAX_X), "upper-right corner of the field slice") - ("plane-max-y,V", po::value()->default_value(DEFAULT_SLICE_MAX_Y)) - ("plane-max-z,W", po::value()->default_value(DEFAULT_SLICE_MAX_Z)) - ("plane-norm-x", po::value()->default_value(DEFAULT_SLICE_NORM_X), "field slice normal") - ("plane-norm-y", po::value()->default_value(DEFAULT_SLICE_NORM_Y)) - ("plane-norm-z", po::value()->default_value(DEFAULT_SLICE_NORM_Z)); + ; } static void LoadParameters(int argc, char *argv[]) { //create an option description - po::options_description desc("Allowed options"); + po::options_description desc("BimSim arguments"); //fill it with options SetOptions(desc); po::variables_map vm; - po::store(po::parse_command_line(argc, argv, desc), vm); + po::store(po::parse_command_line(argc, argv, desc, po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm); po::notify(vm); - //display help and exit - if(vm.count("help")) - { - cout<nf.lambda = 10000/vm["nu"].as(); - gFileOut.wavenumber = true; - } - //otherwise we are using lambda = wavelength - else - { - SCOPE->nf.lambda = vm["lambda"].as(); - gFileOut.wavenumber = false; - } + //load flags (help, verbose output) + lFlags(vm, desc); + + //load the wavelength + lWavelength(vm); + + //load materials + //loadMaterials(vm); + lMaterials(vm); + + //load the sphere data + lSpheres(vm); + + //load the optics + lOptics(vm); + + //load the position and orientation of the image plane + lImagePlane(vm); //load spheres - loadSpheres(vm); + //loadSpheres(vm); + - //load materials - loadMaterials(vm); - loadNearfieldParams(vm); + lNearfield(vm); loadOutputParams(vm); - loadMicroscopeParams(vm); + //loadMicroscopeParams(vm); - loadSliceParams(vm); + //loadSliceParams(vm); //if an extended source will be used if(vm["extended-source"].as() != "") diff --git a/scalarslice.cu b/scalarslice.cu index daa609d..c6f215a 100644 --- a/scalarslice.cu +++ b/scalarslice.cu @@ -22,16 +22,17 @@ scalarslice::scalarslice() scalarslice::~scalarslice() { - HANDLE_ERROR(cudaFree(S)); + if(S != NULL) + HANDLE_ERROR(cudaFree(S)); S = NULL; } -void scalarslice::toImage(std::string filename, ptype vmin, ptype vmax, rts::colormap::colormapType cmap) +void scalarslice::toImage(std::string filename, ptype vmin, ptype vmax, rts::colormapType cmap) { - rts::colormap::gpu2image(S, filename, R[0], R[1], vmin, vmax, cmap); + rts::gpu2image(S, filename, R[0], R[1], vmin, vmax, cmap); } -void scalarslice::toImage(std::string filename, bool positive, rts::colormap::colormapType cmap) +void scalarslice::toImage(std::string filename, bool positive, rts::colormapType cmap) { cublasStatus_t stat; cublasHandle_t handle; @@ -62,7 +63,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co exit(1); } - //std::cout<<"Maximum index: "< #include +#include using namespace rts; using namespace std; @@ -13,6 +15,9 @@ int cbessjyva(double v,complex z,double &vm,complex*cjv, int cbessjyva_sph(int v,complex z,double &vm,complex*cjv, complex*cyv,complex*cjvp,complex*cyvp); +int bessjyv_sph(int v, double z, double &vm, double* cjv, + double* cyv, double* cjvp, double* cyvp); + void sphere::calcCoeff(ptype lambda, rtsComplex ri) { /* These calculations are done at high-precision on the CPU @@ -59,12 +64,6 @@ void sphere::calcCoeff(ptype lambda, rtsComplex ri) cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka); cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna); - - //cout<<"Begin Sphere---------"< ri) //calculate B and add it to the list Bn = (2.0 * l + 1.0) * pow(i, l) * (c / d); B.push_back(bsComplex(Bn.real(), Bn.imag())); - //cout<<"B: "<) * (Nl + 1); + complex* cjv_knr = (complex*)malloc(bytes); + complex* cyv_knr = (complex*)malloc(bytes); + complex* cjvp_knr = (complex*)malloc(bytes); + complex* cyvp_knr = (complex*)malloc(bytes); + + //compute the bessel functions using the CPU-based algorithm + double vm; + + //for each sample along r + ptype dr = a / (aR - 1); + ptype r; + for(int ir = 0; ir < aR; ir++) + { + r = ir * dr; + complex knr( (k*n*r).real(), (k*n*r).imag() ); + cbessjyva_sph(Nl, knr, vm, cjv_knr, cyv_knr, cjvp_knr, cyvp_knr); + + //copy the double data to the bsComplex array + for(int l=0; l<=Nl; l++) + { + //deal with the NaN case at the origin + if(ir == 0) + { + if(l == 0) + j[ir * (Nl+1)] = 1; + else + j[ir * (Nl+1) + l] = 0; + } + else + j[ir * (Nl+1) + l] = bsComplex(cjv_knr[l].real(), cjv_knr[l].imag()); + } + } + + /*ofstream outfile("besselout.txt"); + for(int ir = 0; ir < aR; ir++) + { + for(int l = 0; l nfPlane, unsigned int R) +{ + //calculate the parameters of the lookup table + + //first find the distance to the closest and furthest points on the nearfield plane + d_min = nfPlane.dist(p); + d_max = nfPlane.dist_max(p); + + //compute the radius of the cross-section of the sphere with the plane + ptype a_inter = 0; + if(d_min < a) + a_inter = sqrt(a - d_min); + + + //calculate the resolution of the Usp and Uip lookup tables + int aR = 1 + 2 * R * a_inter / (nfPlane(0, 0) - nfPlane(1, 1)).len(); + int dR = 2 * R; + int thetaR = DEFAULT_SPHERE_THETA_R; + + //allocate space for the bessel function LUTs + bsComplex* j = (bsComplex*)malloc(sizeof(bsComplex) * (Nl + 1) * aR); + bsComplex* h = (bsComplex*)malloc(sizeof(bsComplex) * (Nl + 1) * dR); + + calcLut(j, h, lambda, n, aR, dR); + + //allocate space for the Usp lookup texture + Usp.R[0] = dR; + Usp.R[1] = thetaR; + Usp.init_gpu(); + + //allocate space for the Uip lookup texture + Uip.R[0] = aR; + Uip.R[1] = thetaR; + Uip.init_gpu(); + + + + scalarUsp(h, dR, thetaR); + scalarUip(j, aR, thetaR); + + scalarslice UspMag = Usp.Mag(); + UspMag.toImage("Usp.bmp", true); + + scalarslice UipMag = Uip.Mag(); + UipMag.toImage("Uip.bmp", true); + + //free memory + free(j); + free(h); + +} + +sphere& sphere::operator=(const sphere &rhs) +{ + p = rhs.p; + a = rhs.a; + iMaterial = rhs.iMaterial; + Nl = rhs.Nl; + n = rhs.n; + B = rhs.B; + A = rhs.A; + + return *this; +} + +sphere::sphere(const sphere &rhs) +{ + p = rhs.p; + a = rhs.a; + iMaterial = rhs.iMaterial; + Nl = rhs.Nl; + n = rhs.n; + B = rhs.B; + A = rhs.A; } diff --git a/sphere.cu b/sphere.cu new file mode 100644 index 0000000..8819936 --- /dev/null +++ b/sphere.cu @@ -0,0 +1,149 @@ +#include "sphere.h" +#include "rts/math/legendre.h" + +__global__ void gpuScalarUsp(bsComplex* Usp, bsComplex* h, bsComplex* B, int Nl, int rR, int thetaR) +{ + //get the current coordinate in the plane slice + int ir = blockIdx.x * blockDim.x + threadIdx.x; + int itheta = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(itheta >= thetaR || ir >= rR) return; + + int i = itheta * rR + ir; + + //ptype dr = (rmax - a) / (rR - 1); + ptype dtheta = (PI) / (thetaR - 1); + + //comptue the current angle and distance + //ptype r = dr * ir + a; + ptype theta = dtheta * itheta; + ptype cos_theta = cos(theta); + + //initialize the Legendre polynomial + ptype P[2]; + rts::init_legendre(cos_theta, P[0], P[1]); + + //initialize the result + bsComplex Us((ptype)0, (ptype)0); + + //for each order l + for(int l=0; l <= Nl; l++) + { + if(l == 0) + { + Us += B[l] * h[ir * (Nl+1) + l] * P[0]; + //Us += P[0]; + } + else + { + if(l > 1) + { + rts::shift_legendre(l, cos_theta, P[0], P[1]); + } + Us += B[l] * h[ir * (Nl+1) + l] * P[1]; + //Us += P[1]; + } + + + } + Usp[i] = Us; + //Usp[i] = h[ir * (Nl+1)]; + //Usp[i] = ir; + +} + +__global__ void gpuScalarUip(bsComplex* Uip, bsComplex* j, bsComplex* A, int Nl, int aR, int thetaR) +{ + //get the current coordinate in the plane slice + int ia = blockIdx.x * blockDim.x + threadIdx.x; + int itheta = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(itheta >= thetaR || ia >= aR) return; + + int i = itheta * aR + ia; + + ptype dtheta = (PI) / (thetaR - 1); + + //comptue the current angle and distance + ptype theta = dtheta * itheta; + ptype cos_theta = cos(theta); + + //initialize the Legendre polynomial + ptype P[2]; + rts::init_legendre(cos_theta, P[0], P[1]); + + //initialize the result + bsComplex Ui((ptype)0, (ptype)0); + + //for each order l + for(int l=0; l <= Nl; l++) + { + if(l == 0) + { + Ui += A[l] * j[ia * (Nl+1) + l] * P[0]; + } + else + { + if(l > 1) + { + rts::shift_legendre(l, cos_theta, P[0], P[1]); + } + Ui += A[l] * j[ia * (Nl+1) + l] * P[1]; + } + + + } + Uip[i] = Ui; +} + +void sphere::scalarUsp(bsComplex* h, int rR, int thetaR) +{ + //copy the hankel function to the GPU + bsComplex* gpu_h; + HANDLE_ERROR( cudaMalloc( (void**)&gpu_h, sizeof(bsComplex) * (Nl + 1) * rR ) ); + HANDLE_ERROR( cudaMemcpy( gpu_h, h, sizeof(bsComplex) * (Nl + 1) * rR, cudaMemcpyHostToDevice ) ); + + //allocate memory for the scattering coefficients + bsComplex* gpuB; + HANDLE_ERROR(cudaMalloc((void**) &gpuB, (Nl+1) * sizeof(bsComplex))); + //copy the scattering coefficients to the GPU + HANDLE_ERROR(cudaMemcpy(gpuB, &B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice)); + + //create one thread for each pixel of the field slice + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((Usp.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Usp.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + gpuScalarUsp<<>>(Usp.x_hat, gpu_h, gpuB, Nl, rR, thetaR); + + //free memory + cudaFree(gpu_h); + cudaFree(gpuB); + +} + +void sphere::scalarUip(bsComplex* j, int rR, int thetaR) +{ + //copy the bessel and hankel LUTs to the GPU + bsComplex* gpu_j; + HANDLE_ERROR( cudaMalloc( (void**)&gpu_j, sizeof(bsComplex) * (Nl + 1) * rR ) ); + HANDLE_ERROR( cudaMemcpy( gpu_j, j, sizeof(bsComplex) * (Nl + 1) * rR, cudaMemcpyHostToDevice ) ); + + //allocate memory for the scattering coefficients + bsComplex* gpuA; + HANDLE_ERROR(cudaMalloc((void**) &gpuA, (Nl+1) * sizeof(bsComplex))); + //copy the scattering coefficients to the GPU + HANDLE_ERROR(cudaMemcpy(gpuA, &A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice)); + + //create one thread for each pixel of the field slice + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((Uip.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uip.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + gpuScalarUip<<>>(Uip.x_hat, gpu_j, gpuA, Nl, rR, thetaR); + + //free memory + cudaFree(gpu_j); + cudaFree(gpuA); + +} diff --git a/sphere.h b/sphere.h index c0b685d..7723c8c 100644 --- a/sphere.h +++ b/sphere.h @@ -22,12 +22,12 @@ struct sphere //sphere material index int iMaterial; - //rtsPointer to the scattered field produced by a plane wave + //GPU pointer to the scattered field produced by a plane wave // this is a function of cos(theta) and |r| (distance from sphere center) - //fieldslice surface; - - //resolution of the scattered field - int thetaR, rR; + fieldslice Usp; + fieldslice Uip; + ptype d_min; + ptype d_max; //sphere order int Nl; @@ -50,6 +50,12 @@ struct sphere //surface = fieldslice(ang, ang/2); } + //assignment operator + sphere & operator=(const sphere &rhs); + + //copy constructor + sphere(const sphere &rhs); + std::string toStr() { std::stringstream ss; @@ -66,8 +72,19 @@ struct sphere Nl = ceil( (2 * PI * a) / lambda + 4 * pow( (2 * PI * a) / lambda, 1.0/3.0) + 2); } - void calcCoeff(ptype lambda, rts::rtsComplex n); + //compute the scattering coefficients + void calcCoeff(ptype lambda, bsComplex n); + + //compute the bessel function look-up tables + void calcLut(bsComplex* j, bsComplex* h, ptype lambda, bsComplex n, int aR, int rR); + void calcBesselLut(bsComplex* j, ptype k, bsComplex n, int aR); + void calcHankelLut(bsComplex* h, ptype k, int rR); + + //calculate the scattering domain Us(theta, r) + void calcUp(ptype lambda, bsComplex n, rts::rtsQuad nfPlane, unsigned int R); + void scalarUsp(bsComplex* h, int rR, int thetaR); + void scalarUip(bsComplex* j, int aR, int thetaR); -- libgit2 0.21.4