Authored by dmayerich
1 parent b6179de6

Showing 27 changed files with 1518 additions and 588 deletions
bessjy.cpp

 ... ... @@ -13,7 +13,9 @@ 13 13 // 14 14 #define _USE_MATH_DEFINES 15 15 #include 16 -#include "bessel.h" 16 +#include "bessel.h" 17 + 18 +#define PI 3.14159 17 19 18 20 double gamma(double x); 19 21 // ... ... @@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &nm,double *jn,double *yn, 426 428 0.2775764465332031, 427 429 -1.993531733751297, 428 430 2.724882731126854e1}; 429 - 431 + 430 432 int i,k,m; 431 433 nm = n; 432 434 if ((x < 0.0) || (n < 0)) return 1; ... ... @@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &vm,double *jv,double *yv, 702 704 } 703 705 vm = n + v0; 704 706 return 0; 707 +} 708 + 709 +int bessjyv_sph(int v, double z, double &vm, double* cjv, 710 + double* cyv, double* cjvp, double* cyvp) 711 +{ 712 + //first, compute the bessel functions of fractional order 713 + bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); 714 + 715 + //iterate through each and scale 716 + for(int n = 0; n<=v; n++) 717 + { 718 + 719 + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); 720 + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); 721 + 722 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0)); 723 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0)); 724 + } 725 + 726 + return 0; 727 + 705 728 } 706 - 729 + ... ...
cbessjy.cpp

 ... ... @@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv, 724 724 //iterate through each and scale 725 725 for(int n = 0; n<=v; n++) 726 726 { 727 + 727 728 cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); 728 729 cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); 729 730 ... ...
colormap.h deleted
 1 -#ifndef RTS_COLORMAP_H 2 -#define RTS_COLORMAP_H 3 - 4 -#include 5 -#include 6 -#include 7 -#include "rts/cuda/error.h" 8 - 9 - 10 -#define BREWER_CTRL_PTS 11 11 - 12 -#ifdef __CUDACC__ 13 -texture cudaTexBrewer; 14 -static cudaArray* gpuBrewer; 15 -#endif 16 - 17 - 18 - 19 -namespace rts{ 20 - namespace colormap{ 21 - 22 -enum colormapType {cmBrewer, cmGrayscale}; 23 - 24 -static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size) 25 -{ 26 - //create an image object 27 - QImage image(x_size, y_size, QImage::Format_RGB32); 28 - 29 - int i; 30 - unsigned char r, g, b; 31 - unsigned int x, y; 32 - for(y=0; y 101 -__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) 102 -{ 103 - int i = blockIdx.x * blockDim.x + threadIdx.x; 104 - if(i >= N) return; 105 - 106 - //compute the normalized value on [minVal maxVal] 107 - float a = (gpuSource[i] - minVal) / (maxVal - minVal); 108 - 109 - //lookup the color 110 - float shift = 1.0/BREWER_CTRL_PTS; 111 - float4 color = tex1D(cudaTexBrewer, a+shift); 112 - 113 - gpuDest[i * 3 + 0] = 255 * color.x; 114 - gpuDest[i * 3 + 1] = 255 * color.y; 115 - gpuDest[i * 3 + 2] = 255 * color.z; 116 -} 117 - 118 -template 119 -__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) 120 -{ 121 - int i = blockIdx.x * blockDim.x + threadIdx.x; 122 - if(i >= N) return; 123 - 124 - //compute the normalized value on [minVal maxVal] 125 - float a = (gpuSource[i] - minVal) / (maxVal - minVal); 126 - 127 - gpuDest[i * 3 + 0] = 255 * a; 128 - gpuDest[i * 3 + 1] = 255 * a; 129 - gpuDest[i * 3 + 2] = 255 * a; 130 -} 131 - 132 -template 133 -static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128) 134 -{ 135 - //This function converts a scalar field on the GPU to a color image on the GPU 136 - int gridDim = (nVals + blockDim - 1)/blockDim; 137 - if(cm == cmGrayscale) 138 - applyGrayscale<<>>(gpuSource, gpuDest, nVals, minVal, maxVal); 139 - else if(cm == cmBrewer) 140 - { 141 - initBrewer(); 142 - applyBrewer<<>>(gpuSource, gpuDest, nVals, minVal, maxVal); 143 - destroyBrewer(); 144 - } 145 - 146 -} 147 - 148 -template 149 -static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale) 150 -{ 151 - //this function converts a scalar field on the GPU to a color image on the CPU 152 - 153 - //first create the color image on the GPU 154 - 155 - //allocate GPU memory for the color image 156 - unsigned char* gpuDest; 157 - HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 )); 158 - 159 - //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals)); 160 - 161 - //create the image on the gpu 162 - gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm); 163 - 164 - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3)); 165 - 166 - //copy the image from the GPU to the CPU 167 - HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost)); 168 - 169 - HANDLE_ERROR(cudaFree( gpuDest )); 170 - 171 -} 172 - 173 -template 174 -static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) 175 -{ 176 - //allocate a color buffer 177 - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); 178 - 179 - //do the mapping 180 - gpu2cpu(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); 181 - 182 - //copy the buffer to an image 183 - buffer2image(cpuBuffer, fileDest, x_size, y_size); 184 - 185 - free(cpuBuffer); 186 -} 187 - 188 -#endif 189 - 190 -template 191 -static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale) 192 -{ 193 - int i; 194 - float a; 195 - float range = valMax - valMin; 196 - for(i = 0; i 211 -static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) 212 -{ 213 - //allocate a color buffer 214 - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); 215 - 216 - //do the mapping 217 - cpu2cpu(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); 218 - 219 - //copy the buffer to an image 220 - buffer2image(cpuBuffer, fileDest, x_size, y_size); 221 - 222 - free(cpuBuffer); 223 - 224 -} 225 - 226 -}} //end namespace colormap and rts 227 - 228 -#endif 229 -
dataTypes.h

 ... ... @@ -24,6 +24,8 @@ typedef double ptype; 24 24 25 25 typedef ptype fieldPoint; 26 26 27 +extern bool verbose; 28 + 27 29 //hybrid GPU/CPU complex data typ 28 30 #include "rts/math/complex.h" 29 31 #include "rts/math/vector.h" ... ...
defaults.h

 ... ... @@ -15,14 +15,14 @@ 15 15 #define DEFAULT_FOCUS_X 0 16 16 #define DEFAULT_FOCUS_Y 0 17 17 #define DEFAULT_FOCUS_Z 0 18 -#define DEFAULT_INCIDENT_ORDER 100 18 +//#define DEFAULT_INCIDENT_ORDER 20 19 19 #define DEFAULT_STABILITY_PARM 1.4 20 20 21 21 //optics 22 -#define DEFAULT_CONDENSER_MIN 0.0 22 +#define DEFAULT_CONDENSER_MIN 0 23 23 #define DEFAULT_CONDENSER_MAX 1 24 24 25 -#define DEFAULT_OBJECTIVE_MIN 0.0 25 +#define DEFAULT_OBJECTIVE_MIN 0 26 26 #define DEFAULT_OBJECTIVE_MAX 1 27 27 28 28 //incident light direction ... ... @@ -36,17 +36,20 @@ 36 36 //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective 37 37 38 38 39 -#define DEFAULT_SLICE_MIN_X -5 40 -#define DEFAULT_SLICE_MIN_Y 0 41 -#define DEFAULT_SLICE_MIN_Z -5 39 +#define DEFAULT_PLANE_MIN_X -5 40 +#define DEFAULT_PLANE_MIN_Y 0 41 +#define DEFAULT_PLANE_MIN_Z -5 42 42 43 -#define DEFAULT_SLICE_MAX_X 5 44 -#define DEFAULT_SLICE_MAX_Y 0 45 -#define DEFAULT_SLICE_MAX_Z 5 43 +#define DEFAULT_PLANE_MAX_X 5 44 +#define DEFAULT_PLANE_MAX_Y 0 45 +#define DEFAULT_PLANE_MAX_Z 5 46 46 47 -#define DEFAULT_SLICE_NORM_X 0 48 -#define DEFAULT_SLICE_NORM_Y 1 49 -#define DEFAULT_SLICE_NORM_Z 0 47 +#define DEFAULT_PLANE_NORM_X 0 48 +#define DEFAULT_PLANE_NORM_Y 1 49 +#define DEFAULT_PLANE_NORM_Z 0 50 + 51 +#define DEFAULT_PLANE_SIZE 40 52 +#define DEFAULT_PLANE_POSITION 0 50 53 51 54 52 55 /* ... ... @@ -64,21 +67,23 @@ 64 67 */ 65 68 66 69 67 -#define DEFAULT_FIELD_ORDER 200 70 +#define DEFAULT_FIELD_ORDER 10 68 71 69 -#define DEFAULT_SAMPLES 200 72 +#define DEFAULT_SAMPLES 400 70 73 71 74 #define DEFAULT_SLICE_RES 256 72 75 76 +#define DEFAULT_SPHERE_THETA_R 1000 77 + 73 78 #define DEFAULT_PADDING 1 74 79 #define DEFAULT_SUPERSAMPLE 1 75 80 76 -#define DEFAULT_INTENSITY_FILE "testappend" 81 +#define DEFAULT_INTENSITY_FILE "out_i.bmp" 77 82 #define DEFAULT_TRANSMITTANCE_FILE "" 78 -#define DEFAULT_ABSORBANCE_FILE "out_a" 83 +#define DEFAULT_ABSORBANCE_FILE "out_a.bmp" 79 84 #define DEFAULT_NEAR_FILE "out_n.bmp" 80 85 #define DEFAULT_FAR_FILE "out_f.bmp" 81 -#define DEFAULT_EXTENDED_SOURCE "einstein_small.jpg" 86 +#define DEFAULT_EXTENDED_SOURCE "" 82 87 #define DEFAULT_FIELD_TYPE "magnitude" 83 88 #define DEFAULT_FORMAT fileoutStruct::formatImage 84 89 #define DEFAULT_COLORMAP "brewer" ... ...
fieldslice.cpp

 ... ... @@ -8,14 +8,16 @@ 8 8 using namespace std; 9 9 10 10 fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) 11 -{ 11 +{ 12 + x_hat = y_hat = z_hat = NULL; 13 + 12 14 //save the slice resolution 13 15 R[0] = x_size; 14 16 R[1] = x_size; 15 17 16 18 scalarField = true; 17 19 18 - //init_gpu(); 20 + init_gpu(); 19 21 20 22 21 23 } ... ... @@ -101,5 +103,5 @@ fieldslice::fieldslice() 101 103 102 104 fieldslice::~fieldslice() 103 105 { 104 - //kill_gpu(); 106 + kill_gpu(); 105 107 } ... ...
fieldslice.cu

fieldslice.h

 ... ... @@ -31,6 +31,9 @@ struct fieldslice 31 31 32 32 ~fieldslice(); 33 33 34 + //copy constructor 35 + fieldslice(const fieldslice& rhs); 36 + 34 37 //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); 35 38 36 39 scalarslice Mag(); ... ... @@ -47,6 +50,7 @@ struct fieldslice 47 50 48 51 //crop a region from the field 49 52 fieldslice crop(int u, int v, int su, int sv); 53 + fieldslice& operator=(const fieldslice& rhs); 50 54 51 55 void init_gpu(); 52 56 void kill_gpu(); ... ...
fileout.cu

 ... ... @@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope) 186 186 //save images of the fields in the microscope 187 187 188 188 //if the user specifies an extended source 189 - if(scope->focalPoints.size() > 1) 189 + if(scope->focalPoints.size() > 0) 190 190 { 191 191 //simulate the extended source and output the detector image 192 192 scope->SimulateExtendedSource(); 193 193 194 + //saveNearField(&scope->nf); 195 + saveFarField(scope); 196 + 197 + //save the detector images 198 + saveDetector(scope); 199 + 200 + //simulate scattering for the last point (so that you have a near field image) 201 + scope->SimulateScattering(); 202 + saveNearField(&scope->nf); 203 + 194 204 } 195 205 else 196 206 { ... ... @@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope) 203 213 //run the far-field simulation 204 214 scope->SimulateImaging(); 205 215 216 + //saveNearField(&scope->nf); 206 217 saveFarField(scope); 207 218 219 + //save the detector images 220 + saveDetector(scope); 221 + 208 222 } 209 223 210 - //save the detector images 211 - saveDetector(scope); 224 + 212 225 213 226 214 227 } ... ...
fileout.h

 ... ... @@ -5,7 +5,7 @@ 5 5 //#include "defaults.h" 6 6 #include "dataTypes.h" 7 7 8 -#include "colormap.h" 8 +#include "rts/graphics/colormap.h" 9 9 #include "fieldslice.h" 10 10 #include "nearfield.h" 11 11 #include "microscope.h" ... ... @@ -34,7 +34,7 @@ struct fileoutStruct{ 34 34 //image_source source; 35 35 36 36 //color map info 37 - rts::colormap::colormapType colormap; 37 + rts::colormapType colormap; 38 38 ptype colorMax; 39 39 40 40 void Save(microscopeStruct* scope); ... ...
main.cpp

 ... ... @@ -24,6 +24,7 @@ microscopeStruct* SCOPE; 24 24 #include "warnings.h" 25 25 26 26 fileoutStruct gFileOut; 27 +bool verbose = false; 27 28 using namespace std; 28 29 29 30 int cbessjyva(double v,complex z,double &vm,complex*cjv, ... ... @@ -31,32 +32,19 @@ int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, 31 32 32 33 int main(int argc, char *argv[]) 33 34 { 34 - //test Envi loading and saving 35 - //EnviFile envi("testenvi", "w"); 36 - 37 - //float* data = (float*)malloc(sizeof(float) * 100 * 100); 38 - //envi.addBand(data, 100, 100, 100); 39 - 40 - //envi.close(); 41 - 42 - //return 0; 43 35 44 36 SCOPE = new microscopeStruct(); 45 37 46 - cout<nf.Uf.R[0]<init(); 54 42 55 - OutputOptions(); 56 - 57 43 gFileOut.Save(SCOPE); 58 44 59 - //NF->destroy(); 45 + if(verbose) 46 + OutputOptions(); 47 + 60 48 SCOPE->destroy(); 61 49 62 50 ... ...
microscope.cu

 ... ... @@ -4,7 +4,7 @@ 4 4 #include "rts/tools/progressbar.h" 5 5 #include "rts/cuda/timer.h" 6 6 #include "dataTypes.h" 7 -#include "colormap.h" 7 +#include "rts/graphics/colormap.h" 8 8 9 9 #include 10 10 ... ... @@ -112,8 +112,8 @@ void microscopeStruct::getFarField() 112 112 //Compute the Far Field image of the focal plane 113 113 114 114 //clear the memory from previous detector fields 115 - Ud.kill_gpu(); 116 - Ufd.kill_gpu(); 115 + //Ud.kill_gpu(); 116 + //Ufd.kill_gpu(); 117 117 118 118 //first crop the filtered near-field image of the source and scattered fields 119 119 Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); ... ... @@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource() 261 261 t += gpuStopTimer(); 262 262 263 263 rtsProgressBar((double)(i+1)/(double)npts * 100); 264 + //unsigned char c; 265 + //cin>>c; 264 266 } 265 - cout<
microscope.h

 ... ... @@ -63,6 +63,8 @@ struct microscopeStruct 63 63 scalarslice getTransmittance(); 64 64 scalarslice getIntensity(); 65 65 66 + string toStr(); 67 + 66 68 67 69 68 70 }; ... ...
montecarlo.cpp

 ... ... @@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) 35 35 ptype inPhi = asin(NAin); 36 36 ptype outPhi = asin(NAout); 37 37 38 - //cout<<"inPhi: "<
nearfield.cpp

 1 1 #include "nearfield.h" 2 +#include 3 +#include 4 + 5 +#ifdef _WIN32 6 +#define isnan(x) _isnan(x) 7 +#define isinf(x) (!_finite(x)) 8 +#endif 9 + 10 +int bessjyv_sph(int v, double z, double &vm, double* cjv, 11 + double* cyv, double* cjvp, double* cyvp); 2 12 3 13 nearfieldStruct::nearfieldStruct() 4 14 { 5 15 scalarSim = true; 6 16 planeWave = false; 17 + lut_us = true; 18 + lut_uf = false; 7 19 8 20 nWaves = 0; 9 21 } ... ... @@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr() 46 58 ss<<"Condenser NA: "< n = mVector[imat](lambda); 99 - //std::cout<<"Sphere refractive index: "<
nearfield.h

 ... ... @@ -31,6 +31,8 @@ struct nearfieldStruct 31 31 32 32 //slices for the focused field 33 33 fieldslice Uf; 34 + ptype d_min, d_max; 35 + 34 36 // and total field: Uf + sum(Us) 35 37 fieldslice U; 36 38 ... ... @@ -43,6 +45,14 @@ struct nearfieldStruct 43 45 //flag for a plane wave 44 46 bool planeWave; 45 47 48 + //flag for using a LUT 49 + bool lut_uf; 50 + bool lut_us; 51 + 52 + //timings 53 + float t_Uf; 54 + float t_Us; 55 + 46 56 47 57 48 58 //---------Scatterers------------ ... ... @@ -78,10 +88,17 @@ struct nearfieldStruct 78 88 void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); 79 89 80 90 //this function re-computes the focused field 91 + void calcUf(); 81 92 void scalarUf(); 93 + void scalarUfLut(); 94 + 95 + void calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR); 82 96 83 97 //compute the field scattered by all of the materials 98 + void calcUs(); 84 99 void scalarUs(); 100 + void scalarUpLut(); 101 + 85 102 86 103 //add the incident field to the sum of scattered fields 87 104 void sumUf(); ... ...
nfScalarUf.cu

 ... ... @@ -5,7 +5,7 @@ 5 5 #include "rts/cuda/error.h" 6 6 #include "rts/cuda/timer.h" 7 7 8 - 8 +//Incident field for a single plane wave 9 9 __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) 10 10 { 11 11 /*Compute the scalar focused field using Debye focusing ... ... @@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p 41 41 Uf[i] = exp(d) * A; 42 42 43 43 } 44 - 44 + 45 +//Incident field for a focused point source 45 46 __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) 46 47 { 47 48 /*Compute the scalar focused field using Debye focusing ... ... @@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt 151 152 } 152 153 153 154 sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); 154 - //sumUf += il * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); 155 155 156 156 il *= im; 157 157 } ... ... @@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt 162 162 163 163 void nearfieldStruct::scalarUf() 164 164 { 165 - //Compute the incident field via a scalar simulation 166 - //This method uses Debye focusing to approximate the field analytically 167 - 168 - //time the calculation of the focused field 169 - //gpuStartTimer(); 170 - 171 - //set the field slice to a scalar field 172 - //Uf.scalarField = true; 173 - 174 - //initialize the GPU arrays 175 - //Uf.init_gpu(); 165 + 166 + gpuStartTimer(); 176 167 177 168 //create one thread for each pixel of the field slice 178 169 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); 179 - dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); 170 + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); 180 171 181 172 //if we are computing a plane wave, call the gpuScalarUfp function 182 173 if(planeWave) ... ... @@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf() 191 182 ptype cosBeta = cos(asin(condenser[1])); 192 183 //compute the scalar Uf field (this will be in the x_hat channel of Uf) 193 184 gpuScalarUf<<>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); 194 - } 195 - 196 - //float t = gpuStopTimer(); 197 - //std::cout<<"Scalar Uf Time: "<
nfScalarUfLut.cu 0 โ 100644

nfScalarUpLut.cu 0 โ 100644

 1 +#include "nearfield.h" 2 +#include "rts/math/spherical_bessel.h" 3 +#include "rts/math/legendre.h" 4 +#include 5 +#include "rts/cuda/error.h" 6 +#include "rts/cuda/timer.h" 7 + 8 +texture texUsp; 9 +texture texUip; 10 + 11 +__global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR) 12 +{ 13 + /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field 14 + produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field. 15 + 16 + Us = final scattered field 17 + k = list of incoming plane waves (Monte-Carlo samples) 18 + nk = number of incoming MC samples 19 + kmag= magnitude of the incoming field 2pi/lambda 20 + dmin= minimum distance of the Usp texture 21 + dmax= maximum distance of the Usp texture 22 + f = position of the focus 23 + ps = position of the sphere 24 + A = total amplitude of the incident field arriving at the focal spot 25 + ABCD= rectangle representing the field slice 26 + uR = resolution of the field slice in the u direction 27 + vR = resolution of the field slice in the v direction 28 + dR = resolution of the Usp texture in the d direction 29 + thetaR= resolution of the Usp texture in the theta direction 30 + */ 31 + 32 + //get the current coordinate in the plane slice 33 + int iu = blockIdx.x * blockDim.x + threadIdx.x; 34 + int iv = blockIdx.y * blockDim.y + threadIdx.y; 35 + 36 + //make sure that the thread indices are in-bounds 37 + if(iu >= uR || iv >= vR) return; 38 + 39 + //compute the index (easier access to the scalar field array) 40 + int i = iv*uR + iu; 41 + 42 + //compute the parameters for u and v 43 + ptype u = (ptype)iu / (uR); 44 + ptype v = (ptype)iv / (vR); 45 + 46 + //get the rtsPoint in world space and then the r vector 47 + bsPoint p = ABCD(u, v); 48 + bsVector r = p - ps; 49 + ptype d = r.len(); 50 + float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1); 51 + float ai = ( (d - dmin)/(a - dmin)) * (aR - 1); 52 + 53 + bsComplex sumUs(0, 0); 54 + //for each plane wave in the wave list 55 + for(int iw = 0; iw < nk; iw++) 56 + { 57 + //normalize the direction vectors and find their inner product 58 + r = r.norm(); 59 + ptype cos_theta = k[iw].dot(r); 60 + if(cos_theta < -1) 61 + cos_theta = -1; 62 + if(cos_theta > 1) 63 + cos_theta = 1; 64 + float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1); 65 + 66 + //compute the phase factor for spheres that are not at the origin 67 + bsVector c = ps - f; 68 + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); 69 + 70 + //compute the internal field if we are inside a sphere 71 + if(d < a) 72 + { 73 + float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5); 74 + sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y); 75 + } 76 + //otherwise compute the scattered field 77 + else 78 + { 79 + float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5); 80 + sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y); 81 + } 82 + 83 + } 84 + 85 + Us[i] += sumUs; 86 +} 87 + 88 +void nearfieldStruct::scalarUpLut() 89 +{ 90 + //get the number of spheres 91 + int nSpheres = sVector.size(); 92 + 93 + //if there are no spheres, nothing to do here 94 + if(nSpheres == 0) 95 + return; 96 + 97 + //time the calculation of the focused field 98 + gpuStartTimer(); 99 + 100 + //clear the scattered field 101 + U.clear_gpu(); 102 + 103 + //create one thread for each pixel of the field slice 104 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); 105 + dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); 106 + 107 + //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff) 108 + bsVector* gpuk; 109 + int nWaves; 110 + ptype subA; 111 + if(planeWave) 112 + { 113 + nWaves = 1; 114 + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) ); 115 + HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice)); 116 + subA = A; 117 + } 118 + else 119 + { 120 + nWaves = inWaves.size(); 121 + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) ); 122 + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice)); 123 + //compute the amplitude that makes it through the condenser 124 + subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); 125 + } 126 + 127 + //for each sphere 128 + for(int s = 0; s>>(U.x_hat, 164 + gpuk, 165 + nWaves, 166 + 2 * PI / lambda, 167 + sVector[s].a, 168 + sVector[s].d_min, 169 + sVector[s].d_max, 170 + focus, 171 + sVector[s].p, 172 + subA, 173 + pos, 174 + U.R[0], 175 + U.R[1], 176 + dR, 177 + aR, 178 + thetaR); 179 + 180 + cudaFreeArray(arrayUsp); 181 + cudaFreeArray(arrayUip); 182 + 183 + } 184 + 185 + 186 + //store the time to compute the scattered field 187 + t_Us = gpuStopTimer(); 188 + 189 + //free monte-carlo samples 190 + cudaFree(gpuk); 191 + 192 +} ... ...
nfScalarUs.cu

 ... ... @@ -163,7 +163,7 @@ void nearfieldStruct::scalarUs() 163 163 return; 164 164 165 165 //time the calculation of the focused field 166 - //gpuStartTimer(); 166 + gpuStartTimer(); 167 167 168 168 //clear the scattered field 169 169 U.clear_gpu(); ... ... @@ -251,9 +251,8 @@ void nearfieldStruct::scalarUs() 251 251 } 252 252 253 253 254 + //store the time to compute the scattered field 255 + t_Us = gpuStopTimer(); 254 256 255 - //float t = gpuStopTimer(); 256 - //std::cout<<"Scalar Us Time: "<
nfSumUf.cu

 ... ... @@ -32,7 +32,7 @@ __global__ void gpuScalarUsp(bsComplex* Ufx, bsComplex* Ufy, bsComplex* Ufz, 32 32 { 33 33 r = p - ps[is]; 34 34 d = r.len(); 35 - if(d <= as[is]) 35 + if(d < as[is]) 36 36 return; 37 37 } 38 38 ... ... @@ -110,8 +110,5 @@ void nearfieldStruct::sumUf() 110 110 HANDLE_ERROR(cudaFree(gpu_p)); 111 111 HANDLE_ERROR(cudaFree(gpu_a)); 112 112 113 - //float t = gpuStopTimer(); 114 - //std::cout<<"Add Us Time: "<
options.h

 ... ... @@ -5,7 +5,7 @@ 5 5 6 6 #include "nearfield.h" 7 7 #include "microscope.h" 8 -#include "colormap.h" 8 +#include "rts/graphics/colormap.h" 9 9 #include "fileout.h" 10 10 //extern nearfieldStruct* NF; 11 11 extern microscopeStruct* SCOPE; ... ... @@ -23,7 +23,179 @@ using namespace std; 23 23 #include 24 24 namespace po = boost::program_options; 25 25 26 -static void loadSpheres(string sphereList) 26 +extern bool verbose; 27 + 28 + 29 + 30 +static void lNearfield(po::variables_map vm) 31 +{ 32 + //test to see if we are simulating a plane wave 33 + bool planeWave = DEFAULT_PLANEWAVE; 34 + if(vm.count("plane-wave")) 35 + planeWave = !planeWave; 36 + SCOPE->nf.planeWave = planeWave; 37 + 38 + //get the incident field amplitude 39 + SCOPE->nf.A = vm["amplitude"].as(); 40 + 41 + //get the condenser parameters 42 + SCOPE->nf.condenser[0] = DEFAULT_CONDENSER_MIN; 43 + SCOPE->nf.condenser[1] = DEFAULT_CONDENSER_MAX; 44 + 45 + if(vm.count("condenser")) 46 + { 47 + vector cparams = vm["condenser"].as< vector >(); 48 + 49 + if(cparams.size() == 1) 50 + SCOPE->nf.condenser[1] = cparams[0]; 51 + else 52 + { 53 + SCOPE->nf.condenser[0] = cparams[0]; 54 + SCOPE->nf.condenser[1] = cparams[1]; 55 + } 56 + } 57 + 58 + 59 + //get the focal rtsPoint position 60 + SCOPE->nf.focus[0] = DEFAULT_FOCUS_X; 61 + SCOPE->nf.focus[1] = DEFAULT_FOCUS_Y; 62 + SCOPE->nf.focus[2] = DEFAULT_FOCUS_Z; 63 + if(vm.count("focus")) 64 + { 65 + vector fpos = vm["focus"].as< vector >(); 66 + if(fpos.size() != 3) 67 + { 68 + cout<<"BIMSIM Error - the incident focal point is incorrectly specified; it must have three components."<nf.focus[0] = fpos[0]; 72 + SCOPE->nf.focus[1] = fpos[1]; 73 + SCOPE->nf.focus[2] = fpos[2]; 74 + } 75 + 76 + //get the incident light direction (k-vector) 77 + bsVector spherical(1, 0, 0); 78 + 79 + //if a k-vector is specified 80 + if(vm.count("k")) 81 + { 82 + vector kvec = vm["k"].as< vector >(); 83 + if(kvec.size() != 2) 84 + { 85 + cout<<"BIMSIM Error - k-vector is not specified correctly: it must contain two elements"<nf.k = spherical.sph2cart(); 92 + 93 + 94 + //incident field order 95 + SCOPE->nf.m = vm["field-order"].as(); 96 + 97 + //number of Monte-Carlo samples 98 + SCOPE->nf.nWaves = vm["samples"].as(); 99 + 100 + //random number seed for Monte-Carlo samples 101 + if(vm.count("seed")) 102 + srand(vm["seed"].as()); 103 + 104 + 105 + 106 +} 107 + 108 + 109 +static void loadOutputParams(po::variables_map vm) 110 +{ 111 + //append simulation results to previous binary files 112 + gFileOut.append = DEFAULT_APPEND; 113 + if(vm.count("append")) 114 + gFileOut.append = true; 115 + 116 + //image parameters 117 + //component of the field to be saved 118 + std::string fieldStr; 119 + fieldStr = vm["output-type"].as(); 120 + 121 + if(fieldStr == "magnitude") 122 + gFileOut.field = fileoutStruct::fieldMag; 123 + else if(fieldStr == "intensity") 124 + gFileOut.field = fileoutStruct::fieldIntensity; 125 + else if(fieldStr == "polarization") 126 + gFileOut.field = fileoutStruct::fieldPolar; 127 + else if(fieldStr == "imaginary") 128 + gFileOut.field = fileoutStruct::fieldImag; 129 + else if(fieldStr == "real") 130 + gFileOut.field = fileoutStruct::fieldReal; 131 + else if(fieldStr == "angular-spectrum") 132 + gFileOut.field = fileoutStruct::fieldAngularSpectrum; 133 + 134 + 135 + //image file names 136 + gFileOut.intFile = vm["intensity"].as(); 137 + gFileOut.absFile = vm["absorbance"].as(); 138 + gFileOut.transFile = vm["transmittance"].as(); 139 + gFileOut.nearFile = vm["near-field"].as(); 140 + gFileOut.farFile = vm["far-field"].as(); 141 + 142 + //colormap 143 + std::string cmapStr; 144 + cmapStr = vm["colormap"].as(); 145 + if(cmapStr == "brewer") 146 + gFileOut.colormap = rts::cmBrewer; 147 + else if(cmapStr == "gray") 148 + gFileOut.colormap = rts::cmGrayscale; 149 + else 150 + cout<<"color-map value not recognized (using default): "<nf.lut_us = false; 169 + SCOPE->nf.lut_uf = false; 170 + } 171 + else if(vm.count("recursive-us")) 172 + { 173 + SCOPE->nf.lut_us = false; 174 + } 175 + else if(vm.count("lut-uf")) 176 + { 177 + SCOPE->nf.lut_uf = true; 178 + } 179 +} 180 + 181 +void lWavelength(po::variables_map vm) 182 +{ 183 + //load the wavelength 184 + if(vm.count("nu")) 185 + { 186 + //wavelength is given in wavenumber - transform and flag 187 + SCOPE->nf.lambda = 10000/vm["nu"].as(); 188 + gFileOut.wavenumber = true; 189 + } 190 + //otherwise we are using lambda = wavelength 191 + else 192 + { 193 + SCOPE->nf.lambda = vm["lambda"].as(); 194 + gFileOut.wavenumber = false; 195 + } 196 +} 197 + 198 +static void lSpheres(string sphereList) 27 199 { 28 200 /*This function loads a list of sphere given in the string sphereList 29 201 The format is: ... ... @@ -58,17 +230,60 @@ static void loadSpheres(string sphereList) 58 230 //check out the next element (this should set the EOF error flag) 59 231 ss.peek(); 60 232 } 233 +} 61 234 235 +void lSpheres(po::variables_map vm) 236 +{ 237 + //if a sphere is specified at the command line 238 + if(vm.count("spheres")) 239 + { 240 + //convert the sphere to a string 241 + vector sdesc = vm["spheres"].as< vector >(); 62 242 243 + //compute the number of spheres specified 244 + unsigned int nS; 245 + if(sdesc.size() <= 5) 246 + nS = 1; 247 + else 248 + { 249 + //if the number of parameters is divisible by 4, compute the number of spheres 250 + if(sdesc.size() % 5 == 0) 251 + nS = sdesc.size() / 5; 252 + else 253 + { 254 + cout<<"BIMSIM Error: Invalid number of sphere parameters."<