Commit 51b6469a3ee77583099edb0a57e1bb7859c28fd1

Authored by dmayerich
1 parent b6179de6

added look-up tables

bessjy.cpp
... ... @@ -13,7 +13,9 @@
13 13 //
14 14 #define _USE_MATH_DEFINES
15 15 #include <math.h>
16   -#include "bessel.h"
  16 +#include "bessel.h"
  17 +
  18 +#define PI 3.14159
17 19  
18 20 double gamma(double x);
19 21 //
... ... @@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &amp;nm,double *jn,double *yn,
426 428 0.2775764465332031,
427 429 -1.993531733751297,
428 430 2.724882731126854e1};
429   -
  431 +
430 432 int i,k,m;
431 433 nm = n;
432 434 if ((x < 0.0) || (n < 0)) return 1;
... ... @@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &amp;vm,double *jv,double *yv,
702 704 }
703 705 vm = n + v0;
704 706 return 0;
  707 +}
  708 +
  709 +int bessjyv_sph(int v, double z, double &vm, double* cjv,
  710 + double* cyv, double* cjvp, double* cyvp)
  711 +{
  712 + //first, compute the bessel functions of fractional order
  713 + bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  714 +
  715 + //iterate through each and scale
  716 + for(int n = 0; n<=v; n++)
  717 + {
  718 +
  719 + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0));
  720 + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0));
  721 +
  722 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0));
  723 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0));
  724 + }
  725 +
  726 + return 0;
  727 +
705 728 }
706   -
  729 +
... ...
cbessjy.cpp
... ... @@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv,
724 724 //iterate through each and scale
725 725 for(int n = 0; n<=v; n++)
726 726 {
  727 +
727 728 cjv[n] = cjv[n] * sqrt(PI/(z * 2.0));
728 729 cyv[n] = cyv[n] * sqrt(PI/(z * 2.0));
729 730  
... ...
colormap.h deleted
1   -#ifndef RTS_COLORMAP_H
2   -#define RTS_COLORMAP_H
3   -
4   -#include <string>
5   -#include <qimage.h>
6   -#include <qcolor.h>
7   -#include "rts/cuda/error.h"
8   -
9   -
10   -#define BREWER_CTRL_PTS 11
11   -
12   -#ifdef __CUDACC__
13   -texture<float4, cudaTextureType1D> cudaTexBrewer;
14   -static cudaArray* gpuBrewer;
15   -#endif
16   -
17   -
18   -
19   -namespace rts{
20   - namespace colormap{
21   -
22   -enum colormapType {cmBrewer, cmGrayscale};
23   -
24   -static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size)
25   -{
26   - //create an image object
27   - QImage image(x_size, y_size, QImage::Format_RGB32);
28   -
29   - int i;
30   - unsigned char r, g, b;
31   - unsigned int x, y;
32   - for(y=0; y<y_size; y++)
33   - for(x=0; x<x_size; x++)
34   - {
35   - //calculate the 1D index
36   - i = y * x_size + x;
37   -
38   - r = buffer[i * 3 + 0];
39   - g = buffer[i * 3 + 1];
40   - b = buffer[i * 3 + 2];
41   -
42   - //set the image pixel
43   - QColor color(r, g, b);
44   - image.setPixel(x, y, color.rgb());
45   - }
46   -
47   - image.save(filename.c_str());
48   -}
49   -
50   -#ifdef __CUDACC__
51   -static void initBrewer()
52   -{
53   - //initialize the Brewer colormap
54   -
55   - //allocate CPU space
56   - float4 cpuColorMap[BREWER_CTRL_PTS];
57   -
58   - //define control rtsPoints
59   - cpuColorMap[0] = make_float4(0.192157f, 0.211765f, 0.584314f, 1.0f);
60   - cpuColorMap[1] = make_float4(0.270588f, 0.458824f, 0.705882f, 1.0f);
61   - cpuColorMap[2] = make_float4(0.454902f, 0.678431f, 0.819608f, 1.0f);
62   - cpuColorMap[3] = make_float4(0.670588f, 0.85098f, 0.913725f, 1.0f);
63   - cpuColorMap[4] = make_float4(0.878431f, 0.952941f, 0.972549f, 1.0f);
64   - cpuColorMap[5] = make_float4(1.0f, 1.0f, 0.74902f, 1.0f);
65   - cpuColorMap[6] = make_float4(0.996078f, 0.878431f, 0.564706f, 1.0f);
66   - cpuColorMap[7] = make_float4(0.992157f, 0.682353f, 0.380392f, 1.0f);
67   - cpuColorMap[8] = make_float4(0.956863f, 0.427451f, 0.262745f, 1.0f);
68   - cpuColorMap[9] = make_float4(0.843137f, 0.188235f, 0.152941f, 1.0f);
69   - cpuColorMap[10] = make_float4(0.647059f, 0.0f, 0.14902f, 1.0f);
70   -
71   -
72   - int width = BREWER_CTRL_PTS;
73   - int height = 0;
74   -
75   -
76   - // allocate array and copy colormap data
77   - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
78   -
79   - HANDLE_ERROR(cudaMallocArray(&gpuBrewer, &channelDesc, width, height));
80   -
81   - HANDLE_ERROR(cudaMemcpyToArray(gpuBrewer, 0, 0, cpuColorMap, sizeof(float4)*width, cudaMemcpyHostToDevice));
82   -
83   - // set texture parameters
84   - cudaTexBrewer.addressMode[0] = cudaAddressModeClamp;
85   - //texBrewer.addressMode[1] = cudaAddressModeClamp;
86   - cudaTexBrewer.filterMode = cudaFilterModeLinear;
87   - cudaTexBrewer.normalized = true; // access with normalized texture coordinates
88   -
89   - // Bind the array to the texture
90   - HANDLE_ERROR(cudaBindTextureToArray( cudaTexBrewer, gpuBrewer, channelDesc));
91   -
92   -}
93   -
94   -static void destroyBrewer()
95   -{
96   - HANDLE_ERROR(cudaFreeArray(gpuBrewer));
97   -
98   -}
99   -
100   -template<class T>
101   -__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1)
102   -{
103   - int i = blockIdx.x * blockDim.x + threadIdx.x;
104   - if(i >= N) return;
105   -
106   - //compute the normalized value on [minVal maxVal]
107   - float a = (gpuSource[i] - minVal) / (maxVal - minVal);
108   -
109   - //lookup the color
110   - float shift = 1.0/BREWER_CTRL_PTS;
111   - float4 color = tex1D(cudaTexBrewer, a+shift);
112   -
113   - gpuDest[i * 3 + 0] = 255 * color.x;
114   - gpuDest[i * 3 + 1] = 255 * color.y;
115   - gpuDest[i * 3 + 2] = 255 * color.z;
116   -}
117   -
118   -template<class T>
119   -__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1)
120   -{
121   - int i = blockIdx.x * blockDim.x + threadIdx.x;
122   - if(i >= N) return;
123   -
124   - //compute the normalized value on [minVal maxVal]
125   - float a = (gpuSource[i] - minVal) / (maxVal - minVal);
126   -
127   - gpuDest[i * 3 + 0] = 255 * a;
128   - gpuDest[i * 3 + 1] = 255 * a;
129   - gpuDest[i * 3 + 2] = 255 * a;
130   -}
131   -
132   -template<class T>
133   -static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128)
134   -{
135   - //This function converts a scalar field on the GPU to a color image on the GPU
136   - int gridDim = (nVals + blockDim - 1)/blockDim;
137   - if(cm == cmGrayscale)
138   - applyGrayscale<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal);
139   - else if(cm == cmBrewer)
140   - {
141   - initBrewer();
142   - applyBrewer<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal);
143   - destroyBrewer();
144   - }
145   -
146   -}
147   -
148   -template<class T>
149   -static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale)
150   -{
151   - //this function converts a scalar field on the GPU to a color image on the CPU
152   -
153   - //first create the color image on the GPU
154   -
155   - //allocate GPU memory for the color image
156   - unsigned char* gpuDest;
157   - HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 ));
158   -
159   - //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals));
160   -
161   - //create the image on the gpu
162   - gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm);
163   -
164   - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3));
165   -
166   - //copy the image from the GPU to the CPU
167   - HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost));
168   -
169   - HANDLE_ERROR(cudaFree( gpuDest ));
170   -
171   -}
172   -
173   -template<typename T>
174   -static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale)
175   -{
176   - //allocate a color buffer
177   - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size);
178   -
179   - //do the mapping
180   - gpu2cpu<T>(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm);
181   -
182   - //copy the buffer to an image
183   - buffer2image(cpuBuffer, fileDest, x_size, y_size);
184   -
185   - free(cpuBuffer);
186   -}
187   -
188   -#endif
189   -
190   -template<class T>
191   -static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale)
192   -{
193   - int i;
194   - float a;
195   - float range = valMax - valMin;
196   - for(i = 0; i<nVals; i++)
197   - {
198   - //normalize to the range [valMin valMax]
199   - a = (cpuSource[i] - valMin) / range;
200   -
201   - cpuDest[i * 3 + 0] = 255 * a;
202   - cpuDest[i * 3 + 1] = 255 * a;
203   - cpuDest[i * 3 + 2] = 255 * a;
204   - }
205   -
206   -}
207   -
208   -
209   -
210   -template<typename T>
211   -static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale)
212   -{
213   - //allocate a color buffer
214   - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size);
215   -
216   - //do the mapping
217   - cpu2cpu<T>(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm);
218   -
219   - //copy the buffer to an image
220   - buffer2image(cpuBuffer, fileDest, x_size, y_size);
221   -
222   - free(cpuBuffer);
223   -
224   -}
225   -
226   -}} //end namespace colormap and rts
227   -
228   -#endif
229   -
dataTypes.h
... ... @@ -24,6 +24,8 @@ typedef double ptype;
24 24  
25 25 typedef ptype fieldPoint;
26 26  
  27 +extern bool verbose;
  28 +
27 29 //hybrid GPU/CPU complex data typ
28 30 #include "rts/math/complex.h"
29 31 #include "rts/math/vector.h"
... ...
defaults.h
... ... @@ -15,14 +15,14 @@
15 15 #define DEFAULT_FOCUS_X 0
16 16 #define DEFAULT_FOCUS_Y 0
17 17 #define DEFAULT_FOCUS_Z 0
18   -#define DEFAULT_INCIDENT_ORDER 100
  18 +//#define DEFAULT_INCIDENT_ORDER 20
19 19 #define DEFAULT_STABILITY_PARM 1.4
20 20  
21 21 //optics
22   -#define DEFAULT_CONDENSER_MIN 0.0
  22 +#define DEFAULT_CONDENSER_MIN 0
23 23 #define DEFAULT_CONDENSER_MAX 1
24 24  
25   -#define DEFAULT_OBJECTIVE_MIN 0.0
  25 +#define DEFAULT_OBJECTIVE_MIN 0
26 26 #define DEFAULT_OBJECTIVE_MAX 1
27 27  
28 28 //incident light direction
... ... @@ -36,17 +36,20 @@
36 36 //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective
37 37  
38 38  
39   -#define DEFAULT_SLICE_MIN_X -5
40   -#define DEFAULT_SLICE_MIN_Y 0
41   -#define DEFAULT_SLICE_MIN_Z -5
  39 +#define DEFAULT_PLANE_MIN_X -5
  40 +#define DEFAULT_PLANE_MIN_Y 0
  41 +#define DEFAULT_PLANE_MIN_Z -5
42 42  
43   -#define DEFAULT_SLICE_MAX_X 5
44   -#define DEFAULT_SLICE_MAX_Y 0
45   -#define DEFAULT_SLICE_MAX_Z 5
  43 +#define DEFAULT_PLANE_MAX_X 5
  44 +#define DEFAULT_PLANE_MAX_Y 0
  45 +#define DEFAULT_PLANE_MAX_Z 5
46 46  
47   -#define DEFAULT_SLICE_NORM_X 0
48   -#define DEFAULT_SLICE_NORM_Y 1
49   -#define DEFAULT_SLICE_NORM_Z 0
  47 +#define DEFAULT_PLANE_NORM_X 0
  48 +#define DEFAULT_PLANE_NORM_Y 1
  49 +#define DEFAULT_PLANE_NORM_Z 0
  50 +
  51 +#define DEFAULT_PLANE_SIZE 40
  52 +#define DEFAULT_PLANE_POSITION 0
50 53  
51 54  
52 55 /*
... ... @@ -64,21 +67,23 @@
64 67 */
65 68  
66 69  
67   -#define DEFAULT_FIELD_ORDER 200
  70 +#define DEFAULT_FIELD_ORDER 10
68 71  
69   -#define DEFAULT_SAMPLES 200
  72 +#define DEFAULT_SAMPLES 400
70 73  
71 74 #define DEFAULT_SLICE_RES 256
72 75  
  76 +#define DEFAULT_SPHERE_THETA_R 1000
  77 +
73 78 #define DEFAULT_PADDING 1
74 79 #define DEFAULT_SUPERSAMPLE 1
75 80  
76   -#define DEFAULT_INTENSITY_FILE "testappend"
  81 +#define DEFAULT_INTENSITY_FILE "out_i.bmp"
77 82 #define DEFAULT_TRANSMITTANCE_FILE ""
78   -#define DEFAULT_ABSORBANCE_FILE "out_a"
  83 +#define DEFAULT_ABSORBANCE_FILE "out_a.bmp"
79 84 #define DEFAULT_NEAR_FILE "out_n.bmp"
80 85 #define DEFAULT_FAR_FILE "out_f.bmp"
81   -#define DEFAULT_EXTENDED_SOURCE "einstein_small.jpg"
  86 +#define DEFAULT_EXTENDED_SOURCE ""
82 87 #define DEFAULT_FIELD_TYPE "magnitude"
83 88 #define DEFAULT_FORMAT fileoutStruct::formatImage
84 89 #define DEFAULT_COLORMAP "brewer"
... ...
fieldslice.cpp
... ... @@ -8,14 +8,16 @@
8 8 using namespace std;
9 9  
10 10 fieldslice::fieldslice(unsigned int x_size, unsigned int y_size)
11   -{
  11 +{
  12 + x_hat = y_hat = z_hat = NULL;
  13 +
12 14 //save the slice resolution
13 15 R[0] = x_size;
14 16 R[1] = x_size;
15 17  
16 18 scalarField = true;
17 19  
18   - //init_gpu();
  20 + init_gpu();
19 21  
20 22  
21 23 }
... ... @@ -101,5 +103,5 @@ fieldslice::fieldslice()
101 103  
102 104 fieldslice::~fieldslice()
103 105 {
104   - //kill_gpu();
  106 + kill_gpu();
105 107 }
... ...
fieldslice.cu
1 1 #include "fieldslice.h"
2 2 #include "dataTypes.h"
3   -#include "rts/cuda/error.h"
  3 +#include "rts/cuda/error.h"
  4 +#include "rts/cuda/threads.h"
4 5  
5 6  
6 7 __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N)
7 8 {
8 9 //compute the index for this thread
9   - int i = blockIdx.x * blockDim.x + threadIdx.x;
  10 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  11 + int i = ThreadIndex1D();
  12 +
10 13 if(i >= N) return;
11 14  
12 15 ptype xm = x[i].abs();
... ... @@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty
66 69 __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N)
67 70 {
68 71 //compute the index for this thread
69   - int i = blockIdx.x * blockDim.x + threadIdx.x;
  72 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  73 + int i = ThreadIndex1D();
70 74 if(i >= N) return;
71 75  
72 76 V[i] = field_component[i].real();
... ... @@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N)
75 79 __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N)
76 80 {
77 81 //compute the index for this thread
78   - int i = blockIdx.x * blockDim.x + threadIdx.x;
  82 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  83 + int i = ThreadIndex1D();
79 84 if(i >= N) return;
80 85  
81 86 V[i] = field_component[i].imag();
... ... @@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i
84 89 __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N)
85 90 {
86 91 //compute the index for this thread
87   - int i = blockIdx.x * blockDim.x + threadIdx.x;
  92 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  93 + int i = ThreadIndex1D();
88 94 if(i >= N) return;
89 95  
90 96 output[i] = sqrt(input[i]);
... ... @@ -115,7 +121,8 @@ scalarslice fieldslice::Mag()
115 121  
116 122 //compute the total number of values in the slice
117 123 unsigned int N = R[0] * R[1];
118   - int gridDim = (N+BLOCK-1)/BLOCK;
  124 + //int gridDim = (N+BLOCK-1)/BLOCK;
  125 + dim3 gridDim = GenGrid1D(N, BLOCK);
119 126  
120 127 field_intensity<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, result->S, N);
121 128 field_sqrt<<<gridDim, BLOCK>>>(result->S, result->S, N);
... ... @@ -132,7 +139,8 @@ scalarslice fieldslice::Real()
132 139  
133 140 //compute the total number of values in the slice
134 141 unsigned int N = R[0] * R[1];
135   - int gridDim = (N+BLOCK-1)/BLOCK;
  142 + //int gridDim = (N+BLOCK-1)/BLOCK;
  143 + dim3 gridDim = GenGrid1D(N, BLOCK);
136 144  
137 145 field_real<<<gridDim, BLOCK>>>(x_hat, result->S, N);
138 146  
... ... @@ -148,7 +156,8 @@ scalarslice fieldslice::Imag()
148 156  
149 157 //compute the total number of values in the slice
150 158 unsigned int N = R[0] * R[1];
151   - int gridDim = (N+BLOCK-1)/BLOCK;
  159 + //int gridDim = (N+BLOCK-1)/BLOCK;
  160 + dim3 gridDim = GenGrid1D(N, BLOCK);
152 161  
153 162 field_imaginary<<<gridDim, BLOCK>>>(x_hat, result->S, N);
154 163  
... ... @@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v)
192 201  
193 202 //compute the total number of values in the slice
194 203 unsigned int N = R[0] * R[1];
195   - //cout<<"Size of mag field: "<<N<<endl;
196 204 int gridDim = (N+BLOCK-1)/BLOCK;
197 205  
198 206 field_scale<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, N, v);
... ... @@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v)
200 208 }
201 209  
202 210 void fieldslice::init_gpu()
203   -{
  211 +{
  212 + //if the field has no size, return
  213 + if(R[0] == 0 || R[1] == 0)
  214 + return;
  215 +
  216 + //free any previous memory allocations
  217 + if(x_hat)
  218 + HANDLE_ERROR(cudaFree(x_hat));
  219 + if(y_hat)
  220 + HANDLE_ERROR(cudaFree(y_hat));
  221 + if(z_hat)
  222 + HANDLE_ERROR(cudaFree(z_hat));
  223 +
204 224 //allocate space on the GPU for the field slice
205 225 HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex)));
206   - //HANDLE_ERROR(cudaMemset(x_hat, 0, R[0] * R[1] * sizeof(bsComplex)));
207 226  
208   - //if the field is scalar, y_hat and z_hat are unused
209   - if(scalarField)
210   - {
211   - y_hat = NULL;
212   - z_hat = NULL;
213   -
214   - }
215   - else
  227 + if(!scalarField)
216 228 {
217 229 HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex)));
218 230 //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex)));
... ... @@ -233,6 +245,8 @@ void fieldslice::kill_gpu()
233 245 if(z_hat != NULL)
234 246 HANDLE_ERROR(cudaFree(z_hat));
235 247  
  248 + x_hat = y_hat = z_hat = NULL;
  249 +
236 250 }
237 251  
238 252 void fieldslice::clear_gpu()
... ... @@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv)
275 289 result.scalarField = scalarField;
276 290  
277 291 //allocate space for the new field
278   - result.init_gpu();
  292 + //result.init_gpu();
279 293  
280 294 //create one thread for each pixel of the field slice
281 295 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
... ... @@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv)
291 305  
292 306 return result;
293 307 }
  308 +
  309 +fieldslice::fieldslice(const fieldslice& rhs)
  310 +{
  311 + R[0] = rhs.R[0];
  312 + R[1] = rhs.R[1];
  313 + scalarField = rhs.scalarField;
  314 +
  315 + x_hat = y_hat = z_hat = NULL;
  316 +
  317 + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1];
  318 + if(rhs.x_hat != NULL)
  319 + {
  320 + HANDLE_ERROR(cudaMalloc( (void**)&x_hat, bytes));
  321 + HANDLE_ERROR(cudaMemcpy( x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice));
  322 + }
  323 + if(rhs.y_hat != NULL)
  324 + {
  325 + HANDLE_ERROR(cudaMalloc( (void**)&y_hat, bytes));
  326 + HANDLE_ERROR(cudaMemcpy( y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice));
  327 + }
  328 + if(rhs.z_hat != NULL)
  329 + {
  330 + HANDLE_ERROR(cudaMalloc( (void**)&z_hat, bytes));
  331 + HANDLE_ERROR(cudaMemcpy( z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice));
  332 + }
  333 +
  334 +}
  335 +
  336 +fieldslice& fieldslice::operator=(const fieldslice& rhs)
  337 +{
  338 + //make sure this isn't a self-allocation
  339 + if(this != &rhs)
  340 + {
  341 + //make a shallow copy
  342 + R[0] = rhs.R[0];
  343 + R[1] = rhs.R[1];
  344 + scalarField = rhs.scalarField;
  345 +
  346 + //initialize to new parameters
  347 + init_gpu();
  348 +
  349 + //make a deep copy
  350 + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1];
  351 + if(x_hat != NULL)
  352 + HANDLE_ERROR(cudaMemcpy(x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice));
  353 + if(y_hat != NULL)
  354 + HANDLE_ERROR(cudaMemcpy(y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice));
  355 + if(z_hat != NULL)
  356 + HANDLE_ERROR(cudaMemcpy(z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice));
  357 + }
  358 +
  359 + return *this;
  360 +
  361 +}
... ...
fieldslice.h
... ... @@ -31,6 +31,9 @@ struct fieldslice
31 31  
32 32 ~fieldslice();
33 33  
  34 + //copy constructor
  35 + fieldslice(const fieldslice& rhs);
  36 +
34 37 //void setPos(bsPoint pMin, bsPoint pMax, bsVector N);
35 38  
36 39 scalarslice Mag();
... ... @@ -47,6 +50,7 @@ struct fieldslice
47 50  
48 51 //crop a region from the field
49 52 fieldslice crop(int u, int v, int su, int sv);
  53 + fieldslice& operator=(const fieldslice& rhs);
50 54  
51 55 void init_gpu();
52 56 void kill_gpu();
... ...
fileout.cu
... ... @@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope)
186 186 //save images of the fields in the microscope
187 187  
188 188 //if the user specifies an extended source
189   - if(scope->focalPoints.size() > 1)
  189 + if(scope->focalPoints.size() > 0)
190 190 {
191 191 //simulate the extended source and output the detector image
192 192 scope->SimulateExtendedSource();
193 193  
  194 + //saveNearField(&scope->nf);
  195 + saveFarField(scope);
  196 +
  197 + //save the detector images
  198 + saveDetector(scope);
  199 +
  200 + //simulate scattering for the last point (so that you have a near field image)
  201 + scope->SimulateScattering();
  202 + saveNearField(&scope->nf);
  203 +
194 204 }
195 205 else
196 206 {
... ... @@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope)
203 213 //run the far-field simulation
204 214 scope->SimulateImaging();
205 215  
  216 + //saveNearField(&scope->nf);
206 217 saveFarField(scope);
207 218  
  219 + //save the detector images
  220 + saveDetector(scope);
  221 +
208 222 }
209 223  
210   - //save the detector images
211   - saveDetector(scope);
  224 +
212 225  
213 226  
214 227 }
... ...
fileout.h
... ... @@ -5,7 +5,7 @@
5 5 //#include "defaults.h"
6 6 #include "dataTypes.h"
7 7  
8   -#include "colormap.h"
  8 +#include "rts/graphics/colormap.h"
9 9 #include "fieldslice.h"
10 10 #include "nearfield.h"
11 11 #include "microscope.h"
... ... @@ -34,7 +34,7 @@ struct fileoutStruct{
34 34 //image_source source;
35 35  
36 36 //color map info
37   - rts::colormap::colormapType colormap;
  37 + rts::colormapType colormap;
38 38 ptype colorMax;
39 39  
40 40 void Save(microscopeStruct* scope);
... ...
main.cpp
... ... @@ -24,6 +24,7 @@ microscopeStruct* SCOPE;
24 24 #include "warnings.h"
25 25  
26 26 fileoutStruct gFileOut;
  27 +bool verbose = false;
27 28 using namespace std;
28 29  
29 30 int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv,
... ... @@ -31,32 +32,19 @@ int cbessjyva(double v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv,
31 32  
32 33 int main(int argc, char *argv[])
33 34 {
34   - //test Envi loading and saving
35   - //EnviFile envi("testenvi", "w");
36   -
37   - //float* data = (float*)malloc(sizeof(float) * 100 * 100);
38   - //envi.addBand(data, 100, 100, 100);
39   -
40   - //envi.close();
41   -
42   - //return 0;
43 35  
44 36 SCOPE = new microscopeStruct();
45 37  
46   - cout<<SCOPE->nf.Uf.R[0]<<endl;
47   -
48 38 LoadParameters(argc, argv);
49 39  
50   - //TestSimulation(NF, SCOPE, &gFileOut);
51   -
52 40 //initialize GPU memory for fields
53 41 SCOPE->init();
54 42  
55   - OutputOptions();
56   -
57 43 gFileOut.Save(SCOPE);
58 44  
59   - //NF->destroy();
  45 + if(verbose)
  46 + OutputOptions();
  47 +
60 48 SCOPE->destroy();
61 49  
62 50  
... ...
microscope.cu
... ... @@ -4,7 +4,7 @@
4 4 #include "rts/tools/progressbar.h"
5 5 #include "rts/cuda/timer.h"
6 6 #include "dataTypes.h"
7   -#include "colormap.h"
  7 +#include "rts/graphics/colormap.h"
8 8  
9 9 #include <QImage>
10 10  
... ... @@ -112,8 +112,8 @@ void microscopeStruct::getFarField()
112 112 //Compute the Far Field image of the focal plane
113 113  
114 114 //clear the memory from previous detector fields
115   - Ud.kill_gpu();
116   - Ufd.kill_gpu();
  115 + //Ud.kill_gpu();
  116 + //Ufd.kill_gpu();
117 117  
118 118 //first crop the filtered near-field image of the source and scattered fields
119 119 Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]);
... ... @@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource()
261 261 t += gpuStopTimer();
262 262  
263 263 rtsProgressBar((double)(i+1)/(double)npts * 100);
  264 + //unsigned char c;
  265 + //cin>>c;
264 266 }
265   - cout<<endl;
266   - cout<<"Time per source: "<<t/npts<<"ms"<<endl;
  267 + if(verbose)
  268 + {
  269 + cout<<endl;
  270 + cout<<"Time per source: "<<t/npts<<"ms"<<endl;
  271 + }
267 272  
268 273 }
269 274  
... ... @@ -304,3 +309,15 @@ void microscopeStruct::LoadExtendedSource(std::string filename)
304 309 }
305 310 }
306 311 }
  312 +
  313 +std::string microscopeStruct::toStr()
  314 +{
  315 + stringstream ss;
  316 + ss<<nf.toStr();
  317 +
  318 + ss<<"----------Optics--------------"<<endl<<endl;
  319 + ss<<"Objective NA: "<<objective[0]<<" to "<<objective[1]<<endl;
  320 + return ss.str();
  321 +
  322 +
  323 +}
... ...
microscope.h
... ... @@ -63,6 +63,8 @@ struct microscopeStruct
63 63 scalarslice getTransmittance();
64 64 scalarslice getIntensity();
65 65  
  66 + string toStr();
  67 +
66 68  
67 69  
68 70 };
... ...
montecarlo.cpp
... ... @@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout)
35 35 ptype inPhi = asin(NAin);
36 36 ptype outPhi = asin(NAout);
37 37  
38   - //cout<<"inPhi: "<<inPhi<<endl;
39   - //cout<<"outPhi: "<<outPhi<<endl;
40   -
41 38 //calculate the z-values associated with these angles
42 39 ptype inZ = cos(inPhi);
43 40 ptype outZ = cos(outPhi);
44 41  
45 42 ptype rangeZ = inZ - outZ;
46 43  
47   - //cout<<"inZ: "<<inZ<<endl;
48   - //cout<<"outZ: "<<outZ<<endl;
49   -
50 44 //draw a distribution of random phi, z values
51 45 ptype z, phi, theta;
52 46 for(int i=0; i<N; i++)
... ... @@ -58,7 +52,6 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout)
58 52 phi = acos(z);
59 53  
60 54 //compute and store cartesian coordinates
61   - //bsVector spherical(1, theta + kSph[1], phi + kSph[2]);
62 55 bsVector spherical(1, theta, phi);
63 56 bsVector cart = spherical.sph2cart();
64 57 samples[i] = rotation * cart;
... ...
nearfield.cpp
1 1 #include "nearfield.h"
  2 +#include <time.h>
  3 +#include <math.h>
  4 +
  5 +#ifdef _WIN32
  6 +#define isnan(x) _isnan(x)
  7 +#define isinf(x) (!_finite(x))
  8 +#endif
  9 +
  10 +int bessjyv_sph(int v, double z, double &vm, double* cjv,
  11 + double* cyv, double* cjvp, double* cyvp);
2 12  
3 13 nearfieldStruct::nearfieldStruct()
4 14 {
5 15 scalarSim = true;
6 16 planeWave = false;
  17 + lut_us = true;
  18 + lut_uf = false;
7 19  
8 20 nWaves = 0;
9 21 }
... ... @@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr()
46 58 ss<<"Condenser NA: "<<condenser[0]<<" to "<<condenser[1]<<std::endl;
47 59 ss<<"Focal Point: "<<focus[0]<<", "<<focus[1]<<", "<<focus[2]<<std::endl;
48 60 ss<<"Field Slice: "<<std::endl;
  61 + if(lut_us)
  62 + ss<<"LUT Parameters --- min: "<<d_min<<" max: "<<d_max<<std::endl;
49 63 ss<<pos<<std::endl;
50 64  
51 65 ss<<std::endl<<"---------Materials-----------"<<std::endl;
... ... @@ -61,6 +75,10 @@ std::string nearfieldStruct::toStr()
61 75 for(unsigned int s=0; s<sVector.size(); s++)
62 76 ss<<sVector[s].toStr()<<std::endl;
63 77  
  78 + ss<<"---------Timings-------------"<<std::endl;
  79 + ss<<"Uf = "<<t_Uf<<"ms"<<std::endl;
  80 + ss<<"Us = "<<t_Us<<"ms"<<std::endl;
  81 +
64 82 return ss.str();
65 83 }
66 84  
... ... @@ -70,7 +88,8 @@ void nearfieldStruct::calcWaves()
70 88 inWaves.resize(nWaves);
71 89  
72 90 //re-seed the random number generator
73   - //srand(seed);
  91 + //srand(time(NULL));
  92 + srand(NULL);
74 93  
75 94 //calculate the monte-carlo samples
76 95 mcSampleNA(&inWaves[0], nWaves, k, condenser[0], condenser[1]);
... ... @@ -84,6 +103,8 @@ void nearfieldStruct::calcSpheres()
84 103 //calculate all of the constants necessary to evaluate the scattered field
85 104 //estimate the order required to represent the scattered field for each sphere
86 105  
  106 +
  107 +
87 108 //for each sphere
88 109 for(int i=0; i<sVector.size(); i++)
89 110 {
... ... @@ -91,12 +112,10 @@ void nearfieldStruct::calcSpheres()
91 112  
92 113 //calculate the required order
93 114 sVector[i].calcNl(lambda);
94   - //std::cout<<sVector[i].Nl<<std::endl;
95 115  
96 116 //set the refractive index for the sphere
97 117 int imat = sVector[i].iMaterial;
98 118 rts::rtsComplex<ptype> n = mVector[imat](lambda);
99   - //std::cout<<"Sphere refractive index: "<<n<<std::endl;
100 119  
101 120 //calculate the scattering coefficients
102 121 sVector[i].calcCoeff(lambda, n);
... ... @@ -104,18 +123,109 @@ void nearfieldStruct::calcSpheres()
104 123 //save the refractive index
105 124 sVector[i].n = n;
106 125  
  126 + //if the LUT is used, calculate Usp(theta, r)
  127 + if(lut_us)
  128 + {
  129 + sVector[i].calcUp(lambda, n, pos, max(U.R[0], U.R[1]));
  130 + }
  131 +
  132 +
107 133 }
108 134  
109 135 }
110 136  
  137 +void nearfieldStruct::calcUs()
  138 +{
  139 +
  140 +
  141 + if(lut_us)
  142 + scalarUpLut();
  143 + else
  144 + scalarUs();
  145 +}
  146 +
  147 +void nearfieldStruct::calcUf()
  148 +{
  149 + if(lut_uf)
  150 + scalarUfLut();
  151 + else
  152 + scalarUf();
  153 +}
  154 +
111 155 void nearfieldStruct::Simulate()
112 156 {
  157 + //initialize timings
  158 + t_Uf = 0;
  159 + t_Us = 0;
  160 +
113 161 //compute a set of plane waves for Monte-Carlo simulation
114 162 calcWaves();
115 163  
116 164 //the near field has to be simulated no matter what the output rtsPoint is
117   - scalarUf();
  165 + calcUf();
118 166 calcSpheres();
119   - scalarUs();
  167 + calcUs();
120 168 sumUf();
  169 +
  170 + //U.Mag().toImage("testU.bmp");
  171 +}
  172 +
  173 +void nearfieldStruct::calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR)
  174 +{
  175 + /*Compute the look-up-table for spherical bessel functions used for the incident field
  176 + j = (Nl + 1) x aR array of values
  177 + aR = resolution of j
  178 + */
  179 +
  180 + //compute the wavenumber
  181 + ptype k = 2 * PI / lambda;
  182 + unsigned int Nl = m;
  183 +
  184 + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored)
  185 + int bytes = sizeof(double) * (Nl + 1);
  186 + double* cjv_kd = (double*)malloc(bytes);
  187 + double* cyv_kd = (double*)malloc(bytes);
  188 + double* cjvp_kd = (double*)malloc(bytes);
  189 + double* cyvp_kd = (double*)malloc(bytes);
  190 +
  191 + //compute the bessel functions using the CPU-based algorithm
  192 + double vm;
  193 +
  194 + //for each sample along r
  195 + ptype dr = (d_max - d_min) / (dR - 1);
  196 + ptype d;
  197 + ptype jv;
  198 + for(int id = 0; id < dR; id++)
  199 + {
  200 + d = id * dr + d_min;
  201 + double kd = k*d;
  202 + bessjyv_sph(Nl, kd, vm, cjv_kd, cyv_kd, cjvp_kd, cyvp_kd);
  203 +
  204 + //copy the double data to the bsComplex array
  205 + for(int l=0; l<=Nl; l++)
  206 + {
  207 + jv = cjv_kd[l];
  208 + if(isnan(jv) || isinf(jv))
  209 + {
  210 + if(kd == 0 && l == 0)
  211 + jv = 1;
  212 + else
  213 + jv = 0;
  214 + }
  215 + j[id * (Nl+1) + l] = jv;
  216 + }
  217 + }
  218 +
  219 + /*ofstream outfile("uf_besselout.txt");
  220 + for(int ir = 0; ir < dR; ir++)
  221 + {
  222 + outfile<<ir*dr + d_min<<endl;
  223 + for(int l = 0; l<=Nl; l++)
  224 + {
  225 + outfile<<j[ir * (Nl+1) + l]<<" --";
  226 + }
  227 + outfile<<endl;
  228 + }
  229 + outfile.close();*/
  230 +
121 231 }
... ...
nearfield.h
... ... @@ -31,6 +31,8 @@ struct nearfieldStruct
31 31  
32 32 //slices for the focused field
33 33 fieldslice Uf;
  34 + ptype d_min, d_max;
  35 +
34 36 // and total field: Uf + sum(Us)
35 37 fieldslice U;
36 38  
... ... @@ -43,6 +45,14 @@ struct nearfieldStruct
43 45 //flag for a plane wave
44 46 bool planeWave;
45 47  
  48 + //flag for using a LUT
  49 + bool lut_uf;
  50 + bool lut_us;
  51 +
  52 + //timings
  53 + float t_Uf;
  54 + float t_Us;
  55 +
46 56  
47 57  
48 58 //---------Scatterers------------
... ... @@ -78,10 +88,17 @@ struct nearfieldStruct
78 88 void setPos(bsPoint pMin, bsPoint pMax, bsVector normal);
79 89  
80 90 //this function re-computes the focused field
  91 + void calcUf();
81 92 void scalarUf();
  93 + void scalarUfLut();
  94 +
  95 + void calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR);
82 96  
83 97 //compute the field scattered by all of the materials
  98 + void calcUs();
84 99 void scalarUs();
  100 + void scalarUpLut();
  101 +
85 102  
86 103 //add the incident field to the sum of scattered fields
87 104 void sumUf();
... ...
nfScalarUf.cu
... ... @@ -5,7 +5,7 @@
5 5 #include "rts/cuda/error.h"
6 6 #include "rts/cuda/timer.h"
7 7  
8   -
  8 +//Incident field for a single plane wave
9 9 __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR)
10 10 {
11 11 /*Compute the scalar focused field using Debye focusing
... ... @@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p
41 41 Uf[i] = exp(d) * A;
42 42  
43 43 }
44   -
  44 +
  45 +//Incident field for a focused point source
45 46 __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4)
46 47 {
47 48 /*Compute the scalar focused field using Debye focusing
... ... @@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt
151 152 }
152 153  
153 154 sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
154   - //sumUf += il * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
155 155  
156 156 il *= im;
157 157 }
... ... @@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt
162 162  
163 163 void nearfieldStruct::scalarUf()
164 164 {
165   - //Compute the incident field via a scalar simulation
166   - //This method uses Debye focusing to approximate the field analytically
167   -
168   - //time the calculation of the focused field
169   - //gpuStartTimer();
170   -
171   - //set the field slice to a scalar field
172   - //Uf.scalarField = true;
173   -
174   - //initialize the GPU arrays
175   - //Uf.init_gpu();
  165 +
  166 + gpuStartTimer();
176 167  
177 168 //create one thread for each pixel of the field slice
178 169 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
179   - dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  170 + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
180 171  
181 172 //if we are computing a plane wave, call the gpuScalarUfp function
182 173 if(planeWave)
... ... @@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf()
191 182 ptype cosBeta = cos(asin(condenser[1]));
192 183 //compute the scalar Uf field (this will be in the x_hat channel of Uf)
193 184 gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m);
194   - }
195   -
196   - //float t = gpuStopTimer();
197   - //std::cout<<"Scalar Uf Time: "<<t<<"ms"<<std::endl;
198   - //std::cout<<focus<<std::endl;
199   -
  185 + }
  186 +
  187 + t_Uf = gpuStopTimer();
200 188 }
... ...
nfScalarUfLut.cu 0 โ†’ 100644
  1 +#include "nearfield.h"
  2 +
  3 +#include "rts/math/legendre.h"
  4 +#include "rts/cuda/error.h"
  5 +#include "rts/cuda/timer.h"
  6 +
  7 +texture<float, cudaTextureType2D> texJ;
  8 +
  9 +__global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR);
  10 +
  11 +__global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoint f, bsVector k, ptype A, ptype cosAlpha, ptype cosBeta, int nl, ptype dmin, ptype dmax, int dR)
  12 +{
  13 + /*This function computes the focused field for a 2D slice
  14 +
  15 + Uf = destination field slice
  16 + ABCD = plane representing the field slice in world space
  17 + uR, vR = resolution of the Uf field
  18 + f = focal point of the condenser
  19 + k = direction of the incident light
  20 + A = amplitude of the incident field
  21 + cosAlpha= cosine of the solid angle subtended by the condenser obscuration
  22 + cosBeta = cosine of the solid angle subtended by the condenser aperature
  23 + nl = number of orders used to compute the field
  24 + dR = number of Bessel function values in the look-up texture
  25 +
  26 + */
  27 +
  28 + //get the current coordinate in the plane slice
  29 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  30 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  31 +
  32 + //make sure that the thread indices are in-bounds
  33 + if(iu >= uR || iv >= vR) return;
  34 +
  35 + //compute the index (easier access to the scalar field array)
  36 + int i = iv*uR + iu;
  37 +
  38 + //compute the parameters for u and v
  39 + ptype u = (ptype)iu / (uR);
  40 + ptype v = (ptype)iv / (vR);
  41 +
  42 +
  43 +
  44 + //get the rtsPoint in world space and then the r vector
  45 + bsPoint p = ABCD(u, v);
  46 + bsVector r = p - f;
  47 + ptype d = r.len();
  48 +
  49 + if(d == 0)
  50 + {
  51 + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta);
  52 + return;
  53 + }
  54 +
  55 + //get info for the light direction and frequency
  56 + r = r.norm();
  57 +
  58 + //compute the imaginary factor i^l
  59 + bsComplex im = bsComplex(0, 1);
  60 + bsComplex il = bsComplex(1, 0);
  61 +
  62 + //Legendre functions are computed dynamically to save memory
  63 + //initialize the Legendre functions
  64 +
  65 + ptype P[2];
  66 + //get the angle between k and r (light direction and position vector)
  67 + ptype cosTheta;
  68 + cosTheta = k.dot(r);
  69 +
  70 + rts::init_legendre<ptype>(cosTheta, P[0], P[1]);
  71 +
  72 + //initialize legendre functions for the cassegrain angles
  73 + ptype Palpha[3];
  74 + rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]);
  75 + Palpha[2] = 1;
  76 +
  77 + ptype Pbeta[3];
  78 + rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]);
  79 + Pbeta[2] = 1;
  80 +
  81 + //for each order l
  82 + bsComplex sumUf(0.0, 0.0);
  83 + ptype jl = 0.0;
  84 + ptype Pl;
  85 + ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1);
  86 + for(int l = 0; l<=nl; l++)
  87 + {
  88 + jl = tex2D(texJ, l + 0.5, di + 0.5);
  89 + if(l==0)
  90 + Pl = P[0];
  91 + else if(l==1)
  92 + {
  93 + Pl = P[1];
  94 +
  95 + //adjust the cassegrain Legendre function
  96 + Palpha[2] = Palpha[0];
  97 + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
  98 + Pbeta[2] = Pbeta[0];
  99 + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
  100 + }
  101 + else
  102 + {
  103 + rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]);
  104 +
  105 + Pl = P[1];
  106 +
  107 + //adjust the cassegrain outer Legendre function
  108 + Palpha[2] = Palpha[0];
  109 + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
  110 + Pbeta[2] = Pbeta[0];
  111 + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
  112 + }
  113 +
  114 + sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
  115 + //sumUf += jl;
  116 +
  117 + il *= im;
  118 + }
  119 +
  120 + Uf[i] = sumUf * 2 * PI * A;
  121 + //Uf[i] = u;
  122 + //return;
  123 +}
  124 +
  125 +void nearfieldStruct::scalarUfLut()
  126 +{
  127 + gpuStartTimer();
  128 +
  129 + //calculate the minimum and maximum points in the focused field
  130 + d_min = pos.dist(focus);
  131 + d_max = pos.dist_max(focus);
  132 +
  133 + //allocate space for the Bessel function
  134 + int dR = 2 * max(Uf.R[0], Uf.R[1]);
  135 + ptype* j = NULL;
  136 + j = (ptype*) malloc(sizeof(ptype) * dR * (m+1));
  137 +
  138 + //calculate Bessel function LUT
  139 + calcBesselLut(j, d_min, d_max, dR);
  140 +
  141 + //create a CUDA array structure and specify the format description
  142 + cudaArray* arrayJ;
  143 + cudaChannelFormatDesc channelDesc =
  144 + cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
  145 +
  146 + //allocate memory
  147 + HANDLE_ERROR(cudaMallocArray(&arrayJ, &channelDesc, m+1, dR));
  148 +
  149 + //specify texture properties
  150 + texJ.addressMode[0] = cudaAddressModeMirror;
  151 + texJ.addressMode[1] = cudaAddressModeMirror;
  152 + texJ.filterMode = cudaFilterModeLinear;
  153 + texJ.normalized = false;
  154 +
  155 + //bind the texture to the array
  156 + HANDLE_ERROR(cudaBindTextureToArray(texJ, arrayJ, channelDesc));
  157 +
  158 + //copy the CPU Bessel LUT to the GPU-based array
  159 + HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice));
  160 +
  161 + //----------------Compute the focused field
  162 + //create one thread for each pixel of the field slice
  163 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  164 + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  165 +
  166 + //if we are computing a plane wave, call the gpuScalarUfp function
  167 + if(planeWave)
  168 + {
  169 + gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]);
  170 + }
  171 + //otherwise compute the condenser info and create a focused field
  172 + else
  173 + {
  174 + //pre-compute the cosine of the obscuration and objective angles
  175 + ptype cosAlpha = cos(asin(condenser[0]));
  176 + ptype cosBeta = cos(asin(condenser[1]));
  177 + //compute the scalar Uf field (this will be in the x_hat channel of Uf)
  178 + gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR);
  179 + }
  180 +
  181 +
  182 + //free everything
  183 + free(j);
  184 +
  185 + HANDLE_ERROR(cudaFreeArray(arrayJ));
  186 +
  187 + t_Uf = gpuStopTimer();
  188 +}
... ...
nfScalarUpLut.cu 0 โ†’ 100644
  1 +#include "nearfield.h"
  2 +#include "rts/math/spherical_bessel.h"
  3 +#include "rts/math/legendre.h"
  4 +#include <stdlib.h>
  5 +#include "rts/cuda/error.h"
  6 +#include "rts/cuda/timer.h"
  7 +
  8 +texture<float2, cudaTextureType2D> texUsp;
  9 +texture<float2, cudaTextureType2D> texUip;
  10 +
  11 +__global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR)
  12 +{
  13 + /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field
  14 + produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field.
  15 +
  16 + Us = final scattered field
  17 + k = list of incoming plane waves (Monte-Carlo samples)
  18 + nk = number of incoming MC samples
  19 + kmag= magnitude of the incoming field 2pi/lambda
  20 + dmin= minimum distance of the Usp texture
  21 + dmax= maximum distance of the Usp texture
  22 + f = position of the focus
  23 + ps = position of the sphere
  24 + A = total amplitude of the incident field arriving at the focal spot
  25 + ABCD= rectangle representing the field slice
  26 + uR = resolution of the field slice in the u direction
  27 + vR = resolution of the field slice in the v direction
  28 + dR = resolution of the Usp texture in the d direction
  29 + thetaR= resolution of the Usp texture in the theta direction
  30 + */
  31 +
  32 + //get the current coordinate in the plane slice
  33 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  34 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  35 +
  36 + //make sure that the thread indices are in-bounds
  37 + if(iu >= uR || iv >= vR) return;
  38 +
  39 + //compute the index (easier access to the scalar field array)
  40 + int i = iv*uR + iu;
  41 +
  42 + //compute the parameters for u and v
  43 + ptype u = (ptype)iu / (uR);
  44 + ptype v = (ptype)iv / (vR);
  45 +
  46 + //get the rtsPoint in world space and then the r vector
  47 + bsPoint p = ABCD(u, v);
  48 + bsVector r = p - ps;
  49 + ptype d = r.len();
  50 + float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1);
  51 + float ai = ( (d - dmin)/(a - dmin)) * (aR - 1);
  52 +
  53 + bsComplex sumUs(0, 0);
  54 + //for each plane wave in the wave list
  55 + for(int iw = 0; iw < nk; iw++)
  56 + {
  57 + //normalize the direction vectors and find their inner product
  58 + r = r.norm();
  59 + ptype cos_theta = k[iw].dot(r);
  60 + if(cos_theta < -1)
  61 + cos_theta = -1;
  62 + if(cos_theta > 1)
  63 + cos_theta = 1;
  64 + float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1);
  65 +
  66 + //compute the phase factor for spheres that are not at the origin
  67 + bsVector c = ps - f;
  68 + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));
  69 +
  70 + //compute the internal field if we are inside a sphere
  71 + if(d < a)
  72 + {
  73 + float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5);
  74 + sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y);
  75 + }
  76 + //otherwise compute the scattered field
  77 + else
  78 + {
  79 + float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5);
  80 + sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y);
  81 + }
  82 +
  83 + }
  84 +
  85 + Us[i] += sumUs;
  86 +}
  87 +
  88 +void nearfieldStruct::scalarUpLut()
  89 +{
  90 + //get the number of spheres
  91 + int nSpheres = sVector.size();
  92 +
  93 + //if there are no spheres, nothing to do here
  94 + if(nSpheres == 0)
  95 + return;
  96 +
  97 + //time the calculation of the focused field
  98 + gpuStartTimer();
  99 +
  100 + //clear the scattered field
  101 + U.clear_gpu();
  102 +
  103 + //create one thread for each pixel of the field slice
  104 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  105 + dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  106 +
  107 + //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff)
  108 + bsVector* gpuk;
  109 + int nWaves;
  110 + ptype subA;
  111 + if(planeWave)
  112 + {
  113 + nWaves = 1;
  114 + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) );
  115 + HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));
  116 + subA = A;
  117 + }
  118 + else
  119 + {
  120 + nWaves = inWaves.size();
  121 + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) );
  122 + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice));
  123 + //compute the amplitude that makes it through the condenser
  124 + subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );
  125 + }
  126 +
  127 + //for each sphere
  128 + for(int s = 0; s<nSpheres; s++)
  129 + {
  130 + //get the current sphere
  131 + //sphere S = sVector[s];
  132 +
  133 + //allocate space for the Usp and Uip textures
  134 + //allocate the cuda array
  135 + cudaArray* arrayUsp;
  136 + cudaArray* arrayUip;
  137 + cudaChannelFormatDesc channelDescUsp =
  138 + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
  139 + cudaChannelFormatDesc channelDescUip =
  140 + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
  141 + int dR = sVector[s].Usp.R[0];
  142 + int thetaR = sVector[s].Usp.R[1];
  143 + int aR = sVector[s].Uip.R[0];
  144 + HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR));
  145 + HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR));
  146 +
  147 + texUsp.addressMode[0] = cudaAddressModeMirror;
  148 + texUsp.addressMode[1] = cudaAddressModeMirror;
  149 + texUsp.filterMode = cudaFilterModeLinear;
  150 + texUsp.normalized = false;
  151 +
  152 + texUip.addressMode[0] = cudaAddressModeMirror;
  153 + texUip.addressMode[1] = cudaAddressModeMirror;
  154 + texUip.filterMode = cudaFilterModeLinear;
  155 + texUip.normalized = false;
  156 + HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp));
  157 + HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip));
  158 +
  159 + //copy the LUT to the Usp texture
  160 + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));
  161 + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));
  162 +
  163 + gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat,
  164 + gpuk,
  165 + nWaves,
  166 + 2 * PI / lambda,
  167 + sVector[s].a,
  168 + sVector[s].d_min,
  169 + sVector[s].d_max,
  170 + focus,
  171 + sVector[s].p,
  172 + subA,
  173 + pos,
  174 + U.R[0],
  175 + U.R[1],
  176 + dR,
  177 + aR,
  178 + thetaR);
  179 +
  180 + cudaFreeArray(arrayUsp);
  181 + cudaFreeArray(arrayUip);
  182 +
  183 + }
  184 +
  185 +
  186 + //store the time to compute the scattered field
  187 + t_Us = gpuStopTimer();
  188 +
  189 + //free monte-carlo samples
  190 + cudaFree(gpuk);
  191 +
  192 +}
... ...
nfScalarUs.cu
... ... @@ -163,7 +163,7 @@ void nearfieldStruct::scalarUs()
163 163 return;
164 164  
165 165 //time the calculation of the focused field
166   - //gpuStartTimer();
  166 + gpuStartTimer();
167 167  
168 168 //clear the scattered field
169 169 U.clear_gpu();
... ... @@ -251,9 +251,8 @@ void nearfieldStruct::scalarUs()
251 251 }
252 252  
253 253  
  254 + //store the time to compute the scattered field
  255 + t_Us = gpuStopTimer();
254 256  
255   - //float t = gpuStopTimer();
256   - //std::cout<<"Scalar Us Time: "<<t<<"ms"<<std::endl;
257   - //std::cout<<focus<<std::endl;
258 257  
259 258 }
... ...
nfSumUf.cu
... ... @@ -32,7 +32,7 @@ __global__ void gpuScalarUsp(bsComplex* Ufx, bsComplex* Ufy, bsComplex* Ufz,
32 32 {
33 33 r = p - ps[is];
34 34 d = r.len();
35   - if(d <= as[is])
  35 + if(d < as[is])
36 36 return;
37 37 }
38 38  
... ... @@ -110,8 +110,5 @@ void nearfieldStruct::sumUf()
110 110 HANDLE_ERROR(cudaFree(gpu_p));
111 111 HANDLE_ERROR(cudaFree(gpu_a));
112 112  
113   - //float t = gpuStopTimer();
114   - //std::cout<<"Add Us Time: "<<t<<"ms"<<std::endl;
115   - //std::cout<<focus<<std::endl;
116 113  
117 114 }
... ...
options.h
... ... @@ -5,7 +5,7 @@
5 5  
6 6 #include "nearfield.h"
7 7 #include "microscope.h"
8   -#include "colormap.h"
  8 +#include "rts/graphics/colormap.h"
9 9 #include "fileout.h"
10 10 //extern nearfieldStruct* NF;
11 11 extern microscopeStruct* SCOPE;
... ... @@ -23,7 +23,179 @@ using namespace std;
23 23 #include <boost/program_options.hpp>
24 24 namespace po = boost::program_options;
25 25  
26   -static void loadSpheres(string sphereList)
  26 +extern bool verbose;
  27 +
  28 +
  29 +
  30 +static void lNearfield(po::variables_map vm)
  31 +{
  32 + //test to see if we are simulating a plane wave
  33 + bool planeWave = DEFAULT_PLANEWAVE;
  34 + if(vm.count("plane-wave"))
  35 + planeWave = !planeWave;
  36 + SCOPE->nf.planeWave = planeWave;
  37 +
  38 + //get the incident field amplitude
  39 + SCOPE->nf.A = vm["amplitude"].as<ptype>();
  40 +
  41 + //get the condenser parameters
  42 + SCOPE->nf.condenser[0] = DEFAULT_CONDENSER_MIN;
  43 + SCOPE->nf.condenser[1] = DEFAULT_CONDENSER_MAX;
  44 +
  45 + if(vm.count("condenser"))
  46 + {
  47 + vector<ptype> cparams = vm["condenser"].as< vector<ptype> >();
  48 +
  49 + if(cparams.size() == 1)
  50 + SCOPE->nf.condenser[1] = cparams[0];
  51 + else
  52 + {
  53 + SCOPE->nf.condenser[0] = cparams[0];
  54 + SCOPE->nf.condenser[1] = cparams[1];
  55 + }
  56 + }
  57 +
  58 +
  59 + //get the focal rtsPoint position
  60 + SCOPE->nf.focus[0] = DEFAULT_FOCUS_X;
  61 + SCOPE->nf.focus[1] = DEFAULT_FOCUS_Y;
  62 + SCOPE->nf.focus[2] = DEFAULT_FOCUS_Z;
  63 + if(vm.count("focus"))
  64 + {
  65 + vector<ptype> fpos = vm["focus"].as< vector<ptype> >();
  66 + if(fpos.size() != 3)
  67 + {
  68 + cout<<"BIMSIM Error - the incident focal point is incorrectly specified; it must have three components."<<endl;
  69 + exit(1);
  70 + }
  71 + SCOPE->nf.focus[0] = fpos[0];
  72 + SCOPE->nf.focus[1] = fpos[1];
  73 + SCOPE->nf.focus[2] = fpos[2];
  74 + }
  75 +
  76 + //get the incident light direction (k-vector)
  77 + bsVector spherical(1, 0, 0);
  78 +
  79 + //if a k-vector is specified
  80 + if(vm.count("k"))
  81 + {
  82 + vector<ptype> kvec = vm["k"].as< vector<ptype> >();
  83 + if(kvec.size() != 2)
  84 + {
  85 + cout<<"BIMSIM Error - k-vector is not specified correctly: it must contain two elements"<<endl;
  86 + exit(1);
  87 + }
  88 + spherical[1] = kvec[0];
  89 + spherical[2] = kvec[1];
  90 + }
  91 + SCOPE->nf.k = spherical.sph2cart();
  92 +
  93 +
  94 + //incident field order
  95 + SCOPE->nf.m = vm["field-order"].as<int>();
  96 +
  97 + //number of Monte-Carlo samples
  98 + SCOPE->nf.nWaves = vm["samples"].as<int>();
  99 +
  100 + //random number seed for Monte-Carlo samples
  101 + if(vm.count("seed"))
  102 + srand(vm["seed"].as<unsigned int>());
  103 +
  104 +
  105 +
  106 +}
  107 +
  108 +
  109 +static void loadOutputParams(po::variables_map vm)
  110 +{
  111 + //append simulation results to previous binary files
  112 + gFileOut.append = DEFAULT_APPEND;
  113 + if(vm.count("append"))
  114 + gFileOut.append = true;
  115 +
  116 + //image parameters
  117 + //component of the field to be saved
  118 + std::string fieldStr;
  119 + fieldStr = vm["output-type"].as<string>();
  120 +
  121 + if(fieldStr == "magnitude")
  122 + gFileOut.field = fileoutStruct::fieldMag;
  123 + else if(fieldStr == "intensity")
  124 + gFileOut.field = fileoutStruct::fieldIntensity;
  125 + else if(fieldStr == "polarization")
  126 + gFileOut.field = fileoutStruct::fieldPolar;
  127 + else if(fieldStr == "imaginary")
  128 + gFileOut.field = fileoutStruct::fieldImag;
  129 + else if(fieldStr == "real")
  130 + gFileOut.field = fileoutStruct::fieldReal;
  131 + else if(fieldStr == "angular-spectrum")
  132 + gFileOut.field = fileoutStruct::fieldAngularSpectrum;
  133 +
  134 +
  135 + //image file names
  136 + gFileOut.intFile = vm["intensity"].as<string>();
  137 + gFileOut.absFile = vm["absorbance"].as<string>();
  138 + gFileOut.transFile = vm["transmittance"].as<string>();
  139 + gFileOut.nearFile = vm["near-field"].as<string>();
  140 + gFileOut.farFile = vm["far-field"].as<string>();
  141 +
  142 + //colormap
  143 + std::string cmapStr;
  144 + cmapStr = vm["colormap"].as<string>();
  145 + if(cmapStr == "brewer")
  146 + gFileOut.colormap = rts::cmBrewer;
  147 + else if(cmapStr == "gray")
  148 + gFileOut.colormap = rts::cmGrayscale;
  149 + else
  150 + cout<<"color-map value not recognized (using default): "<<cmapStr<<endl;
  151 +}
  152 +
  153 +void lFlags(po::variables_map vm, po::options_description desc)
  154 +{
  155 + //display help and exit
  156 + if(vm.count("help"))
  157 + {
  158 + cout<<desc<<endl;
  159 + exit(1);
  160 + }
  161 +
  162 + //flag for verbose output
  163 + if(vm.count("verbose"))
  164 + verbose = true;
  165 +
  166 + if(vm.count("recursive"))
  167 + {
  168 + SCOPE->nf.lut_us = false;
  169 + SCOPE->nf.lut_uf = false;
  170 + }
  171 + else if(vm.count("recursive-us"))
  172 + {
  173 + SCOPE->nf.lut_us = false;
  174 + }
  175 + else if(vm.count("lut-uf"))
  176 + {
  177 + SCOPE->nf.lut_uf = true;
  178 + }
  179 +}
  180 +
  181 +void lWavelength(po::variables_map vm)
  182 +{
  183 + //load the wavelength
  184 + if(vm.count("nu"))
  185 + {
  186 + //wavelength is given in wavenumber - transform and flag
  187 + SCOPE->nf.lambda = 10000/vm["nu"].as<ptype>();
  188 + gFileOut.wavenumber = true;
  189 + }
  190 + //otherwise we are using lambda = wavelength
  191 + else
  192 + {
  193 + SCOPE->nf.lambda = vm["lambda"].as<ptype>();
  194 + gFileOut.wavenumber = false;
  195 + }
  196 +}
  197 +
  198 +static void lSpheres(string sphereList)
27 199 {
28 200 /*This function loads a list of sphere given in the string sphereList
29 201 The format is:
... ... @@ -58,17 +230,60 @@ static void loadSpheres(string sphereList)
58 230 //check out the next element (this should set the EOF error flag)
59 231 ss.peek();
60 232 }
  233 +}
61 234  
  235 +void lSpheres(po::variables_map vm)
  236 +{
  237 + //if a sphere is specified at the command line
  238 + if(vm.count("spheres"))
  239 + {
  240 + //convert the sphere to a string
  241 + vector<ptype> sdesc = vm["spheres"].as< vector<ptype> >();
62 242  
  243 + //compute the number of spheres specified
  244 + unsigned int nS;
  245 + if(sdesc.size() <= 5)
  246 + nS = 1;
  247 + else
  248 + {
  249 + //if the number of parameters is divisible by 4, compute the number of spheres
  250 + if(sdesc.size() % 5 == 0)
  251 + nS = sdesc.size() / 5;
  252 + else
  253 + {
  254 + cout<<"BIMSIM Error: Invalid number of sphere parameters."<<endl;
  255 + exit(1);
  256 + }
  257 + }
63 258  
64   -}
  259 + stringstream ss;
  260 +
  261 + //for each sphere
  262 + for(unsigned int s=0; s<nS; s++)
  263 + {
  264 + //compute the number of sphere parameters
  265 + unsigned int nP;
  266 + if(nS == 1) nP = sdesc.size();
  267 + else nP = 5;
  268 +
  269 + //store each parameter as a string
  270 + for(unsigned int i=0; i<nP; i++)
  271 + {
  272 + ss<<sdesc[s*5 + i]<<" ";
  273 + }
  274 + ss<<endl;
  275 + }
  276 +
  277 +
  278 +
  279 + //convert the string to a sphere list
  280 + lSpheres(ss.str());
  281 + }
65 282  
66   -static void loadSpheres(po::variables_map vm)
67   -{
68 283 //if a files are specified
69 284 if(vm.count("sphere-file"))
70 285 {
71   - cout<<"Sphere files detected."<<endl;
  286 +
72 287 vector<string> filenames = vm["sphere-file"].as< vector<string> >();
73 288 //load each file
74 289 for(int iS=0; iS<filenames.size(); iS++)
... ... @@ -85,69 +300,51 @@ static void loadSpheres(po::variables_map vm)
85 300 std::string instr((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
86 301  
87 302 //load the list of spheres from a string
88   - loadSpheres(instr);
  303 + lSpheres(instr);
89 304 }
90 305 }
91 306  
92   - //load the sphere from the command line
93   - if(vm.count("sx") || vm.count("sy") || vm.count("sz") || vm.count("s"))
94   - {
95   - //create a new sphere
96   - sphere newS;
97   -
98   - //set defaults
99   - if(vm.count("sx"))
100   - newS.p[0] = vm["sx"].as<ptype>();
101   - else
102   - newS.p[0] = DEFAULT_SPHERE_X;
103   -
104   -
105   - if(vm.count("sy"))
106   - newS.p[1] = vm["sy"].as<ptype>();
107   - else
108   - newS.p[1] = DEFAULT_SPHERE_Y;
109   -
110   - if(vm.count("sz"))
111   - newS.p[2] = vm["sz"].as<ptype>();
112   - else
113   - newS.p[2] = DEFAULT_SPHERE_Z;
114   -
115   - if(vm.count("radius"))
116   - newS.a = vm["radius"].as<ptype>();
117   - else
118   - newS.a = DEFAULT_SPHERE_A;
119   -
120   - //add the sphere to the sphere vector
121   - SCOPE->nf.sVector.push_back(newS);
  307 + //make sure the appropriate materials are loaded
  308 + unsigned int nS = SCOPE->nf.sVector.size();
122 309  
  310 + //for each sphere
  311 + for(unsigned int s = 0; s<nS; s++)
  312 + {
  313 + //make sure the corresponding material exists
  314 + if(SCOPE->nf.sVector[s].iMaterial + 1 > SCOPE->nf.mVector.size())
  315 + {
  316 + //otherwise output an error
  317 + cout<<"BIMSIM Error - A material is not loaded for sphere "<<s+1<<"."<<endl;
  318 + exit(1);
  319 + }
123 320 }
124 321 }
125 322  
126   -static void loadMaterials(po::variables_map vm)
  323 +static void lMaterials(po::variables_map vm)
127 324 {
128 325 //if materials are specified at the command line
129 326 if(vm.count("materials"))
130 327 {
131 328 vector<ptype> matVec = vm["materials"].as< vector<ptype> >();
132   - if(matVec.size() %2 != 0)
  329 + if(matVec.size() == 1)
  330 + {
  331 + rts::material<ptype> newM(SCOPE->nf.lambda, matVec[0], 0);
  332 + SCOPE->nf.mVector.push_back(newM);
  333 + }
  334 + else if(matVec.size() %2 != 0)
133 335 {
134 336 cout<<"BIMSim Error: materials must be specified in n, k pairs"<<endl;
135 337 exit(1);
136 338 }
137   -
138   -
139   - for(int i=0; i<matVec.size(); i+=2)
  339 + else
140 340 {
141   - rts::material<ptype> newM(SCOPE->nf.lambda, matVec[i], matVec[i+1]);
142   - SCOPE->nf.mVector.push_back(newM);
  341 + for(int i=0; i<matVec.size(); i+=2)
  342 + {
  343 + rts::material<ptype> newM(SCOPE->nf.lambda, matVec[i], matVec[i+1]);
  344 + SCOPE->nf.mVector.push_back(newM);
  345 + }
143 346 }
144 347 }
145   - else
146   - {
147   - //add the command line material as the default (material 0)
148   - rts::material<ptype> newM(SCOPE->nf.lambda, vm["n"].as<ptype>(), vm["k"].as<ptype>());
149   - SCOPE->nf.mVector.push_back(newM);
150   - }
151 348  
152 349 //if file names are specified, load the materials
153 350 if(vm.count("material-file"))
... ... @@ -169,57 +366,109 @@ static void loadMaterials(po::variables_map vm)
169 366  
170 367 }
171 368  
172   -static void loadNearfieldParams(po::variables_map vm)
  369 +static void lOptics(po::variables_map vm)
173 370 {
174   - //test to see if we are simulating a plane wave
175   - bool planeWave = DEFAULT_PLANEWAVE;
176   - if(vm.count("plane-wave"))
177   - planeWave = !planeWave;
178   - SCOPE->nf.planeWave = planeWave;
179   -
180   - //get the wavelength
181   - //SCOPE->nf.lambda = vm["lambda"].as<ptype>();
182   -
183   - //get the incident field amplitude
184   - SCOPE->nf.A = vm["amplitude"].as<ptype>();
185   -
186   - //get the condenser parameters
187   - SCOPE->nf.condenser[0] = vm["condenser-min"].as<ptype>();
188   - SCOPE->nf.condenser[1] = vm["condenser-max"].as<ptype>();
189   -
190   -
191   - //get the focal rtsPoint position
192   - SCOPE->nf.focus[0] = vm["fx"].as<ptype>();
193   - SCOPE->nf.focus[1] = vm["fy"].as<ptype>();
194   - SCOPE->nf.focus[2] = vm["fz"].as<ptype>();
195   -
196   - //get the incident light direction (k-vector)
197   - bsVector spherical;
198   - spherical[0] = 1.0;
199   - spherical[1] = vm["theta"].as<ptype>();
200   - spherical[2] = vm["phi"].as<ptype>();
201   - SCOPE->nf.k = spherical.sph2cart();
202   -
203   -
204   - //incident field order
205   - SCOPE->nf.m = vm["field-order"].as<int>();
206   -
207   - //number of Monte-Carlo samples
208   - SCOPE->nf.nWaves = vm["samples"].as<int>();
209   -
210   -
  371 + SCOPE->objective[0] = DEFAULT_OBJECTIVE_MIN;
  372 + SCOPE->objective[1] = DEFAULT_OBJECTIVE_MAX;
  373 + if(vm.count("objective"))
  374 + {
  375 + vector<ptype> oparams = vm["objective"].as< vector<ptype> >();
211 376  
  377 + if(oparams.size() == 1)
  378 + SCOPE->objective[1] = oparams[0];
  379 + else
  380 + {
  381 + SCOPE->objective[0] = oparams[0];
  382 + SCOPE->objective[1] = oparams[1];
  383 + }
  384 + }
212 385 }
213 386  
214   -static void loadSliceParams(po::variables_map vm)
  387 +static void lImagePlane(po::variables_map vm)
215 388 {
216   - //parameters for the sample plane
217   -
  389 + bsPoint pMin(DEFAULT_PLANE_MIN_X, DEFAULT_PLANE_MIN_Y, DEFAULT_PLANE_MIN_Z);
  390 + bsPoint pMax(DEFAULT_PLANE_MAX_X, DEFAULT_PLANE_MAX_Y, DEFAULT_PLANE_MAX_Z);
  391 + bsVector normal(DEFAULT_PLANE_NORM_X, DEFAULT_PLANE_NORM_Y, DEFAULT_PLANE_NORM_Z);
218 392  
219 393 //set the default values for the slice position and orientation
220   - bsPoint pMin(vm["plane-min-x"].as<ptype>(), vm["plane-min-y"].as<ptype>(), vm["plane-min-z"].as<ptype>());
221   - bsPoint pMax(vm["plane-max-x"].as<ptype>(), vm["plane-max-y"].as<ptype>(), vm["plane-max-z"].as<ptype>());
222   - bsVector normal(vm["plane-norm-x"].as<ptype>(), vm["plane-norm-y"].as<ptype>(), vm["plane-norm-z"].as<ptype>());
  394 + if(vm.count("plane-lower-left") && vm.count("plane-upper-right") && vm.count("plane-normal"))
  395 + {
  396 + vector<ptype> ll = vm["plane-lower-left"].as< vector<ptype> >();
  397 + if(ll.size() != 3)
  398 + {
  399 + cout<<"BIMSIM Error - The lower-left corner of the image plane is incorrectly specified."<<endl;
  400 + exit(1);
  401 + }
  402 +
  403 + vector<ptype> ur = vm["plane-lower-left"].as< vector<ptype> >();
  404 + if(ur.size() != 3)
  405 + {
  406 + cout<<"BIMSIM Error - The upper-right corner of the image plane is incorrectly specified."<<endl;
  407 + exit(1);
  408 + }
  409 +
  410 + vector<ptype> norm = vm["plane-lower-left"].as< vector<ptype> >();
  411 + if(norm.size() != 3)
  412 + {
  413 + cout<<"BIMSIM Error - The normal of the image plane is incorrectly specified."<<endl;
  414 + exit(1);
  415 + }
  416 +
  417 + pMin = bsPoint(ll[0], ll[1], ll[2]);
  418 + pMax = bsPoint(ur[0], ur[1], ur[2]);
  419 + normal = bsVector(norm[0], norm[1], norm[2]);
  420 + }
  421 + else if(vm.count("xy"))
  422 + {
  423 + //default plane size in microns
  424 + ptype s = DEFAULT_PLANE_SIZE;
  425 + ptype pos = DEFAULT_PLANE_POSITION;
  426 +
  427 + vector<ptype> xy = vm["xy"].as< vector<ptype> >();
  428 + if(xy.size() >= 1)
  429 + s = xy[0];
  430 + if(xy.size() >= 2)
  431 + pos = xy[1];
  432 +
  433 + //calculate the plane corners and normal based on the size and position
  434 + pMin = bsPoint(-s/2, -s/2, pos);
  435 + pMax = bsPoint(s/2, s/2, pos);
  436 + normal = bsVector(0, 0, 1);
  437 + }
  438 + else if(vm.count("xz"))
  439 + {
  440 + //default plane size in microns
  441 + ptype size = DEFAULT_PLANE_SIZE;
  442 + ptype pos = DEFAULT_PLANE_POSITION;
  443 +
  444 + vector<ptype> xz = vm["xz"].as< vector<ptype> >();
  445 + if(xz.size() >= 1)
  446 + size = xz[0];
  447 + if(xz.size() >= 2)
  448 + pos = xz[1];
  449 +
  450 + //calculate the plane corners and normal based on the size and position
  451 + pMin = bsPoint(-size/2, pos, -size/2);
  452 + pMax = bsPoint(size/2, pos, size/2);
  453 + normal = bsVector(0, -1, 0);
  454 + }
  455 + else if(vm.count("yz"))
  456 + {
  457 + //default plane size in microns
  458 + ptype size = DEFAULT_PLANE_SIZE;
  459 + ptype pos = DEFAULT_PLANE_POSITION;
  460 +
  461 + vector<ptype> yz = vm["yz"].as< vector<ptype> >();
  462 + if(yz.size() >= 1)
  463 + size = yz[0];
  464 + if(yz.size() >= 2)
  465 + pos = yz[1];
  466 +
  467 + //calculate the plane corners and normal based on the size and position
  468 + pMin = bsPoint(pos, -size/2, -size/2);
  469 + pMax = bsPoint(pos, size/2, size/2);
  470 + normal = bsVector(1, 0, 0);
  471 + }
223 472 SCOPE->setPos(pMin, pMax, normal);
224 473  
225 474 //resolution
... ... @@ -233,175 +482,111 @@ static void loadSliceParams(po::variables_map vm)
233 482  
234 483  
235 484 SCOPE->setNearfield();
236   -
237   -
238   -
239   -}
240   -
241   -static void loadMicroscopeParams(po::variables_map vm)
242   -{
243   - //objective
244   - SCOPE->objective[0] = vm["objective-min"].as<ptype>();
245   - SCOPE->objective[1] = vm["objective-max"].as<ptype>();
246   -
247   -
248   -
249   -
250   -
251   -}
252   -
253   -static void loadOutputParams(po::variables_map vm)
254   -{
255   - //append simulation results to previous binary files
256   - gFileOut.append = DEFAULT_APPEND;
257   - if(vm.count("append"))
258   - gFileOut.append = true;
259   -
260   - //image parameters
261   - //component of the field to be saved
262   - std::string fieldStr;
263   - fieldStr = vm["output-type"].as<string>();
264   -
265   - if(fieldStr == "magnitude")
266   - gFileOut.field = fileoutStruct::fieldMag;
267   - else if(fieldStr == "intensity")
268   - gFileOut.field = fileoutStruct::fieldIntensity;
269   - else if(fieldStr == "polarization")
270   - gFileOut.field = fileoutStruct::fieldPolar;
271   - else if(fieldStr == "imaginary")
272   - gFileOut.field = fileoutStruct::fieldImag;
273   - else if(fieldStr == "real")
274   - gFileOut.field = fileoutStruct::fieldReal;
275   - else if(fieldStr == "angular-spectrum")
276   - gFileOut.field = fileoutStruct::fieldAngularSpectrum;
277   -
278   -
279   - //image file names
280   - gFileOut.intFile = vm["intensity"].as<string>();
281   - gFileOut.absFile = vm["absorbance"].as<string>();
282   - gFileOut.transFile = vm["transmittance"].as<string>();
283   - gFileOut.nearFile = vm["near-field"].as<string>();
284   - gFileOut.farFile = vm["far-field"].as<string>();
285   -
286   - //colormap
287   - std::string cmapStr;
288   - cmapStr = vm["colormap"].as<string>();
289   - if(cmapStr == "brewer")
290   - gFileOut.colormap = rts::colormap::cmBrewer;
291   - else if(cmapStr == "gray")
292   - gFileOut.colormap = rts::colormap::cmGrayscale;
293   - else
294   - cout<<"color-map value not recognized (using default): "<<cmapStr<<endl;
295 485 }
296 486  
297 487 static void OutputOptions()
298 488 {
299   - cout<<SCOPE->nf.toStr();
  489 + cout<<SCOPE->toStr();
300 490  
301 491 cout<<"# of source points: "<<SCOPE->focalPoints.size()<<endl;
302 492  
303 493 }
304 494  
  495 +vector<ptype> test;
305 496 static void SetOptions(po::options_description &desc)
306 497 {
307 498 desc.add_options()
308   - ("help,h", "prints this help")
309   - ("plane-wave,P", "simulates an incident plane wave")
310   - ("intensity,I", po::value<string>()->default_value(DEFAULT_INTENSITY_FILE), "output measured intensity (filename)")
311   - ("absorbance,A", po::value<string>()->default_value(DEFAULT_ABSORBANCE_FILE), "output measured absorbance (filename)")
312   - ("transmittance,T", po::value<string>()->default_value(DEFAULT_TRANSMITTANCE_FILE), "output measured transmittance (filename)")
313   - ("far-field,F", po::value<string>()->default_value(DEFAULT_FAR_FILE), "output far-field at detector (filename)")
314   - ("near-field,N", po::value<string>()->default_value(DEFAULT_NEAR_FILE), "output field at focal plane (filename)")
315   - ("extended-source,X", po::value<string>()->default_value(DEFAULT_EXTENDED_SOURCE), "image of source at focus (filename)")
316   - //("sx,x", po::value<ptype>()->default_value(DEFAULT_SPHERE_X), "sphere coordinates")
317   - //("sy,y", po::value<ptype>()->default_value(DEFAULT_SPHERE_Y))
318   - //("sz,z", po::value<ptype>()->default_value(DEFAULT_SPHERE_Z))
319   - ("sx,x", po::value<ptype>(), "sphere coordinates")
320   - ("sy,y", po::value<ptype>())
321   - ("sz,z", po::value<ptype>())
322   - ("radius,r", po::value<ptype>()->default_value(DEFAULT_SPHERE_A), "sphere radius")
323   - ("samples,s", po::value<int>()->default_value(DEFAULT_SAMPLES), "Monte-Carlo samples used to compute Us")
324   - ("sphere-file,S", po::value< vector<string> >()->multitoken(), "sphere file:\n [x y z radius material]")
325   - ("amplitude,a", po::value<ptype>()->default_value(DEFAULT_AMPLITUDE), "incident field amplitude")
326   - ("n,n", po::value<ptype>()->default_value(DEFAULT_N, "1.4"), "sphere phase speed")
327   - ("k,k", po::value<ptype>()->default_value(DEFAULT_K), "sphere absorption coefficient")
328   - ("material-file,M", po::value< vector<string> >()->multitoken(), "material file:\n [lambda n k]")
329   - ("materials", po::value< vector<ptype> >()->multitoken(), "materials specified using n, k pairs:\n ex. --materials n1 k1 n2 k2\n (if used --n and --k are ignored)")
330   - ("lambda,l", po::value<ptype>()->default_value(DEFAULT_LAMBDA), "incident wavelength")
  499 + ("help", "prints this help")
  500 + ("verbose", "verbose output\n")
  501 +
  502 + ("intensity", po::value<string>()->default_value(DEFAULT_INTENSITY_FILE), "output measured intensity (filename)")
  503 + ("absorbance", po::value<string>()->default_value(DEFAULT_ABSORBANCE_FILE), "output measured absorbance (filename)")
  504 + ("transmittance", po::value<string>()->default_value(DEFAULT_TRANSMITTANCE_FILE), "output measured transmittance (filename)")
  505 + ("far-field", po::value<string>()->default_value(DEFAULT_FAR_FILE), "output far-field at detector (filename)")
  506 + ("near-field", po::value<string>()->default_value(DEFAULT_NEAR_FILE), "output field at focal plane (filename)")
  507 + ("extended-source", po::value<string>()->default_value(DEFAULT_EXTENDED_SOURCE), "image of source at focus (filename)\n")
  508 +
  509 + ("spheres", po::value< vector<ptype> >()->multitoken(), "sphere position: x y z a m")
  510 + ("sphere-file", po::value< vector<string> >()->multitoken(), "sphere file:\n [x y z radius material]")
  511 + ("materials", po::value< vector<ptype> >()->multitoken(), "refractive indices as n, k pairs:\n ex. -m n0 k0 n1 k1 n2 k2")
  512 + ("material-file", po::value< vector<string> >()->multitoken(), "material file:\n [lambda n k]\n")
  513 +
  514 + ("lambda", po::value<ptype>()->default_value(DEFAULT_LAMBDA), "incident wavelength")
331 515 ("nu", po::value<ptype>(), "incident frequency (in cm^-1)\n(if specified, lambda is ignored)")
332   - ("theta,t", po::value<ptype>()->default_value(DEFAULT_K_THETA), "light direction (polar coords)")
333   - ("phi,p", po::value<ptype>()->default_value(DEFAULT_K_PHI))
334   - ("fx", po::value<ptype>()->default_value(DEFAULT_FOCUS_X), "incident focal point")
335   - ("fy", po::value<ptype>()->default_value(DEFAULT_FOCUS_Y))
336   - ("fz", po::value<ptype>()->default_value(DEFAULT_FOCUS_Z))
337   - ("condenser-max,C", po::value<ptype>()->default_value(DEFAULT_CONDENSER_MAX), "condenser numerical aperature")
338   - ("condenser-min,c", po::value<ptype>()->default_value(DEFAULT_CONDENSER_MIN), "condenser obscuration NA")
339   - ("objective-max,O", po::value<ptype>()->default_value(DEFAULT_OBJECTIVE_MAX), "objective numerical aperature")
340   - ("objective-min,o", po::value<ptype>()->default_value(DEFAULT_OBJECTIVE_MIN), "objective obscuration NA")
341   - ("field-order", po::value<int>()->default_value(DEFAULT_FIELD_ORDER), "order of the incident field")
342   - ("output-type,f", po::value<string>()->default_value(DEFAULT_FIELD_TYPE), "output field value:\n magnitude, polarization, real, imaginary, angular-spectrum")
343   - ("resolution,R", po::value<unsigned int>()->default_value(DEFAULT_SLICE_RES), "resolution of the detector")
344   - ("padding,d", po::value<unsigned int>()->default_value(DEFAULT_PADDING), "FFT padding for the objective bandpass")
  516 + ("k", po::value< vector<ptype> >()->multitoken(), "k-vector direction: -k theta phi\n theta = [0 2*pi], phi = [0 pi]")
  517 + ("amplitude", po::value<ptype>()->default_value(DEFAULT_AMPLITUDE), "incident field amplitude")
  518 + ("condenser", po::value< vector<ptype> >()->multitoken(), "condenser numerical aperature\nA pair of values can be used to specify an inner obscuration: -c NAin NAout")
  519 + ("objective", po::value< vector<ptype> >()->multitoken(), "objective numerical aperature\nA pair of values can be used to specify an inner obscuration: -c NAin NAout")
  520 + ("focus", po::value< vector<ptype> >()->multitoken(), "focal position for the incident point source\n (default = --focus 0 0 0)")
  521 + ("plane-wave", "simulates an incident plane wave\n")
  522 +
  523 + ("resolution", po::value<unsigned int>()->default_value(DEFAULT_SLICE_RES), "resolution of the detector")
  524 + ("plane-lower-left", po::value< vector<ptype> >()->multitoken(), "lower-left position of the image plane")
  525 + ("plane-upper-right", po::value< vector<ptype> >()->multitoken(), "upper-right position of the image plane")
  526 + ("plane-normal", po::value< vector<ptype> >()->multitoken(), "normal for the image plane")
  527 + ("xy", po::value< vector<ptype> >()->multitoken(), "specify an x-y image plane\n (standard microscope)")
  528 + ("xz", po::value< vector<ptype> >()->multitoken(), "specify a x-z image plane\n (cross-section of the focal volume)")
  529 + ("yz", po::value< vector<ptype> >()->multitoken(), "specify a y-z image plane\n (cross-section of the focal volume)\n")
  530 +
  531 + ("samples", po::value<int>()->default_value(DEFAULT_SAMPLES), "Monte-Carlo samples used to compute Us")
  532 + ("padding", po::value<unsigned int>()->default_value(DEFAULT_PADDING), "FFT padding for the objective bandpass")
345 533 ("supersample", po::value<unsigned int>()->default_value(DEFAULT_SUPERSAMPLE), "super-sampling rate for the detector field")
  534 + ("field-order", po::value<int>()->default_value(DEFAULT_FIELD_ORDER), "order of the incident field")
  535 + ("seed", po::value<unsigned int>(), "seed for the Monte-Carlo random number generator")
  536 + ("recursive", "evaluate all Bessel functions recursively\n")
  537 + ("recursive-us", "evaluate scattered-field Bessel functions recursively\n")
  538 + ("lut-uf", "evaluate the focused-field using a look-up table\n")
  539 +
  540 + ("output-type", po::value<string>()->default_value(DEFAULT_FIELD_TYPE), "output field value:\n magnitude, polarization, real, imaginary, angular-spectrum")
346 541 ("colormap", po::value<string>()->default_value(DEFAULT_COLORMAP), "colormap: gray, brewer")
347 542 ("append", "append result to an existing file\n (binary files only)")
348   - ("plane-min-x,u", po::value<ptype>()->default_value(DEFAULT_SLICE_MIN_X), "lower-left corner of the field slice")
349   - ("plane-min-y,v", po::value<ptype>()->default_value(DEFAULT_SLICE_MIN_Y))
350   - ("plane-min-z,w", po::value<ptype>()->default_value(DEFAULT_SLICE_MIN_Z))
351   - ("plane-max-x,U", po::value<ptype>()->default_value(DEFAULT_SLICE_MAX_X), "upper-right corner of the field slice")
352   - ("plane-max-y,V", po::value<ptype>()->default_value(DEFAULT_SLICE_MAX_Y))
353   - ("plane-max-z,W", po::value<ptype>()->default_value(DEFAULT_SLICE_MAX_Z))
354   - ("plane-norm-x", po::value<ptype>()->default_value(DEFAULT_SLICE_NORM_X), "field slice normal")
355   - ("plane-norm-y", po::value<ptype>()->default_value(DEFAULT_SLICE_NORM_Y))
356   - ("plane-norm-z", po::value<ptype>()->default_value(DEFAULT_SLICE_NORM_Z));
  543 + ;
357 544 }
358 545  
359 546 static void LoadParameters(int argc, char *argv[])
360 547 {
361 548 //create an option description
362   - po::options_description desc("Allowed options");
  549 + po::options_description desc("BimSim arguments");
363 550  
364 551 //fill it with options
365 552 SetOptions(desc);
366 553  
367 554 po::variables_map vm;
368   - po::store(po::parse_command_line(argc, argv, desc), vm);
  555 + po::store(po::parse_command_line(argc, argv, desc, po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
369 556 po::notify(vm);
370 557  
371   - //display help and exit
372   - if(vm.count("help"))
373   - {
374   - cout<<desc<<endl;
375   - exit(1);
376   - }
377 558  
378   - //load the wavelength
379   - if(vm.count("nu"))
380   - {
381   - //wavelength is given in wavenumber - transform and flag
382   - SCOPE->nf.lambda = 10000/vm["nu"].as<ptype>();
383   - gFileOut.wavenumber = true;
384   - }
385   - //otherwise we are using lambda = wavelength
386   - else
387   - {
388   - SCOPE->nf.lambda = vm["lambda"].as<ptype>();
389   - gFileOut.wavenumber = false;
390   - }
  559 + //load flags (help, verbose output)
  560 + lFlags(vm, desc);
  561 +
  562 + //load the wavelength
  563 + lWavelength(vm);
  564 +
  565 + //load materials
  566 + //loadMaterials(vm);
  567 + lMaterials(vm);
  568 +
  569 + //load the sphere data
  570 + lSpheres(vm);
  571 +
  572 + //load the optics
  573 + lOptics(vm);
  574 +
  575 + //load the position and orientation of the image plane
  576 + lImagePlane(vm);
391 577  
392 578 //load spheres
393   - loadSpheres(vm);
  579 + //loadSpheres(vm);
  580 +
394 581  
395   - //load materials
396   - loadMaterials(vm);
397 582  
398   - loadNearfieldParams(vm);
  583 + lNearfield(vm);
399 584  
400 585 loadOutputParams(vm);
401 586  
402   - loadMicroscopeParams(vm);
  587 + //loadMicroscopeParams(vm);
403 588  
404   - loadSliceParams(vm);
  589 + //loadSliceParams(vm);
405 590  
406 591 //if an extended source will be used
407 592 if(vm["extended-source"].as<string>() != "")
... ...
scalarslice.cu
... ... @@ -22,16 +22,17 @@ scalarslice::scalarslice()
22 22  
23 23 scalarslice::~scalarslice()
24 24 {
25   - HANDLE_ERROR(cudaFree(S));
  25 + if(S != NULL)
  26 + HANDLE_ERROR(cudaFree(S));
26 27 S = NULL;
27 28 }
28 29  
29   -void scalarslice::toImage(std::string filename, ptype vmin, ptype vmax, rts::colormap::colormapType cmap)
  30 +void scalarslice::toImage(std::string filename, ptype vmin, ptype vmax, rts::colormapType cmap)
30 31 {
31   - rts::colormap::gpu2image<ptype>(S, filename, R[0], R[1], vmin, vmax, cmap);
  32 + rts::gpu2image<ptype>(S, filename, R[0], R[1], vmin, vmax, cmap);
32 33 }
33 34  
34   -void scalarslice::toImage(std::string filename, bool positive, rts::colormap::colormapType cmap)
  35 +void scalarslice::toImage(std::string filename, bool positive, rts::colormapType cmap)
35 36 {
36 37 cublasStatus_t stat;
37 38 cublasHandle_t handle;
... ... @@ -62,7 +63,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co
62 63 exit(1);
63 64 }
64 65  
65   - //std::cout<<"Maximum index: "<<result<<std::endl;
  66 +
66 67  
67 68 //retrieve the maximum value
68 69 ptype maxVal;
... ... @@ -75,7 +76,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co
75 76 if(positive)
76 77 toImage(filename, 0, maxVal, cmap);
77 78 else
78   - toImage(filename, -maxVal, maxVal, cmap);
  79 + toImage(filename, -abs(maxVal), abs(maxVal), cmap);
79 80 }
80 81  
81 82 void scalarslice::toEnvi(std::string filename, ptype wavelength, bool append)
... ...
scalarslice.h
... ... @@ -2,7 +2,7 @@
2 2 #define RTS_SCALAR_SLICE
3 3  
4 4 #include "dataTypes.h"
5   -#include "colormap.h"
  5 +#include "rts/graphics/colormap.h"
6 6  
7 7 struct scalarslice
8 8 {
... ... @@ -17,8 +17,8 @@ struct scalarslice
17 17 ~scalarslice();
18 18 void clear();
19 19  
20   - void toImage(std::string filename, ptype vmin, ptype vmax, rts::colormap::colormapType cmap = rts::colormap::cmBrewer);
21   - void toImage(std::string filename, bool positive = true, rts::colormap::colormapType cmap = rts::colormap::cmBrewer);
  20 + void toImage(std::string filename, ptype vmin, ptype vmax, rts::colormapType cmap = rts::cmBrewer);
  21 + void toImage(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer);
22 22 void toEnvi(std::string filename, ptype wavelength = 0, bool append = false);
23 23  
24 24 };
... ...
sphere.cpp
1 1 #include "sphere.h"
  2 +#include "defaults.h"
2 3  
3 4 #include "rts/math/complex.h"
4 5 #include <complex>
5 6 #include <stdlib.h>
  7 +#include <fstream>
6 8  
7 9 using namespace rts;
8 10 using namespace std;
... ... @@ -13,6 +15,9 @@ int cbessjyva(double v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv,
13 15 int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv,
14 16 complex<double>*cyv,complex<double>*cjvp,complex<double>*cyvp);
15 17  
  18 +int bessjyv_sph(int v, double z, double &vm, double* cjv,
  19 + double* cyv, double* cjvp, double* cyvp);
  20 +
16 21 void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri)
17 22 {
18 23 /* These calculations are done at high-precision on the CPU
... ... @@ -59,12 +64,6 @@ void sphere::calcCoeff(ptype lambda, rtsComplex&lt;ptype&gt; ri)
59 64 cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka);
60 65 cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna);
61 66  
62   -
63   - //cout<<"Begin Sphere---------"<<endl;
64   - //cout<<"Nl = "<<Nl<<endl;
65   - //cout<<"ka = "<<ka<<endl;
66   - //cout<<"kna = "<<kna<<endl;
67   -
68 67 //compute A for each order
69 68 complex<double> i(0, 1);
70 69 complex<double> a, b, c, d;
... ... @@ -83,7 +82,7 @@ void sphere::calcCoeff(ptype lambda, rtsComplex&lt;ptype&gt; ri)
83 82 //calculate A and add it to the list
84 83 An = (2.0 * l + 1.0) * pow(i, l) * (a / b);
85 84 A.push_back(bsComplex(An.real(), An.imag()));
86   - //cout<<"A: "<<An<<endl;
  85 +
87 86  
88 87 //Compute B (external scattering coefficient)
89 88 c = cjv_ka[l] * cjvp_kna[l] * nc - cjv_kna[l] * cjvp_ka[l];
... ... @@ -92,7 +91,206 @@ void sphere::calcCoeff(ptype lambda, rtsComplex&lt;ptype&gt; ri)
92 91 //calculate B and add it to the list
93 92 Bn = (2.0 * l + 1.0) * pow(i, l) * (c / d);
94 93 B.push_back(bsComplex(Bn.real(), Bn.imag()));
95   - //cout<<"B: "<<Bn<<endl;
96 94  
  95 +
  96 + }
  97 +}
  98 +
  99 +void sphere::calcBesselLut(bsComplex* j, ptype k, bsComplex n, int aR)
  100 +{
  101 + /*Compute the look-up-table for spherical bessel functions used inside of the sphere
  102 + j = (Nl + 1) x aR array of values
  103 + aR = resolution of j
  104 + */
  105 +
  106 + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored)
  107 + int bytes = sizeof(complex<double>) * (Nl + 1);
  108 + complex<double>* cjv_knr = (complex<double>*)malloc(bytes);
  109 + complex<double>* cyv_knr = (complex<double>*)malloc(bytes);
  110 + complex<double>* cjvp_knr = (complex<double>*)malloc(bytes);
  111 + complex<double>* cyvp_knr = (complex<double>*)malloc(bytes);
  112 +
  113 + //compute the bessel functions using the CPU-based algorithm
  114 + double vm;
  115 +
  116 + //for each sample along r
  117 + ptype dr = a / (aR - 1);
  118 + ptype r;
  119 + for(int ir = 0; ir < aR; ir++)
  120 + {
  121 + r = ir * dr;
  122 + complex<double> knr( (k*n*r).real(), (k*n*r).imag() );
  123 + cbessjyva_sph(Nl, knr, vm, cjv_knr, cyv_knr, cjvp_knr, cyvp_knr);
  124 +
  125 + //copy the double data to the bsComplex array
  126 + for(int l=0; l<=Nl; l++)
  127 + {
  128 + //deal with the NaN case at the origin
  129 + if(ir == 0)
  130 + {
  131 + if(l == 0)
  132 + j[ir * (Nl+1)] = 1;
  133 + else
  134 + j[ir * (Nl+1) + l] = 0;
  135 + }
  136 + else
  137 + j[ir * (Nl+1) + l] = bsComplex(cjv_knr[l].real(), cjv_knr[l].imag());
  138 + }
  139 + }
  140 +
  141 + /*ofstream outfile("besselout.txt");
  142 + for(int ir = 0; ir < aR; ir++)
  143 + {
  144 + for(int l = 0; l<Nl+1; l++)
  145 + {
  146 + outfile<<j[ir * (Nl+1) + l].real()<<" ";
  147 + }
  148 + outfile<<endl;
  149 + }
  150 + outfile.close();*/
  151 +
  152 +}
  153 +
  154 +void sphere::calcHankelLut(bsComplex* h, ptype k, int rR)
  155 +{
  156 + /*Compute the look-up-table for spherical bessel functions used inside of the sphere
  157 + h_out = (Nl + 1) x aR array of values
  158 + rmin = minimum value of r
  159 + d_max = maximum value of r
  160 + rR = resolution of h_out
  161 + */
  162 +
  163 + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored)
  164 + int bytes = sizeof(double) * (Nl + 1);
  165 + double* cjv_kr = (double*)malloc(bytes);
  166 + double* cyv_kr = (double*)malloc(bytes);
  167 + double* cjvp_kr = (double*)malloc(bytes);
  168 + double* cyvp_kr = (double*)malloc(bytes);
  169 +
  170 + //compute the bessel functions using the CPU-based algorithm
  171 + double vm;
  172 +
  173 +
  174 +
  175 + //for each sample along r
  176 + ptype dr = (d_max - max(a, d_min)) / (rR - 1);
  177 + ptype r;
  178 + for(int ir = 0; ir < rR; ir++)
  179 + {
  180 + r = ir * dr + max(a, d_min);
  181 + double kr = k*r;
  182 + bessjyv_sph(Nl, kr, vm, cjv_kr, cyv_kr, cjvp_kr, cyvp_kr);
  183 +
  184 + //copy the double data to the bsComplex array
  185 + for(int l=0; l<=Nl; l++)
  186 + {
  187 + //h[ir * (Nl+1) + l] = bsComplex(cjv_kr[l].real(), cyv_kr[l].real());
  188 + h[ir * (Nl+1) + l] = bsComplex(cjv_kr[l], cyv_kr[l]);
  189 + }
97 190 }
  191 +
  192 + /*ofstream outfile("hankelout.txt");
  193 + for(int ir = 0; ir < rR; ir++)
  194 + {
  195 + outfile<<ir*dr + max(a, d_min)<<" ";
  196 + for(int l = 0; l<=0; l++)
  197 + {
  198 + outfile<<h[ir * (Nl+1) + l].real()<<" "<<h[ir * (Nl+1) + l].imag()<<" ";
  199 + }
  200 + outfile<<endl;
  201 + }
  202 + outfile.close();*/
  203 +}
  204 +
  205 +void sphere::calcLut(bsComplex* j, bsComplex* h, ptype lambda, bsComplex n, int aR, int rR)
  206 +{
  207 + /*Compute the look-up-tables for spherical bessel functions used both inside and outside of the sphere.
  208 + j = (Nl + 1) x aR array of values
  209 + j = (Nl + 1) x rR array of values
  210 + d_max = maximum distance for the LUT
  211 + aR = resolution of j_in
  212 + rR = resolution of j_out
  213 + */
  214 +
  215 + //compute the magnitude of the k vector
  216 + double k = 2 * PI / lambda;
  217 +
  218 + calcBesselLut(j, k, n, aR);
  219 + calcHankelLut(h, k, rR);
  220 +}
  221 +
  222 +void sphere::calcUp(ptype lambda, bsComplex n, rts::rtsQuad<ptype, 3> nfPlane, unsigned int R)
  223 +{
  224 + //calculate the parameters of the lookup table
  225 +
  226 + //first find the distance to the closest and furthest points on the nearfield plane
  227 + d_min = nfPlane.dist(p);
  228 + d_max = nfPlane.dist_max(p);
  229 +
  230 + //compute the radius of the cross-section of the sphere with the plane
  231 + ptype a_inter = 0;
  232 + if(d_min < a)
  233 + a_inter = sqrt(a - d_min);
  234 +
  235 +
  236 + //calculate the resolution of the Usp and Uip lookup tables
  237 + int aR = 1 + 2 * R * a_inter / (nfPlane(0, 0) - nfPlane(1, 1)).len();
  238 + int dR = 2 * R;
  239 + int thetaR = DEFAULT_SPHERE_THETA_R;
  240 +
  241 + //allocate space for the bessel function LUTs
  242 + bsComplex* j = (bsComplex*)malloc(sizeof(bsComplex) * (Nl + 1) * aR);
  243 + bsComplex* h = (bsComplex*)malloc(sizeof(bsComplex) * (Nl + 1) * dR);
  244 +
  245 + calcLut(j, h, lambda, n, aR, dR);
  246 +
  247 + //allocate space for the Usp lookup texture
  248 + Usp.R[0] = dR;
  249 + Usp.R[1] = thetaR;
  250 + Usp.init_gpu();
  251 +
  252 + //allocate space for the Uip lookup texture
  253 + Uip.R[0] = aR;
  254 + Uip.R[1] = thetaR;
  255 + Uip.init_gpu();
  256 +
  257 +
  258 +
  259 + scalarUsp(h, dR, thetaR);
  260 + scalarUip(j, aR, thetaR);
  261 +
  262 + scalarslice UspMag = Usp.Mag();
  263 + UspMag.toImage("Usp.bmp", true);
  264 +
  265 + scalarslice UipMag = Uip.Mag();
  266 + UipMag.toImage("Uip.bmp", true);
  267 +
  268 + //free memory
  269 + free(j);
  270 + free(h);
  271 +
  272 +}
  273 +
  274 +sphere& sphere::operator=(const sphere &rhs)
  275 +{
  276 + p = rhs.p;
  277 + a = rhs.a;
  278 + iMaterial = rhs.iMaterial;
  279 + Nl = rhs.Nl;
  280 + n = rhs.n;
  281 + B = rhs.B;
  282 + A = rhs.A;
  283 +
  284 + return *this;
  285 +}
  286 +
  287 +sphere::sphere(const sphere &rhs)
  288 +{
  289 + p = rhs.p;
  290 + a = rhs.a;
  291 + iMaterial = rhs.iMaterial;
  292 + Nl = rhs.Nl;
  293 + n = rhs.n;
  294 + B = rhs.B;
  295 + A = rhs.A;
98 296 }
... ...
sphere.cu 0 โ†’ 100644
  1 +#include "sphere.h"
  2 +#include "rts/math/legendre.h"
  3 +
  4 +__global__ void gpuScalarUsp(bsComplex* Usp, bsComplex* h, bsComplex* B, int Nl, int rR, int thetaR)
  5 +{
  6 + //get the current coordinate in the plane slice
  7 + int ir = blockIdx.x * blockDim.x + threadIdx.x;
  8 + int itheta = blockIdx.y * blockDim.y + threadIdx.y;
  9 +
  10 + //make sure that the thread indices are in-bounds
  11 + if(itheta >= thetaR || ir >= rR) return;
  12 +
  13 + int i = itheta * rR + ir;
  14 +
  15 + //ptype dr = (rmax - a) / (rR - 1);
  16 + ptype dtheta = (PI) / (thetaR - 1);
  17 +
  18 + //comptue the current angle and distance
  19 + //ptype r = dr * ir + a;
  20 + ptype theta = dtheta * itheta;
  21 + ptype cos_theta = cos(theta);
  22 +
  23 + //initialize the Legendre polynomial
  24 + ptype P[2];
  25 + rts::init_legendre<ptype>(cos_theta, P[0], P[1]);
  26 +
  27 + //initialize the result
  28 + bsComplex Us((ptype)0, (ptype)0);
  29 +
  30 + //for each order l
  31 + for(int l=0; l <= Nl; l++)
  32 + {
  33 + if(l == 0)
  34 + {
  35 + Us += B[l] * h[ir * (Nl+1) + l] * P[0];
  36 + //Us += P[0];
  37 + }
  38 + else
  39 + {
  40 + if(l > 1)
  41 + {
  42 + rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);
  43 + }
  44 + Us += B[l] * h[ir * (Nl+1) + l] * P[1];
  45 + //Us += P[1];
  46 + }
  47 +
  48 +
  49 + }
  50 + Usp[i] = Us;
  51 + //Usp[i] = h[ir * (Nl+1)];
  52 + //Usp[i] = ir;
  53 +
  54 +}
  55 +
  56 +__global__ void gpuScalarUip(bsComplex* Uip, bsComplex* j, bsComplex* A, int Nl, int aR, int thetaR)
  57 +{
  58 + //get the current coordinate in the plane slice
  59 + int ia = blockIdx.x * blockDim.x + threadIdx.x;
  60 + int itheta = blockIdx.y * blockDim.y + threadIdx.y;
  61 +
  62 + //make sure that the thread indices are in-bounds
  63 + if(itheta >= thetaR || ia >= aR) return;
  64 +
  65 + int i = itheta * aR + ia;
  66 +
  67 + ptype dtheta = (PI) / (thetaR - 1);
  68 +
  69 + //comptue the current angle and distance
  70 + ptype theta = dtheta * itheta;
  71 + ptype cos_theta = cos(theta);
  72 +
  73 + //initialize the Legendre polynomial
  74 + ptype P[2];
  75 + rts::init_legendre<ptype>(cos_theta, P[0], P[1]);
  76 +
  77 + //initialize the result
  78 + bsComplex Ui((ptype)0, (ptype)0);
  79 +
  80 + //for each order l
  81 + for(int l=0; l <= Nl; l++)
  82 + {
  83 + if(l == 0)
  84 + {
  85 + Ui += A[l] * j[ia * (Nl+1) + l] * P[0];
  86 + }
  87 + else
  88 + {
  89 + if(l > 1)
  90 + {
  91 + rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);
  92 + }
  93 + Ui += A[l] * j[ia * (Nl+1) + l] * P[1];
  94 + }
  95 +
  96 +
  97 + }
  98 + Uip[i] = Ui;
  99 +}
  100 +
  101 +void sphere::scalarUsp(bsComplex* h, int rR, int thetaR)
  102 +{
  103 + //copy the hankel function to the GPU
  104 + bsComplex* gpu_h;
  105 + HANDLE_ERROR( cudaMalloc( (void**)&gpu_h, sizeof(bsComplex) * (Nl + 1) * rR ) );
  106 + HANDLE_ERROR( cudaMemcpy( gpu_h, h, sizeof(bsComplex) * (Nl + 1) * rR, cudaMemcpyHostToDevice ) );
  107 +
  108 + //allocate memory for the scattering coefficients
  109 + bsComplex* gpuB;
  110 + HANDLE_ERROR(cudaMalloc((void**) &gpuB, (Nl+1) * sizeof(bsComplex)));
  111 + //copy the scattering coefficients to the GPU
  112 + HANDLE_ERROR(cudaMemcpy(gpuB, &B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));
  113 +
  114 + //create one thread for each pixel of the field slice
  115 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  116 + dim3 dimGrid((Usp.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Usp.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  117 +
  118 + gpuScalarUsp<<<dimGrid, dimBlock>>>(Usp.x_hat, gpu_h, gpuB, Nl, rR, thetaR);
  119 +
  120 + //free memory
  121 + cudaFree(gpu_h);
  122 + cudaFree(gpuB);
  123 +
  124 +}
  125 +
  126 +void sphere::scalarUip(bsComplex* j, int rR, int thetaR)
  127 +{
  128 + //copy the bessel and hankel LUTs to the GPU
  129 + bsComplex* gpu_j;
  130 + HANDLE_ERROR( cudaMalloc( (void**)&gpu_j, sizeof(bsComplex) * (Nl + 1) * rR ) );
  131 + HANDLE_ERROR( cudaMemcpy( gpu_j, j, sizeof(bsComplex) * (Nl + 1) * rR, cudaMemcpyHostToDevice ) );
  132 +
  133 + //allocate memory for the scattering coefficients
  134 + bsComplex* gpuA;
  135 + HANDLE_ERROR(cudaMalloc((void**) &gpuA, (Nl+1) * sizeof(bsComplex)));
  136 + //copy the scattering coefficients to the GPU
  137 + HANDLE_ERROR(cudaMemcpy(gpuA, &A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));
  138 +
  139 + //create one thread for each pixel of the field slice
  140 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  141 + dim3 dimGrid((Uip.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uip.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  142 +
  143 + gpuScalarUip<<<dimGrid, dimBlock>>>(Uip.x_hat, gpu_j, gpuA, Nl, rR, thetaR);
  144 +
  145 + //free memory
  146 + cudaFree(gpu_j);
  147 + cudaFree(gpuA);
  148 +
  149 +}
... ...
sphere.h
... ... @@ -22,12 +22,12 @@ struct sphere
22 22 //sphere material index
23 23 int iMaterial;
24 24  
25   - //rtsPointer to the scattered field produced by a plane wave
  25 + //GPU pointer to the scattered field produced by a plane wave
26 26 // this is a function of cos(theta) and |r| (distance from sphere center)
27   - //fieldslice surface;
28   -
29   - //resolution of the scattered field
30   - int thetaR, rR;
  27 + fieldslice Usp;
  28 + fieldslice Uip;
  29 + ptype d_min;
  30 + ptype d_max;
31 31  
32 32 //sphere order
33 33 int Nl;
... ... @@ -50,6 +50,12 @@ struct sphere
50 50 //surface = fieldslice(ang, ang/2);
51 51 }
52 52  
  53 + //assignment operator
  54 + sphere & operator=(const sphere &rhs);
  55 +
  56 + //copy constructor
  57 + sphere(const sphere &rhs);
  58 +
53 59 std::string toStr()
54 60 {
55 61 std::stringstream ss;
... ... @@ -66,8 +72,19 @@ struct sphere
66 72 Nl = ceil( (2 * PI * a) / lambda + 4 * pow( (2 * PI * a) / lambda, 1.0/3.0) + 2);
67 73 }
68 74  
69   - void calcCoeff(ptype lambda, rts::rtsComplex<ptype> n);
  75 + //compute the scattering coefficients
  76 + void calcCoeff(ptype lambda, bsComplex n);
  77 +
  78 + //compute the bessel function look-up tables
  79 + void calcLut(bsComplex* j, bsComplex* h, ptype lambda, bsComplex n, int aR, int rR);
  80 + void calcBesselLut(bsComplex* j, ptype k, bsComplex n, int aR);
  81 + void calcHankelLut(bsComplex* h, ptype k, int rR);
  82 +
  83 + //calculate the scattering domain Us(theta, r)
  84 + void calcUp(ptype lambda, bsComplex n, rts::rtsQuad<ptype, 3> nfPlane, unsigned int R);
70 85  
  86 + void scalarUsp(bsComplex* h, int rR, int thetaR);
  87 + void scalarUip(bsComplex* j, int aR, int thetaR);
71 88  
72 89  
73 90  
... ...