Commit 51b6469a3ee77583099edb0a57e1bb7859c28fd1

Authored by dmayerich
1 parent b6179de6

added look-up tables

@@ -13,7 +13,9 @@ @@ -13,7 +13,9 @@
13 // 13 //
14 #define _USE_MATH_DEFINES 14 #define _USE_MATH_DEFINES
15 #include <math.h> 15 #include <math.h>
16 -#include "bessel.h" 16 +#include "bessel.h"
  17 +
  18 +#define PI 3.14159
17 19
18 double gamma(double x); 20 double gamma(double x);
19 // 21 //
@@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &amp;nm,double *jn,double *yn, @@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &amp;nm,double *jn,double *yn,
426 0.2775764465332031, 428 0.2775764465332031,
427 -1.993531733751297, 429 -1.993531733751297,
428 2.724882731126854e1}; 430 2.724882731126854e1};
429 - 431 +
430 int i,k,m; 432 int i,k,m;
431 nm = n; 433 nm = n;
432 if ((x < 0.0) || (n < 0)) return 1; 434 if ((x < 0.0) || (n < 0)) return 1;
@@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &amp;vm,double *jv,double *yv, @@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &amp;vm,double *jv,double *yv,
702 } 704 }
703 vm = n + v0; 705 vm = n + v0;
704 return 0; 706 return 0;
  707 +}
  708 +
  709 +int bessjyv_sph(int v, double z, double &vm, double* cjv,
  710 + double* cyv, double* cjvp, double* cyvp)
  711 +{
  712 + //first, compute the bessel functions of fractional order
  713 + bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  714 +
  715 + //iterate through each and scale
  716 + for(int n = 0; n<=v; n++)
  717 + {
  718 +
  719 + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0));
  720 + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0));
  721 +
  722 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0));
  723 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0));
  724 + }
  725 +
  726 + return 0;
  727 +
705 } 728 }
706 - 729 +
@@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv, @@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv,
724 //iterate through each and scale 724 //iterate through each and scale
725 for(int n = 0; n<=v; n++) 725 for(int n = 0; n<=v; n++)
726 { 726 {
  727 +
727 cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); 728 cjv[n] = cjv[n] * sqrt(PI/(z * 2.0));
728 cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); 729 cyv[n] = cyv[n] * sqrt(PI/(z * 2.0));
729 730
colormap.h deleted
1 -#ifndef RTS_COLORMAP_H  
2 -#define RTS_COLORMAP_H  
3 -  
4 -#include <string>  
5 -#include <qimage.h>  
6 -#include <qcolor.h>  
7 -#include "rts/cuda/error.h"  
8 -  
9 -  
10 -#define BREWER_CTRL_PTS 11  
11 -  
12 -#ifdef __CUDACC__  
13 -texture<float4, cudaTextureType1D> cudaTexBrewer;  
14 -static cudaArray* gpuBrewer;  
15 -#endif  
16 -  
17 -  
18 -  
19 -namespace rts{  
20 - namespace colormap{  
21 -  
22 -enum colormapType {cmBrewer, cmGrayscale};  
23 -  
24 -static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size)  
25 -{  
26 - //create an image object  
27 - QImage image(x_size, y_size, QImage::Format_RGB32);  
28 -  
29 - int i;  
30 - unsigned char r, g, b;  
31 - unsigned int x, y;  
32 - for(y=0; y<y_size; y++)  
33 - for(x=0; x<x_size; x++)  
34 - {  
35 - //calculate the 1D index  
36 - i = y * x_size + x;  
37 -  
38 - r = buffer[i * 3 + 0];  
39 - g = buffer[i * 3 + 1];  
40 - b = buffer[i * 3 + 2];  
41 -  
42 - //set the image pixel  
43 - QColor color(r, g, b);  
44 - image.setPixel(x, y, color.rgb());  
45 - }  
46 -  
47 - image.save(filename.c_str());  
48 -}  
49 -  
50 -#ifdef __CUDACC__  
51 -static void initBrewer()  
52 -{  
53 - //initialize the Brewer colormap  
54 -  
55 - //allocate CPU space  
56 - float4 cpuColorMap[BREWER_CTRL_PTS];  
57 -  
58 - //define control rtsPoints  
59 - cpuColorMap[0] = make_float4(0.192157f, 0.211765f, 0.584314f, 1.0f);  
60 - cpuColorMap[1] = make_float4(0.270588f, 0.458824f, 0.705882f, 1.0f);  
61 - cpuColorMap[2] = make_float4(0.454902f, 0.678431f, 0.819608f, 1.0f);  
62 - cpuColorMap[3] = make_float4(0.670588f, 0.85098f, 0.913725f, 1.0f);  
63 - cpuColorMap[4] = make_float4(0.878431f, 0.952941f, 0.972549f, 1.0f);  
64 - cpuColorMap[5] = make_float4(1.0f, 1.0f, 0.74902f, 1.0f);  
65 - cpuColorMap[6] = make_float4(0.996078f, 0.878431f, 0.564706f, 1.0f);  
66 - cpuColorMap[7] = make_float4(0.992157f, 0.682353f, 0.380392f, 1.0f);  
67 - cpuColorMap[8] = make_float4(0.956863f, 0.427451f, 0.262745f, 1.0f);  
68 - cpuColorMap[9] = make_float4(0.843137f, 0.188235f, 0.152941f, 1.0f);  
69 - cpuColorMap[10] = make_float4(0.647059f, 0.0f, 0.14902f, 1.0f);  
70 -  
71 -  
72 - int width = BREWER_CTRL_PTS;  
73 - int height = 0;  
74 -  
75 -  
76 - // allocate array and copy colormap data  
77 - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);  
78 -  
79 - HANDLE_ERROR(cudaMallocArray(&gpuBrewer, &channelDesc, width, height));  
80 -  
81 - HANDLE_ERROR(cudaMemcpyToArray(gpuBrewer, 0, 0, cpuColorMap, sizeof(float4)*width, cudaMemcpyHostToDevice));  
82 -  
83 - // set texture parameters  
84 - cudaTexBrewer.addressMode[0] = cudaAddressModeClamp;  
85 - //texBrewer.addressMode[1] = cudaAddressModeClamp;  
86 - cudaTexBrewer.filterMode = cudaFilterModeLinear;  
87 - cudaTexBrewer.normalized = true; // access with normalized texture coordinates  
88 -  
89 - // Bind the array to the texture  
90 - HANDLE_ERROR(cudaBindTextureToArray( cudaTexBrewer, gpuBrewer, channelDesc));  
91 -  
92 -}  
93 -  
94 -static void destroyBrewer()  
95 -{  
96 - HANDLE_ERROR(cudaFreeArray(gpuBrewer));  
97 -  
98 -}  
99 -  
100 -template<class T>  
101 -__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1)  
102 -{  
103 - int i = blockIdx.x * blockDim.x + threadIdx.x;  
104 - if(i >= N) return;  
105 -  
106 - //compute the normalized value on [minVal maxVal]  
107 - float a = (gpuSource[i] - minVal) / (maxVal - minVal);  
108 -  
109 - //lookup the color  
110 - float shift = 1.0/BREWER_CTRL_PTS;  
111 - float4 color = tex1D(cudaTexBrewer, a+shift);  
112 -  
113 - gpuDest[i * 3 + 0] = 255 * color.x;  
114 - gpuDest[i * 3 + 1] = 255 * color.y;  
115 - gpuDest[i * 3 + 2] = 255 * color.z;  
116 -}  
117 -  
118 -template<class T>  
119 -__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1)  
120 -{  
121 - int i = blockIdx.x * blockDim.x + threadIdx.x;  
122 - if(i >= N) return;  
123 -  
124 - //compute the normalized value on [minVal maxVal]  
125 - float a = (gpuSource[i] - minVal) / (maxVal - minVal);  
126 -  
127 - gpuDest[i * 3 + 0] = 255 * a;  
128 - gpuDest[i * 3 + 1] = 255 * a;  
129 - gpuDest[i * 3 + 2] = 255 * a;  
130 -}  
131 -  
132 -template<class T>  
133 -static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128)  
134 -{  
135 - //This function converts a scalar field on the GPU to a color image on the GPU  
136 - int gridDim = (nVals + blockDim - 1)/blockDim;  
137 - if(cm == cmGrayscale)  
138 - applyGrayscale<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal);  
139 - else if(cm == cmBrewer)  
140 - {  
141 - initBrewer();  
142 - applyBrewer<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal);  
143 - destroyBrewer();  
144 - }  
145 -  
146 -}  
147 -  
148 -template<class T>  
149 -static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale)  
150 -{  
151 - //this function converts a scalar field on the GPU to a color image on the CPU  
152 -  
153 - //first create the color image on the GPU  
154 -  
155 - //allocate GPU memory for the color image  
156 - unsigned char* gpuDest;  
157 - HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 ));  
158 -  
159 - //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals));  
160 -  
161 - //create the image on the gpu  
162 - gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm);  
163 -  
164 - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3));  
165 -  
166 - //copy the image from the GPU to the CPU  
167 - HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost));  
168 -  
169 - HANDLE_ERROR(cudaFree( gpuDest ));  
170 -  
171 -}  
172 -  
173 -template<typename T>  
174 -static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale)  
175 -{  
176 - //allocate a color buffer  
177 - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size);  
178 -  
179 - //do the mapping  
180 - gpu2cpu<T>(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm);  
181 -  
182 - //copy the buffer to an image  
183 - buffer2image(cpuBuffer, fileDest, x_size, y_size);  
184 -  
185 - free(cpuBuffer);  
186 -}  
187 -  
188 -#endif  
189 -  
190 -template<class T>  
191 -static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale)  
192 -{  
193 - int i;  
194 - float a;  
195 - float range = valMax - valMin;  
196 - for(i = 0; i<nVals; i++)  
197 - {  
198 - //normalize to the range [valMin valMax]  
199 - a = (cpuSource[i] - valMin) / range;  
200 -  
201 - cpuDest[i * 3 + 0] = 255 * a;  
202 - cpuDest[i * 3 + 1] = 255 * a;  
203 - cpuDest[i * 3 + 2] = 255 * a;  
204 - }  
205 -  
206 -}  
207 -  
208 -  
209 -  
210 -template<typename T>  
211 -static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale)  
212 -{  
213 - //allocate a color buffer  
214 - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size);  
215 -  
216 - //do the mapping  
217 - cpu2cpu<T>(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm);  
218 -  
219 - //copy the buffer to an image  
220 - buffer2image(cpuBuffer, fileDest, x_size, y_size);  
221 -  
222 - free(cpuBuffer);  
223 -  
224 -}  
225 -  
226 -}} //end namespace colormap and rts  
227 -  
228 -#endif  
229 -  
@@ -24,6 +24,8 @@ typedef double ptype; @@ -24,6 +24,8 @@ typedef double ptype;
24 24
25 typedef ptype fieldPoint; 25 typedef ptype fieldPoint;
26 26
  27 +extern bool verbose;
  28 +
27 //hybrid GPU/CPU complex data typ 29 //hybrid GPU/CPU complex data typ
28 #include "rts/math/complex.h" 30 #include "rts/math/complex.h"
29 #include "rts/math/vector.h" 31 #include "rts/math/vector.h"
@@ -15,14 +15,14 @@ @@ -15,14 +15,14 @@
15 #define DEFAULT_FOCUS_X 0 15 #define DEFAULT_FOCUS_X 0
16 #define DEFAULT_FOCUS_Y 0 16 #define DEFAULT_FOCUS_Y 0
17 #define DEFAULT_FOCUS_Z 0 17 #define DEFAULT_FOCUS_Z 0
18 -#define DEFAULT_INCIDENT_ORDER 100 18 +//#define DEFAULT_INCIDENT_ORDER 20
19 #define DEFAULT_STABILITY_PARM 1.4 19 #define DEFAULT_STABILITY_PARM 1.4
20 20
21 //optics 21 //optics
22 -#define DEFAULT_CONDENSER_MIN 0.0 22 +#define DEFAULT_CONDENSER_MIN 0
23 #define DEFAULT_CONDENSER_MAX 1 23 #define DEFAULT_CONDENSER_MAX 1
24 24
25 -#define DEFAULT_OBJECTIVE_MIN 0.0 25 +#define DEFAULT_OBJECTIVE_MIN 0
26 #define DEFAULT_OBJECTIVE_MAX 1 26 #define DEFAULT_OBJECTIVE_MAX 1
27 27
28 //incident light direction 28 //incident light direction
@@ -36,17 +36,20 @@ @@ -36,17 +36,20 @@
36 //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective 36 //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective
37 37
38 38
39 -#define DEFAULT_SLICE_MIN_X -5  
40 -#define DEFAULT_SLICE_MIN_Y 0  
41 -#define DEFAULT_SLICE_MIN_Z -5 39 +#define DEFAULT_PLANE_MIN_X -5
  40 +#define DEFAULT_PLANE_MIN_Y 0
  41 +#define DEFAULT_PLANE_MIN_Z -5
42 42
43 -#define DEFAULT_SLICE_MAX_X 5  
44 -#define DEFAULT_SLICE_MAX_Y 0  
45 -#define DEFAULT_SLICE_MAX_Z 5 43 +#define DEFAULT_PLANE_MAX_X 5
  44 +#define DEFAULT_PLANE_MAX_Y 0
  45 +#define DEFAULT_PLANE_MAX_Z 5
46 46
47 -#define DEFAULT_SLICE_NORM_X 0  
48 -#define DEFAULT_SLICE_NORM_Y 1  
49 -#define DEFAULT_SLICE_NORM_Z 0 47 +#define DEFAULT_PLANE_NORM_X 0
  48 +#define DEFAULT_PLANE_NORM_Y 1
  49 +#define DEFAULT_PLANE_NORM_Z 0
  50 +
  51 +#define DEFAULT_PLANE_SIZE 40
  52 +#define DEFAULT_PLANE_POSITION 0
50 53
51 54
52 /* 55 /*
@@ -64,21 +67,23 @@ @@ -64,21 +67,23 @@
64 */ 67 */
65 68
66 69
67 -#define DEFAULT_FIELD_ORDER 200 70 +#define DEFAULT_FIELD_ORDER 10
68 71
69 -#define DEFAULT_SAMPLES 200 72 +#define DEFAULT_SAMPLES 400
70 73
71 #define DEFAULT_SLICE_RES 256 74 #define DEFAULT_SLICE_RES 256
72 75
  76 +#define DEFAULT_SPHERE_THETA_R 1000
  77 +
73 #define DEFAULT_PADDING 1 78 #define DEFAULT_PADDING 1
74 #define DEFAULT_SUPERSAMPLE 1 79 #define DEFAULT_SUPERSAMPLE 1
75 80
76 -#define DEFAULT_INTENSITY_FILE "testappend" 81 +#define DEFAULT_INTENSITY_FILE "out_i.bmp"
77 #define DEFAULT_TRANSMITTANCE_FILE "" 82 #define DEFAULT_TRANSMITTANCE_FILE ""
78 -#define DEFAULT_ABSORBANCE_FILE "out_a" 83 +#define DEFAULT_ABSORBANCE_FILE "out_a.bmp"
79 #define DEFAULT_NEAR_FILE "out_n.bmp" 84 #define DEFAULT_NEAR_FILE "out_n.bmp"
80 #define DEFAULT_FAR_FILE "out_f.bmp" 85 #define DEFAULT_FAR_FILE "out_f.bmp"
81 -#define DEFAULT_EXTENDED_SOURCE "einstein_small.jpg" 86 +#define DEFAULT_EXTENDED_SOURCE ""
82 #define DEFAULT_FIELD_TYPE "magnitude" 87 #define DEFAULT_FIELD_TYPE "magnitude"
83 #define DEFAULT_FORMAT fileoutStruct::formatImage 88 #define DEFAULT_FORMAT fileoutStruct::formatImage
84 #define DEFAULT_COLORMAP "brewer" 89 #define DEFAULT_COLORMAP "brewer"
@@ -8,14 +8,16 @@ @@ -8,14 +8,16 @@
8 using namespace std; 8 using namespace std;
9 9
10 fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) 10 fieldslice::fieldslice(unsigned int x_size, unsigned int y_size)
11 -{ 11 +{
  12 + x_hat = y_hat = z_hat = NULL;
  13 +
12 //save the slice resolution 14 //save the slice resolution
13 R[0] = x_size; 15 R[0] = x_size;
14 R[1] = x_size; 16 R[1] = x_size;
15 17
16 scalarField = true; 18 scalarField = true;
17 19
18 - //init_gpu(); 20 + init_gpu();
19 21
20 22
21 } 23 }
@@ -101,5 +103,5 @@ fieldslice::fieldslice() @@ -101,5 +103,5 @@ fieldslice::fieldslice()
101 103
102 fieldslice::~fieldslice() 104 fieldslice::~fieldslice()
103 { 105 {
104 - //kill_gpu(); 106 + kill_gpu();
105 } 107 }
1 #include "fieldslice.h" 1 #include "fieldslice.h"
2 #include "dataTypes.h" 2 #include "dataTypes.h"
3 -#include "rts/cuda/error.h" 3 +#include "rts/cuda/error.h"
  4 +#include "rts/cuda/threads.h"
4 5
5 6
6 __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N) 7 __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N)
7 { 8 {
8 //compute the index for this thread 9 //compute the index for this thread
9 - int i = blockIdx.x * blockDim.x + threadIdx.x; 10 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  11 + int i = ThreadIndex1D();
  12 +
10 if(i >= N) return; 13 if(i >= N) return;
11 14
12 ptype xm = x[i].abs(); 15 ptype xm = x[i].abs();
@@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty @@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty
66 __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) 69 __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N)
67 { 70 {
68 //compute the index for this thread 71 //compute the index for this thread
69 - int i = blockIdx.x * blockDim.x + threadIdx.x; 72 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  73 + int i = ThreadIndex1D();
70 if(i >= N) return; 74 if(i >= N) return;
71 75
72 V[i] = field_component[i].real(); 76 V[i] = field_component[i].real();
@@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) @@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N)
75 __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N) 79 __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N)
76 { 80 {
77 //compute the index for this thread 81 //compute the index for this thread
78 - int i = blockIdx.x * blockDim.x + threadIdx.x; 82 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  83 + int i = ThreadIndex1D();
79 if(i >= N) return; 84 if(i >= N) return;
80 85
81 V[i] = field_component[i].imag(); 86 V[i] = field_component[i].imag();
@@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i @@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i
84 __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N) 89 __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N)
85 { 90 {
86 //compute the index for this thread 91 //compute the index for this thread
87 - int i = blockIdx.x * blockDim.x + threadIdx.x; 92 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  93 + int i = ThreadIndex1D();
88 if(i >= N) return; 94 if(i >= N) return;
89 95
90 output[i] = sqrt(input[i]); 96 output[i] = sqrt(input[i]);
@@ -115,7 +121,8 @@ scalarslice fieldslice::Mag() @@ -115,7 +121,8 @@ scalarslice fieldslice::Mag()
115 121
116 //compute the total number of values in the slice 122 //compute the total number of values in the slice
117 unsigned int N = R[0] * R[1]; 123 unsigned int N = R[0] * R[1];
118 - int gridDim = (N+BLOCK-1)/BLOCK; 124 + //int gridDim = (N+BLOCK-1)/BLOCK;
  125 + dim3 gridDim = GenGrid1D(N, BLOCK);
119 126
120 field_intensity<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, result->S, N); 127 field_intensity<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, result->S, N);
121 field_sqrt<<<gridDim, BLOCK>>>(result->S, result->S, N); 128 field_sqrt<<<gridDim, BLOCK>>>(result->S, result->S, N);
@@ -132,7 +139,8 @@ scalarslice fieldslice::Real() @@ -132,7 +139,8 @@ scalarslice fieldslice::Real()
132 139
133 //compute the total number of values in the slice 140 //compute the total number of values in the slice
134 unsigned int N = R[0] * R[1]; 141 unsigned int N = R[0] * R[1];
135 - int gridDim = (N+BLOCK-1)/BLOCK; 142 + //int gridDim = (N+BLOCK-1)/BLOCK;
  143 + dim3 gridDim = GenGrid1D(N, BLOCK);
136 144
137 field_real<<<gridDim, BLOCK>>>(x_hat, result->S, N); 145 field_real<<<gridDim, BLOCK>>>(x_hat, result->S, N);
138 146
@@ -148,7 +156,8 @@ scalarslice fieldslice::Imag() @@ -148,7 +156,8 @@ scalarslice fieldslice::Imag()
148 156
149 //compute the total number of values in the slice 157 //compute the total number of values in the slice
150 unsigned int N = R[0] * R[1]; 158 unsigned int N = R[0] * R[1];
151 - int gridDim = (N+BLOCK-1)/BLOCK; 159 + //int gridDim = (N+BLOCK-1)/BLOCK;
  160 + dim3 gridDim = GenGrid1D(N, BLOCK);
152 161
153 field_imaginary<<<gridDim, BLOCK>>>(x_hat, result->S, N); 162 field_imaginary<<<gridDim, BLOCK>>>(x_hat, result->S, N);
154 163
@@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v) @@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v)
192 201
193 //compute the total number of values in the slice 202 //compute the total number of values in the slice
194 unsigned int N = R[0] * R[1]; 203 unsigned int N = R[0] * R[1];
195 - //cout<<"Size of mag field: "<<N<<endl;  
196 int gridDim = (N+BLOCK-1)/BLOCK; 204 int gridDim = (N+BLOCK-1)/BLOCK;
197 205
198 field_scale<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, N, v); 206 field_scale<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, N, v);
@@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v) @@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v)
200 } 208 }
201 209
202 void fieldslice::init_gpu() 210 void fieldslice::init_gpu()
203 -{ 211 +{
  212 + //if the field has no size, return
  213 + if(R[0] == 0 || R[1] == 0)
  214 + return;
  215 +
  216 + //free any previous memory allocations
  217 + if(x_hat)
  218 + HANDLE_ERROR(cudaFree(x_hat));
  219 + if(y_hat)
  220 + HANDLE_ERROR(cudaFree(y_hat));
  221 + if(z_hat)
  222 + HANDLE_ERROR(cudaFree(z_hat));
  223 +
204 //allocate space on the GPU for the field slice 224 //allocate space on the GPU for the field slice
205 HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex))); 225 HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex)));
206 - //HANDLE_ERROR(cudaMemset(x_hat, 0, R[0] * R[1] * sizeof(bsComplex)));  
207 226
208 - //if the field is scalar, y_hat and z_hat are unused  
209 - if(scalarField)  
210 - {  
211 - y_hat = NULL;  
212 - z_hat = NULL;  
213 -  
214 - }  
215 - else 227 + if(!scalarField)
216 { 228 {
217 HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex))); 229 HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex)));
218 //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex))); 230 //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex)));
@@ -233,6 +245,8 @@ void fieldslice::kill_gpu() @@ -233,6 +245,8 @@ void fieldslice::kill_gpu()
233 if(z_hat != NULL) 245 if(z_hat != NULL)
234 HANDLE_ERROR(cudaFree(z_hat)); 246 HANDLE_ERROR(cudaFree(z_hat));
235 247
  248 + x_hat = y_hat = z_hat = NULL;
  249 +
236 } 250 }
237 251
238 void fieldslice::clear_gpu() 252 void fieldslice::clear_gpu()
@@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) @@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv)
275 result.scalarField = scalarField; 289 result.scalarField = scalarField;
276 290
277 //allocate space for the new field 291 //allocate space for the new field
278 - result.init_gpu(); 292 + //result.init_gpu();
279 293
280 //create one thread for each pixel of the field slice 294 //create one thread for each pixel of the field slice
281 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); 295 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
@@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) @@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv)
291 305
292 return result; 306 return result;
293 } 307 }
  308 +
  309 +fieldslice::fieldslice(const fieldslice& rhs)
  310 +{
  311 + R[0] = rhs.R[0];
  312 + R[1] = rhs.R[1];
  313 + scalarField = rhs.scalarField;
  314 +
  315 + x_hat = y_hat = z_hat = NULL;
  316 +
  317 + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1];
  318 + if(rhs.x_hat != NULL)
  319 + {
  320 + HANDLE_ERROR(cudaMalloc( (void**)&x_hat, bytes));
  321 + HANDLE_ERROR(cudaMemcpy( x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice));
  322 + }
  323 + if(rhs.y_hat != NULL)
  324 + {
  325 + HANDLE_ERROR(cudaMalloc( (void**)&y_hat, bytes));
  326 + HANDLE_ERROR(cudaMemcpy( y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice));
  327 + }
  328 + if(rhs.z_hat != NULL)
  329 + {
  330 + HANDLE_ERROR(cudaMalloc( (void**)&z_hat, bytes));
  331 + HANDLE_ERROR(cudaMemcpy( z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice));
  332 + }
  333 +
  334 +}
  335 +
  336 +fieldslice& fieldslice::operator=(const fieldslice& rhs)
  337 +{
  338 + //make sure this isn't a self-allocation
  339 + if(this != &rhs)
  340 + {
  341 + //make a shallow copy
  342 + R[0] = rhs.R[0];
  343 + R[1] = rhs.R[1];
  344 + scalarField = rhs.scalarField;
  345 +
  346 + //initialize to new parameters
  347 + init_gpu();
  348 +
  349 + //make a deep copy
  350 + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1];
  351 + if(x_hat != NULL)
  352 + HANDLE_ERROR(cudaMemcpy(x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice));
  353 + if(y_hat != NULL)
  354 + HANDLE_ERROR(cudaMemcpy(y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice));
  355 + if(z_hat != NULL)
  356 + HANDLE_ERROR(cudaMemcpy(z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice));
  357 + }
  358 +
  359 + return *this;
  360 +
  361 +}
@@ -31,6 +31,9 @@ struct fieldslice @@ -31,6 +31,9 @@ struct fieldslice
31 31
32 ~fieldslice(); 32 ~fieldslice();
33 33
  34 + //copy constructor
  35 + fieldslice(const fieldslice& rhs);
  36 +
34 //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); 37 //void setPos(bsPoint pMin, bsPoint pMax, bsVector N);
35 38
36 scalarslice Mag(); 39 scalarslice Mag();
@@ -47,6 +50,7 @@ struct fieldslice @@ -47,6 +50,7 @@ struct fieldslice
47 50
48 //crop a region from the field 51 //crop a region from the field
49 fieldslice crop(int u, int v, int su, int sv); 52 fieldslice crop(int u, int v, int su, int sv);
  53 + fieldslice& operator=(const fieldslice& rhs);
50 54
51 void init_gpu(); 55 void init_gpu();
52 void kill_gpu(); 56 void kill_gpu();
@@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope) @@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope)
186 //save images of the fields in the microscope 186 //save images of the fields in the microscope
187 187
188 //if the user specifies an extended source 188 //if the user specifies an extended source
189 - if(scope->focalPoints.size() > 1) 189 + if(scope->focalPoints.size() > 0)
190 { 190 {
191 //simulate the extended source and output the detector image 191 //simulate the extended source and output the detector image
192 scope->SimulateExtendedSource(); 192 scope->SimulateExtendedSource();
193 193
  194 + //saveNearField(&scope->nf);
  195 + saveFarField(scope);
  196 +
  197 + //save the detector images
  198 + saveDetector(scope);
  199 +
  200 + //simulate scattering for the last point (so that you have a near field image)
  201 + scope->SimulateScattering();
  202 + saveNearField(&scope->nf);
  203 +
194 } 204 }
195 else 205 else
196 { 206 {
@@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope) @@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope)
203 //run the far-field simulation 213 //run the far-field simulation
204 scope->SimulateImaging(); 214 scope->SimulateImaging();
205 215
  216 + //saveNearField(&scope->nf);
206 saveFarField(scope); 217 saveFarField(scope);
207 218
  219 + //save the detector images
  220 + saveDetector(scope);
  221 +
208 } 222 }
209 223
210 - //save the detector images  
211 - saveDetector(scope); 224 +
212 225
213 226
214 } 227 }
@@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
5 //#include "defaults.h" 5 //#include "defaults.h"
6 #include "dataTypes.h" 6 #include "dataTypes.h"
7 7
8 -#include "colormap.h" 8 +#include "rts/graphics/colormap.h"
9 #include "fieldslice.h" 9 #include "fieldslice.h"
10 #include "nearfield.h" 10 #include "nearfield.h"
11 #include "microscope.h" 11 #include "microscope.h"
@@ -34,7 +34,7 @@ struct fileoutStruct{ @@ -34,7 +34,7 @@ struct fileoutStruct{
34 //image_source source; 34 //image_source source;
35 35
36 //color map info 36 //color map info
37 - rts::colormap::colormapType colormap; 37 + rts::colormapType colormap;
38 ptype colorMax; 38 ptype colorMax;
39 39
40 void Save(microscopeStruct* scope); 40 void Save(microscopeStruct* scope);
@@ -24,6 +24,7 @@ microscopeStruct* SCOPE; @@ -24,6 +24,7 @@ microscopeStruct* SCOPE;
24 #include "warnings.h" 24 #include "warnings.h"
25 25
26 fileoutStruct gFileOut; 26 fileoutStruct gFileOut;
  27 +bool verbose = false;
27 using namespace std; 28 using namespace std;
28 29
29 int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, 30 int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv,
@@ -31,32 +32,19 @@ int cbessjyva(double v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv, @@ -31,32 +32,19 @@ int cbessjyva(double v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv,
31 32
32 int main(int argc, char *argv[]) 33 int main(int argc, char *argv[])
33 { 34 {
34 - //test Envi loading and saving  
35 - //EnviFile envi("testenvi", "w");  
36 -  
37 - //float* data = (float*)malloc(sizeof(float) * 100 * 100);  
38 - //envi.addBand(data, 100, 100, 100);  
39 -  
40 - //envi.close();  
41 -  
42 - //return 0;  
43 35
44 SCOPE = new microscopeStruct(); 36 SCOPE = new microscopeStruct();
45 37
46 - cout<<SCOPE->nf.Uf.R[0]<<endl;  
47 -  
48 LoadParameters(argc, argv); 38 LoadParameters(argc, argv);
49 39
50 - //TestSimulation(NF, SCOPE, &gFileOut);  
51 -  
52 //initialize GPU memory for fields 40 //initialize GPU memory for fields
53 SCOPE->init(); 41 SCOPE->init();
54 42
55 - OutputOptions();  
56 -  
57 gFileOut.Save(SCOPE); 43 gFileOut.Save(SCOPE);
58 44
59 - //NF->destroy(); 45 + if(verbose)
  46 + OutputOptions();
  47 +
60 SCOPE->destroy(); 48 SCOPE->destroy();
61 49
62 50
@@ -4,7 +4,7 @@ @@ -4,7 +4,7 @@
4 #include "rts/tools/progressbar.h" 4 #include "rts/tools/progressbar.h"
5 #include "rts/cuda/timer.h" 5 #include "rts/cuda/timer.h"
6 #include "dataTypes.h" 6 #include "dataTypes.h"
7 -#include "colormap.h" 7 +#include "rts/graphics/colormap.h"
8 8
9 #include <QImage> 9 #include <QImage>
10 10
@@ -112,8 +112,8 @@ void microscopeStruct::getFarField() @@ -112,8 +112,8 @@ void microscopeStruct::getFarField()
112 //Compute the Far Field image of the focal plane 112 //Compute the Far Field image of the focal plane
113 113
114 //clear the memory from previous detector fields 114 //clear the memory from previous detector fields
115 - Ud.kill_gpu();  
116 - Ufd.kill_gpu(); 115 + //Ud.kill_gpu();
  116 + //Ufd.kill_gpu();
117 117
118 //first crop the filtered near-field image of the source and scattered fields 118 //first crop the filtered near-field image of the source and scattered fields
119 Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); 119 Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]);
@@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource() @@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource()
261 t += gpuStopTimer(); 261 t += gpuStopTimer();
262 262
263 rtsProgressBar((double)(i+1)/(double)npts * 100); 263 rtsProgressBar((double)(i+1)/(double)npts * 100);
  264 + //unsigned char c;
  265 + //cin>>c;
264 } 266 }
265 - cout<<endl;  
266 - cout<<"Time per source: "<<t/npts<<"ms"<<endl; 267 + if(verbose)
  268 + {
  269 + cout<<endl;
  270 + cout<<"Time per source: "<<t/npts<<"ms"<<endl;
  271 + }
267 272
268 } 273 }
269 274
@@ -304,3 +309,15 @@ void microscopeStruct::LoadExtendedSource(std::string filename) @@ -304,3 +309,15 @@ void microscopeStruct::LoadExtendedSource(std::string filename)
304 } 309 }
305 } 310 }
306 } 311 }
  312 +
  313 +std::string microscopeStruct::toStr()
  314 +{
  315 + stringstream ss;
  316 + ss<<nf.toStr();
  317 +
  318 + ss<<"----------Optics--------------"<<endl<<endl;
  319 + ss<<"Objective NA: "<<objective[0]<<" to "<<objective[1]<<endl;
  320 + return ss.str();
  321 +
  322 +
  323 +}
@@ -63,6 +63,8 @@ struct microscopeStruct @@ -63,6 +63,8 @@ struct microscopeStruct
63 scalarslice getTransmittance(); 63 scalarslice getTransmittance();
64 scalarslice getIntensity(); 64 scalarslice getIntensity();
65 65
  66 + string toStr();
  67 +
66 68
67 69
68 }; 70 };
@@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) @@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout)
35 ptype inPhi = asin(NAin); 35 ptype inPhi = asin(NAin);
36 ptype outPhi = asin(NAout); 36 ptype outPhi = asin(NAout);
37 37
38 - //cout<<"inPhi: "<<inPhi<<endl;  
39 - //cout<<"outPhi: "<<outPhi<<endl;  
40 -  
41 //calculate the z-values associated with these angles 38 //calculate the z-values associated with these angles
42 ptype inZ = cos(inPhi); 39 ptype inZ = cos(inPhi);
43 ptype outZ = cos(outPhi); 40 ptype outZ = cos(outPhi);
44 41
45 ptype rangeZ = inZ - outZ; 42 ptype rangeZ = inZ - outZ;
46 43
47 - //cout<<"inZ: "<<inZ<<endl;  
48 - //cout<<"outZ: "<<outZ<<endl;  
49 -  
50 //draw a distribution of random phi, z values 44 //draw a distribution of random phi, z values
51 ptype z, phi, theta; 45 ptype z, phi, theta;
52 for(int i=0; i<N; i++) 46 for(int i=0; i<N; i++)
@@ -58,7 +52,6 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) @@ -58,7 +52,6 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout)
58 phi = acos(z); 52 phi = acos(z);
59 53
60 //compute and store cartesian coordinates 54 //compute and store cartesian coordinates
61 - //bsVector spherical(1, theta + kSph[1], phi + kSph[2]);  
62 bsVector spherical(1, theta, phi); 55 bsVector spherical(1, theta, phi);
63 bsVector cart = spherical.sph2cart(); 56 bsVector cart = spherical.sph2cart();
64 samples[i] = rotation * cart; 57 samples[i] = rotation * cart;
1 #include "nearfield.h" 1 #include "nearfield.h"
  2 +#include <time.h>
  3 +#include <math.h>
  4 +
  5 +#ifdef _WIN32
  6 +#define isnan(x) _isnan(x)
  7 +#define isinf(x) (!_finite(x))
  8 +#endif
  9 +
  10 +int bessjyv_sph(int v, double z, double &vm, double* cjv,
  11 + double* cyv, double* cjvp, double* cyvp);
2 12
3 nearfieldStruct::nearfieldStruct() 13 nearfieldStruct::nearfieldStruct()
4 { 14 {
5 scalarSim = true; 15 scalarSim = true;
6 planeWave = false; 16 planeWave = false;
  17 + lut_us = true;
  18 + lut_uf = false;
7 19
8 nWaves = 0; 20 nWaves = 0;
9 } 21 }
@@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr() @@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr()
46 ss<<"Condenser NA: "<<condenser[0]<<" to "<<condenser[1]<<std::endl; 58 ss<<"Condenser NA: "<<condenser[0]<<" to "<<condenser[1]<<std::endl;
47 ss<<"Focal Point: "<<focus[0]<<", "<<focus[1]<<", "<<focus[2]<<std::endl; 59 ss<<"Focal Point: "<<focus[0]<<", "<<focus[1]<<", "<<focus[2]<<std::endl;
48 ss<<"Field Slice: "<<std::endl; 60 ss<<"Field Slice: "<<std::endl;
  61 + if(lut_us)
  62 + ss<<"LUT Parameters --- min: "<<d_min<<" max: "<<d_max<<std::endl;
49 ss<<pos<<std::endl; 63 ss<<pos<<std::endl;
50 64
51 ss<<std::endl<<"---------Materials-----------"<<std::endl; 65 ss<<std::endl<<"---------Materials-----------"<<std::endl;
@@ -61,6 +75,10 @@ std::string nearfieldStruct::toStr() @@ -61,6 +75,10 @@ std::string nearfieldStruct::toStr()
61 for(unsigned int s=0; s<sVector.size(); s++) 75 for(unsigned int s=0; s<sVector.size(); s++)
62 ss<<sVector[s].toStr()<<std::endl; 76 ss<<sVector[s].toStr()<<std::endl;
63 77
  78 + ss<<"---------Timings-------------"<<std::endl;
  79 + ss<<"Uf = "<<t_Uf<<"ms"<<std::endl;
  80 + ss<<"Us = "<<t_Us<<"ms"<<std::endl;
  81 +
64 return ss.str(); 82 return ss.str();
65 } 83 }
66 84
@@ -70,7 +88,8 @@ void nearfieldStruct::calcWaves() @@ -70,7 +88,8 @@ void nearfieldStruct::calcWaves()
70 inWaves.resize(nWaves); 88 inWaves.resize(nWaves);
71 89
72 //re-seed the random number generator 90 //re-seed the random number generator
73 - //srand(seed); 91 + //srand(time(NULL));
  92 + srand(NULL);
74 93
75 //calculate the monte-carlo samples 94 //calculate the monte-carlo samples
76 mcSampleNA(&inWaves[0], nWaves, k, condenser[0], condenser[1]); 95 mcSampleNA(&inWaves[0], nWaves, k, condenser[0], condenser[1]);
@@ -84,6 +103,8 @@ void nearfieldStruct::calcSpheres() @@ -84,6 +103,8 @@ void nearfieldStruct::calcSpheres()
84 //calculate all of the constants necessary to evaluate the scattered field 103 //calculate all of the constants necessary to evaluate the scattered field
85 //estimate the order required to represent the scattered field for each sphere 104 //estimate the order required to represent the scattered field for each sphere
86 105
  106 +
  107 +
87 //for each sphere 108 //for each sphere
88 for(int i=0; i<sVector.size(); i++) 109 for(int i=0; i<sVector.size(); i++)
89 { 110 {
@@ -91,12 +112,10 @@ void nearfieldStruct::calcSpheres() @@ -91,12 +112,10 @@ void nearfieldStruct::calcSpheres()
91 112
92 //calculate the required order 113 //calculate the required order
93 sVector[i].calcNl(lambda); 114 sVector[i].calcNl(lambda);
94 - //std::cout<<sVector[i].Nl<<std::endl;  
95 115
96 //set the refractive index for the sphere 116 //set the refractive index for the sphere
97 int imat = sVector[i].iMaterial; 117 int imat = sVector[i].iMaterial;
98 rts::rtsComplex<ptype> n = mVector[imat](lambda); 118 rts::rtsComplex<ptype> n = mVector[imat](lambda);
99 - //std::cout<<"Sphere refractive index: "<<n<<std::endl;  
100 119
101 //calculate the scattering coefficients 120 //calculate the scattering coefficients
102 sVector[i].calcCoeff(lambda, n); 121 sVector[i].calcCoeff(lambda, n);
@@ -104,18 +123,109 @@ void nearfieldStruct::calcSpheres() @@ -104,18 +123,109 @@ void nearfieldStruct::calcSpheres()
104 //save the refractive index 123 //save the refractive index
105 sVector[i].n = n; 124 sVector[i].n = n;
106 125
  126 + //if the LUT is used, calculate Usp(theta, r)
  127 + if(lut_us)
  128 + {
  129 + sVector[i].calcUp(lambda, n, pos, max(U.R[0], U.R[1]));
  130 + }
  131 +
  132 +
107 } 133 }
108 134
109 } 135 }
110 136
  137 +void nearfieldStruct::calcUs()
  138 +{
  139 +
  140 +
  141 + if(lut_us)
  142 + scalarUpLut();
  143 + else
  144 + scalarUs();
  145 +}
  146 +
  147 +void nearfieldStruct::calcUf()
  148 +{
  149 + if(lut_uf)
  150 + scalarUfLut();
  151 + else
  152 + scalarUf();
  153 +}
  154 +
111 void nearfieldStruct::Simulate() 155 void nearfieldStruct::Simulate()
112 { 156 {
  157 + //initialize timings
  158 + t_Uf = 0;
  159 + t_Us = 0;
  160 +
113 //compute a set of plane waves for Monte-Carlo simulation 161 //compute a set of plane waves for Monte-Carlo simulation
114 calcWaves(); 162 calcWaves();
115 163
116 //the near field has to be simulated no matter what the output rtsPoint is 164 //the near field has to be simulated no matter what the output rtsPoint is
117 - scalarUf(); 165 + calcUf();
118 calcSpheres(); 166 calcSpheres();
119 - scalarUs(); 167 + calcUs();
120 sumUf(); 168 sumUf();
  169 +
  170 + //U.Mag().toImage("testU.bmp");
  171 +}
  172 +
  173 +void nearfieldStruct::calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR)
  174 +{
  175 + /*Compute the look-up-table for spherical bessel functions used for the incident field
  176 + j = (Nl + 1) x aR array of values
  177 + aR = resolution of j
  178 + */
  179 +
  180 + //compute the wavenumber
  181 + ptype k = 2 * PI / lambda;
  182 + unsigned int Nl = m;
  183 +
  184 + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored)
  185 + int bytes = sizeof(double) * (Nl + 1);
  186 + double* cjv_kd = (double*)malloc(bytes);
  187 + double* cyv_kd = (double*)malloc(bytes);
  188 + double* cjvp_kd = (double*)malloc(bytes);
  189 + double* cyvp_kd = (double*)malloc(bytes);
  190 +
  191 + //compute the bessel functions using the CPU-based algorithm
  192 + double vm;
  193 +
  194 + //for each sample along r
  195 + ptype dr = (d_max - d_min) / (dR - 1);
  196 + ptype d;
  197 + ptype jv;
  198 + for(int id = 0; id < dR; id++)
  199 + {
  200 + d = id * dr + d_min;
  201 + double kd = k*d;
  202 + bessjyv_sph(Nl, kd, vm, cjv_kd, cyv_kd, cjvp_kd, cyvp_kd);
  203 +
  204 + //copy the double data to the bsComplex array
  205 + for(int l=0; l<=Nl; l++)
  206 + {
  207 + jv = cjv_kd[l];
  208 + if(isnan(jv) || isinf(jv))
  209 + {
  210 + if(kd == 0 && l == 0)
  211 + jv = 1;
  212 + else
  213 + jv = 0;
  214 + }
  215 + j[id * (Nl+1) + l] = jv;
  216 + }
  217 + }
  218 +
  219 + /*ofstream outfile("uf_besselout.txt");
  220 + for(int ir = 0; ir < dR; ir++)
  221 + {
  222 + outfile<<ir*dr + d_min<<endl;
  223 + for(int l = 0; l<=Nl; l++)
  224 + {
  225 + outfile<<j[ir * (Nl+1) + l]<<" --";
  226 + }
  227 + outfile<<endl;
  228 + }
  229 + outfile.close();*/
  230 +
121 } 231 }
@@ -31,6 +31,8 @@ struct nearfieldStruct @@ -31,6 +31,8 @@ struct nearfieldStruct
31 31
32 //slices for the focused field 32 //slices for the focused field
33 fieldslice Uf; 33 fieldslice Uf;
  34 + ptype d_min, d_max;
  35 +
34 // and total field: Uf + sum(Us) 36 // and total field: Uf + sum(Us)
35 fieldslice U; 37 fieldslice U;
36 38
@@ -43,6 +45,14 @@ struct nearfieldStruct @@ -43,6 +45,14 @@ struct nearfieldStruct
43 //flag for a plane wave 45 //flag for a plane wave
44 bool planeWave; 46 bool planeWave;
45 47
  48 + //flag for using a LUT
  49 + bool lut_uf;
  50 + bool lut_us;
  51 +
  52 + //timings
  53 + float t_Uf;
  54 + float t_Us;
  55 +
46 56
47 57
48 //---------Scatterers------------ 58 //---------Scatterers------------
@@ -78,10 +88,17 @@ struct nearfieldStruct @@ -78,10 +88,17 @@ struct nearfieldStruct
78 void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); 88 void setPos(bsPoint pMin, bsPoint pMax, bsVector normal);
79 89
80 //this function re-computes the focused field 90 //this function re-computes the focused field
  91 + void calcUf();
81 void scalarUf(); 92 void scalarUf();
  93 + void scalarUfLut();
  94 +
  95 + void calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR);
82 96
83 //compute the field scattered by all of the materials 97 //compute the field scattered by all of the materials
  98 + void calcUs();
84 void scalarUs(); 99 void scalarUs();
  100 + void scalarUpLut();
  101 +
85 102
86 //add the incident field to the sum of scattered fields 103 //add the incident field to the sum of scattered fields
87 void sumUf(); 104 void sumUf();
@@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
5 #include "rts/cuda/error.h" 5 #include "rts/cuda/error.h"
6 #include "rts/cuda/timer.h" 6 #include "rts/cuda/timer.h"
7 7
8 - 8 +//Incident field for a single plane wave
9 __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) 9 __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR)
10 { 10 {
11 /*Compute the scalar focused field using Debye focusing 11 /*Compute the scalar focused field using Debye focusing
@@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p @@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p
41 Uf[i] = exp(d) * A; 41 Uf[i] = exp(d) * A;
42 42
43 } 43 }
44 - 44 +
  45 +//Incident field for a focused point source
45 __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) 46 __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4)
46 { 47 {
47 /*Compute the scalar focused field using Debye focusing 48 /*Compute the scalar focused field using Debye focusing
@@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt @@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt
151 } 152 }
152 153
153 sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); 154 sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
154 - //sumUf += il * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);  
155 155
156 il *= im; 156 il *= im;
157 } 157 }
@@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt @@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt
162 162
163 void nearfieldStruct::scalarUf() 163 void nearfieldStruct::scalarUf()
164 { 164 {
165 - //Compute the incident field via a scalar simulation  
166 - //This method uses Debye focusing to approximate the field analytically  
167 -  
168 - //time the calculation of the focused field  
169 - //gpuStartTimer();  
170 -  
171 - //set the field slice to a scalar field  
172 - //Uf.scalarField = true;  
173 -  
174 - //initialize the GPU arrays  
175 - //Uf.init_gpu(); 165 +
  166 + gpuStartTimer();
176 167
177 //create one thread for each pixel of the field slice 168 //create one thread for each pixel of the field slice
178 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); 169 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
179 - dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); 170 + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
180 171
181 //if we are computing a plane wave, call the gpuScalarUfp function 172 //if we are computing a plane wave, call the gpuScalarUfp function
182 if(planeWave) 173 if(planeWave)
@@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf() @@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf()
191 ptype cosBeta = cos(asin(condenser[1])); 182 ptype cosBeta = cos(asin(condenser[1]));
192 //compute the scalar Uf field (this will be in the x_hat channel of Uf) 183 //compute the scalar Uf field (this will be in the x_hat channel of Uf)
193 gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); 184 gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m);
194 - }  
195 -  
196 - //float t = gpuStopTimer();  
197 - //std::cout<<"Scalar Uf Time: "<<t<<"ms"<<std::endl;  
198 - //std::cout<<focus<<std::endl;  
199 - 185 + }
  186 +
  187 + t_Uf = gpuStopTimer();
200 } 188 }
nfScalarUfLut.cu 0 โ†’ 100644
  1 +#include "nearfield.h"
  2 +
  3 +#include "rts/math/legendre.h"
  4 +#include "rts/cuda/error.h"
  5 +#include "rts/cuda/timer.h"
  6 +
  7 +texture<float, cudaTextureType2D> texJ;
  8 +
  9 +__global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR);
  10 +
  11 +__global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoint f, bsVector k, ptype A, ptype cosAlpha, ptype cosBeta, int nl, ptype dmin, ptype dmax, int dR)
  12 +{
  13 + /*This function computes the focused field for a 2D slice
  14 +
  15 + Uf = destination field slice
  16 + ABCD = plane representing the field slice in world space
  17 + uR, vR = resolution of the Uf field
  18 + f = focal point of the condenser
  19 + k = direction of the incident light
  20 + A = amplitude of the incident field
  21 + cosAlpha= cosine of the solid angle subtended by the condenser obscuration
  22 + cosBeta = cosine of the solid angle subtended by the condenser aperature
  23 + nl = number of orders used to compute the field
  24 + dR = number of Bessel function values in the look-up texture
  25 +
  26 + */
  27 +
  28 + //get the current coordinate in the plane slice
  29 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  30 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  31 +
  32 + //make sure that the thread indices are in-bounds
  33 + if(iu >= uR || iv >= vR) return;
  34 +
  35 + //compute the index (easier access to the scalar field array)
  36 + int i = iv*uR + iu;
  37 +
  38 + //compute the parameters for u and v
  39 + ptype u = (ptype)iu / (uR);
  40 + ptype v = (ptype)iv / (vR);
  41 +
  42 +
  43 +
  44 + //get the rtsPoint in world space and then the r vector
  45 + bsPoint p = ABCD(u, v);
  46 + bsVector r = p - f;
  47 + ptype d = r.len();
  48 +
  49 + if(d == 0)
  50 + {
  51 + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta);
  52 + return;
  53 + }
  54 +
  55 + //get info for the light direction and frequency
  56 + r = r.norm();
  57 +
  58 + //compute the imaginary factor i^l
  59 + bsComplex im = bsComplex(0, 1);
  60 + bsComplex il = bsComplex(1, 0);
  61 +
  62 + //Legendre functions are computed dynamically to save memory
  63 + //initialize the Legendre functions
  64 +
  65 + ptype P[2];
  66 + //get the angle between k and r (light direction and position vector)
  67 + ptype cosTheta;
  68 + cosTheta = k.dot(r);
  69 +
  70 + rts::init_legendre<ptype>(cosTheta, P[0], P[1]);
  71 +
  72 + //initialize legendre functions for the cassegrain angles
  73 + ptype Palpha[3];
  74 + rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]);
  75 + Palpha[2] = 1;
  76 +
  77 + ptype Pbeta[3];
  78 + rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]);
  79 + Pbeta[2] = 1;
  80 +
  81 + //for each order l
  82 + bsComplex sumUf(0.0, 0.0);
  83 + ptype jl = 0.0;
  84 + ptype Pl;
  85 + ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1);
  86 + for(int l = 0; l<=nl; l++)
  87 + {
  88 + jl = tex2D(texJ, l + 0.5, di + 0.5);
  89 + if(l==0)
  90 + Pl = P[0];
  91 + else if(l==1)
  92 + {
  93 + Pl = P[1];
  94 +
  95 + //adjust the cassegrain Legendre function
  96 + Palpha[2] = Palpha[0];
  97 + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
  98 + Pbeta[2] = Pbeta[0];
  99 + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
  100 + }
  101 + else
  102 + {
  103 + rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]);
  104 +
  105 + Pl = P[1];
  106 +
  107 + //adjust the cassegrain outer Legendre function
  108 + Palpha[2] = Palpha[0];
  109 + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
  110 + Pbeta[2] = Pbeta[0];
  111 + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
  112 + }
  113 +
  114 + sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
  115 + //sumUf += jl;
  116 +
  117 + il *= im;
  118 + }
  119 +
  120 + Uf[i] = sumUf * 2 * PI * A;
  121 + //Uf[i] = u;
  122 + //return;
  123 +}
  124 +
  125 +void nearfieldStruct::scalarUfLut()
  126 +{
  127 + gpuStartTimer();
  128 +
  129 + //calculate the minimum and maximum points in the focused field
  130 + d_min = pos.dist(focus);
  131 + d_max = pos.dist_max(focus);
  132 +
  133 + //allocate space for the Bessel function
  134 + int dR = 2 * max(Uf.R[0], Uf.R[1]);
  135 + ptype* j = NULL;
  136 + j = (ptype*) malloc(sizeof(ptype) * dR * (m+1));
  137 +
  138 + //calculate Bessel function LUT
  139 + calcBesselLut(j, d_min, d_max, dR);
  140 +
  141 + //create a CUDA array structure and specify the format description
  142 + cudaArray* arrayJ;
  143 + cudaChannelFormatDesc channelDesc =
  144 + cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
  145 +
  146 + //allocate memory
  147 + HANDLE_ERROR(cudaMallocArray(&arrayJ, &channelDesc, m+1, dR));
  148 +
  149 + //specify texture properties
  150 + texJ.addressMode[0] = cudaAddressModeMirror;
  151 + texJ.addressMode[1] = cudaAddressModeMirror;
  152 + texJ.filterMode = cudaFilterModeLinear;
  153 + texJ.normalized = false;
  154 +
  155 + //bind the texture to the array
  156 + HANDLE_ERROR(cudaBindTextureToArray(texJ, arrayJ, channelDesc));
  157 +
  158 + //copy the CPU Bessel LUT to the GPU-based array
  159 + HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice));
  160 +
  161 + //----------------Compute the focused field
  162 + //create one thread for each pixel of the field slice
  163 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  164 + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  165 +
  166 + //if we are computing a plane wave, call the gpuScalarUfp function
  167 + if(planeWave)
  168 + {
  169 + gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]);
  170 + }
  171 + //otherwise compute the condenser info and create a focused field
  172 + else
  173 + {
  174 + //pre-compute the cosine of the obscuration and objective angles
  175 + ptype cosAlpha = cos(asin(condenser[0]));
  176 + ptype cosBeta = cos(asin(condenser[1]));
  177 + //compute the scalar Uf field (this will be in the x_hat channel of Uf)
  178 + gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR);
  179 + }
  180 +
  181 +
  182 + //free everything
  183 + free(j);
  184 +
  185 + HANDLE_ERROR(cudaFreeArray(arrayJ));
  186 +
  187 + t_Uf = gpuStopTimer();
  188 +}
nfScalarUpLut.cu 0 โ†’ 100644
  1 +#include "nearfield.h"
  2 +#include "rts/math/spherical_bessel.h"
  3 +#include "rts/math/legendre.h"
  4 +#include <stdlib.h>
  5 +#include "rts/cuda/error.h"
  6 +#include "rts/cuda/timer.h"
  7 +
  8 +texture<float2, cudaTextureType2D> texUsp;
  9 +texture<float2, cudaTextureType2D> texUip;
  10 +
  11 +__global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR)
  12 +{
  13 + /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field
  14 + produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field.
  15 +
  16 + Us = final scattered field
  17 + k = list of incoming plane waves (Monte-Carlo samples)
  18 + nk = number of incoming MC samples
  19 + kmag= magnitude of the incoming field 2pi/lambda
  20 + dmin= minimum distance of the Usp texture
  21 + dmax= maximum distance of the Usp texture
  22 + f = position of the focus
  23 + ps = position of the sphere
  24 + A = total amplitude of the incident field arriving at the focal spot
  25 + ABCD= rectangle representing the field slice
  26 + uR = resolution of the field slice in the u direction
  27 + vR = resolution of the field slice in the v direction
  28 + dR = resolution of the Usp texture in the d direction
  29 + thetaR= resolution of the Usp texture in the theta direction
  30 + */
  31 +
  32 + //get the current coordinate in the plane slice
  33 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  34 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  35 +
  36 + //make sure that the thread indices are in-bounds
  37 + if(iu >= uR || iv >= vR) return;
  38 +
  39 + //compute the index (easier access to the scalar field array)
  40 + int i = iv*uR + iu;
  41 +
  42 + //compute the parameters for u and v
  43 + ptype u = (ptype)iu / (uR);
  44 + ptype v = (ptype)iv / (vR);
  45 +
  46 + //get the rtsPoint in world space and then the r vector
  47 + bsPoint p = ABCD(u, v);
  48 + bsVector r = p - ps;
  49 + ptype d = r.len();
  50 + float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1);
  51 + float ai = ( (d - dmin)/(a - dmin)) * (aR - 1);
  52 +
  53 + bsComplex sumUs(0, 0);
  54 + //for each plane wave in the wave list
  55 + for(int iw = 0; iw < nk; iw++)
  56 + {
  57 + //normalize the direction vectors and find their inner product
  58 + r = r.norm();
  59 + ptype cos_theta = k[iw].dot(r);
  60 + if(cos_theta < -1)
  61 + cos_theta = -1;
  62 + if(cos_theta > 1)
  63 + cos_theta = 1;
  64 + float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1);
  65 +
  66 + //compute the phase factor for spheres that are not at the origin
  67 + bsVector c = ps - f;
  68 + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));
  69 +
  70 + //compute the internal field if we are inside a sphere
  71 + if(d < a)
  72 + {
  73 + float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5);
  74 + sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y);
  75 + }
  76 + //otherwise compute the scattered field
  77 + else
  78 + {
  79 + float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5);
  80 + sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y);
  81 + }
  82 +
  83 + }
  84 +
  85 + Us[i] += sumUs;
  86 +}
  87 +
  88 +void nearfieldStruct::scalarUpLut()
  89 +{
  90 + //get the number of spheres
  91 + int nSpheres = sVector.size();
  92 +
  93 + //if there are no spheres, nothing to do here
  94 + if(nSpheres == 0)
  95 + return;
  96 +
  97 + //time the calculation of the focused field
  98 + gpuStartTimer();
  99 +
  100 + //clear the scattered field
  101 + U.clear_gpu();
  102 +
  103 + //create one thread for each pixel of the field slice
  104 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  105 + dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  106 +
  107 + //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff)
  108 + bsVector* gpuk;
  109 + int nWaves;
  110 + ptype subA;
  111 + if(planeWave)
  112 + {
  113 + nWaves = 1;
  114 + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) );
  115 + HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));
  116 + subA = A;
  117 + }
  118 + else
  119 + {
  120 + nWaves = inWaves.size();
  121 + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) );
  122 + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice));
  123 + //compute the amplitude that makes it through the condenser
  124 + subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );
  125 + }
  126 +
  127 + //for each sphere
  128 + for(int s = 0; s<nSpheres; s++)
  129 + {
  130 + //get the current sphere
  131 + //sphere S = sVector[s];
  132 +
  133 + //allocate space for the Usp and Uip textures
  134 + //allocate the cuda array
  135 + cudaArray* arrayUsp;
  136 + cudaArray* arrayUip;
  137 + cudaChannelFormatDesc channelDescUsp =
  138 + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
  139 + cudaChannelFormatDesc channelDescUip =
  140 + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
  141 + int dR = sVector[s].Usp.R[0];
  142 + int thetaR = sVector[s].Usp.R[1];
  143 + int aR = sVector[s].Uip.R[0];
  144 + HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR));
  145 + HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR));
  146 +
  147 + texUsp.addressMode[0] = cudaAddressModeMirror;
  148 + texUsp.addressMode[1] = cudaAddressModeMirror;
  149 + texUsp.filterMode = cudaFilterModeLinear;
  150 + texUsp.normalized = false;
  151 +
  152 + texUip.addressMode[0] = cudaAddressModeMirror;
  153 + texUip.addressMode[1] = cudaAddressModeMirror;
  154 + texUip.filterMode = cudaFilterModeLinear;
  155 + texUip.normalized = false;
  156 + HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp));
  157 + HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip));
  158 +
  159 + //copy the LUT to the Usp texture
  160 + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));
  161 + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));
  162 +
  163 + gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat,
  164 + gpuk,
  165 + nWaves,
  166 + 2 * PI / lambda,
  167 + sVector[s].a,
  168 + sVector[s].d_min,
  169 + sVector[s].d_max,
  170 + focus,
  171 + sVector[s].p,
  172 + subA,
  173 + pos,
  174 + U.R[0],
  175 + U.R[1],
  176 + dR,
  177 + aR,
  178 + thetaR);
  179 +
  180 + cudaFreeArray(arrayUsp);
  181 + cudaFreeArray(arrayUip);
  182 +
  183 + }
  184 +
  185 +
  186 + //store the time to compute the scattered field
  187 + t_Us = gpuStopTimer();
  188 +
  189 + //free monte-carlo samples
  190 + cudaFree(gpuk);
  191 +
  192 +}
@@ -163,7 +163,7 @@ void nearfieldStruct::scalarUs() @@ -163,7 +163,7 @@ void nearfieldStruct::scalarUs()
163 return; 163 return;
164 164
165 //time the calculation of the focused field 165 //time the calculation of the focused field
166 - //gpuStartTimer(); 166 + gpuStartTimer();
167 167
168 //clear the scattered field 168 //clear the scattered field
169 U.clear_gpu(); 169 U.clear_gpu();
@@ -251,9 +251,8 @@ void nearfieldStruct::scalarUs() @@ -251,9 +251,8 @@ void nearfieldStruct::scalarUs()
251 } 251 }
252 252
253 253
  254 + //store the time to compute the scattered field
  255 + t_Us = gpuStopTimer();
254 256
255 - //float t = gpuStopTimer();  
256 - //std::cout<<"Scalar Us Time: "<<t<<"ms"<<std::endl;  
257 - //std::cout<<focus<<std::endl;  
258 257
259 } 258 }
@@ -32,7 +32,7 @@ __global__ void gpuScalarUsp(bsComplex* Ufx, bsComplex* Ufy, bsComplex* Ufz, @@ -32,7 +32,7 @@ __global__ void gpuScalarUsp(bsComplex* Ufx, bsComplex* Ufy, bsComplex* Ufz,
32 { 32 {
33 r = p - ps[is]; 33 r = p - ps[is];
34 d = r.len(); 34 d = r.len();
35 - if(d <= as[is]) 35 + if(d < as[is])
36 return; 36 return;
37 } 37 }
38 38
@@ -110,8 +110,5 @@ void nearfieldStruct::sumUf() @@ -110,8 +110,5 @@ void nearfieldStruct::sumUf()
110 HANDLE_ERROR(cudaFree(gpu_p)); 110 HANDLE_ERROR(cudaFree(gpu_p));
111 HANDLE_ERROR(cudaFree(gpu_a)); 111 HANDLE_ERROR(cudaFree(gpu_a));
112 112
113 - //float t = gpuStopTimer();  
114 - //std::cout<<"Add Us Time: "<<t<<"ms"<<std::endl;  
115 - //std::cout<<focus<<std::endl;  
116 113
117 } 114 }
@@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
5 5
6 #include "nearfield.h" 6 #include "nearfield.h"
7 #include "microscope.h" 7 #include "microscope.h"
8 -#include "colormap.h" 8 +#include "rts/graphics/colormap.h"
9 #include "fileout.h" 9 #include "fileout.h"
10 //extern nearfieldStruct* NF; 10 //extern nearfieldStruct* NF;
11 extern microscopeStruct* SCOPE; 11 extern microscopeStruct* SCOPE;
@@ -23,7 +23,179 @@ using namespace std; @@ -23,7 +23,179 @@ using namespace std;
23 #include <boost/program_options.hpp> 23 #include <boost/program_options.hpp>
24 namespace po = boost::program_options; 24 namespace po = boost::program_options;
25 25
26 -static void loadSpheres(string sphereList) 26 +extern bool verbose;
  27 +
  28 +
  29 +
  30 +static void lNearfield(po::variables_map vm)
  31 +{
  32 + //test to see if we are simulating a plane wave
  33 + bool planeWave = DEFAULT_PLANEWAVE;
  34 + if(vm.count("plane-wave"))
  35 + planeWave = !planeWave;
  36 + SCOPE->nf.planeWave = planeWave;
  37 +
  38 + //get the incident field amplitude
  39 + SCOPE->nf.A = vm["amplitude"].as<ptype>();
  40 +
  41 + //get the condenser parameters
  42 + SCOPE->nf.condenser[0] = DEFAULT_CONDENSER_MIN;
  43 + SCOPE->nf.condenser[1] = DEFAULT_CONDENSER_MAX;
  44 +
  45 + if(vm.count("condenser"))
  46 + {
  47 + vector<ptype> cparams = vm["condenser"].as< vector<ptype> >();
  48 +
  49 + if(cparams.size() == 1)
  50 + SCOPE->nf.condenser[1] = cparams[0];
  51 + else
  52 + {
  53 + SCOPE->nf.condenser[0] = cparams[0];
  54 + SCOPE->nf.condenser[1] = cparams[1];
  55 + }
  56 + }
  57 +
  58 +
  59 + //get the focal rtsPoint position
  60 + SCOPE->nf.focus[0] = DEFAULT_FOCUS_X;
  61 + SCOPE->nf.focus[1] = DEFAULT_FOCUS_Y;
  62 + SCOPE->nf.focus[2] = DEFAULT_FOCUS_Z;
  63 + if(vm.count("focus"))
  64 + {
  65 + vector<ptype> fpos = vm["focus"].as< vector<ptype> >();
  66 + if(fpos.size() != 3)
  67 + {
  68 + cout<<"BIMSIM Error - the incident focal point is incorrectly specified; it must have three components."<<endl;
  69 + exit(1);
  70 + }
  71 + SCOPE->nf.focus[0] = fpos[0];
  72 + SCOPE->nf.focus[1] = fpos[1];
  73 + SCOPE->nf.focus[2] = fpos[2];
  74 + }
  75 +
  76 + //get the incident light direction (k-vector)
  77 + bsVector spherical(1, 0, 0);
  78 +
  79 + //if a k-vector is specified
  80 + if(vm.count("k"))
  81 + {
  82 + vector<ptype> kvec = vm["k"].as< vector<ptype> >();
  83 + if(kvec.size() != 2)
  84 + {
  85 + cout<<"BIMSIM Error - k-vector is not specified correctly: it must contain two elements"<<endl;
  86 + exit(1);
  87 + }
  88 + spherical[1] = kvec[0];
  89 + spherical[2] = kvec[1];
  90 + }
  91 + SCOPE->nf.k = spherical.sph2cart();
  92 +
  93 +
  94 + //incident field order
  95 + SCOPE->nf.m = vm["field-order"].as<int>();
  96 +
  97 + //number of Monte-Carlo samples
  98 + SCOPE->nf.nWaves = vm["samples"].as<int>();
  99 +
  100 + //random number seed for Monte-Carlo samples
  101 + if(vm.count("seed"))
  102 + srand(vm["seed"].as<unsigned int>());
  103 +
  104 +
  105 +
  106 +}
  107 +
  108 +
  109 +static void loadOutputParams(po::variables_map vm)
  110 +{
  111 + //append simulation results to previous binary files
  112 + gFileOut.append = DEFAULT_APPEND;
  113 + if(vm.count("append"))
  114 + gFileOut.append = true;
  115 +
  116 + //image parameters
  117 + //component of the field to be saved
  118 + std::string fieldStr;
  119 + fieldStr = vm["output-type"].as<string>();
  120 +
  121 + if(fieldStr == "magnitude")
  122 + gFileOut.field = fileoutStruct::fieldMag;
  123 + else if(fieldStr == "intensity")
  124 + gFileOut.field = fileoutStruct::fieldIntensity;
  125 + else if(fieldStr == "polarization")
  126 + gFileOut.field = fileoutStruct::fieldPolar;
  127 + else if(fieldStr == "imaginary")
  128 + gFileOut.field = fileoutStruct::fieldImag;
  129 + else if(fieldStr == "real")
  130 + gFileOut.field = fileoutStruct::fieldReal;
  131 + else if(fieldStr == "angular-spectrum")
  132 + gFileOut.field = fileoutStruct::fieldAngularSpectrum;
  133 +
  134 +
  135 + //image file names
  136 + gFileOut.intFile = vm["intensity"].as<string>();
  137 + gFileOut.absFile = vm["absorbance"].as<string>();
  138 + gFileOut.transFile = vm["transmittance"].as<string>();
  139 + gFileOut.nearFile = vm["near-field"].as<string>();
  140 + gFileOut.farFile = vm["far-field"].as<string>();
  141 +
  142 + //colormap
  143 + std::string cmapStr;
  144 + cmapStr = vm["colormap"].as<string>();
  145 + if(cmapStr == "brewer")
  146 + gFileOut.colormap = rts::cmBrewer;
  147 + else if(cmapStr == "gray")
  148 + gFileOut.colormap = rts::cmGrayscale;
  149 + else
  150 + cout<<"color-map value not recognized (using default): "<<cmapStr<<endl;
  151 +}
  152 +
  153 +void lFlags(po::variables_map vm, po::options_description desc)
  154 +{
  155 + //display help and exit
  156 + if(vm.count("help"))
  157 + {
  158 + cout<<desc<<endl;
  159 + exit(1);
  160 + }
  161 +
  162 + //flag for verbose output
  163 + if(vm.count("verbose"))
  164 + verbose = true;
  165 +
  166 + if(vm.count("recursive"))
  167 + {
  168 + SCOPE->nf.lut_us = false;
  169 + SCOPE->nf.lut_uf = false;
  170 + }
  171 + else if(vm.count("recursive-us"))
  172 + {
  173 + SCOPE->nf.lut_us = false;
  174 + }
  175 + else if(vm.count("lut-uf"))
  176 + {
  177 + SCOPE->nf.lut_uf = true;
  178 + }
  179 +}
  180 +
  181 +void lWavelength(po::variables_map vm)
  182 +{
  183 + //load the wavelength
  184 + if(vm.count("nu"))
  185 + {
  186 + //wavelength is given in wavenumber - transform and flag
  187 + SCOPE->nf.lambda = 10000/vm["nu"].as<ptype>();
  188 + gFileOut.wavenumber = true;
  189 + }
  190 + //otherwise we are using lambda = wavelength
  191 + else
  192 + {
  193 + SCOPE->nf.lambda = vm["lambda"].as<ptype>();
  194 + gFileOut.wavenumber = false;
  195 + }
  196 +}
  197 +
  198 +static void lSpheres(string sphereList)
27 { 199 {
28 /*This function loads a list of sphere given in the string sphereList 200 /*This function loads a list of sphere given in the string sphereList
29 The format is: 201 The format is:
@@ -58,17 +230,60 @@ static void loadSpheres(string sphereList) @@ -58,17 +230,60 @@ static void loadSpheres(string sphereList)
58 //check out the next element (this should set the EOF error flag) 230 //check out the next element (this should set the EOF error flag)
59 ss.peek(); 231 ss.peek();
60 } 232 }
  233 +}
61 234
  235 +void lSpheres(po::variables_map vm)
  236 +{
  237 + //if a sphere is specified at the command line
  238 + if(vm.count("spheres"))
  239 + {
  240 + //convert the sphere to a string
  241 + vector<ptype> sdesc = vm["spheres"].as< vector<ptype> >();
62 242
  243 + //compute the number of spheres specified
  244 + unsigned int nS;
  245 + if(sdesc.size() <= 5)
  246 + nS = 1;
  247 + else
  248 + {
  249 + //if the number of parameters is divisible by 4, compute the number of spheres
  250 + if(sdesc.size() % 5 == 0)
  251 + nS = sdesc.size() / 5;
  252 + else
  253 + {
  254 + cout<<"BIMSIM Error: Invalid number of sphere parameters."<<endl;
  255 + exit(1);
  256 + }
  257 + }
63 258
64 -} 259 + stringstream ss;
  260 +
  261 + //for each sphere
  262 + for(unsigned int s=0; s<nS; s++)
  263 + {
  264 + //compute the number of sphere parameters
  265 + unsigned int nP;
  266 + if(nS == 1) nP = sdesc.size();
  267 + else nP = 5;
  268 +
  269 + //store each parameter as a string
  270 + for(unsigned int i=0; i<nP; i++)
  271 + {
  272 + ss<<sdesc[s*5 + i]<<" ";
  273 + }
  274 + ss<<endl;
  275 + }
  276 +
  277 +
  278 +
  279 + //convert the string to a sphere list
  280 + lSpheres(ss.str());
  281 + }
65 282
66 -static void loadSpheres(po::variables_map vm)  
67 -{  
68 //if a files are specified 283 //if a files are specified
69 if(vm.count("sphere-file")) 284 if(vm.count("sphere-file"))
70 { 285 {
71 - cout<<"Sphere files detected."<<endl; 286 +
72 vector<string> filenames = vm["sphere-file"].as< vector<string> >(); 287 vector<string> filenames = vm["sphere-file"].as< vector<string> >();
73 //load each file 288 //load each file
74 for(int iS=0; iS<filenames.size(); iS++) 289 for(int iS=0; iS<filenames.size(); iS++)
@@ -85,69 +300,51 @@ static void loadSpheres(po::variables_map vm) @@ -85,69 +300,51 @@ static void loadSpheres(po::variables_map vm)
85 std::string instr((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>()); 300 std::string instr((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
86 301
87 //load the list of spheres from a string 302 //load the list of spheres from a string
88 - loadSpheres(instr); 303 + lSpheres(instr);
89 } 304 }
90 } 305 }
91 306
92 - //load the sphere from the command line  
93 - if(vm.count("sx") || vm.count("sy") || vm.count("sz") || vm.count("s"))  
94 - {  
95 - //create a new sphere  
96 - sphere newS;  
97 -  
98 - //set defaults  
99 - if(vm.count("sx"))  
100 - newS.p[0] = vm["sx"].as<ptype>();  
101 - else  
102 - newS.p[0] = DEFAULT_SPHERE_X;  
103 -  
104 -  
105 - if(vm.count("sy"))  
106 - newS.p[1] = vm["sy"].as<ptype>();  
107 - else  
108 - newS.p[1] = DEFAULT_SPHERE_Y;  
109 -  
110 - if(vm.count("sz"))  
111 - newS.p[2] = vm["sz"].as<ptype>();  
112 - else  
113 - newS.p[2] = DEFAULT_SPHERE_Z;  
114 -  
115 - if(vm.count("radius"))  
116 - newS.a = vm["radius"].as<ptype>();  
117 - else  
118 - newS.a = DEFAULT_SPHERE_A;  
119 -  
120 - //add the sphere to the sphere vector  
121 - SCOPE->nf.sVector.push_back(newS); 307 + //make sure the appropriate materials are loaded
  308 + unsigned int nS = SCOPE->nf.sVector.size();
122 309
  310 + //for each sphere
  311 + for(unsigned int s = 0; s<nS; s++)
  312 + {
  313 + //make sure the corresponding material exists
  314 + if(SCOPE->nf.sVector[s].iMaterial + 1 > SCOPE->nf.mVector.size())
  315 + {
  316 + //otherwise output an error
  317 + cout<<"BIMSIM Error - A material is not loaded for sphere "<<s+1<<"."<<endl;
  318 + exit(1);
  319 + }
123 } 320 }
124 } 321 }
125 322
126 -static void loadMaterials(po::variables_map vm) 323 +static void lMaterials(po::variables_map vm)
127 { 324 {
128 //if materials are specified at the command line 325 //if materials are specified at the command line
129 if(vm.count("materials")) 326 if(vm.count("materials"))
130 { 327 {
131 vector<ptype> matVec = vm["materials"].as< vector<ptype> >(); 328 vector<ptype> matVec = vm["materials"].as< vector<ptype> >();
132 - if(matVec.size() %2 != 0) 329 + if(matVec.size() == 1)
  330 + {
  331 + rts::material<ptype> newM(SCOPE->nf.lambda, matVec[0], 0);
  332 + SCOPE->nf.mVector.push_back(newM);
  333 + }
  334 + else if(matVec.size() %2 != 0)
133 { 335 {
134 cout<<"BIMSim Error: materials must be specified in n, k pairs"<<endl; 336 cout<<"BIMSim Error: materials must be specified in n, k pairs"<<endl;
135 exit(1); 337 exit(1);
136 } 338 }
137 -  
138 -  
139 - for(int i=0; i<matVec.size(); i+=2) 339 + else
140 { 340 {
141 - rts::material<ptype> newM(SCOPE->nf.lambda, matVec[i], matVec[i+1]);  
142 - SCOPE->nf.mVector.push_back(newM); 341 + for(int i=0; i<matVec.size(); i+=2)
  342 + {
  343 + rts::material<ptype> newM(SCOPE->nf.lambda, matVec[i], matVec[i+1]);
  344 + SCOPE->nf.mVector.push_back(newM);
  345 + }
143 } 346 }
144 } 347 }
145 - else  
146 - {  
147 - //add the command line material as the default (material 0)  
148 - rts::material<ptype> newM(SCOPE->nf.lambda, vm["n"].as<ptype>(), vm["k"].as<ptype>());  
149 - SCOPE->nf.mVector.push_back(newM);  
150 - }  
151 348
152 //if file names are specified, load the materials 349 //if file names are specified, load the materials
153 if(vm.count("material-file")) 350 if(vm.count("material-file"))
@@ -169,57 +366,109 @@ static void loadMaterials(po::variables_map vm) @@ -169,57 +366,109 @@ static void loadMaterials(po::variables_map vm)
169 366
170 } 367 }
171 368
172 -static void loadNearfieldParams(po::variables_map vm) 369 +static void lOptics(po::variables_map vm)
173 { 370 {
174 - //test to see if we are simulating a plane wave  
175 - bool planeWave = DEFAULT_PLANEWAVE;  
176 - if(vm.count("plane-wave"))  
177 - planeWave = !planeWave;  
178 - SCOPE->nf.planeWave = planeWave;  
179 -  
180 - //get the wavelength  
181 - //SCOPE->nf.lambda = vm["lambda"].as<ptype>();  
182 -  
183 - //get the incident field amplitude  
184 - SCOPE->nf.A = vm["amplitude"].as<ptype>();  
185 -  
186 - //get the condenser parameters  
187 - SCOPE->nf.condenser[0] = vm["condenser-min"].as<ptype>();  
188 - SCOPE->nf.condenser[1] = vm["condenser-max"].as<ptype>();  
189 -  
190 -  
191 - //get the focal rtsPoint position  
192 - SCOPE->nf.focus[0] = vm["fx"].as<ptype>();  
193 - SCOPE->nf.focus[1] = vm["fy"].as<ptype>();  
194 - SCOPE->nf.focus[2] = vm["fz"].as<ptype>();  
195 -  
196 - //get the incident light direction (k-vector)  
197 - bsVector spherical;  
198 - spherical[0] = 1.0;  
199 - spherical[1] = vm["theta"].as<ptype>();  
200 - spherical[2] = vm["phi"].as<ptype>();  
201 - SCOPE->nf.k = spherical.sph2cart();  
202 -  
203 -  
204 - //incident field order  
205 - SCOPE->nf.m = vm["field-order"].as<int>();  
206 -  
207 - //number of Monte-Carlo samples  
208 - SCOPE->nf.nWaves = vm["samples"].as<int>();  
209 -  
210 - 371 + SCOPE->objective[0] = DEFAULT_OBJECTIVE_MIN;
  372 + SCOPE->objective[1] = DEFAULT_OBJECTIVE_MAX;
  373 + if(vm.count("objective"))
  374 + {
  375 + vector<ptype> oparams = vm["objective"].as< vector<ptype> >();
211 376
  377 + if(oparams.size() == 1)
  378 + SCOPE->objective[1] = oparams[0];
  379 + else
  380 + {
  381 + SCOPE->objective[0] = oparams[0];
  382 + SCOPE->objective[1] = oparams[1];
  383 + }
  384 + }
212 } 385 }
213 386
214 -static void loadSliceParams(po::variables_map vm) 387 +static void lImagePlane(po::variables_map vm)
215 { 388 {
216 - //parameters for the sample plane  
217 - 389 + bsPoint pMin(DEFAULT_PLANE_MIN_X, DEFAULT_PLANE_MIN_Y, DEFAULT_PLANE_MIN_Z);
  390 + bsPoint pMax(DEFAULT_PLANE_MAX_X, DEFAULT_PLANE_MAX_Y, DEFAULT_PLANE_MAX_Z);
  391 + bsVector normal(DEFAULT_PLANE_NORM_X, DEFAULT_PLANE_NORM_Y, DEFAULT_PLANE_NORM_Z);
218 392
219 //set the default values for the slice position and orientation 393 //set the default values for the slice position and orientation
220 - bsPoint pMin(vm["plane-min-x"].as<ptype>(), vm["plane-min-y"].as<ptype>(), vm["plane-min-z"].as<ptype>());  
221 - bsPoint pMax(vm["plane-max-x"].as<ptype>(), vm["plane-max-y"].as<ptype>(), vm["plane-max-z"].as<ptype>());  
222 - bsVector normal(vm["plane-norm-x"].as<ptype>(), vm["plane-norm-y"].as<ptype>(), vm["plane-norm-z"].as<ptype>()); 394 + if(vm.count("plane-lower-left") && vm.count("plane-upper-right") && vm.count("plane-normal"))
  395 + {
  396 + vector<ptype> ll = vm["plane-lower-left"].as< vector<ptype> >();
  397 + if(ll.size() != 3)
  398 + {
  399 + cout<<"BIMSIM Error - The lower-left corner of the image plane is incorrectly specified."<<endl;
  400 + exit(1);
  401 + }
  402 +
  403 + vector<ptype> ur = vm["plane-lower-left"].as< vector<ptype> >();
  404 + if(ur.size() != 3)
  405 + {
  406 + cout<<"BIMSIM Error - The upper-right corner of the image plane is incorrectly specified."<<endl;
  407 + exit(1);
  408 + }
  409 +
  410 + vector<ptype> norm = vm["plane-lower-left"].as< vector<ptype> >();
  411 + if(norm.size() != 3)
  412 + {
  413 + cout<<"BIMSIM Error - The normal of the image plane is incorrectly specified."<<endl;
  414 + exit(1);
  415 + }
  416 +
  417 + pMin = bsPoint(ll[0], ll[1], ll[2]);
  418 + pMax = bsPoint(ur[0], ur[1], ur[2]);
  419 + normal = bsVector(norm[0], norm[1], norm[2]);
  420 + }
  421 + else if(vm.count("xy"))
  422 + {
  423 + //default plane size in microns
  424 + ptype s = DEFAULT_PLANE_SIZE;
  425 + ptype pos = DEFAULT_PLANE_POSITION;
  426 +
  427 + vector<ptype> xy = vm["xy"].as< vector<ptype> >();
  428 + if(xy.size() >= 1)
  429 + s = xy[0];
  430 + if(xy.size() >= 2)
  431 + pos = xy[1];
  432 +
  433 + //calculate the plane corners and normal based on the size and position
  434 + pMin = bsPoint(-s/2, -s/2, pos);
  435 + pMax = bsPoint(s/2, s/2, pos);
  436 + normal = bsVector(0, 0, 1);
  437 + }
  438 + else if(vm.count("xz"))
  439 + {
  440 + //default plane size in microns
  441 + ptype size = DEFAULT_PLANE_SIZE;
  442 + ptype pos = DEFAULT_PLANE_POSITION;
  443 +
  444 + vector<ptype> xz = vm["xz"].as< vector<ptype> >();
  445 + if(xz.size() >= 1)
  446 + size = xz[0];
  447 + if(xz.size() >= 2)
  448 + pos = xz[1];
  449 +
  450 + //calculate the plane corners and normal based on the size and position
  451 + pMin = bsPoint(-size/2, pos, -size/2);
  452 + pMax = bsPoint(size/2, pos, size/2);
  453 + normal = bsVector(0, -1, 0);
  454 + }
  455 + else if(vm.count("yz"))
  456 + {
  457 + //default plane size in microns
  458 + ptype size = DEFAULT_PLANE_SIZE;
  459 + ptype pos = DEFAULT_PLANE_POSITION;
  460 +
  461 + vector<ptype> yz = vm["yz"].as< vector<ptype> >();
  462 + if(yz.size() >= 1)
  463 + size = yz[0];
  464 + if(yz.size() >= 2)
  465 + pos = yz[1];
  466 +
  467 + //calculate the plane corners and normal based on the size and position
  468 + pMin = bsPoint(pos, -size/2, -size/2);
  469 + pMax = bsPoint(pos, size/2, size/2);
  470 + normal = bsVector(1, 0, 0);
  471 + }
223 SCOPE->setPos(pMin, pMax, normal); 472 SCOPE->setPos(pMin, pMax, normal);
224 473
225 //resolution 474 //resolution
@@ -233,175 +482,111 @@ static void loadSliceParams(po::variables_map vm) @@ -233,175 +482,111 @@ static void loadSliceParams(po::variables_map vm)
233 482
234 483
235 SCOPE->setNearfield(); 484 SCOPE->setNearfield();
236 -  
237 -  
238 -  
239 -}  
240 -  
241 -static void loadMicroscopeParams(po::variables_map vm)  
242 -{  
243 - //objective  
244 - SCOPE->objective[0] = vm["objective-min"].as<ptype>();  
245 - SCOPE->objective[1] = vm["objective-max"].as<ptype>();  
246 -  
247 -  
248 -  
249 -  
250 -  
251 -}  
252 -  
253 -static void loadOutputParams(po::variables_map vm)  
254 -{  
255 - //append simulation results to previous binary files  
256 - gFileOut.append = DEFAULT_APPEND;  
257 - if(vm.count("append"))  
258 - gFileOut.append = true;  
259 -  
260 - //image parameters  
261 - //component of the field to be saved  
262 - std::string fieldStr;  
263 - fieldStr = vm["output-type"].as<string>();  
264 -  
265 - if(fieldStr == "magnitude")  
266 - gFileOut.field = fileoutStruct::fieldMag;  
267 - else if(fieldStr == "intensity")  
268 - gFileOut.field = fileoutStruct::fieldIntensity;  
269 - else if(fieldStr == "polarization")  
270 - gFileOut.field = fileoutStruct::fieldPolar;  
271 - else if(fieldStr == "imaginary")  
272 - gFileOut.field = fileoutStruct::fieldImag;  
273 - else if(fieldStr == "real")  
274 - gFileOut.field = fileoutStruct::fieldReal;  
275 - else if(fieldStr == "angular-spectrum")  
276 - gFileOut.field = fileoutStruct::fieldAngularSpectrum;  
277 -  
278 -  
279 - //image file names  
280 - gFileOut.intFile = vm["intensity"].as<string>();  
281 - gFileOut.absFile = vm["absorbance"].as<string>();  
282 - gFileOut.transFile = vm["transmittance"].as<string>();  
283 - gFileOut.nearFile = vm["near-field"].as<string>();  
284 - gFileOut.farFile = vm["far-field"].as<string>();  
285 -  
286 - //colormap  
287 - std::string cmapStr;  
288 - cmapStr = vm["colormap"].as<string>();  
289 - if(cmapStr == "brewer")  
290 - gFileOut.colormap = rts::colormap::cmBrewer;  
291 - else if(cmapStr == "gray")  
292 - gFileOut.colormap = rts::colormap::cmGrayscale;  
293 - else  
294 - cout<<"color-map value not recognized (using default): "<<cmapStr<<endl;  
295 } 485 }
296 486
297 static void OutputOptions() 487 static void OutputOptions()
298 { 488 {
299 - cout<<SCOPE->nf.toStr(); 489 + cout<<SCOPE->toStr();
300 490
301 cout<<"# of source points: "<<SCOPE->focalPoints.size()<<endl; 491 cout<<"# of source points: "<<SCOPE->focalPoints.size()<<endl;
302 492
303 } 493 }
304 494
  495 +vector<ptype> test;
305 static void SetOptions(po::options_description &desc) 496 static void SetOptions(po::options_description &desc)
306 { 497 {
307 desc.add_options() 498 desc.add_options()
308 - ("help,h", "prints this help")  
309 - ("plane-wave,P", "simulates an incident plane wave")  
310 - ("intensity,I", po::value<string>()->default_value(DEFAULT_INTENSITY_FILE), "output measured intensity (filename)")  
311 - ("absorbance,A", po::value<string>()->default_value(DEFAULT_ABSORBANCE_FILE), "output measured absorbance (filename)")  
312 - ("transmittance,T", po::value<string>()->default_value(DEFAULT_TRANSMITTANCE_FILE), "output measured transmittance (filename)")  
313 - ("far-field,F", po::value<string>()->default_value(DEFAULT_FAR_FILE), "output far-field at detector (filename)")  
314 - ("near-field,N", po::value<string>()->default_value(DEFAULT_NEAR_FILE), "output field at focal plane (filename)")  
315 - ("extended-source,X", po::value<string>()->default_value(DEFAULT_EXTENDED_SOURCE), "image of source at focus (filename)")  
316 - //("sx,x", po::value<ptype>()->default_value(DEFAULT_SPHERE_X), "sphere coordinates")  
317 - //("sy,y", po::value<ptype>()->default_value(DEFAULT_SPHERE_Y))  
318 - //("sz,z", po::value<ptype>()->default_value(DEFAULT_SPHERE_Z))  
319 - ("sx,x", po::value<ptype>(), "sphere coordinates")  
320 - ("sy,y", po::value<ptype>())  
321 - ("sz,z", po::value<ptype>())  
322 - ("radius,r", po::value<ptype>()->default_value(DEFAULT_SPHERE_A), "sphere radius")  
323 - ("samples,s", po::value<int>()->default_value(DEFAULT_SAMPLES), "Monte-Carlo samples used to compute Us")  
324 - ("sphere-file,S", po::value< vector<string> >()->multitoken(), "sphere file:\n [x y z radius material]")  
325 - ("amplitude,a", po::value<ptype>()->default_value(DEFAULT_AMPLITUDE), "incident field amplitude")  
326 - ("n,n", po::value<ptype>()->default_value(DEFAULT_N, "1.4"), "sphere phase speed")  
327 - ("k,k", po::value<ptype>()->default_value(DEFAULT_K), "sphere absorption coefficient")  
328 - ("material-file,M", po::value< vector<string> >()->multitoken(), "material file:\n [lambda n k]")  
329 - ("materials", po::value< vector<ptype> >()->multitoken(), "materials specified using n, k pairs:\n ex. --materials n1 k1 n2 k2\n (if used --n and --k are ignored)")  
330 - ("lambda,l", po::value<ptype>()->default_value(DEFAULT_LAMBDA), "incident wavelength") 499 + ("help", "prints this help")
  500 + ("verbose", "verbose output\n")
  501 +
  502 + ("intensity", po::value<string>()->default_value(DEFAULT_INTENSITY_FILE), "output measured intensity (filename)")
  503 + ("absorbance", po::value<string>()->default_value(DEFAULT_ABSORBANCE_FILE), "output measured absorbance (filename)")
  504 + ("transmittance", po::value<string>()->default_value(DEFAULT_TRANSMITTANCE_FILE), "output measured transmittance (filename)")
  505 + ("far-field", po::value<string>()->default_value(DEFAULT_FAR_FILE), "output far-field at detector (filename)")
  506 + ("near-field", po::value<string>()->default_value(DEFAULT_NEAR_FILE), "output field at focal plane (filename)")
  507 + ("extended-source", po::value<string>()->default_value(DEFAULT_EXTENDED_SOURCE), "image of source at focus (filename)\n")
  508 +
  509 + ("spheres", po::value< vector<ptype> >()->multitoken(), "sphere position: x y z a m")
  510 + ("sphere-file", po::value< vector<string> >()->multitoken(), "sphere file:\n [x y z radius material]")
  511 + ("materials", po::value< vector<ptype> >()->multitoken(), "refractive indices as n, k pairs:\n ex. -m n0 k0 n1 k1 n2 k2")
  512 + ("material-file", po::value< vector<string> >()->multitoken(), "material file:\n [lambda n k]\n")
  513 +
  514 + ("lambda", po::value<ptype>()->default_value(DEFAULT_LAMBDA), "incident wavelength")
331 ("nu", po::value<ptype>(), "incident frequency (in cm^-1)\n(if specified, lambda is ignored)") 515 ("nu", po::value<ptype>(), "incident frequency (in cm^-1)\n(if specified, lambda is ignored)")
332 - ("theta,t", po::value<ptype>()->default_value(DEFAULT_K_THETA), "light direction (polar coords)")  
333 - ("phi,p", po::value<ptype>()->default_value(DEFAULT_K_PHI))  
334 - ("fx", po::value<ptype>()->default_value(DEFAULT_FOCUS_X), "incident focal point")  
335 - ("fy", po::value<ptype>()->default_value(DEFAULT_FOCUS_Y))  
336 - ("fz", po::value<ptype>()->default_value(DEFAULT_FOCUS_Z))  
337 - ("condenser-max,C", po::value<ptype>()->default_value(DEFAULT_CONDENSER_MAX), "condenser numerical aperature")  
338 - ("condenser-min,c", po::value<ptype>()->default_value(DEFAULT_CONDENSER_MIN), "condenser obscuration NA")  
339 - ("objective-max,O", po::value<ptype>()->default_value(DEFAULT_OBJECTIVE_MAX), "objective numerical aperature")  
340 - ("objective-min,o", po::value<ptype>()->default_value(DEFAULT_OBJECTIVE_MIN), "objective obscuration NA")  
341 - ("field-order", po::value<int>()->default_value(DEFAULT_FIELD_ORDER), "order of the incident field")  
342 - ("output-type,f", po::value<string>()->default_value(DEFAULT_FIELD_TYPE), "output field value:\n magnitude, polarization, real, imaginary, angular-spectrum")  
343 - ("resolution,R", po::value<unsigned int>()->default_value(DEFAULT_SLICE_RES), "resolution of the detector")  
344 - ("padding,d", po::value<unsigned int>()->default_value(DEFAULT_PADDING), "FFT padding for the objective bandpass") 516 + ("k", po::value< vector<ptype> >()->multitoken(), "k-vector direction: -k theta phi\n theta = [0 2*pi], phi = [0 pi]")
  517 + ("amplitude", po::value<ptype>()->default_value(DEFAULT_AMPLITUDE), "incident field amplitude")
  518 + ("condenser", po::value< vector<ptype> >()->multitoken(), "condenser numerical aperature\nA pair of values can be used to specify an inner obscuration: -c NAin NAout")
  519 + ("objective", po::value< vector<ptype> >()->multitoken(), "objective numerical aperature\nA pair of values can be used to specify an inner obscuration: -c NAin NAout")
  520 + ("focus", po::value< vector<ptype> >()->multitoken(), "focal position for the incident point source\n (default = --focus 0 0 0)")
  521 + ("plane-wave", "simulates an incident plane wave\n")
  522 +
  523 + ("resolution", po::value<unsigned int>()->default_value(DEFAULT_SLICE_RES), "resolution of the detector")
  524 + ("plane-lower-left", po::value< vector<ptype> >()->multitoken(), "lower-left position of the image plane")
  525 + ("plane-upper-right", po::value< vector<ptype> >()->multitoken(), "upper-right position of the image plane")
  526 + ("plane-normal", po::value< vector<ptype> >()->multitoken(), "normal for the image plane")
  527 + ("xy", po::value< vector<ptype> >()->multitoken(), "specify an x-y image plane\n (standard microscope)")
  528 + ("xz", po::value< vector<ptype> >()->multitoken(), "specify a x-z image plane\n (cross-section of the focal volume)")
  529 + ("yz", po::value< vector<ptype> >()->multitoken(), "specify a y-z image plane\n (cross-section of the focal volume)\n")
  530 +
  531 + ("samples", po::value<int>()->default_value(DEFAULT_SAMPLES), "Monte-Carlo samples used to compute Us")
  532 + ("padding", po::value<unsigned int>()->default_value(DEFAULT_PADDING), "FFT padding for the objective bandpass")
345 ("supersample", po::value<unsigned int>()->default_value(DEFAULT_SUPERSAMPLE), "super-sampling rate for the detector field") 533 ("supersample", po::value<unsigned int>()->default_value(DEFAULT_SUPERSAMPLE), "super-sampling rate for the detector field")
  534 + ("field-order", po::value<int>()->default_value(DEFAULT_FIELD_ORDER), "order of the incident field")
  535 + ("seed", po::value<unsigned int>(), "seed for the Monte-Carlo random number generator")
  536 + ("recursive", "evaluate all Bessel functions recursively\n")
  537 + ("recursive-us", "evaluate scattered-field Bessel functions recursively\n")
  538 + ("lut-uf", "evaluate the focused-field using a look-up table\n")
  539 +
  540 + ("output-type", po::value<string>()->default_value(DEFAULT_FIELD_TYPE), "output field value:\n magnitude, polarization, real, imaginary, angular-spectrum")
346 ("colormap", po::value<string>()->default_value(DEFAULT_COLORMAP), "colormap: gray, brewer") 541 ("colormap", po::value<string>()->default_value(DEFAULT_COLORMAP), "colormap: gray, brewer")
347 ("append", "append result to an existing file\n (binary files only)") 542 ("append", "append result to an existing file\n (binary files only)")
348 - ("plane-min-x,u", po::value<ptype>()->default_value(DEFAULT_SLICE_MIN_X), "lower-left corner of the field slice")  
349 - ("plane-min-y,v", po::value<ptype>()->default_value(DEFAULT_SLICE_MIN_Y))  
350 - ("plane-min-z,w", po::value<ptype>()->default_value(DEFAULT_SLICE_MIN_Z))  
351 - ("plane-max-x,U", po::value<ptype>()->default_value(DEFAULT_SLICE_MAX_X), "upper-right corner of the field slice")  
352 - ("plane-max-y,V", po::value<ptype>()->default_value(DEFAULT_SLICE_MAX_Y))  
353 - ("plane-max-z,W", po::value<ptype>()->default_value(DEFAULT_SLICE_MAX_Z))  
354 - ("plane-norm-x", po::value<ptype>()->default_value(DEFAULT_SLICE_NORM_X), "field slice normal")  
355 - ("plane-norm-y", po::value<ptype>()->default_value(DEFAULT_SLICE_NORM_Y))  
356 - ("plane-norm-z", po::value<ptype>()->default_value(DEFAULT_SLICE_NORM_Z)); 543 + ;
357 } 544 }
358 545
359 static void LoadParameters(int argc, char *argv[]) 546 static void LoadParameters(int argc, char *argv[])
360 { 547 {
361 //create an option description 548 //create an option description
362 - po::options_description desc("Allowed options"); 549 + po::options_description desc("BimSim arguments");
363 550
364 //fill it with options 551 //fill it with options
365 SetOptions(desc); 552 SetOptions(desc);
366 553
367 po::variables_map vm; 554 po::variables_map vm;
368 - po::store(po::parse_command_line(argc, argv, desc), vm); 555 + po::store(po::parse_command_line(argc, argv, desc, po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
369 po::notify(vm); 556 po::notify(vm);
370 557
371 - //display help and exit  
372 - if(vm.count("help"))  
373 - {  
374 - cout<<desc<<endl;  
375 - exit(1);  
376 - }  
377 558
378 - //load the wavelength  
379 - if(vm.count("nu"))  
380 - {  
381 - //wavelength is given in wavenumber - transform and flag  
382 - SCOPE->nf.lambda = 10000/vm["nu"].as<ptype>();  
383 - gFileOut.wavenumber = true;  
384 - }  
385 - //otherwise we are using lambda = wavelength  
386 - else  
387 - {  
388 - SCOPE->nf.lambda = vm["lambda"].as<ptype>();  
389 - gFileOut.wavenumber = false;  
390 - } 559 + //load flags (help, verbose output)
  560 + lFlags(vm, desc);
  561 +
  562 + //load the wavelength
  563 + lWavelength(vm);
  564 +
  565 + //load materials
  566 + //loadMaterials(vm);
  567 + lMaterials(vm);
  568 +
  569 + //load the sphere data
  570 + lSpheres(vm);
  571 +
  572 + //load the optics
  573 + lOptics(vm);
  574 +
  575 + //load the position and orientation of the image plane
  576 + lImagePlane(vm);
391 577
392 //load spheres 578 //load spheres
393 - loadSpheres(vm); 579 + //loadSpheres(vm);
  580 +
394 581
395 - //load materials  
396 - loadMaterials(vm);  
397 582
398 - loadNearfieldParams(vm); 583 + lNearfield(vm);
399 584
400 loadOutputParams(vm); 585 loadOutputParams(vm);
401 586
402 - loadMicroscopeParams(vm); 587 + //loadMicroscopeParams(vm);
403 588
404 - loadSliceParams(vm); 589 + //loadSliceParams(vm);
405 590
406 //if an extended source will be used 591 //if an extended source will be used
407 if(vm["extended-source"].as<string>() != "") 592 if(vm["extended-source"].as<string>() != "")
@@ -22,16 +22,17 @@ scalarslice::scalarslice() @@ -22,16 +22,17 @@ scalarslice::scalarslice()
22 22
23 scalarslice::~scalarslice() 23 scalarslice::~scalarslice()
24 { 24 {
25 - HANDLE_ERROR(cudaFree(S)); 25 + if(S != NULL)
  26 + HANDLE_ERROR(cudaFree(S));
26 S = NULL; 27 S = NULL;
27 } 28 }
28 29
29 -void scalarslice::toImage(std::string filename, ptype vmin, ptype vmax, rts::colormap::colormapType cmap) 30 +void scalarslice::toImage(std::string filename, ptype vmin, ptype vmax, rts::colormapType cmap)
30 { 31 {
31 - rts::colormap::gpu2image<ptype>(S, filename, R[0], R[1], vmin, vmax, cmap); 32 + rts::gpu2image<ptype>(S, filename, R[0], R[1], vmin, vmax, cmap);
32 } 33 }
33 34
34 -void scalarslice::toImage(std::string filename, bool positive, rts::colormap::colormapType cmap) 35 +void scalarslice::toImage(std::string filename, bool positive, rts::colormapType cmap)
35 { 36 {
36 cublasStatus_t stat; 37 cublasStatus_t stat;
37 cublasHandle_t handle; 38 cublasHandle_t handle;
@@ -62,7 +63,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co @@ -62,7 +63,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co
62 exit(1); 63 exit(1);
63 } 64 }
64 65
65 - //std::cout<<"Maximum index: "<<result<<std::endl; 66 +
66 67
67 //retrieve the maximum value 68 //retrieve the maximum value
68 ptype maxVal; 69 ptype maxVal;
@@ -75,7 +76,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co @@ -75,7 +76,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co
75 if(positive) 76 if(positive)
76 toImage(filename, 0, maxVal, cmap); 77 toImage(filename, 0, maxVal, cmap);
77 else 78 else
78 - toImage(filename, -maxVal, maxVal, cmap); 79 + toImage(filename, -abs(maxVal), abs(maxVal), cmap);
79 } 80 }
80 81
81 void scalarslice::toEnvi(std::string filename, ptype wavelength, bool append) 82 void scalarslice::toEnvi(std::string filename, ptype wavelength, bool append)
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 #define RTS_SCALAR_SLICE 2 #define RTS_SCALAR_SLICE
3 3
4 #include "dataTypes.h" 4 #include "dataTypes.h"
5 -#include "colormap.h" 5 +#include "rts/graphics/colormap.h"
6 6
7 struct scalarslice 7 struct scalarslice
8 { 8 {
@@ -17,8 +17,8 @@ struct scalarslice @@ -17,8 +17,8 @@ struct scalarslice
17 ~scalarslice(); 17 ~scalarslice();
18 void clear(); 18 void clear();
19 19
20 - void toImage(std::string filename, ptype vmin, ptype vmax, rts::colormap::colormapType cmap = rts::colormap::cmBrewer);  
21 - void toImage(std::string filename, bool positive = true, rts::colormap::colormapType cmap = rts::colormap::cmBrewer); 20 + void toImage(std::string filename, ptype vmin, ptype vmax, rts::colormapType cmap = rts::cmBrewer);
  21 + void toImage(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer);
22 void toEnvi(std::string filename, ptype wavelength = 0, bool append = false); 22 void toEnvi(std::string filename, ptype wavelength = 0, bool append = false);
23 23
24 }; 24 };
1 #include "sphere.h" 1 #include "sphere.h"
  2 +#include "defaults.h"
2 3
3 #include "rts/math/complex.h" 4 #include "rts/math/complex.h"
4 #include <complex> 5 #include <complex>
5 #include <stdlib.h> 6 #include <stdlib.h>
  7 +#include <fstream>
6 8
7 using namespace rts; 9 using namespace rts;
8 using namespace std; 10 using namespace std;
@@ -13,6 +15,9 @@ int cbessjyva(double v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv, @@ -13,6 +15,9 @@ int cbessjyva(double v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv,
13 int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv, 15 int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv,
14 complex<double>*cyv,complex<double>*cjvp,complex<double>*cyvp); 16 complex<double>*cyv,complex<double>*cjvp,complex<double>*cyvp);
15 17
  18 +int bessjyv_sph(int v, double z, double &vm, double* cjv,
  19 + double* cyv, double* cjvp, double* cyvp);
  20 +
16 void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) 21 void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri)
17 { 22 {
18 /* These calculations are done at high-precision on the CPU 23 /* These calculations are done at high-precision on the CPU
@@ -59,12 +64,6 @@ void sphere::calcCoeff(ptype lambda, rtsComplex&lt;ptype&gt; ri) @@ -59,12 +64,6 @@ void sphere::calcCoeff(ptype lambda, rtsComplex&lt;ptype&gt; ri)
59 cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka); 64 cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka);
60 cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna); 65 cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna);
61 66
62 -  
63 - //cout<<"Begin Sphere---------"<<endl;  
64 - //cout<<"Nl = "<<Nl<<endl;  
65 - //cout<<"ka = "<<ka<<endl;  
66 - //cout<<"kna = "<<kna<<endl;  
67 -  
68 //compute A for each order 67 //compute A for each order
69 complex<double> i(0, 1); 68 complex<double> i(0, 1);
70 complex<double> a, b, c, d; 69 complex<double> a, b, c, d;
@@ -83,7 +82,7 @@ void sphere::calcCoeff(ptype lambda, rtsComplex&lt;ptype&gt; ri) @@ -83,7 +82,7 @@ void sphere::calcCoeff(ptype lambda, rtsComplex&lt;ptype&gt; ri)
83 //calculate A and add it to the list 82 //calculate A and add it to the list
84 An = (2.0 * l + 1.0) * pow(i, l) * (a / b); 83 An = (2.0 * l + 1.0) * pow(i, l) * (a / b);
85 A.push_back(bsComplex(An.real(), An.imag())); 84 A.push_back(bsComplex(An.real(), An.imag()));
86 - //cout<<"A: "<<An<<endl; 85 +
87 86
88 //Compute B (external scattering coefficient) 87 //Compute B (external scattering coefficient)
89 c = cjv_ka[l] * cjvp_kna[l] * nc - cjv_kna[l] * cjvp_ka[l]; 88 c = cjv_ka[l] * cjvp_kna[l] * nc - cjv_kna[l] * cjvp_ka[l];
@@ -92,7 +91,206 @@ void sphere::calcCoeff(ptype lambda, rtsComplex&lt;ptype&gt; ri) @@ -92,7 +91,206 @@ void sphere::calcCoeff(ptype lambda, rtsComplex&lt;ptype&gt; ri)
92 //calculate B and add it to the list 91 //calculate B and add it to the list
93 Bn = (2.0 * l + 1.0) * pow(i, l) * (c / d); 92 Bn = (2.0 * l + 1.0) * pow(i, l) * (c / d);
94 B.push_back(bsComplex(Bn.real(), Bn.imag())); 93 B.push_back(bsComplex(Bn.real(), Bn.imag()));
95 - //cout<<"B: "<<Bn<<endl;  
96 94
  95 +
  96 + }
  97 +}
  98 +
  99 +void sphere::calcBesselLut(bsComplex* j, ptype k, bsComplex n, int aR)
  100 +{
  101 + /*Compute the look-up-table for spherical bessel functions used inside of the sphere
  102 + j = (Nl + 1) x aR array of values
  103 + aR = resolution of j
  104 + */
  105 +
  106 + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored)
  107 + int bytes = sizeof(complex<double>) * (Nl + 1);
  108 + complex<double>* cjv_knr = (complex<double>*)malloc(bytes);
  109 + complex<double>* cyv_knr = (complex<double>*)malloc(bytes);
  110 + complex<double>* cjvp_knr = (complex<double>*)malloc(bytes);
  111 + complex<double>* cyvp_knr = (complex<double>*)malloc(bytes);
  112 +
  113 + //compute the bessel functions using the CPU-based algorithm
  114 + double vm;
  115 +
  116 + //for each sample along r
  117 + ptype dr = a / (aR - 1);
  118 + ptype r;
  119 + for(int ir = 0; ir < aR; ir++)
  120 + {
  121 + r = ir * dr;
  122 + complex<double> knr( (k*n*r).real(), (k*n*r).imag() );
  123 + cbessjyva_sph(Nl, knr, vm, cjv_knr, cyv_knr, cjvp_knr, cyvp_knr);
  124 +
  125 + //copy the double data to the bsComplex array
  126 + for(int l=0; l<=Nl; l++)
  127 + {
  128 + //deal with the NaN case at the origin
  129 + if(ir == 0)
  130 + {
  131 + if(l == 0)
  132 + j[ir * (Nl+1)] = 1;
  133 + else
  134 + j[ir * (Nl+1) + l] = 0;
  135 + }
  136 + else
  137 + j[ir * (Nl+1) + l] = bsComplex(cjv_knr[l].real(), cjv_knr[l].imag());
  138 + }
  139 + }
  140 +
  141 + /*ofstream outfile("besselout.txt");
  142 + for(int ir = 0; ir < aR; ir++)
  143 + {
  144 + for(int l = 0; l<Nl+1; l++)
  145 + {
  146 + outfile<<j[ir * (Nl+1) + l].real()<<" ";
  147 + }
  148 + outfile<<endl;
  149 + }
  150 + outfile.close();*/
  151 +
  152 +}
  153 +
  154 +void sphere::calcHankelLut(bsComplex* h, ptype k, int rR)
  155 +{
  156 + /*Compute the look-up-table for spherical bessel functions used inside of the sphere
  157 + h_out = (Nl + 1) x aR array of values
  158 + rmin = minimum value of r
  159 + d_max = maximum value of r
  160 + rR = resolution of h_out
  161 + */
  162 +
  163 + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored)
  164 + int bytes = sizeof(double) * (Nl + 1);
  165 + double* cjv_kr = (double*)malloc(bytes);
  166 + double* cyv_kr = (double*)malloc(bytes);
  167 + double* cjvp_kr = (double*)malloc(bytes);
  168 + double* cyvp_kr = (double*)malloc(bytes);
  169 +
  170 + //compute the bessel functions using the CPU-based algorithm
  171 + double vm;
  172 +
  173 +
  174 +
  175 + //for each sample along r
  176 + ptype dr = (d_max - max(a, d_min)) / (rR - 1);
  177 + ptype r;
  178 + for(int ir = 0; ir < rR; ir++)
  179 + {
  180 + r = ir * dr + max(a, d_min);
  181 + double kr = k*r;
  182 + bessjyv_sph(Nl, kr, vm, cjv_kr, cyv_kr, cjvp_kr, cyvp_kr);
  183 +
  184 + //copy the double data to the bsComplex array
  185 + for(int l=0; l<=Nl; l++)
  186 + {
  187 + //h[ir * (Nl+1) + l] = bsComplex(cjv_kr[l].real(), cyv_kr[l].real());
  188 + h[ir * (Nl+1) + l] = bsComplex(cjv_kr[l], cyv_kr[l]);
  189 + }
97 } 190 }
  191 +
  192 + /*ofstream outfile("hankelout.txt");
  193 + for(int ir = 0; ir < rR; ir++)
  194 + {
  195 + outfile<<ir*dr + max(a, d_min)<<" ";
  196 + for(int l = 0; l<=0; l++)
  197 + {
  198 + outfile<<h[ir * (Nl+1) + l].real()<<" "<<h[ir * (Nl+1) + l].imag()<<" ";
  199 + }
  200 + outfile<<endl;
  201 + }
  202 + outfile.close();*/
  203 +}
  204 +
  205 +void sphere::calcLut(bsComplex* j, bsComplex* h, ptype lambda, bsComplex n, int aR, int rR)
  206 +{
  207 + /*Compute the look-up-tables for spherical bessel functions used both inside and outside of the sphere.
  208 + j = (Nl + 1) x aR array of values
  209 + j = (Nl + 1) x rR array of values
  210 + d_max = maximum distance for the LUT
  211 + aR = resolution of j_in
  212 + rR = resolution of j_out
  213 + */
  214 +
  215 + //compute the magnitude of the k vector
  216 + double k = 2 * PI / lambda;
  217 +
  218 + calcBesselLut(j, k, n, aR);
  219 + calcHankelLut(h, k, rR);
  220 +}
  221 +
  222 +void sphere::calcUp(ptype lambda, bsComplex n, rts::rtsQuad<ptype, 3> nfPlane, unsigned int R)
  223 +{
  224 + //calculate the parameters of the lookup table
  225 +
  226 + //first find the distance to the closest and furthest points on the nearfield plane
  227 + d_min = nfPlane.dist(p);
  228 + d_max = nfPlane.dist_max(p);
  229 +
  230 + //compute the radius of the cross-section of the sphere with the plane
  231 + ptype a_inter = 0;
  232 + if(d_min < a)
  233 + a_inter = sqrt(a - d_min);
  234 +
  235 +
  236 + //calculate the resolution of the Usp and Uip lookup tables
  237 + int aR = 1 + 2 * R * a_inter / (nfPlane(0, 0) - nfPlane(1, 1)).len();
  238 + int dR = 2 * R;
  239 + int thetaR = DEFAULT_SPHERE_THETA_R;
  240 +
  241 + //allocate space for the bessel function LUTs
  242 + bsComplex* j = (bsComplex*)malloc(sizeof(bsComplex) * (Nl + 1) * aR);
  243 + bsComplex* h = (bsComplex*)malloc(sizeof(bsComplex) * (Nl + 1) * dR);
  244 +
  245 + calcLut(j, h, lambda, n, aR, dR);
  246 +
  247 + //allocate space for the Usp lookup texture
  248 + Usp.R[0] = dR;
  249 + Usp.R[1] = thetaR;
  250 + Usp.init_gpu();
  251 +
  252 + //allocate space for the Uip lookup texture
  253 + Uip.R[0] = aR;
  254 + Uip.R[1] = thetaR;
  255 + Uip.init_gpu();
  256 +
  257 +
  258 +
  259 + scalarUsp(h, dR, thetaR);
  260 + scalarUip(j, aR, thetaR);
  261 +
  262 + scalarslice UspMag = Usp.Mag();
  263 + UspMag.toImage("Usp.bmp", true);
  264 +
  265 + scalarslice UipMag = Uip.Mag();
  266 + UipMag.toImage("Uip.bmp", true);
  267 +
  268 + //free memory
  269 + free(j);
  270 + free(h);
  271 +
  272 +}
  273 +
  274 +sphere& sphere::operator=(const sphere &rhs)
  275 +{
  276 + p = rhs.p;
  277 + a = rhs.a;
  278 + iMaterial = rhs.iMaterial;
  279 + Nl = rhs.Nl;
  280 + n = rhs.n;
  281 + B = rhs.B;
  282 + A = rhs.A;
  283 +
  284 + return *this;
  285 +}
  286 +
  287 +sphere::sphere(const sphere &rhs)
  288 +{
  289 + p = rhs.p;
  290 + a = rhs.a;
  291 + iMaterial = rhs.iMaterial;
  292 + Nl = rhs.Nl;
  293 + n = rhs.n;
  294 + B = rhs.B;
  295 + A = rhs.A;
98 } 296 }
sphere.cu 0 โ†’ 100644
  1 +#include "sphere.h"
  2 +#include "rts/math/legendre.h"
  3 +
  4 +__global__ void gpuScalarUsp(bsComplex* Usp, bsComplex* h, bsComplex* B, int Nl, int rR, int thetaR)
  5 +{
  6 + //get the current coordinate in the plane slice
  7 + int ir = blockIdx.x * blockDim.x + threadIdx.x;
  8 + int itheta = blockIdx.y * blockDim.y + threadIdx.y;
  9 +
  10 + //make sure that the thread indices are in-bounds
  11 + if(itheta >= thetaR || ir >= rR) return;
  12 +
  13 + int i = itheta * rR + ir;
  14 +
  15 + //ptype dr = (rmax - a) / (rR - 1);
  16 + ptype dtheta = (PI) / (thetaR - 1);
  17 +
  18 + //comptue the current angle and distance
  19 + //ptype r = dr * ir + a;
  20 + ptype theta = dtheta * itheta;
  21 + ptype cos_theta = cos(theta);
  22 +
  23 + //initialize the Legendre polynomial
  24 + ptype P[2];
  25 + rts::init_legendre<ptype>(cos_theta, P[0], P[1]);
  26 +
  27 + //initialize the result
  28 + bsComplex Us((ptype)0, (ptype)0);
  29 +
  30 + //for each order l
  31 + for(int l=0; l <= Nl; l++)
  32 + {
  33 + if(l == 0)
  34 + {
  35 + Us += B[l] * h[ir * (Nl+1) + l] * P[0];
  36 + //Us += P[0];
  37 + }
  38 + else
  39 + {
  40 + if(l > 1)
  41 + {
  42 + rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);
  43 + }
  44 + Us += B[l] * h[ir * (Nl+1) + l] * P[1];
  45 + //Us += P[1];
  46 + }
  47 +
  48 +
  49 + }
  50 + Usp[i] = Us;
  51 + //Usp[i] = h[ir * (Nl+1)];
  52 + //Usp[i] = ir;
  53 +
  54 +}
  55 +
  56 +__global__ void gpuScalarUip(bsComplex* Uip, bsComplex* j, bsComplex* A, int Nl, int aR, int thetaR)
  57 +{
  58 + //get the current coordinate in the plane slice
  59 + int ia = blockIdx.x * blockDim.x + threadIdx.x;
  60 + int itheta = blockIdx.y * blockDim.y + threadIdx.y;
  61 +
  62 + //make sure that the thread indices are in-bounds
  63 + if(itheta >= thetaR || ia >= aR) return;
  64 +
  65 + int i = itheta * aR + ia;
  66 +
  67 + ptype dtheta = (PI) / (thetaR - 1);
  68 +
  69 + //comptue the current angle and distance
  70 + ptype theta = dtheta * itheta;
  71 + ptype cos_theta = cos(theta);
  72 +
  73 + //initialize the Legendre polynomial
  74 + ptype P[2];
  75 + rts::init_legendre<ptype>(cos_theta, P[0], P[1]);
  76 +
  77 + //initialize the result
  78 + bsComplex Ui((ptype)0, (ptype)0);
  79 +
  80 + //for each order l
  81 + for(int l=0; l <= Nl; l++)
  82 + {
  83 + if(l == 0)
  84 + {
  85 + Ui += A[l] * j[ia * (Nl+1) + l] * P[0];
  86 + }
  87 + else
  88 + {
  89 + if(l > 1)
  90 + {
  91 + rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);
  92 + }
  93 + Ui += A[l] * j[ia * (Nl+1) + l] * P[1];
  94 + }
  95 +
  96 +
  97 + }
  98 + Uip[i] = Ui;
  99 +}
  100 +
  101 +void sphere::scalarUsp(bsComplex* h, int rR, int thetaR)
  102 +{
  103 + //copy the hankel function to the GPU
  104 + bsComplex* gpu_h;
  105 + HANDLE_ERROR( cudaMalloc( (void**)&gpu_h, sizeof(bsComplex) * (Nl + 1) * rR ) );
  106 + HANDLE_ERROR( cudaMemcpy( gpu_h, h, sizeof(bsComplex) * (Nl + 1) * rR, cudaMemcpyHostToDevice ) );
  107 +
  108 + //allocate memory for the scattering coefficients
  109 + bsComplex* gpuB;
  110 + HANDLE_ERROR(cudaMalloc((void**) &gpuB, (Nl+1) * sizeof(bsComplex)));
  111 + //copy the scattering coefficients to the GPU
  112 + HANDLE_ERROR(cudaMemcpy(gpuB, &B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));
  113 +
  114 + //create one thread for each pixel of the field slice
  115 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  116 + dim3 dimGrid((Usp.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Usp.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  117 +
  118 + gpuScalarUsp<<<dimGrid, dimBlock>>>(Usp.x_hat, gpu_h, gpuB, Nl, rR, thetaR);
  119 +
  120 + //free memory
  121 + cudaFree(gpu_h);
  122 + cudaFree(gpuB);
  123 +
  124 +}
  125 +
  126 +void sphere::scalarUip(bsComplex* j, int rR, int thetaR)
  127 +{
  128 + //copy the bessel and hankel LUTs to the GPU
  129 + bsComplex* gpu_j;
  130 + HANDLE_ERROR( cudaMalloc( (void**)&gpu_j, sizeof(bsComplex) * (Nl + 1) * rR ) );
  131 + HANDLE_ERROR( cudaMemcpy( gpu_j, j, sizeof(bsComplex) * (Nl + 1) * rR, cudaMemcpyHostToDevice ) );
  132 +
  133 + //allocate memory for the scattering coefficients
  134 + bsComplex* gpuA;
  135 + HANDLE_ERROR(cudaMalloc((void**) &gpuA, (Nl+1) * sizeof(bsComplex)));
  136 + //copy the scattering coefficients to the GPU
  137 + HANDLE_ERROR(cudaMemcpy(gpuA, &A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));
  138 +
  139 + //create one thread for each pixel of the field slice
  140 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  141 + dim3 dimGrid((Uip.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uip.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  142 +
  143 + gpuScalarUip<<<dimGrid, dimBlock>>>(Uip.x_hat, gpu_j, gpuA, Nl, rR, thetaR);
  144 +
  145 + //free memory
  146 + cudaFree(gpu_j);
  147 + cudaFree(gpuA);
  148 +
  149 +}
@@ -22,12 +22,12 @@ struct sphere @@ -22,12 +22,12 @@ struct sphere
22 //sphere material index 22 //sphere material index
23 int iMaterial; 23 int iMaterial;
24 24
25 - //rtsPointer to the scattered field produced by a plane wave 25 + //GPU pointer to the scattered field produced by a plane wave
26 // this is a function of cos(theta) and |r| (distance from sphere center) 26 // this is a function of cos(theta) and |r| (distance from sphere center)
27 - //fieldslice surface;  
28 -  
29 - //resolution of the scattered field  
30 - int thetaR, rR; 27 + fieldslice Usp;
  28 + fieldslice Uip;
  29 + ptype d_min;
  30 + ptype d_max;
31 31
32 //sphere order 32 //sphere order
33 int Nl; 33 int Nl;
@@ -50,6 +50,12 @@ struct sphere @@ -50,6 +50,12 @@ struct sphere
50 //surface = fieldslice(ang, ang/2); 50 //surface = fieldslice(ang, ang/2);
51 } 51 }
52 52
  53 + //assignment operator
  54 + sphere & operator=(const sphere &rhs);
  55 +
  56 + //copy constructor
  57 + sphere(const sphere &rhs);
  58 +
53 std::string toStr() 59 std::string toStr()
54 { 60 {
55 std::stringstream ss; 61 std::stringstream ss;
@@ -66,8 +72,19 @@ struct sphere @@ -66,8 +72,19 @@ struct sphere
66 Nl = ceil( (2 * PI * a) / lambda + 4 * pow( (2 * PI * a) / lambda, 1.0/3.0) + 2); 72 Nl = ceil( (2 * PI * a) / lambda + 4 * pow( (2 * PI * a) / lambda, 1.0/3.0) + 2);
67 } 73 }
68 74
69 - void calcCoeff(ptype lambda, rts::rtsComplex<ptype> n); 75 + //compute the scattering coefficients
  76 + void calcCoeff(ptype lambda, bsComplex n);
  77 +
  78 + //compute the bessel function look-up tables
  79 + void calcLut(bsComplex* j, bsComplex* h, ptype lambda, bsComplex n, int aR, int rR);
  80 + void calcBesselLut(bsComplex* j, ptype k, bsComplex n, int aR);
  81 + void calcHankelLut(bsComplex* h, ptype k, int rR);
  82 +
  83 + //calculate the scattering domain Us(theta, r)
  84 + void calcUp(ptype lambda, bsComplex n, rts::rtsQuad<ptype, 3> nfPlane, unsigned int R);
70 85
  86 + void scalarUsp(bsComplex* h, int rR, int thetaR);
  87 + void scalarUip(bsComplex* j, int aR, int thetaR);
71 88
72 89
73 90