Commit 51b6469a3ee77583099edb0a57e1bb7859c28fd1
1 parent
b6179de6
added look-up tables
Showing
27 changed files
with
1518 additions
and
588 deletions
Show diff stats
bessjy.cpp
@@ -13,7 +13,9 @@ | @@ -13,7 +13,9 @@ | ||
13 | // | 13 | // |
14 | #define _USE_MATH_DEFINES | 14 | #define _USE_MATH_DEFINES |
15 | #include <math.h> | 15 | #include <math.h> |
16 | -#include "bessel.h" | 16 | +#include "bessel.h" |
17 | + | ||
18 | +#define PI 3.14159 | ||
17 | 19 | ||
18 | double gamma(double x); | 20 | double gamma(double x); |
19 | // | 21 | // |
@@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &nm,double *jn,double *yn, | @@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &nm,double *jn,double *yn, | ||
426 | 0.2775764465332031, | 428 | 0.2775764465332031, |
427 | -1.993531733751297, | 429 | -1.993531733751297, |
428 | 2.724882731126854e1}; | 430 | 2.724882731126854e1}; |
429 | - | 431 | + |
430 | int i,k,m; | 432 | int i,k,m; |
431 | nm = n; | 433 | nm = n; |
432 | if ((x < 0.0) || (n < 0)) return 1; | 434 | if ((x < 0.0) || (n < 0)) return 1; |
@@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &vm,double *jv,double *yv, | @@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &vm,double *jv,double *yv, | ||
702 | } | 704 | } |
703 | vm = n + v0; | 705 | vm = n + v0; |
704 | return 0; | 706 | return 0; |
707 | +} | ||
708 | + | ||
709 | +int bessjyv_sph(int v, double z, double &vm, double* cjv, | ||
710 | + double* cyv, double* cjvp, double* cyvp) | ||
711 | +{ | ||
712 | + //first, compute the bessel functions of fractional order | ||
713 | + bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); | ||
714 | + | ||
715 | + //iterate through each and scale | ||
716 | + for(int n = 0; n<=v; n++) | ||
717 | + { | ||
718 | + | ||
719 | + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); | ||
720 | + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); | ||
721 | + | ||
722 | + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0)); | ||
723 | + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0)); | ||
724 | + } | ||
725 | + | ||
726 | + return 0; | ||
727 | + | ||
705 | } | 728 | } |
706 | - | 729 | + |
cbessjy.cpp
@@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv, | @@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv, | ||
724 | //iterate through each and scale | 724 | //iterate through each and scale |
725 | for(int n = 0; n<=v; n++) | 725 | for(int n = 0; n<=v; n++) |
726 | { | 726 | { |
727 | + | ||
727 | cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); | 728 | cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); |
728 | cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); | 729 | cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); |
729 | 730 |
colormap.h deleted
1 | -#ifndef RTS_COLORMAP_H | ||
2 | -#define RTS_COLORMAP_H | ||
3 | - | ||
4 | -#include <string> | ||
5 | -#include <qimage.h> | ||
6 | -#include <qcolor.h> | ||
7 | -#include "rts/cuda/error.h" | ||
8 | - | ||
9 | - | ||
10 | -#define BREWER_CTRL_PTS 11 | ||
11 | - | ||
12 | -#ifdef __CUDACC__ | ||
13 | -texture<float4, cudaTextureType1D> cudaTexBrewer; | ||
14 | -static cudaArray* gpuBrewer; | ||
15 | -#endif | ||
16 | - | ||
17 | - | ||
18 | - | ||
19 | -namespace rts{ | ||
20 | - namespace colormap{ | ||
21 | - | ||
22 | -enum colormapType {cmBrewer, cmGrayscale}; | ||
23 | - | ||
24 | -static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size) | ||
25 | -{ | ||
26 | - //create an image object | ||
27 | - QImage image(x_size, y_size, QImage::Format_RGB32); | ||
28 | - | ||
29 | - int i; | ||
30 | - unsigned char r, g, b; | ||
31 | - unsigned int x, y; | ||
32 | - for(y=0; y<y_size; y++) | ||
33 | - for(x=0; x<x_size; x++) | ||
34 | - { | ||
35 | - //calculate the 1D index | ||
36 | - i = y * x_size + x; | ||
37 | - | ||
38 | - r = buffer[i * 3 + 0]; | ||
39 | - g = buffer[i * 3 + 1]; | ||
40 | - b = buffer[i * 3 + 2]; | ||
41 | - | ||
42 | - //set the image pixel | ||
43 | - QColor color(r, g, b); | ||
44 | - image.setPixel(x, y, color.rgb()); | ||
45 | - } | ||
46 | - | ||
47 | - image.save(filename.c_str()); | ||
48 | -} | ||
49 | - | ||
50 | -#ifdef __CUDACC__ | ||
51 | -static void initBrewer() | ||
52 | -{ | ||
53 | - //initialize the Brewer colormap | ||
54 | - | ||
55 | - //allocate CPU space | ||
56 | - float4 cpuColorMap[BREWER_CTRL_PTS]; | ||
57 | - | ||
58 | - //define control rtsPoints | ||
59 | - cpuColorMap[0] = make_float4(0.192157f, 0.211765f, 0.584314f, 1.0f); | ||
60 | - cpuColorMap[1] = make_float4(0.270588f, 0.458824f, 0.705882f, 1.0f); | ||
61 | - cpuColorMap[2] = make_float4(0.454902f, 0.678431f, 0.819608f, 1.0f); | ||
62 | - cpuColorMap[3] = make_float4(0.670588f, 0.85098f, 0.913725f, 1.0f); | ||
63 | - cpuColorMap[4] = make_float4(0.878431f, 0.952941f, 0.972549f, 1.0f); | ||
64 | - cpuColorMap[5] = make_float4(1.0f, 1.0f, 0.74902f, 1.0f); | ||
65 | - cpuColorMap[6] = make_float4(0.996078f, 0.878431f, 0.564706f, 1.0f); | ||
66 | - cpuColorMap[7] = make_float4(0.992157f, 0.682353f, 0.380392f, 1.0f); | ||
67 | - cpuColorMap[8] = make_float4(0.956863f, 0.427451f, 0.262745f, 1.0f); | ||
68 | - cpuColorMap[9] = make_float4(0.843137f, 0.188235f, 0.152941f, 1.0f); | ||
69 | - cpuColorMap[10] = make_float4(0.647059f, 0.0f, 0.14902f, 1.0f); | ||
70 | - | ||
71 | - | ||
72 | - int width = BREWER_CTRL_PTS; | ||
73 | - int height = 0; | ||
74 | - | ||
75 | - | ||
76 | - // allocate array and copy colormap data | ||
77 | - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); | ||
78 | - | ||
79 | - HANDLE_ERROR(cudaMallocArray(&gpuBrewer, &channelDesc, width, height)); | ||
80 | - | ||
81 | - HANDLE_ERROR(cudaMemcpyToArray(gpuBrewer, 0, 0, cpuColorMap, sizeof(float4)*width, cudaMemcpyHostToDevice)); | ||
82 | - | ||
83 | - // set texture parameters | ||
84 | - cudaTexBrewer.addressMode[0] = cudaAddressModeClamp; | ||
85 | - //texBrewer.addressMode[1] = cudaAddressModeClamp; | ||
86 | - cudaTexBrewer.filterMode = cudaFilterModeLinear; | ||
87 | - cudaTexBrewer.normalized = true; // access with normalized texture coordinates | ||
88 | - | ||
89 | - // Bind the array to the texture | ||
90 | - HANDLE_ERROR(cudaBindTextureToArray( cudaTexBrewer, gpuBrewer, channelDesc)); | ||
91 | - | ||
92 | -} | ||
93 | - | ||
94 | -static void destroyBrewer() | ||
95 | -{ | ||
96 | - HANDLE_ERROR(cudaFreeArray(gpuBrewer)); | ||
97 | - | ||
98 | -} | ||
99 | - | ||
100 | -template<class T> | ||
101 | -__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) | ||
102 | -{ | ||
103 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | ||
104 | - if(i >= N) return; | ||
105 | - | ||
106 | - //compute the normalized value on [minVal maxVal] | ||
107 | - float a = (gpuSource[i] - minVal) / (maxVal - minVal); | ||
108 | - | ||
109 | - //lookup the color | ||
110 | - float shift = 1.0/BREWER_CTRL_PTS; | ||
111 | - float4 color = tex1D(cudaTexBrewer, a+shift); | ||
112 | - | ||
113 | - gpuDest[i * 3 + 0] = 255 * color.x; | ||
114 | - gpuDest[i * 3 + 1] = 255 * color.y; | ||
115 | - gpuDest[i * 3 + 2] = 255 * color.z; | ||
116 | -} | ||
117 | - | ||
118 | -template<class T> | ||
119 | -__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) | ||
120 | -{ | ||
121 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | ||
122 | - if(i >= N) return; | ||
123 | - | ||
124 | - //compute the normalized value on [minVal maxVal] | ||
125 | - float a = (gpuSource[i] - minVal) / (maxVal - minVal); | ||
126 | - | ||
127 | - gpuDest[i * 3 + 0] = 255 * a; | ||
128 | - gpuDest[i * 3 + 1] = 255 * a; | ||
129 | - gpuDest[i * 3 + 2] = 255 * a; | ||
130 | -} | ||
131 | - | ||
132 | -template<class T> | ||
133 | -static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128) | ||
134 | -{ | ||
135 | - //This function converts a scalar field on the GPU to a color image on the GPU | ||
136 | - int gridDim = (nVals + blockDim - 1)/blockDim; | ||
137 | - if(cm == cmGrayscale) | ||
138 | - applyGrayscale<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal); | ||
139 | - else if(cm == cmBrewer) | ||
140 | - { | ||
141 | - initBrewer(); | ||
142 | - applyBrewer<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal); | ||
143 | - destroyBrewer(); | ||
144 | - } | ||
145 | - | ||
146 | -} | ||
147 | - | ||
148 | -template<class T> | ||
149 | -static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale) | ||
150 | -{ | ||
151 | - //this function converts a scalar field on the GPU to a color image on the CPU | ||
152 | - | ||
153 | - //first create the color image on the GPU | ||
154 | - | ||
155 | - //allocate GPU memory for the color image | ||
156 | - unsigned char* gpuDest; | ||
157 | - HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 )); | ||
158 | - | ||
159 | - //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals)); | ||
160 | - | ||
161 | - //create the image on the gpu | ||
162 | - gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm); | ||
163 | - | ||
164 | - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3)); | ||
165 | - | ||
166 | - //copy the image from the GPU to the CPU | ||
167 | - HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost)); | ||
168 | - | ||
169 | - HANDLE_ERROR(cudaFree( gpuDest )); | ||
170 | - | ||
171 | -} | ||
172 | - | ||
173 | -template<typename T> | ||
174 | -static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) | ||
175 | -{ | ||
176 | - //allocate a color buffer | ||
177 | - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); | ||
178 | - | ||
179 | - //do the mapping | ||
180 | - gpu2cpu<T>(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); | ||
181 | - | ||
182 | - //copy the buffer to an image | ||
183 | - buffer2image(cpuBuffer, fileDest, x_size, y_size); | ||
184 | - | ||
185 | - free(cpuBuffer); | ||
186 | -} | ||
187 | - | ||
188 | -#endif | ||
189 | - | ||
190 | -template<class T> | ||
191 | -static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale) | ||
192 | -{ | ||
193 | - int i; | ||
194 | - float a; | ||
195 | - float range = valMax - valMin; | ||
196 | - for(i = 0; i<nVals; i++) | ||
197 | - { | ||
198 | - //normalize to the range [valMin valMax] | ||
199 | - a = (cpuSource[i] - valMin) / range; | ||
200 | - | ||
201 | - cpuDest[i * 3 + 0] = 255 * a; | ||
202 | - cpuDest[i * 3 + 1] = 255 * a; | ||
203 | - cpuDest[i * 3 + 2] = 255 * a; | ||
204 | - } | ||
205 | - | ||
206 | -} | ||
207 | - | ||
208 | - | ||
209 | - | ||
210 | -template<typename T> | ||
211 | -static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) | ||
212 | -{ | ||
213 | - //allocate a color buffer | ||
214 | - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); | ||
215 | - | ||
216 | - //do the mapping | ||
217 | - cpu2cpu<T>(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); | ||
218 | - | ||
219 | - //copy the buffer to an image | ||
220 | - buffer2image(cpuBuffer, fileDest, x_size, y_size); | ||
221 | - | ||
222 | - free(cpuBuffer); | ||
223 | - | ||
224 | -} | ||
225 | - | ||
226 | -}} //end namespace colormap and rts | ||
227 | - | ||
228 | -#endif | ||
229 | - |
dataTypes.h
@@ -24,6 +24,8 @@ typedef double ptype; | @@ -24,6 +24,8 @@ typedef double ptype; | ||
24 | 24 | ||
25 | typedef ptype fieldPoint; | 25 | typedef ptype fieldPoint; |
26 | 26 | ||
27 | +extern bool verbose; | ||
28 | + | ||
27 | //hybrid GPU/CPU complex data typ | 29 | //hybrid GPU/CPU complex data typ |
28 | #include "rts/math/complex.h" | 30 | #include "rts/math/complex.h" |
29 | #include "rts/math/vector.h" | 31 | #include "rts/math/vector.h" |
defaults.h
@@ -15,14 +15,14 @@ | @@ -15,14 +15,14 @@ | ||
15 | #define DEFAULT_FOCUS_X 0 | 15 | #define DEFAULT_FOCUS_X 0 |
16 | #define DEFAULT_FOCUS_Y 0 | 16 | #define DEFAULT_FOCUS_Y 0 |
17 | #define DEFAULT_FOCUS_Z 0 | 17 | #define DEFAULT_FOCUS_Z 0 |
18 | -#define DEFAULT_INCIDENT_ORDER 100 | 18 | +//#define DEFAULT_INCIDENT_ORDER 20 |
19 | #define DEFAULT_STABILITY_PARM 1.4 | 19 | #define DEFAULT_STABILITY_PARM 1.4 |
20 | 20 | ||
21 | //optics | 21 | //optics |
22 | -#define DEFAULT_CONDENSER_MIN 0.0 | 22 | +#define DEFAULT_CONDENSER_MIN 0 |
23 | #define DEFAULT_CONDENSER_MAX 1 | 23 | #define DEFAULT_CONDENSER_MAX 1 |
24 | 24 | ||
25 | -#define DEFAULT_OBJECTIVE_MIN 0.0 | 25 | +#define DEFAULT_OBJECTIVE_MIN 0 |
26 | #define DEFAULT_OBJECTIVE_MAX 1 | 26 | #define DEFAULT_OBJECTIVE_MAX 1 |
27 | 27 | ||
28 | //incident light direction | 28 | //incident light direction |
@@ -36,17 +36,20 @@ | @@ -36,17 +36,20 @@ | ||
36 | //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective | 36 | //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective |
37 | 37 | ||
38 | 38 | ||
39 | -#define DEFAULT_SLICE_MIN_X -5 | ||
40 | -#define DEFAULT_SLICE_MIN_Y 0 | ||
41 | -#define DEFAULT_SLICE_MIN_Z -5 | 39 | +#define DEFAULT_PLANE_MIN_X -5 |
40 | +#define DEFAULT_PLANE_MIN_Y 0 | ||
41 | +#define DEFAULT_PLANE_MIN_Z -5 | ||
42 | 42 | ||
43 | -#define DEFAULT_SLICE_MAX_X 5 | ||
44 | -#define DEFAULT_SLICE_MAX_Y 0 | ||
45 | -#define DEFAULT_SLICE_MAX_Z 5 | 43 | +#define DEFAULT_PLANE_MAX_X 5 |
44 | +#define DEFAULT_PLANE_MAX_Y 0 | ||
45 | +#define DEFAULT_PLANE_MAX_Z 5 | ||
46 | 46 | ||
47 | -#define DEFAULT_SLICE_NORM_X 0 | ||
48 | -#define DEFAULT_SLICE_NORM_Y 1 | ||
49 | -#define DEFAULT_SLICE_NORM_Z 0 | 47 | +#define DEFAULT_PLANE_NORM_X 0 |
48 | +#define DEFAULT_PLANE_NORM_Y 1 | ||
49 | +#define DEFAULT_PLANE_NORM_Z 0 | ||
50 | + | ||
51 | +#define DEFAULT_PLANE_SIZE 40 | ||
52 | +#define DEFAULT_PLANE_POSITION 0 | ||
50 | 53 | ||
51 | 54 | ||
52 | /* | 55 | /* |
@@ -64,21 +67,23 @@ | @@ -64,21 +67,23 @@ | ||
64 | */ | 67 | */ |
65 | 68 | ||
66 | 69 | ||
67 | -#define DEFAULT_FIELD_ORDER 200 | 70 | +#define DEFAULT_FIELD_ORDER 10 |
68 | 71 | ||
69 | -#define DEFAULT_SAMPLES 200 | 72 | +#define DEFAULT_SAMPLES 400 |
70 | 73 | ||
71 | #define DEFAULT_SLICE_RES 256 | 74 | #define DEFAULT_SLICE_RES 256 |
72 | 75 | ||
76 | +#define DEFAULT_SPHERE_THETA_R 1000 | ||
77 | + | ||
73 | #define DEFAULT_PADDING 1 | 78 | #define DEFAULT_PADDING 1 |
74 | #define DEFAULT_SUPERSAMPLE 1 | 79 | #define DEFAULT_SUPERSAMPLE 1 |
75 | 80 | ||
76 | -#define DEFAULT_INTENSITY_FILE "testappend" | 81 | +#define DEFAULT_INTENSITY_FILE "out_i.bmp" |
77 | #define DEFAULT_TRANSMITTANCE_FILE "" | 82 | #define DEFAULT_TRANSMITTANCE_FILE "" |
78 | -#define DEFAULT_ABSORBANCE_FILE "out_a" | 83 | +#define DEFAULT_ABSORBANCE_FILE "out_a.bmp" |
79 | #define DEFAULT_NEAR_FILE "out_n.bmp" | 84 | #define DEFAULT_NEAR_FILE "out_n.bmp" |
80 | #define DEFAULT_FAR_FILE "out_f.bmp" | 85 | #define DEFAULT_FAR_FILE "out_f.bmp" |
81 | -#define DEFAULT_EXTENDED_SOURCE "einstein_small.jpg" | 86 | +#define DEFAULT_EXTENDED_SOURCE "" |
82 | #define DEFAULT_FIELD_TYPE "magnitude" | 87 | #define DEFAULT_FIELD_TYPE "magnitude" |
83 | #define DEFAULT_FORMAT fileoutStruct::formatImage | 88 | #define DEFAULT_FORMAT fileoutStruct::formatImage |
84 | #define DEFAULT_COLORMAP "brewer" | 89 | #define DEFAULT_COLORMAP "brewer" |
fieldslice.cpp
@@ -8,14 +8,16 @@ | @@ -8,14 +8,16 @@ | ||
8 | using namespace std; | 8 | using namespace std; |
9 | 9 | ||
10 | fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) | 10 | fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) |
11 | -{ | 11 | +{ |
12 | + x_hat = y_hat = z_hat = NULL; | ||
13 | + | ||
12 | //save the slice resolution | 14 | //save the slice resolution |
13 | R[0] = x_size; | 15 | R[0] = x_size; |
14 | R[1] = x_size; | 16 | R[1] = x_size; |
15 | 17 | ||
16 | scalarField = true; | 18 | scalarField = true; |
17 | 19 | ||
18 | - //init_gpu(); | 20 | + init_gpu(); |
19 | 21 | ||
20 | 22 | ||
21 | } | 23 | } |
@@ -101,5 +103,5 @@ fieldslice::fieldslice() | @@ -101,5 +103,5 @@ fieldslice::fieldslice() | ||
101 | 103 | ||
102 | fieldslice::~fieldslice() | 104 | fieldslice::~fieldslice() |
103 | { | 105 | { |
104 | - //kill_gpu(); | 106 | + kill_gpu(); |
105 | } | 107 | } |
fieldslice.cu
1 | #include "fieldslice.h" | 1 | #include "fieldslice.h" |
2 | #include "dataTypes.h" | 2 | #include "dataTypes.h" |
3 | -#include "rts/cuda/error.h" | 3 | +#include "rts/cuda/error.h" |
4 | +#include "rts/cuda/threads.h" | ||
4 | 5 | ||
5 | 6 | ||
6 | __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N) | 7 | __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N) |
7 | { | 8 | { |
8 | //compute the index for this thread | 9 | //compute the index for this thread |
9 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | 10 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; |
11 | + int i = ThreadIndex1D(); | ||
12 | + | ||
10 | if(i >= N) return; | 13 | if(i >= N) return; |
11 | 14 | ||
12 | ptype xm = x[i].abs(); | 15 | ptype xm = x[i].abs(); |
@@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty | @@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty | ||
66 | __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) | 69 | __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) |
67 | { | 70 | { |
68 | //compute the index for this thread | 71 | //compute the index for this thread |
69 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | 72 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; |
73 | + int i = ThreadIndex1D(); | ||
70 | if(i >= N) return; | 74 | if(i >= N) return; |
71 | 75 | ||
72 | V[i] = field_component[i].real(); | 76 | V[i] = field_component[i].real(); |
@@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) | @@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) | ||
75 | __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N) | 79 | __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N) |
76 | { | 80 | { |
77 | //compute the index for this thread | 81 | //compute the index for this thread |
78 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | 82 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; |
83 | + int i = ThreadIndex1D(); | ||
79 | if(i >= N) return; | 84 | if(i >= N) return; |
80 | 85 | ||
81 | V[i] = field_component[i].imag(); | 86 | V[i] = field_component[i].imag(); |
@@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i | @@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i | ||
84 | __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N) | 89 | __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N) |
85 | { | 90 | { |
86 | //compute the index for this thread | 91 | //compute the index for this thread |
87 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | 92 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; |
93 | + int i = ThreadIndex1D(); | ||
88 | if(i >= N) return; | 94 | if(i >= N) return; |
89 | 95 | ||
90 | output[i] = sqrt(input[i]); | 96 | output[i] = sqrt(input[i]); |
@@ -115,7 +121,8 @@ scalarslice fieldslice::Mag() | @@ -115,7 +121,8 @@ scalarslice fieldslice::Mag() | ||
115 | 121 | ||
116 | //compute the total number of values in the slice | 122 | //compute the total number of values in the slice |
117 | unsigned int N = R[0] * R[1]; | 123 | unsigned int N = R[0] * R[1]; |
118 | - int gridDim = (N+BLOCK-1)/BLOCK; | 124 | + //int gridDim = (N+BLOCK-1)/BLOCK; |
125 | + dim3 gridDim = GenGrid1D(N, BLOCK); | ||
119 | 126 | ||
120 | field_intensity<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, result->S, N); | 127 | field_intensity<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, result->S, N); |
121 | field_sqrt<<<gridDim, BLOCK>>>(result->S, result->S, N); | 128 | field_sqrt<<<gridDim, BLOCK>>>(result->S, result->S, N); |
@@ -132,7 +139,8 @@ scalarslice fieldslice::Real() | @@ -132,7 +139,8 @@ scalarslice fieldslice::Real() | ||
132 | 139 | ||
133 | //compute the total number of values in the slice | 140 | //compute the total number of values in the slice |
134 | unsigned int N = R[0] * R[1]; | 141 | unsigned int N = R[0] * R[1]; |
135 | - int gridDim = (N+BLOCK-1)/BLOCK; | 142 | + //int gridDim = (N+BLOCK-1)/BLOCK; |
143 | + dim3 gridDim = GenGrid1D(N, BLOCK); | ||
136 | 144 | ||
137 | field_real<<<gridDim, BLOCK>>>(x_hat, result->S, N); | 145 | field_real<<<gridDim, BLOCK>>>(x_hat, result->S, N); |
138 | 146 | ||
@@ -148,7 +156,8 @@ scalarslice fieldslice::Imag() | @@ -148,7 +156,8 @@ scalarslice fieldslice::Imag() | ||
148 | 156 | ||
149 | //compute the total number of values in the slice | 157 | //compute the total number of values in the slice |
150 | unsigned int N = R[0] * R[1]; | 158 | unsigned int N = R[0] * R[1]; |
151 | - int gridDim = (N+BLOCK-1)/BLOCK; | 159 | + //int gridDim = (N+BLOCK-1)/BLOCK; |
160 | + dim3 gridDim = GenGrid1D(N, BLOCK); | ||
152 | 161 | ||
153 | field_imaginary<<<gridDim, BLOCK>>>(x_hat, result->S, N); | 162 | field_imaginary<<<gridDim, BLOCK>>>(x_hat, result->S, N); |
154 | 163 | ||
@@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v) | @@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v) | ||
192 | 201 | ||
193 | //compute the total number of values in the slice | 202 | //compute the total number of values in the slice |
194 | unsigned int N = R[0] * R[1]; | 203 | unsigned int N = R[0] * R[1]; |
195 | - //cout<<"Size of mag field: "<<N<<endl; | ||
196 | int gridDim = (N+BLOCK-1)/BLOCK; | 204 | int gridDim = (N+BLOCK-1)/BLOCK; |
197 | 205 | ||
198 | field_scale<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, N, v); | 206 | field_scale<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, N, v); |
@@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v) | @@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v) | ||
200 | } | 208 | } |
201 | 209 | ||
202 | void fieldslice::init_gpu() | 210 | void fieldslice::init_gpu() |
203 | -{ | 211 | +{ |
212 | + //if the field has no size, return | ||
213 | + if(R[0] == 0 || R[1] == 0) | ||
214 | + return; | ||
215 | + | ||
216 | + //free any previous memory allocations | ||
217 | + if(x_hat) | ||
218 | + HANDLE_ERROR(cudaFree(x_hat)); | ||
219 | + if(y_hat) | ||
220 | + HANDLE_ERROR(cudaFree(y_hat)); | ||
221 | + if(z_hat) | ||
222 | + HANDLE_ERROR(cudaFree(z_hat)); | ||
223 | + | ||
204 | //allocate space on the GPU for the field slice | 224 | //allocate space on the GPU for the field slice |
205 | HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex))); | 225 | HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex))); |
206 | - //HANDLE_ERROR(cudaMemset(x_hat, 0, R[0] * R[1] * sizeof(bsComplex))); | ||
207 | 226 | ||
208 | - //if the field is scalar, y_hat and z_hat are unused | ||
209 | - if(scalarField) | ||
210 | - { | ||
211 | - y_hat = NULL; | ||
212 | - z_hat = NULL; | ||
213 | - | ||
214 | - } | ||
215 | - else | 227 | + if(!scalarField) |
216 | { | 228 | { |
217 | HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex))); | 229 | HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex))); |
218 | //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex))); | 230 | //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex))); |
@@ -233,6 +245,8 @@ void fieldslice::kill_gpu() | @@ -233,6 +245,8 @@ void fieldslice::kill_gpu() | ||
233 | if(z_hat != NULL) | 245 | if(z_hat != NULL) |
234 | HANDLE_ERROR(cudaFree(z_hat)); | 246 | HANDLE_ERROR(cudaFree(z_hat)); |
235 | 247 | ||
248 | + x_hat = y_hat = z_hat = NULL; | ||
249 | + | ||
236 | } | 250 | } |
237 | 251 | ||
238 | void fieldslice::clear_gpu() | 252 | void fieldslice::clear_gpu() |
@@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) | @@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) | ||
275 | result.scalarField = scalarField; | 289 | result.scalarField = scalarField; |
276 | 290 | ||
277 | //allocate space for the new field | 291 | //allocate space for the new field |
278 | - result.init_gpu(); | 292 | + //result.init_gpu(); |
279 | 293 | ||
280 | //create one thread for each pixel of the field slice | 294 | //create one thread for each pixel of the field slice |
281 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | 295 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); |
@@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) | @@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) | ||
291 | 305 | ||
292 | return result; | 306 | return result; |
293 | } | 307 | } |
308 | + | ||
309 | +fieldslice::fieldslice(const fieldslice& rhs) | ||
310 | +{ | ||
311 | + R[0] = rhs.R[0]; | ||
312 | + R[1] = rhs.R[1]; | ||
313 | + scalarField = rhs.scalarField; | ||
314 | + | ||
315 | + x_hat = y_hat = z_hat = NULL; | ||
316 | + | ||
317 | + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1]; | ||
318 | + if(rhs.x_hat != NULL) | ||
319 | + { | ||
320 | + HANDLE_ERROR(cudaMalloc( (void**)&x_hat, bytes)); | ||
321 | + HANDLE_ERROR(cudaMemcpy( x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice)); | ||
322 | + } | ||
323 | + if(rhs.y_hat != NULL) | ||
324 | + { | ||
325 | + HANDLE_ERROR(cudaMalloc( (void**)&y_hat, bytes)); | ||
326 | + HANDLE_ERROR(cudaMemcpy( y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice)); | ||
327 | + } | ||
328 | + if(rhs.z_hat != NULL) | ||
329 | + { | ||
330 | + HANDLE_ERROR(cudaMalloc( (void**)&z_hat, bytes)); | ||
331 | + HANDLE_ERROR(cudaMemcpy( z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice)); | ||
332 | + } | ||
333 | + | ||
334 | +} | ||
335 | + | ||
336 | +fieldslice& fieldslice::operator=(const fieldslice& rhs) | ||
337 | +{ | ||
338 | + //make sure this isn't a self-allocation | ||
339 | + if(this != &rhs) | ||
340 | + { | ||
341 | + //make a shallow copy | ||
342 | + R[0] = rhs.R[0]; | ||
343 | + R[1] = rhs.R[1]; | ||
344 | + scalarField = rhs.scalarField; | ||
345 | + | ||
346 | + //initialize to new parameters | ||
347 | + init_gpu(); | ||
348 | + | ||
349 | + //make a deep copy | ||
350 | + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1]; | ||
351 | + if(x_hat != NULL) | ||
352 | + HANDLE_ERROR(cudaMemcpy(x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice)); | ||
353 | + if(y_hat != NULL) | ||
354 | + HANDLE_ERROR(cudaMemcpy(y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice)); | ||
355 | + if(z_hat != NULL) | ||
356 | + HANDLE_ERROR(cudaMemcpy(z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice)); | ||
357 | + } | ||
358 | + | ||
359 | + return *this; | ||
360 | + | ||
361 | +} |
fieldslice.h
@@ -31,6 +31,9 @@ struct fieldslice | @@ -31,6 +31,9 @@ struct fieldslice | ||
31 | 31 | ||
32 | ~fieldslice(); | 32 | ~fieldslice(); |
33 | 33 | ||
34 | + //copy constructor | ||
35 | + fieldslice(const fieldslice& rhs); | ||
36 | + | ||
34 | //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); | 37 | //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); |
35 | 38 | ||
36 | scalarslice Mag(); | 39 | scalarslice Mag(); |
@@ -47,6 +50,7 @@ struct fieldslice | @@ -47,6 +50,7 @@ struct fieldslice | ||
47 | 50 | ||
48 | //crop a region from the field | 51 | //crop a region from the field |
49 | fieldslice crop(int u, int v, int su, int sv); | 52 | fieldslice crop(int u, int v, int su, int sv); |
53 | + fieldslice& operator=(const fieldslice& rhs); | ||
50 | 54 | ||
51 | void init_gpu(); | 55 | void init_gpu(); |
52 | void kill_gpu(); | 56 | void kill_gpu(); |
fileout.cu
@@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope) | @@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope) | ||
186 | //save images of the fields in the microscope | 186 | //save images of the fields in the microscope |
187 | 187 | ||
188 | //if the user specifies an extended source | 188 | //if the user specifies an extended source |
189 | - if(scope->focalPoints.size() > 1) | 189 | + if(scope->focalPoints.size() > 0) |
190 | { | 190 | { |
191 | //simulate the extended source and output the detector image | 191 | //simulate the extended source and output the detector image |
192 | scope->SimulateExtendedSource(); | 192 | scope->SimulateExtendedSource(); |
193 | 193 | ||
194 | + //saveNearField(&scope->nf); | ||
195 | + saveFarField(scope); | ||
196 | + | ||
197 | + //save the detector images | ||
198 | + saveDetector(scope); | ||
199 | + | ||
200 | + //simulate scattering for the last point (so that you have a near field image) | ||
201 | + scope->SimulateScattering(); | ||
202 | + saveNearField(&scope->nf); | ||
203 | + | ||
194 | } | 204 | } |
195 | else | 205 | else |
196 | { | 206 | { |
@@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope) | @@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope) | ||
203 | //run the far-field simulation | 213 | //run the far-field simulation |
204 | scope->SimulateImaging(); | 214 | scope->SimulateImaging(); |
205 | 215 | ||
216 | + //saveNearField(&scope->nf); | ||
206 | saveFarField(scope); | 217 | saveFarField(scope); |
207 | 218 | ||
219 | + //save the detector images | ||
220 | + saveDetector(scope); | ||
221 | + | ||
208 | } | 222 | } |
209 | 223 | ||
210 | - //save the detector images | ||
211 | - saveDetector(scope); | 224 | + |
212 | 225 | ||
213 | 226 | ||
214 | } | 227 | } |
fileout.h
@@ -5,7 +5,7 @@ | @@ -5,7 +5,7 @@ | ||
5 | //#include "defaults.h" | 5 | //#include "defaults.h" |
6 | #include "dataTypes.h" | 6 | #include "dataTypes.h" |
7 | 7 | ||
8 | -#include "colormap.h" | 8 | +#include "rts/graphics/colormap.h" |
9 | #include "fieldslice.h" | 9 | #include "fieldslice.h" |
10 | #include "nearfield.h" | 10 | #include "nearfield.h" |
11 | #include "microscope.h" | 11 | #include "microscope.h" |
@@ -34,7 +34,7 @@ struct fileoutStruct{ | @@ -34,7 +34,7 @@ struct fileoutStruct{ | ||
34 | //image_source source; | 34 | //image_source source; |
35 | 35 | ||
36 | //color map info | 36 | //color map info |
37 | - rts::colormap::colormapType colormap; | 37 | + rts::colormapType colormap; |
38 | ptype colorMax; | 38 | ptype colorMax; |
39 | 39 | ||
40 | void Save(microscopeStruct* scope); | 40 | void Save(microscopeStruct* scope); |
main.cpp
@@ -24,6 +24,7 @@ microscopeStruct* SCOPE; | @@ -24,6 +24,7 @@ microscopeStruct* SCOPE; | ||
24 | #include "warnings.h" | 24 | #include "warnings.h" |
25 | 25 | ||
26 | fileoutStruct gFileOut; | 26 | fileoutStruct gFileOut; |
27 | +bool verbose = false; | ||
27 | using namespace std; | 28 | using namespace std; |
28 | 29 | ||
29 | int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, | 30 | int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, |
@@ -31,32 +32,19 @@ int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, | @@ -31,32 +32,19 @@ int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, | ||
31 | 32 | ||
32 | int main(int argc, char *argv[]) | 33 | int main(int argc, char *argv[]) |
33 | { | 34 | { |
34 | - //test Envi loading and saving | ||
35 | - //EnviFile envi("testenvi", "w"); | ||
36 | - | ||
37 | - //float* data = (float*)malloc(sizeof(float) * 100 * 100); | ||
38 | - //envi.addBand(data, 100, 100, 100); | ||
39 | - | ||
40 | - //envi.close(); | ||
41 | - | ||
42 | - //return 0; | ||
43 | 35 | ||
44 | SCOPE = new microscopeStruct(); | 36 | SCOPE = new microscopeStruct(); |
45 | 37 | ||
46 | - cout<<SCOPE->nf.Uf.R[0]<<endl; | ||
47 | - | ||
48 | LoadParameters(argc, argv); | 38 | LoadParameters(argc, argv); |
49 | 39 | ||
50 | - //TestSimulation(NF, SCOPE, &gFileOut); | ||
51 | - | ||
52 | //initialize GPU memory for fields | 40 | //initialize GPU memory for fields |
53 | SCOPE->init(); | 41 | SCOPE->init(); |
54 | 42 | ||
55 | - OutputOptions(); | ||
56 | - | ||
57 | gFileOut.Save(SCOPE); | 43 | gFileOut.Save(SCOPE); |
58 | 44 | ||
59 | - //NF->destroy(); | 45 | + if(verbose) |
46 | + OutputOptions(); | ||
47 | + | ||
60 | SCOPE->destroy(); | 48 | SCOPE->destroy(); |
61 | 49 | ||
62 | 50 |
microscope.cu
@@ -4,7 +4,7 @@ | @@ -4,7 +4,7 @@ | ||
4 | #include "rts/tools/progressbar.h" | 4 | #include "rts/tools/progressbar.h" |
5 | #include "rts/cuda/timer.h" | 5 | #include "rts/cuda/timer.h" |
6 | #include "dataTypes.h" | 6 | #include "dataTypes.h" |
7 | -#include "colormap.h" | 7 | +#include "rts/graphics/colormap.h" |
8 | 8 | ||
9 | #include <QImage> | 9 | #include <QImage> |
10 | 10 | ||
@@ -112,8 +112,8 @@ void microscopeStruct::getFarField() | @@ -112,8 +112,8 @@ void microscopeStruct::getFarField() | ||
112 | //Compute the Far Field image of the focal plane | 112 | //Compute the Far Field image of the focal plane |
113 | 113 | ||
114 | //clear the memory from previous detector fields | 114 | //clear the memory from previous detector fields |
115 | - Ud.kill_gpu(); | ||
116 | - Ufd.kill_gpu(); | 115 | + //Ud.kill_gpu(); |
116 | + //Ufd.kill_gpu(); | ||
117 | 117 | ||
118 | //first crop the filtered near-field image of the source and scattered fields | 118 | //first crop the filtered near-field image of the source and scattered fields |
119 | Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); | 119 | Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); |
@@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource() | @@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource() | ||
261 | t += gpuStopTimer(); | 261 | t += gpuStopTimer(); |
262 | 262 | ||
263 | rtsProgressBar((double)(i+1)/(double)npts * 100); | 263 | rtsProgressBar((double)(i+1)/(double)npts * 100); |
264 | + //unsigned char c; | ||
265 | + //cin>>c; | ||
264 | } | 266 | } |
265 | - cout<<endl; | ||
266 | - cout<<"Time per source: "<<t/npts<<"ms"<<endl; | 267 | + if(verbose) |
268 | + { | ||
269 | + cout<<endl; | ||
270 | + cout<<"Time per source: "<<t/npts<<"ms"<<endl; | ||
271 | + } | ||
267 | 272 | ||
268 | } | 273 | } |
269 | 274 | ||
@@ -304,3 +309,15 @@ void microscopeStruct::LoadExtendedSource(std::string filename) | @@ -304,3 +309,15 @@ void microscopeStruct::LoadExtendedSource(std::string filename) | ||
304 | } | 309 | } |
305 | } | 310 | } |
306 | } | 311 | } |
312 | + | ||
313 | +std::string microscopeStruct::toStr() | ||
314 | +{ | ||
315 | + stringstream ss; | ||
316 | + ss<<nf.toStr(); | ||
317 | + | ||
318 | + ss<<"----------Optics--------------"<<endl<<endl; | ||
319 | + ss<<"Objective NA: "<<objective[0]<<" to "<<objective[1]<<endl; | ||
320 | + return ss.str(); | ||
321 | + | ||
322 | + | ||
323 | +} |
microscope.h
montecarlo.cpp
@@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) | @@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) | ||
35 | ptype inPhi = asin(NAin); | 35 | ptype inPhi = asin(NAin); |
36 | ptype outPhi = asin(NAout); | 36 | ptype outPhi = asin(NAout); |
37 | 37 | ||
38 | - //cout<<"inPhi: "<<inPhi<<endl; | ||
39 | - //cout<<"outPhi: "<<outPhi<<endl; | ||
40 | - | ||
41 | //calculate the z-values associated with these angles | 38 | //calculate the z-values associated with these angles |
42 | ptype inZ = cos(inPhi); | 39 | ptype inZ = cos(inPhi); |
43 | ptype outZ = cos(outPhi); | 40 | ptype outZ = cos(outPhi); |
44 | 41 | ||
45 | ptype rangeZ = inZ - outZ; | 42 | ptype rangeZ = inZ - outZ; |
46 | 43 | ||
47 | - //cout<<"inZ: "<<inZ<<endl; | ||
48 | - //cout<<"outZ: "<<outZ<<endl; | ||
49 | - | ||
50 | //draw a distribution of random phi, z values | 44 | //draw a distribution of random phi, z values |
51 | ptype z, phi, theta; | 45 | ptype z, phi, theta; |
52 | for(int i=0; i<N; i++) | 46 | for(int i=0; i<N; i++) |
@@ -58,7 +52,6 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) | @@ -58,7 +52,6 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) | ||
58 | phi = acos(z); | 52 | phi = acos(z); |
59 | 53 | ||
60 | //compute and store cartesian coordinates | 54 | //compute and store cartesian coordinates |
61 | - //bsVector spherical(1, theta + kSph[1], phi + kSph[2]); | ||
62 | bsVector spherical(1, theta, phi); | 55 | bsVector spherical(1, theta, phi); |
63 | bsVector cart = spherical.sph2cart(); | 56 | bsVector cart = spherical.sph2cart(); |
64 | samples[i] = rotation * cart; | 57 | samples[i] = rotation * cart; |
nearfield.cpp
1 | #include "nearfield.h" | 1 | #include "nearfield.h" |
2 | +#include <time.h> | ||
3 | +#include <math.h> | ||
4 | + | ||
5 | +#ifdef _WIN32 | ||
6 | +#define isnan(x) _isnan(x) | ||
7 | +#define isinf(x) (!_finite(x)) | ||
8 | +#endif | ||
9 | + | ||
10 | +int bessjyv_sph(int v, double z, double &vm, double* cjv, | ||
11 | + double* cyv, double* cjvp, double* cyvp); | ||
2 | 12 | ||
3 | nearfieldStruct::nearfieldStruct() | 13 | nearfieldStruct::nearfieldStruct() |
4 | { | 14 | { |
5 | scalarSim = true; | 15 | scalarSim = true; |
6 | planeWave = false; | 16 | planeWave = false; |
17 | + lut_us = true; | ||
18 | + lut_uf = false; | ||
7 | 19 | ||
8 | nWaves = 0; | 20 | nWaves = 0; |
9 | } | 21 | } |
@@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr() | @@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr() | ||
46 | ss<<"Condenser NA: "<<condenser[0]<<" to "<<condenser[1]<<std::endl; | 58 | ss<<"Condenser NA: "<<condenser[0]<<" to "<<condenser[1]<<std::endl; |
47 | ss<<"Focal Point: "<<focus[0]<<", "<<focus[1]<<", "<<focus[2]<<std::endl; | 59 | ss<<"Focal Point: "<<focus[0]<<", "<<focus[1]<<", "<<focus[2]<<std::endl; |
48 | ss<<"Field Slice: "<<std::endl; | 60 | ss<<"Field Slice: "<<std::endl; |
61 | + if(lut_us) | ||
62 | + ss<<"LUT Parameters --- min: "<<d_min<<" max: "<<d_max<<std::endl; | ||
49 | ss<<pos<<std::endl; | 63 | ss<<pos<<std::endl; |
50 | 64 | ||
51 | ss<<std::endl<<"---------Materials-----------"<<std::endl; | 65 | ss<<std::endl<<"---------Materials-----------"<<std::endl; |
@@ -61,6 +75,10 @@ std::string nearfieldStruct::toStr() | @@ -61,6 +75,10 @@ std::string nearfieldStruct::toStr() | ||
61 | for(unsigned int s=0; s<sVector.size(); s++) | 75 | for(unsigned int s=0; s<sVector.size(); s++) |
62 | ss<<sVector[s].toStr()<<std::endl; | 76 | ss<<sVector[s].toStr()<<std::endl; |
63 | 77 | ||
78 | + ss<<"---------Timings-------------"<<std::endl; | ||
79 | + ss<<"Uf = "<<t_Uf<<"ms"<<std::endl; | ||
80 | + ss<<"Us = "<<t_Us<<"ms"<<std::endl; | ||
81 | + | ||
64 | return ss.str(); | 82 | return ss.str(); |
65 | } | 83 | } |
66 | 84 | ||
@@ -70,7 +88,8 @@ void nearfieldStruct::calcWaves() | @@ -70,7 +88,8 @@ void nearfieldStruct::calcWaves() | ||
70 | inWaves.resize(nWaves); | 88 | inWaves.resize(nWaves); |
71 | 89 | ||
72 | //re-seed the random number generator | 90 | //re-seed the random number generator |
73 | - //srand(seed); | 91 | + //srand(time(NULL)); |
92 | + srand(NULL); | ||
74 | 93 | ||
75 | //calculate the monte-carlo samples | 94 | //calculate the monte-carlo samples |
76 | mcSampleNA(&inWaves[0], nWaves, k, condenser[0], condenser[1]); | 95 | mcSampleNA(&inWaves[0], nWaves, k, condenser[0], condenser[1]); |
@@ -84,6 +103,8 @@ void nearfieldStruct::calcSpheres() | @@ -84,6 +103,8 @@ void nearfieldStruct::calcSpheres() | ||
84 | //calculate all of the constants necessary to evaluate the scattered field | 103 | //calculate all of the constants necessary to evaluate the scattered field |
85 | //estimate the order required to represent the scattered field for each sphere | 104 | //estimate the order required to represent the scattered field for each sphere |
86 | 105 | ||
106 | + | ||
107 | + | ||
87 | //for each sphere | 108 | //for each sphere |
88 | for(int i=0; i<sVector.size(); i++) | 109 | for(int i=0; i<sVector.size(); i++) |
89 | { | 110 | { |
@@ -91,12 +112,10 @@ void nearfieldStruct::calcSpheres() | @@ -91,12 +112,10 @@ void nearfieldStruct::calcSpheres() | ||
91 | 112 | ||
92 | //calculate the required order | 113 | //calculate the required order |
93 | sVector[i].calcNl(lambda); | 114 | sVector[i].calcNl(lambda); |
94 | - //std::cout<<sVector[i].Nl<<std::endl; | ||
95 | 115 | ||
96 | //set the refractive index for the sphere | 116 | //set the refractive index for the sphere |
97 | int imat = sVector[i].iMaterial; | 117 | int imat = sVector[i].iMaterial; |
98 | rts::rtsComplex<ptype> n = mVector[imat](lambda); | 118 | rts::rtsComplex<ptype> n = mVector[imat](lambda); |
99 | - //std::cout<<"Sphere refractive index: "<<n<<std::endl; | ||
100 | 119 | ||
101 | //calculate the scattering coefficients | 120 | //calculate the scattering coefficients |
102 | sVector[i].calcCoeff(lambda, n); | 121 | sVector[i].calcCoeff(lambda, n); |
@@ -104,18 +123,109 @@ void nearfieldStruct::calcSpheres() | @@ -104,18 +123,109 @@ void nearfieldStruct::calcSpheres() | ||
104 | //save the refractive index | 123 | //save the refractive index |
105 | sVector[i].n = n; | 124 | sVector[i].n = n; |
106 | 125 | ||
126 | + //if the LUT is used, calculate Usp(theta, r) | ||
127 | + if(lut_us) | ||
128 | + { | ||
129 | + sVector[i].calcUp(lambda, n, pos, max(U.R[0], U.R[1])); | ||
130 | + } | ||
131 | + | ||
132 | + | ||
107 | } | 133 | } |
108 | 134 | ||
109 | } | 135 | } |
110 | 136 | ||
137 | +void nearfieldStruct::calcUs() | ||
138 | +{ | ||
139 | + | ||
140 | + | ||
141 | + if(lut_us) | ||
142 | + scalarUpLut(); | ||
143 | + else | ||
144 | + scalarUs(); | ||
145 | +} | ||
146 | + | ||
147 | +void nearfieldStruct::calcUf() | ||
148 | +{ | ||
149 | + if(lut_uf) | ||
150 | + scalarUfLut(); | ||
151 | + else | ||
152 | + scalarUf(); | ||
153 | +} | ||
154 | + | ||
111 | void nearfieldStruct::Simulate() | 155 | void nearfieldStruct::Simulate() |
112 | { | 156 | { |
157 | + //initialize timings | ||
158 | + t_Uf = 0; | ||
159 | + t_Us = 0; | ||
160 | + | ||
113 | //compute a set of plane waves for Monte-Carlo simulation | 161 | //compute a set of plane waves for Monte-Carlo simulation |
114 | calcWaves(); | 162 | calcWaves(); |
115 | 163 | ||
116 | //the near field has to be simulated no matter what the output rtsPoint is | 164 | //the near field has to be simulated no matter what the output rtsPoint is |
117 | - scalarUf(); | 165 | + calcUf(); |
118 | calcSpheres(); | 166 | calcSpheres(); |
119 | - scalarUs(); | 167 | + calcUs(); |
120 | sumUf(); | 168 | sumUf(); |
169 | + | ||
170 | + //U.Mag().toImage("testU.bmp"); | ||
171 | +} | ||
172 | + | ||
173 | +void nearfieldStruct::calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR) | ||
174 | +{ | ||
175 | + /*Compute the look-up-table for spherical bessel functions used for the incident field | ||
176 | + j = (Nl + 1) x aR array of values | ||
177 | + aR = resolution of j | ||
178 | + */ | ||
179 | + | ||
180 | + //compute the wavenumber | ||
181 | + ptype k = 2 * PI / lambda; | ||
182 | + unsigned int Nl = m; | ||
183 | + | ||
184 | + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored) | ||
185 | + int bytes = sizeof(double) * (Nl + 1); | ||
186 | + double* cjv_kd = (double*)malloc(bytes); | ||
187 | + double* cyv_kd = (double*)malloc(bytes); | ||
188 | + double* cjvp_kd = (double*)malloc(bytes); | ||
189 | + double* cyvp_kd = (double*)malloc(bytes); | ||
190 | + | ||
191 | + //compute the bessel functions using the CPU-based algorithm | ||
192 | + double vm; | ||
193 | + | ||
194 | + //for each sample along r | ||
195 | + ptype dr = (d_max - d_min) / (dR - 1); | ||
196 | + ptype d; | ||
197 | + ptype jv; | ||
198 | + for(int id = 0; id < dR; id++) | ||
199 | + { | ||
200 | + d = id * dr + d_min; | ||
201 | + double kd = k*d; | ||
202 | + bessjyv_sph(Nl, kd, vm, cjv_kd, cyv_kd, cjvp_kd, cyvp_kd); | ||
203 | + | ||
204 | + //copy the double data to the bsComplex array | ||
205 | + for(int l=0; l<=Nl; l++) | ||
206 | + { | ||
207 | + jv = cjv_kd[l]; | ||
208 | + if(isnan(jv) || isinf(jv)) | ||
209 | + { | ||
210 | + if(kd == 0 && l == 0) | ||
211 | + jv = 1; | ||
212 | + else | ||
213 | + jv = 0; | ||
214 | + } | ||
215 | + j[id * (Nl+1) + l] = jv; | ||
216 | + } | ||
217 | + } | ||
218 | + | ||
219 | + /*ofstream outfile("uf_besselout.txt"); | ||
220 | + for(int ir = 0; ir < dR; ir++) | ||
221 | + { | ||
222 | + outfile<<ir*dr + d_min<<endl; | ||
223 | + for(int l = 0; l<=Nl; l++) | ||
224 | + { | ||
225 | + outfile<<j[ir * (Nl+1) + l]<<" --"; | ||
226 | + } | ||
227 | + outfile<<endl; | ||
228 | + } | ||
229 | + outfile.close();*/ | ||
230 | + | ||
121 | } | 231 | } |
nearfield.h
@@ -31,6 +31,8 @@ struct nearfieldStruct | @@ -31,6 +31,8 @@ struct nearfieldStruct | ||
31 | 31 | ||
32 | //slices for the focused field | 32 | //slices for the focused field |
33 | fieldslice Uf; | 33 | fieldslice Uf; |
34 | + ptype d_min, d_max; | ||
35 | + | ||
34 | // and total field: Uf + sum(Us) | 36 | // and total field: Uf + sum(Us) |
35 | fieldslice U; | 37 | fieldslice U; |
36 | 38 | ||
@@ -43,6 +45,14 @@ struct nearfieldStruct | @@ -43,6 +45,14 @@ struct nearfieldStruct | ||
43 | //flag for a plane wave | 45 | //flag for a plane wave |
44 | bool planeWave; | 46 | bool planeWave; |
45 | 47 | ||
48 | + //flag for using a LUT | ||
49 | + bool lut_uf; | ||
50 | + bool lut_us; | ||
51 | + | ||
52 | + //timings | ||
53 | + float t_Uf; | ||
54 | + float t_Us; | ||
55 | + | ||
46 | 56 | ||
47 | 57 | ||
48 | //---------Scatterers------------ | 58 | //---------Scatterers------------ |
@@ -78,10 +88,17 @@ struct nearfieldStruct | @@ -78,10 +88,17 @@ struct nearfieldStruct | ||
78 | void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); | 88 | void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); |
79 | 89 | ||
80 | //this function re-computes the focused field | 90 | //this function re-computes the focused field |
91 | + void calcUf(); | ||
81 | void scalarUf(); | 92 | void scalarUf(); |
93 | + void scalarUfLut(); | ||
94 | + | ||
95 | + void calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR); | ||
82 | 96 | ||
83 | //compute the field scattered by all of the materials | 97 | //compute the field scattered by all of the materials |
98 | + void calcUs(); | ||
84 | void scalarUs(); | 99 | void scalarUs(); |
100 | + void scalarUpLut(); | ||
101 | + | ||
85 | 102 | ||
86 | //add the incident field to the sum of scattered fields | 103 | //add the incident field to the sum of scattered fields |
87 | void sumUf(); | 104 | void sumUf(); |
nfScalarUf.cu
@@ -5,7 +5,7 @@ | @@ -5,7 +5,7 @@ | ||
5 | #include "rts/cuda/error.h" | 5 | #include "rts/cuda/error.h" |
6 | #include "rts/cuda/timer.h" | 6 | #include "rts/cuda/timer.h" |
7 | 7 | ||
8 | - | 8 | +//Incident field for a single plane wave |
9 | __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) | 9 | __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) |
10 | { | 10 | { |
11 | /*Compute the scalar focused field using Debye focusing | 11 | /*Compute the scalar focused field using Debye focusing |
@@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p | @@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p | ||
41 | Uf[i] = exp(d) * A; | 41 | Uf[i] = exp(d) * A; |
42 | 42 | ||
43 | } | 43 | } |
44 | - | 44 | + |
45 | +//Incident field for a focused point source | ||
45 | __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) | 46 | __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) |
46 | { | 47 | { |
47 | /*Compute the scalar focused field using Debye focusing | 48 | /*Compute the scalar focused field using Debye focusing |
@@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | @@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | ||
151 | } | 152 | } |
152 | 153 | ||
153 | sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); | 154 | sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); |
154 | - //sumUf += il * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); | ||
155 | 155 | ||
156 | il *= im; | 156 | il *= im; |
157 | } | 157 | } |
@@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | @@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | ||
162 | 162 | ||
163 | void nearfieldStruct::scalarUf() | 163 | void nearfieldStruct::scalarUf() |
164 | { | 164 | { |
165 | - //Compute the incident field via a scalar simulation | ||
166 | - //This method uses Debye focusing to approximate the field analytically | ||
167 | - | ||
168 | - //time the calculation of the focused field | ||
169 | - //gpuStartTimer(); | ||
170 | - | ||
171 | - //set the field slice to a scalar field | ||
172 | - //Uf.scalarField = true; | ||
173 | - | ||
174 | - //initialize the GPU arrays | ||
175 | - //Uf.init_gpu(); | 165 | + |
166 | + gpuStartTimer(); | ||
176 | 167 | ||
177 | //create one thread for each pixel of the field slice | 168 | //create one thread for each pixel of the field slice |
178 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | 169 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); |
179 | - dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | 170 | + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); |
180 | 171 | ||
181 | //if we are computing a plane wave, call the gpuScalarUfp function | 172 | //if we are computing a plane wave, call the gpuScalarUfp function |
182 | if(planeWave) | 173 | if(planeWave) |
@@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf() | @@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf() | ||
191 | ptype cosBeta = cos(asin(condenser[1])); | 182 | ptype cosBeta = cos(asin(condenser[1])); |
192 | //compute the scalar Uf field (this will be in the x_hat channel of Uf) | 183 | //compute the scalar Uf field (this will be in the x_hat channel of Uf) |
193 | gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); | 184 | gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); |
194 | - } | ||
195 | - | ||
196 | - //float t = gpuStopTimer(); | ||
197 | - //std::cout<<"Scalar Uf Time: "<<t<<"ms"<<std::endl; | ||
198 | - //std::cout<<focus<<std::endl; | ||
199 | - | 185 | + } |
186 | + | ||
187 | + t_Uf = gpuStopTimer(); | ||
200 | } | 188 | } |
1 | +#include "nearfield.h" | ||
2 | + | ||
3 | +#include "rts/math/legendre.h" | ||
4 | +#include "rts/cuda/error.h" | ||
5 | +#include "rts/cuda/timer.h" | ||
6 | + | ||
7 | +texture<float, cudaTextureType2D> texJ; | ||
8 | + | ||
9 | +__global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR); | ||
10 | + | ||
11 | +__global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoint f, bsVector k, ptype A, ptype cosAlpha, ptype cosBeta, int nl, ptype dmin, ptype dmax, int dR) | ||
12 | +{ | ||
13 | + /*This function computes the focused field for a 2D slice | ||
14 | + | ||
15 | + Uf = destination field slice | ||
16 | + ABCD = plane representing the field slice in world space | ||
17 | + uR, vR = resolution of the Uf field | ||
18 | + f = focal point of the condenser | ||
19 | + k = direction of the incident light | ||
20 | + A = amplitude of the incident field | ||
21 | + cosAlpha= cosine of the solid angle subtended by the condenser obscuration | ||
22 | + cosBeta = cosine of the solid angle subtended by the condenser aperature | ||
23 | + nl = number of orders used to compute the field | ||
24 | + dR = number of Bessel function values in the look-up texture | ||
25 | + | ||
26 | + */ | ||
27 | + | ||
28 | + //get the current coordinate in the plane slice | ||
29 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
30 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
31 | + | ||
32 | + //make sure that the thread indices are in-bounds | ||
33 | + if(iu >= uR || iv >= vR) return; | ||
34 | + | ||
35 | + //compute the index (easier access to the scalar field array) | ||
36 | + int i = iv*uR + iu; | ||
37 | + | ||
38 | + //compute the parameters for u and v | ||
39 | + ptype u = (ptype)iu / (uR); | ||
40 | + ptype v = (ptype)iv / (vR); | ||
41 | + | ||
42 | + | ||
43 | + | ||
44 | + //get the rtsPoint in world space and then the r vector | ||
45 | + bsPoint p = ABCD(u, v); | ||
46 | + bsVector r = p - f; | ||
47 | + ptype d = r.len(); | ||
48 | + | ||
49 | + if(d == 0) | ||
50 | + { | ||
51 | + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); | ||
52 | + return; | ||
53 | + } | ||
54 | + | ||
55 | + //get info for the light direction and frequency | ||
56 | + r = r.norm(); | ||
57 | + | ||
58 | + //compute the imaginary factor i^l | ||
59 | + bsComplex im = bsComplex(0, 1); | ||
60 | + bsComplex il = bsComplex(1, 0); | ||
61 | + | ||
62 | + //Legendre functions are computed dynamically to save memory | ||
63 | + //initialize the Legendre functions | ||
64 | + | ||
65 | + ptype P[2]; | ||
66 | + //get the angle between k and r (light direction and position vector) | ||
67 | + ptype cosTheta; | ||
68 | + cosTheta = k.dot(r); | ||
69 | + | ||
70 | + rts::init_legendre<ptype>(cosTheta, P[0], P[1]); | ||
71 | + | ||
72 | + //initialize legendre functions for the cassegrain angles | ||
73 | + ptype Palpha[3]; | ||
74 | + rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]); | ||
75 | + Palpha[2] = 1; | ||
76 | + | ||
77 | + ptype Pbeta[3]; | ||
78 | + rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]); | ||
79 | + Pbeta[2] = 1; | ||
80 | + | ||
81 | + //for each order l | ||
82 | + bsComplex sumUf(0.0, 0.0); | ||
83 | + ptype jl = 0.0; | ||
84 | + ptype Pl; | ||
85 | + ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1); | ||
86 | + for(int l = 0; l<=nl; l++) | ||
87 | + { | ||
88 | + jl = tex2D(texJ, l + 0.5, di + 0.5); | ||
89 | + if(l==0) | ||
90 | + Pl = P[0]; | ||
91 | + else if(l==1) | ||
92 | + { | ||
93 | + Pl = P[1]; | ||
94 | + | ||
95 | + //adjust the cassegrain Legendre function | ||
96 | + Palpha[2] = Palpha[0]; | ||
97 | + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]); | ||
98 | + Pbeta[2] = Pbeta[0]; | ||
99 | + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]); | ||
100 | + } | ||
101 | + else | ||
102 | + { | ||
103 | + rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]); | ||
104 | + | ||
105 | + Pl = P[1]; | ||
106 | + | ||
107 | + //adjust the cassegrain outer Legendre function | ||
108 | + Palpha[2] = Palpha[0]; | ||
109 | + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]); | ||
110 | + Pbeta[2] = Pbeta[0]; | ||
111 | + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]); | ||
112 | + } | ||
113 | + | ||
114 | + sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); | ||
115 | + //sumUf += jl; | ||
116 | + | ||
117 | + il *= im; | ||
118 | + } | ||
119 | + | ||
120 | + Uf[i] = sumUf * 2 * PI * A; | ||
121 | + //Uf[i] = u; | ||
122 | + //return; | ||
123 | +} | ||
124 | + | ||
125 | +void nearfieldStruct::scalarUfLut() | ||
126 | +{ | ||
127 | + gpuStartTimer(); | ||
128 | + | ||
129 | + //calculate the minimum and maximum points in the focused field | ||
130 | + d_min = pos.dist(focus); | ||
131 | + d_max = pos.dist_max(focus); | ||
132 | + | ||
133 | + //allocate space for the Bessel function | ||
134 | + int dR = 2 * max(Uf.R[0], Uf.R[1]); | ||
135 | + ptype* j = NULL; | ||
136 | + j = (ptype*) malloc(sizeof(ptype) * dR * (m+1)); | ||
137 | + | ||
138 | + //calculate Bessel function LUT | ||
139 | + calcBesselLut(j, d_min, d_max, dR); | ||
140 | + | ||
141 | + //create a CUDA array structure and specify the format description | ||
142 | + cudaArray* arrayJ; | ||
143 | + cudaChannelFormatDesc channelDesc = | ||
144 | + cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); | ||
145 | + | ||
146 | + //allocate memory | ||
147 | + HANDLE_ERROR(cudaMallocArray(&arrayJ, &channelDesc, m+1, dR)); | ||
148 | + | ||
149 | + //specify texture properties | ||
150 | + texJ.addressMode[0] = cudaAddressModeMirror; | ||
151 | + texJ.addressMode[1] = cudaAddressModeMirror; | ||
152 | + texJ.filterMode = cudaFilterModeLinear; | ||
153 | + texJ.normalized = false; | ||
154 | + | ||
155 | + //bind the texture to the array | ||
156 | + HANDLE_ERROR(cudaBindTextureToArray(texJ, arrayJ, channelDesc)); | ||
157 | + | ||
158 | + //copy the CPU Bessel LUT to the GPU-based array | ||
159 | + HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice)); | ||
160 | + | ||
161 | + //----------------Compute the focused field | ||
162 | + //create one thread for each pixel of the field slice | ||
163 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
164 | + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
165 | + | ||
166 | + //if we are computing a plane wave, call the gpuScalarUfp function | ||
167 | + if(planeWave) | ||
168 | + { | ||
169 | + gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]); | ||
170 | + } | ||
171 | + //otherwise compute the condenser info and create a focused field | ||
172 | + else | ||
173 | + { | ||
174 | + //pre-compute the cosine of the obscuration and objective angles | ||
175 | + ptype cosAlpha = cos(asin(condenser[0])); | ||
176 | + ptype cosBeta = cos(asin(condenser[1])); | ||
177 | + //compute the scalar Uf field (this will be in the x_hat channel of Uf) | ||
178 | + gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR); | ||
179 | + } | ||
180 | + | ||
181 | + | ||
182 | + //free everything | ||
183 | + free(j); | ||
184 | + | ||
185 | + HANDLE_ERROR(cudaFreeArray(arrayJ)); | ||
186 | + | ||
187 | + t_Uf = gpuStopTimer(); | ||
188 | +} |
1 | +#include "nearfield.h" | ||
2 | +#include "rts/math/spherical_bessel.h" | ||
3 | +#include "rts/math/legendre.h" | ||
4 | +#include <stdlib.h> | ||
5 | +#include "rts/cuda/error.h" | ||
6 | +#include "rts/cuda/timer.h" | ||
7 | + | ||
8 | +texture<float2, cudaTextureType2D> texUsp; | ||
9 | +texture<float2, cudaTextureType2D> texUip; | ||
10 | + | ||
11 | +__global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR) | ||
12 | +{ | ||
13 | + /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field | ||
14 | + produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field. | ||
15 | + | ||
16 | + Us = final scattered field | ||
17 | + k = list of incoming plane waves (Monte-Carlo samples) | ||
18 | + nk = number of incoming MC samples | ||
19 | + kmag= magnitude of the incoming field 2pi/lambda | ||
20 | + dmin= minimum distance of the Usp texture | ||
21 | + dmax= maximum distance of the Usp texture | ||
22 | + f = position of the focus | ||
23 | + ps = position of the sphere | ||
24 | + A = total amplitude of the incident field arriving at the focal spot | ||
25 | + ABCD= rectangle representing the field slice | ||
26 | + uR = resolution of the field slice in the u direction | ||
27 | + vR = resolution of the field slice in the v direction | ||
28 | + dR = resolution of the Usp texture in the d direction | ||
29 | + thetaR= resolution of the Usp texture in the theta direction | ||
30 | + */ | ||
31 | + | ||
32 | + //get the current coordinate in the plane slice | ||
33 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
34 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
35 | + | ||
36 | + //make sure that the thread indices are in-bounds | ||
37 | + if(iu >= uR || iv >= vR) return; | ||
38 | + | ||
39 | + //compute the index (easier access to the scalar field array) | ||
40 | + int i = iv*uR + iu; | ||
41 | + | ||
42 | + //compute the parameters for u and v | ||
43 | + ptype u = (ptype)iu / (uR); | ||
44 | + ptype v = (ptype)iv / (vR); | ||
45 | + | ||
46 | + //get the rtsPoint in world space and then the r vector | ||
47 | + bsPoint p = ABCD(u, v); | ||
48 | + bsVector r = p - ps; | ||
49 | + ptype d = r.len(); | ||
50 | + float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1); | ||
51 | + float ai = ( (d - dmin)/(a - dmin)) * (aR - 1); | ||
52 | + | ||
53 | + bsComplex sumUs(0, 0); | ||
54 | + //for each plane wave in the wave list | ||
55 | + for(int iw = 0; iw < nk; iw++) | ||
56 | + { | ||
57 | + //normalize the direction vectors and find their inner product | ||
58 | + r = r.norm(); | ||
59 | + ptype cos_theta = k[iw].dot(r); | ||
60 | + if(cos_theta < -1) | ||
61 | + cos_theta = -1; | ||
62 | + if(cos_theta > 1) | ||
63 | + cos_theta = 1; | ||
64 | + float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1); | ||
65 | + | ||
66 | + //compute the phase factor for spheres that are not at the origin | ||
67 | + bsVector c = ps - f; | ||
68 | + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); | ||
69 | + | ||
70 | + //compute the internal field if we are inside a sphere | ||
71 | + if(d < a) | ||
72 | + { | ||
73 | + float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5); | ||
74 | + sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y); | ||
75 | + } | ||
76 | + //otherwise compute the scattered field | ||
77 | + else | ||
78 | + { | ||
79 | + float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5); | ||
80 | + sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y); | ||
81 | + } | ||
82 | + | ||
83 | + } | ||
84 | + | ||
85 | + Us[i] += sumUs; | ||
86 | +} | ||
87 | + | ||
88 | +void nearfieldStruct::scalarUpLut() | ||
89 | +{ | ||
90 | + //get the number of spheres | ||
91 | + int nSpheres = sVector.size(); | ||
92 | + | ||
93 | + //if there are no spheres, nothing to do here | ||
94 | + if(nSpheres == 0) | ||
95 | + return; | ||
96 | + | ||
97 | + //time the calculation of the focused field | ||
98 | + gpuStartTimer(); | ||
99 | + | ||
100 | + //clear the scattered field | ||
101 | + U.clear_gpu(); | ||
102 | + | ||
103 | + //create one thread for each pixel of the field slice | ||
104 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
105 | + dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
106 | + | ||
107 | + //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff) | ||
108 | + bsVector* gpuk; | ||
109 | + int nWaves; | ||
110 | + ptype subA; | ||
111 | + if(planeWave) | ||
112 | + { | ||
113 | + nWaves = 1; | ||
114 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) ); | ||
115 | + HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice)); | ||
116 | + subA = A; | ||
117 | + } | ||
118 | + else | ||
119 | + { | ||
120 | + nWaves = inWaves.size(); | ||
121 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) ); | ||
122 | + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice)); | ||
123 | + //compute the amplitude that makes it through the condenser | ||
124 | + subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); | ||
125 | + } | ||
126 | + | ||
127 | + //for each sphere | ||
128 | + for(int s = 0; s<nSpheres; s++) | ||
129 | + { | ||
130 | + //get the current sphere | ||
131 | + //sphere S = sVector[s]; | ||
132 | + | ||
133 | + //allocate space for the Usp and Uip textures | ||
134 | + //allocate the cuda array | ||
135 | + cudaArray* arrayUsp; | ||
136 | + cudaArray* arrayUip; | ||
137 | + cudaChannelFormatDesc channelDescUsp = | ||
138 | + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); | ||
139 | + cudaChannelFormatDesc channelDescUip = | ||
140 | + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); | ||
141 | + int dR = sVector[s].Usp.R[0]; | ||
142 | + int thetaR = sVector[s].Usp.R[1]; | ||
143 | + int aR = sVector[s].Uip.R[0]; | ||
144 | + HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR)); | ||
145 | + HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR)); | ||
146 | + | ||
147 | + texUsp.addressMode[0] = cudaAddressModeMirror; | ||
148 | + texUsp.addressMode[1] = cudaAddressModeMirror; | ||
149 | + texUsp.filterMode = cudaFilterModeLinear; | ||
150 | + texUsp.normalized = false; | ||
151 | + | ||
152 | + texUip.addressMode[0] = cudaAddressModeMirror; | ||
153 | + texUip.addressMode[1] = cudaAddressModeMirror; | ||
154 | + texUip.filterMode = cudaFilterModeLinear; | ||
155 | + texUip.normalized = false; | ||
156 | + HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp)); | ||
157 | + HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip)); | ||
158 | + | ||
159 | + //copy the LUT to the Usp texture | ||
160 | + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice)); | ||
161 | + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice)); | ||
162 | + | ||
163 | + gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat, | ||
164 | + gpuk, | ||
165 | + nWaves, | ||
166 | + 2 * PI / lambda, | ||
167 | + sVector[s].a, | ||
168 | + sVector[s].d_min, | ||
169 | + sVector[s].d_max, | ||
170 | + focus, | ||
171 | + sVector[s].p, | ||
172 | + subA, | ||
173 | + pos, | ||
174 | + U.R[0], | ||
175 | + U.R[1], | ||
176 | + dR, | ||
177 | + aR, | ||
178 | + thetaR); | ||
179 | + | ||
180 | + cudaFreeArray(arrayUsp); | ||
181 | + cudaFreeArray(arrayUip); | ||
182 | + | ||
183 | + } | ||
184 | + | ||
185 | + | ||
186 | + //store the time to compute the scattered field | ||
187 | + t_Us = gpuStopTimer(); | ||
188 | + | ||
189 | + //free monte-carlo samples | ||
190 | + cudaFree(gpuk); | ||
191 | + | ||
192 | +} |
nfScalarUs.cu
@@ -163,7 +163,7 @@ void nearfieldStruct::scalarUs() | @@ -163,7 +163,7 @@ void nearfieldStruct::scalarUs() | ||
163 | return; | 163 | return; |
164 | 164 | ||
165 | //time the calculation of the focused field | 165 | //time the calculation of the focused field |
166 | - //gpuStartTimer(); | 166 | + gpuStartTimer(); |
167 | 167 | ||
168 | //clear the scattered field | 168 | //clear the scattered field |
169 | U.clear_gpu(); | 169 | U.clear_gpu(); |
@@ -251,9 +251,8 @@ void nearfieldStruct::scalarUs() | @@ -251,9 +251,8 @@ void nearfieldStruct::scalarUs() | ||
251 | } | 251 | } |
252 | 252 | ||
253 | 253 | ||
254 | + //store the time to compute the scattered field | ||
255 | + t_Us = gpuStopTimer(); | ||
254 | 256 | ||
255 | - //float t = gpuStopTimer(); | ||
256 | - //std::cout<<"Scalar Us Time: "<<t<<"ms"<<std::endl; | ||
257 | - //std::cout<<focus<<std::endl; | ||
258 | 257 | ||
259 | } | 258 | } |
nfSumUf.cu
@@ -32,7 +32,7 @@ __global__ void gpuScalarUsp(bsComplex* Ufx, bsComplex* Ufy, bsComplex* Ufz, | @@ -32,7 +32,7 @@ __global__ void gpuScalarUsp(bsComplex* Ufx, bsComplex* Ufy, bsComplex* Ufz, | ||
32 | { | 32 | { |
33 | r = p - ps[is]; | 33 | r = p - ps[is]; |
34 | d = r.len(); | 34 | d = r.len(); |
35 | - if(d <= as[is]) | 35 | + if(d < as[is]) |
36 | return; | 36 | return; |
37 | } | 37 | } |
38 | 38 | ||
@@ -110,8 +110,5 @@ void nearfieldStruct::sumUf() | @@ -110,8 +110,5 @@ void nearfieldStruct::sumUf() | ||
110 | HANDLE_ERROR(cudaFree(gpu_p)); | 110 | HANDLE_ERROR(cudaFree(gpu_p)); |
111 | HANDLE_ERROR(cudaFree(gpu_a)); | 111 | HANDLE_ERROR(cudaFree(gpu_a)); |
112 | 112 | ||
113 | - //float t = gpuStopTimer(); | ||
114 | - //std::cout<<"Add Us Time: "<<t<<"ms"<<std::endl; | ||
115 | - //std::cout<<focus<<std::endl; | ||
116 | 113 | ||
117 | } | 114 | } |
options.h
@@ -5,7 +5,7 @@ | @@ -5,7 +5,7 @@ | ||
5 | 5 | ||
6 | #include "nearfield.h" | 6 | #include "nearfield.h" |
7 | #include "microscope.h" | 7 | #include "microscope.h" |
8 | -#include "colormap.h" | 8 | +#include "rts/graphics/colormap.h" |
9 | #include "fileout.h" | 9 | #include "fileout.h" |
10 | //extern nearfieldStruct* NF; | 10 | //extern nearfieldStruct* NF; |
11 | extern microscopeStruct* SCOPE; | 11 | extern microscopeStruct* SCOPE; |
@@ -23,7 +23,179 @@ using namespace std; | @@ -23,7 +23,179 @@ using namespace std; | ||
23 | #include <boost/program_options.hpp> | 23 | #include <boost/program_options.hpp> |
24 | namespace po = boost::program_options; | 24 | namespace po = boost::program_options; |
25 | 25 | ||
26 | -static void loadSpheres(string sphereList) | 26 | +extern bool verbose; |
27 | + | ||
28 | + | ||
29 | + | ||
30 | +static void lNearfield(po::variables_map vm) | ||
31 | +{ | ||
32 | + //test to see if we are simulating a plane wave | ||
33 | + bool planeWave = DEFAULT_PLANEWAVE; | ||
34 | + if(vm.count("plane-wave")) | ||
35 | + planeWave = !planeWave; | ||
36 | + SCOPE->nf.planeWave = planeWave; | ||
37 | + | ||
38 | + //get the incident field amplitude | ||
39 | + SCOPE->nf.A = vm["amplitude"].as<ptype>(); | ||
40 | + | ||
41 | + //get the condenser parameters | ||
42 | + SCOPE->nf.condenser[0] = DEFAULT_CONDENSER_MIN; | ||
43 | + SCOPE->nf.condenser[1] = DEFAULT_CONDENSER_MAX; | ||
44 | + | ||
45 | + if(vm.count("condenser")) | ||
46 | + { | ||
47 | + vector<ptype> cparams = vm["condenser"].as< vector<ptype> >(); | ||
48 | + | ||
49 | + if(cparams.size() == 1) | ||
50 | + SCOPE->nf.condenser[1] = cparams[0]; | ||
51 | + else | ||
52 | + { | ||
53 | + SCOPE->nf.condenser[0] = cparams[0]; | ||
54 | + SCOPE->nf.condenser[1] = cparams[1]; | ||
55 | + } | ||
56 | + } | ||
57 | + | ||
58 | + | ||
59 | + //get the focal rtsPoint position | ||
60 | + SCOPE->nf.focus[0] = DEFAULT_FOCUS_X; | ||
61 | + SCOPE->nf.focus[1] = DEFAULT_FOCUS_Y; | ||
62 | + SCOPE->nf.focus[2] = DEFAULT_FOCUS_Z; | ||
63 | + if(vm.count("focus")) | ||
64 | + { | ||
65 | + vector<ptype> fpos = vm["focus"].as< vector<ptype> >(); | ||
66 | + if(fpos.size() != 3) | ||
67 | + { | ||
68 | + cout<<"BIMSIM Error - the incident focal point is incorrectly specified; it must have three components."<<endl; | ||
69 | + exit(1); | ||
70 | + } | ||
71 | + SCOPE->nf.focus[0] = fpos[0]; | ||
72 | + SCOPE->nf.focus[1] = fpos[1]; | ||
73 | + SCOPE->nf.focus[2] = fpos[2]; | ||
74 | + } | ||
75 | + | ||
76 | + //get the incident light direction (k-vector) | ||
77 | + bsVector spherical(1, 0, 0); | ||
78 | + | ||
79 | + //if a k-vector is specified | ||
80 | + if(vm.count("k")) | ||
81 | + { | ||
82 | + vector<ptype> kvec = vm["k"].as< vector<ptype> >(); | ||
83 | + if(kvec.size() != 2) | ||
84 | + { | ||
85 | + cout<<"BIMSIM Error - k-vector is not specified correctly: it must contain two elements"<<endl; | ||
86 | + exit(1); | ||
87 | + } | ||
88 | + spherical[1] = kvec[0]; | ||
89 | + spherical[2] = kvec[1]; | ||
90 | + } | ||
91 | + SCOPE->nf.k = spherical.sph2cart(); | ||
92 | + | ||
93 | + | ||
94 | + //incident field order | ||
95 | + SCOPE->nf.m = vm["field-order"].as<int>(); | ||
96 | + | ||
97 | + //number of Monte-Carlo samples | ||
98 | + SCOPE->nf.nWaves = vm["samples"].as<int>(); | ||
99 | + | ||
100 | + //random number seed for Monte-Carlo samples | ||
101 | + if(vm.count("seed")) | ||
102 | + srand(vm["seed"].as<unsigned int>()); | ||
103 | + | ||
104 | + | ||
105 | + | ||
106 | +} | ||
107 | + | ||
108 | + | ||
109 | +static void loadOutputParams(po::variables_map vm) | ||
110 | +{ | ||
111 | + //append simulation results to previous binary files | ||
112 | + gFileOut.append = DEFAULT_APPEND; | ||
113 | + if(vm.count("append")) | ||
114 | + gFileOut.append = true; | ||
115 | + | ||
116 | + //image parameters | ||
117 | + //component of the field to be saved | ||
118 | + std::string fieldStr; | ||
119 | + fieldStr = vm["output-type"].as<string>(); | ||
120 | + | ||
121 | + if(fieldStr == "magnitude") | ||
122 | + gFileOut.field = fileoutStruct::fieldMag; | ||
123 | + else if(fieldStr == "intensity") | ||
124 | + gFileOut.field = fileoutStruct::fieldIntensity; | ||
125 | + else if(fieldStr == "polarization") | ||
126 | + gFileOut.field = fileoutStruct::fieldPolar; | ||
127 | + else if(fieldStr == "imaginary") | ||
128 | + gFileOut.field = fileoutStruct::fieldImag; | ||
129 | + else if(fieldStr == "real") | ||
130 | + gFileOut.field = fileoutStruct::fieldReal; | ||
131 | + else if(fieldStr == "angular-spectrum") | ||
132 | + gFileOut.field = fileoutStruct::fieldAngularSpectrum; | ||
133 | + | ||
134 | + | ||
135 | + //image file names | ||
136 | + gFileOut.intFile = vm["intensity"].as<string>(); | ||
137 | + gFileOut.absFile = vm["absorbance"].as<string>(); | ||
138 | + gFileOut.transFile = vm["transmittance"].as<string>(); | ||
139 | + gFileOut.nearFile = vm["near-field"].as<string>(); | ||
140 | + gFileOut.farFile = vm["far-field"].as<string>(); | ||
141 | + | ||
142 | + //colormap | ||
143 | + std::string cmapStr; | ||
144 | + cmapStr = vm["colormap"].as<string>(); | ||
145 | + if(cmapStr == "brewer") | ||
146 | + gFileOut.colormap = rts::cmBrewer; | ||
147 | + else if(cmapStr == "gray") | ||
148 | + gFileOut.colormap = rts::cmGrayscale; | ||
149 | + else | ||
150 | + cout<<"color-map value not recognized (using default): "<<cmapStr<<endl; | ||
151 | +} | ||
152 | + | ||
153 | +void lFlags(po::variables_map vm, po::options_description desc) | ||
154 | +{ | ||
155 | + //display help and exit | ||
156 | + if(vm.count("help")) | ||
157 | + { | ||
158 | + cout<<desc<<endl; | ||
159 | + exit(1); | ||
160 | + } | ||
161 | + | ||
162 | + //flag for verbose output | ||
163 | + if(vm.count("verbose")) | ||
164 | + verbose = true; | ||
165 | + | ||
166 | + if(vm.count("recursive")) | ||
167 | + { | ||
168 | + SCOPE->nf.lut_us = false; | ||
169 | + SCOPE->nf.lut_uf = false; | ||
170 | + } | ||
171 | + else if(vm.count("recursive-us")) | ||
172 | + { | ||
173 | + SCOPE->nf.lut_us = false; | ||
174 | + } | ||
175 | + else if(vm.count("lut-uf")) | ||
176 | + { | ||
177 | + SCOPE->nf.lut_uf = true; | ||
178 | + } | ||
179 | +} | ||
180 | + | ||
181 | +void lWavelength(po::variables_map vm) | ||
182 | +{ | ||
183 | + //load the wavelength | ||
184 | + if(vm.count("nu")) | ||
185 | + { | ||
186 | + //wavelength is given in wavenumber - transform and flag | ||
187 | + SCOPE->nf.lambda = 10000/vm["nu"].as<ptype>(); | ||
188 | + gFileOut.wavenumber = true; | ||
189 | + } | ||
190 | + //otherwise we are using lambda = wavelength | ||
191 | + else | ||
192 | + { | ||
193 | + SCOPE->nf.lambda = vm["lambda"].as<ptype>(); | ||
194 | + gFileOut.wavenumber = false; | ||
195 | + } | ||
196 | +} | ||
197 | + | ||
198 | +static void lSpheres(string sphereList) | ||
27 | { | 199 | { |
28 | /*This function loads a list of sphere given in the string sphereList | 200 | /*This function loads a list of sphere given in the string sphereList |
29 | The format is: | 201 | The format is: |
@@ -58,17 +230,60 @@ static void loadSpheres(string sphereList) | @@ -58,17 +230,60 @@ static void loadSpheres(string sphereList) | ||
58 | //check out the next element (this should set the EOF error flag) | 230 | //check out the next element (this should set the EOF error flag) |
59 | ss.peek(); | 231 | ss.peek(); |
60 | } | 232 | } |
233 | +} | ||
61 | 234 | ||
235 | +void lSpheres(po::variables_map vm) | ||
236 | +{ | ||
237 | + //if a sphere is specified at the command line | ||
238 | + if(vm.count("spheres")) | ||
239 | + { | ||
240 | + //convert the sphere to a string | ||
241 | + vector<ptype> sdesc = vm["spheres"].as< vector<ptype> >(); | ||
62 | 242 | ||
243 | + //compute the number of spheres specified | ||
244 | + unsigned int nS; | ||
245 | + if(sdesc.size() <= 5) | ||
246 | + nS = 1; | ||
247 | + else | ||
248 | + { | ||
249 | + //if the number of parameters is divisible by 4, compute the number of spheres | ||
250 | + if(sdesc.size() % 5 == 0) | ||
251 | + nS = sdesc.size() / 5; | ||
252 | + else | ||
253 | + { | ||
254 | + cout<<"BIMSIM Error: Invalid number of sphere parameters."<<endl; | ||
255 | + exit(1); | ||
256 | + } | ||
257 | + } | ||
63 | 258 | ||
64 | -} | 259 | + stringstream ss; |
260 | + | ||
261 | + //for each sphere | ||
262 | + for(unsigned int s=0; s<nS; s++) | ||
263 | + { | ||
264 | + //compute the number of sphere parameters | ||
265 | + unsigned int nP; | ||
266 | + if(nS == 1) nP = sdesc.size(); | ||
267 | + else nP = 5; | ||
268 | + | ||
269 | + //store each parameter as a string | ||
270 | + for(unsigned int i=0; i<nP; i++) | ||
271 | + { | ||
272 | + ss<<sdesc[s*5 + i]<<" "; | ||
273 | + } | ||
274 | + ss<<endl; | ||
275 | + } | ||
276 | + | ||
277 | + | ||
278 | + | ||
279 | + //convert the string to a sphere list | ||
280 | + lSpheres(ss.str()); | ||
281 | + } | ||
65 | 282 | ||
66 | -static void loadSpheres(po::variables_map vm) | ||
67 | -{ | ||
68 | //if a files are specified | 283 | //if a files are specified |
69 | if(vm.count("sphere-file")) | 284 | if(vm.count("sphere-file")) |
70 | { | 285 | { |
71 | - cout<<"Sphere files detected."<<endl; | 286 | + |
72 | vector<string> filenames = vm["sphere-file"].as< vector<string> >(); | 287 | vector<string> filenames = vm["sphere-file"].as< vector<string> >(); |
73 | //load each file | 288 | //load each file |
74 | for(int iS=0; iS<filenames.size(); iS++) | 289 | for(int iS=0; iS<filenames.size(); iS++) |
@@ -85,69 +300,51 @@ static void loadSpheres(po::variables_map vm) | @@ -85,69 +300,51 @@ static void loadSpheres(po::variables_map vm) | ||
85 | std::string instr((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>()); | 300 | std::string instr((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>()); |
86 | 301 | ||
87 | //load the list of spheres from a string | 302 | //load the list of spheres from a string |
88 | - loadSpheres(instr); | 303 | + lSpheres(instr); |
89 | } | 304 | } |
90 | } | 305 | } |
91 | 306 | ||
92 | - //load the sphere from the command line | ||
93 | - if(vm.count("sx") || vm.count("sy") || vm.count("sz") || vm.count("s")) | ||
94 | - { | ||
95 | - //create a new sphere | ||
96 | - sphere newS; | ||
97 | - | ||
98 | - //set defaults | ||
99 | - if(vm.count("sx")) | ||
100 | - newS.p[0] = vm["sx"].as<ptype>(); | ||
101 | - else | ||
102 | - newS.p[0] = DEFAULT_SPHERE_X; | ||
103 | - | ||
104 | - | ||
105 | - if(vm.count("sy")) | ||
106 | - newS.p[1] = vm["sy"].as<ptype>(); | ||
107 | - else | ||
108 | - newS.p[1] = DEFAULT_SPHERE_Y; | ||
109 | - | ||
110 | - if(vm.count("sz")) | ||
111 | - newS.p[2] = vm["sz"].as<ptype>(); | ||
112 | - else | ||
113 | - newS.p[2] = DEFAULT_SPHERE_Z; | ||
114 | - | ||
115 | - if(vm.count("radius")) | ||
116 | - newS.a = vm["radius"].as<ptype>(); | ||
117 | - else | ||
118 | - newS.a = DEFAULT_SPHERE_A; | ||
119 | - | ||
120 | - //add the sphere to the sphere vector | ||
121 | - SCOPE->nf.sVector.push_back(newS); | 307 | + //make sure the appropriate materials are loaded |
308 | + unsigned int nS = SCOPE->nf.sVector.size(); | ||
122 | 309 | ||
310 | + //for each sphere | ||
311 | + for(unsigned int s = 0; s<nS; s++) | ||
312 | + { | ||
313 | + //make sure the corresponding material exists | ||
314 | + if(SCOPE->nf.sVector[s].iMaterial + 1 > SCOPE->nf.mVector.size()) | ||
315 | + { | ||
316 | + //otherwise output an error | ||
317 | + cout<<"BIMSIM Error - A material is not loaded for sphere "<<s+1<<"."<<endl; | ||
318 | + exit(1); | ||
319 | + } | ||
123 | } | 320 | } |
124 | } | 321 | } |
125 | 322 | ||
126 | -static void loadMaterials(po::variables_map vm) | 323 | +static void lMaterials(po::variables_map vm) |
127 | { | 324 | { |
128 | //if materials are specified at the command line | 325 | //if materials are specified at the command line |
129 | if(vm.count("materials")) | 326 | if(vm.count("materials")) |
130 | { | 327 | { |
131 | vector<ptype> matVec = vm["materials"].as< vector<ptype> >(); | 328 | vector<ptype> matVec = vm["materials"].as< vector<ptype> >(); |
132 | - if(matVec.size() %2 != 0) | 329 | + if(matVec.size() == 1) |
330 | + { | ||
331 | + rts::material<ptype> newM(SCOPE->nf.lambda, matVec[0], 0); | ||
332 | + SCOPE->nf.mVector.push_back(newM); | ||
333 | + } | ||
334 | + else if(matVec.size() %2 != 0) | ||
133 | { | 335 | { |
134 | cout<<"BIMSim Error: materials must be specified in n, k pairs"<<endl; | 336 | cout<<"BIMSim Error: materials must be specified in n, k pairs"<<endl; |
135 | exit(1); | 337 | exit(1); |
136 | } | 338 | } |
137 | - | ||
138 | - | ||
139 | - for(int i=0; i<matVec.size(); i+=2) | 339 | + else |
140 | { | 340 | { |
141 | - rts::material<ptype> newM(SCOPE->nf.lambda, matVec[i], matVec[i+1]); | ||
142 | - SCOPE->nf.mVector.push_back(newM); | 341 | + for(int i=0; i<matVec.size(); i+=2) |
342 | + { | ||
343 | + rts::material<ptype> newM(SCOPE->nf.lambda, matVec[i], matVec[i+1]); | ||
344 | + SCOPE->nf.mVector.push_back(newM); | ||
345 | + } | ||
143 | } | 346 | } |
144 | } | 347 | } |
145 | - else | ||
146 | - { | ||
147 | - //add the command line material as the default (material 0) | ||
148 | - rts::material<ptype> newM(SCOPE->nf.lambda, vm["n"].as<ptype>(), vm["k"].as<ptype>()); | ||
149 | - SCOPE->nf.mVector.push_back(newM); | ||
150 | - } | ||
151 | 348 | ||
152 | //if file names are specified, load the materials | 349 | //if file names are specified, load the materials |
153 | if(vm.count("material-file")) | 350 | if(vm.count("material-file")) |
@@ -169,57 +366,109 @@ static void loadMaterials(po::variables_map vm) | @@ -169,57 +366,109 @@ static void loadMaterials(po::variables_map vm) | ||
169 | 366 | ||
170 | } | 367 | } |
171 | 368 | ||
172 | -static void loadNearfieldParams(po::variables_map vm) | 369 | +static void lOptics(po::variables_map vm) |
173 | { | 370 | { |
174 | - //test to see if we are simulating a plane wave | ||
175 | - bool planeWave = DEFAULT_PLANEWAVE; | ||
176 | - if(vm.count("plane-wave")) | ||
177 | - planeWave = !planeWave; | ||
178 | - SCOPE->nf.planeWave = planeWave; | ||
179 | - | ||
180 | - //get the wavelength | ||
181 | - //SCOPE->nf.lambda = vm["lambda"].as<ptype>(); | ||
182 | - | ||
183 | - //get the incident field amplitude | ||
184 | - SCOPE->nf.A = vm["amplitude"].as<ptype>(); | ||
185 | - | ||
186 | - //get the condenser parameters | ||
187 | - SCOPE->nf.condenser[0] = vm["condenser-min"].as<ptype>(); | ||
188 | - SCOPE->nf.condenser[1] = vm["condenser-max"].as<ptype>(); | ||
189 | - | ||
190 | - | ||
191 | - //get the focal rtsPoint position | ||
192 | - SCOPE->nf.focus[0] = vm["fx"].as<ptype>(); | ||
193 | - SCOPE->nf.focus[1] = vm["fy"].as<ptype>(); | ||
194 | - SCOPE->nf.focus[2] = vm["fz"].as<ptype>(); | ||
195 | - | ||
196 | - //get the incident light direction (k-vector) | ||
197 | - bsVector spherical; | ||
198 | - spherical[0] = 1.0; | ||
199 | - spherical[1] = vm["theta"].as<ptype>(); | ||
200 | - spherical[2] = vm["phi"].as<ptype>(); | ||
201 | - SCOPE->nf.k = spherical.sph2cart(); | ||
202 | - | ||
203 | - | ||
204 | - //incident field order | ||
205 | - SCOPE->nf.m = vm["field-order"].as<int>(); | ||
206 | - | ||
207 | - //number of Monte-Carlo samples | ||
208 | - SCOPE->nf.nWaves = vm["samples"].as<int>(); | ||
209 | - | ||
210 | - | 371 | + SCOPE->objective[0] = DEFAULT_OBJECTIVE_MIN; |
372 | + SCOPE->objective[1] = DEFAULT_OBJECTIVE_MAX; | ||
373 | + if(vm.count("objective")) | ||
374 | + { | ||
375 | + vector<ptype> oparams = vm["objective"].as< vector<ptype> >(); | ||
211 | 376 | ||
377 | + if(oparams.size() == 1) | ||
378 | + SCOPE->objective[1] = oparams[0]; | ||
379 | + else | ||
380 | + { | ||
381 | + SCOPE->objective[0] = oparams[0]; | ||
382 | + SCOPE->objective[1] = oparams[1]; | ||
383 | + } | ||
384 | + } | ||
212 | } | 385 | } |
213 | 386 | ||
214 | -static void loadSliceParams(po::variables_map vm) | 387 | +static void lImagePlane(po::variables_map vm) |
215 | { | 388 | { |
216 | - //parameters for the sample plane | ||
217 | - | 389 | + bsPoint pMin(DEFAULT_PLANE_MIN_X, DEFAULT_PLANE_MIN_Y, DEFAULT_PLANE_MIN_Z); |
390 | + bsPoint pMax(DEFAULT_PLANE_MAX_X, DEFAULT_PLANE_MAX_Y, DEFAULT_PLANE_MAX_Z); | ||
391 | + bsVector normal(DEFAULT_PLANE_NORM_X, DEFAULT_PLANE_NORM_Y, DEFAULT_PLANE_NORM_Z); | ||
218 | 392 | ||
219 | //set the default values for the slice position and orientation | 393 | //set the default values for the slice position and orientation |
220 | - bsPoint pMin(vm["plane-min-x"].as<ptype>(), vm["plane-min-y"].as<ptype>(), vm["plane-min-z"].as<ptype>()); | ||
221 | - bsPoint pMax(vm["plane-max-x"].as<ptype>(), vm["plane-max-y"].as<ptype>(), vm["plane-max-z"].as<ptype>()); | ||
222 | - bsVector normal(vm["plane-norm-x"].as<ptype>(), vm["plane-norm-y"].as<ptype>(), vm["plane-norm-z"].as<ptype>()); | 394 | + if(vm.count("plane-lower-left") && vm.count("plane-upper-right") && vm.count("plane-normal")) |
395 | + { | ||
396 | + vector<ptype> ll = vm["plane-lower-left"].as< vector<ptype> >(); | ||
397 | + if(ll.size() != 3) | ||
398 | + { | ||
399 | + cout<<"BIMSIM Error - The lower-left corner of the image plane is incorrectly specified."<<endl; | ||
400 | + exit(1); | ||
401 | + } | ||
402 | + | ||
403 | + vector<ptype> ur = vm["plane-lower-left"].as< vector<ptype> >(); | ||
404 | + if(ur.size() != 3) | ||
405 | + { | ||
406 | + cout<<"BIMSIM Error - The upper-right corner of the image plane is incorrectly specified."<<endl; | ||
407 | + exit(1); | ||
408 | + } | ||
409 | + | ||
410 | + vector<ptype> norm = vm["plane-lower-left"].as< vector<ptype> >(); | ||
411 | + if(norm.size() != 3) | ||
412 | + { | ||
413 | + cout<<"BIMSIM Error - The normal of the image plane is incorrectly specified."<<endl; | ||
414 | + exit(1); | ||
415 | + } | ||
416 | + | ||
417 | + pMin = bsPoint(ll[0], ll[1], ll[2]); | ||
418 | + pMax = bsPoint(ur[0], ur[1], ur[2]); | ||
419 | + normal = bsVector(norm[0], norm[1], norm[2]); | ||
420 | + } | ||
421 | + else if(vm.count("xy")) | ||
422 | + { | ||
423 | + //default plane size in microns | ||
424 | + ptype s = DEFAULT_PLANE_SIZE; | ||
425 | + ptype pos = DEFAULT_PLANE_POSITION; | ||
426 | + | ||
427 | + vector<ptype> xy = vm["xy"].as< vector<ptype> >(); | ||
428 | + if(xy.size() >= 1) | ||
429 | + s = xy[0]; | ||
430 | + if(xy.size() >= 2) | ||
431 | + pos = xy[1]; | ||
432 | + | ||
433 | + //calculate the plane corners and normal based on the size and position | ||
434 | + pMin = bsPoint(-s/2, -s/2, pos); | ||
435 | + pMax = bsPoint(s/2, s/2, pos); | ||
436 | + normal = bsVector(0, 0, 1); | ||
437 | + } | ||
438 | + else if(vm.count("xz")) | ||
439 | + { | ||
440 | + //default plane size in microns | ||
441 | + ptype size = DEFAULT_PLANE_SIZE; | ||
442 | + ptype pos = DEFAULT_PLANE_POSITION; | ||
443 | + | ||
444 | + vector<ptype> xz = vm["xz"].as< vector<ptype> >(); | ||
445 | + if(xz.size() >= 1) | ||
446 | + size = xz[0]; | ||
447 | + if(xz.size() >= 2) | ||
448 | + pos = xz[1]; | ||
449 | + | ||
450 | + //calculate the plane corners and normal based on the size and position | ||
451 | + pMin = bsPoint(-size/2, pos, -size/2); | ||
452 | + pMax = bsPoint(size/2, pos, size/2); | ||
453 | + normal = bsVector(0, -1, 0); | ||
454 | + } | ||
455 | + else if(vm.count("yz")) | ||
456 | + { | ||
457 | + //default plane size in microns | ||
458 | + ptype size = DEFAULT_PLANE_SIZE; | ||
459 | + ptype pos = DEFAULT_PLANE_POSITION; | ||
460 | + | ||
461 | + vector<ptype> yz = vm["yz"].as< vector<ptype> >(); | ||
462 | + if(yz.size() >= 1) | ||
463 | + size = yz[0]; | ||
464 | + if(yz.size() >= 2) | ||
465 | + pos = yz[1]; | ||
466 | + | ||
467 | + //calculate the plane corners and normal based on the size and position | ||
468 | + pMin = bsPoint(pos, -size/2, -size/2); | ||
469 | + pMax = bsPoint(pos, size/2, size/2); | ||
470 | + normal = bsVector(1, 0, 0); | ||
471 | + } | ||
223 | SCOPE->setPos(pMin, pMax, normal); | 472 | SCOPE->setPos(pMin, pMax, normal); |
224 | 473 | ||
225 | //resolution | 474 | //resolution |
@@ -233,175 +482,111 @@ static void loadSliceParams(po::variables_map vm) | @@ -233,175 +482,111 @@ static void loadSliceParams(po::variables_map vm) | ||
233 | 482 | ||
234 | 483 | ||
235 | SCOPE->setNearfield(); | 484 | SCOPE->setNearfield(); |
236 | - | ||
237 | - | ||
238 | - | ||
239 | -} | ||
240 | - | ||
241 | -static void loadMicroscopeParams(po::variables_map vm) | ||
242 | -{ | ||
243 | - //objective | ||
244 | - SCOPE->objective[0] = vm["objective-min"].as<ptype>(); | ||
245 | - SCOPE->objective[1] = vm["objective-max"].as<ptype>(); | ||
246 | - | ||
247 | - | ||
248 | - | ||
249 | - | ||
250 | - | ||
251 | -} | ||
252 | - | ||
253 | -static void loadOutputParams(po::variables_map vm) | ||
254 | -{ | ||
255 | - //append simulation results to previous binary files | ||
256 | - gFileOut.append = DEFAULT_APPEND; | ||
257 | - if(vm.count("append")) | ||
258 | - gFileOut.append = true; | ||
259 | - | ||
260 | - //image parameters | ||
261 | - //component of the field to be saved | ||
262 | - std::string fieldStr; | ||
263 | - fieldStr = vm["output-type"].as<string>(); | ||
264 | - | ||
265 | - if(fieldStr == "magnitude") | ||
266 | - gFileOut.field = fileoutStruct::fieldMag; | ||
267 | - else if(fieldStr == "intensity") | ||
268 | - gFileOut.field = fileoutStruct::fieldIntensity; | ||
269 | - else if(fieldStr == "polarization") | ||
270 | - gFileOut.field = fileoutStruct::fieldPolar; | ||
271 | - else if(fieldStr == "imaginary") | ||
272 | - gFileOut.field = fileoutStruct::fieldImag; | ||
273 | - else if(fieldStr == "real") | ||
274 | - gFileOut.field = fileoutStruct::fieldReal; | ||
275 | - else if(fieldStr == "angular-spectrum") | ||
276 | - gFileOut.field = fileoutStruct::fieldAngularSpectrum; | ||
277 | - | ||
278 | - | ||
279 | - //image file names | ||
280 | - gFileOut.intFile = vm["intensity"].as<string>(); | ||
281 | - gFileOut.absFile = vm["absorbance"].as<string>(); | ||
282 | - gFileOut.transFile = vm["transmittance"].as<string>(); | ||
283 | - gFileOut.nearFile = vm["near-field"].as<string>(); | ||
284 | - gFileOut.farFile = vm["far-field"].as<string>(); | ||
285 | - | ||
286 | - //colormap | ||
287 | - std::string cmapStr; | ||
288 | - cmapStr = vm["colormap"].as<string>(); | ||
289 | - if(cmapStr == "brewer") | ||
290 | - gFileOut.colormap = rts::colormap::cmBrewer; | ||
291 | - else if(cmapStr == "gray") | ||
292 | - gFileOut.colormap = rts::colormap::cmGrayscale; | ||
293 | - else | ||
294 | - cout<<"color-map value not recognized (using default): "<<cmapStr<<endl; | ||
295 | } | 485 | } |
296 | 486 | ||
297 | static void OutputOptions() | 487 | static void OutputOptions() |
298 | { | 488 | { |
299 | - cout<<SCOPE->nf.toStr(); | 489 | + cout<<SCOPE->toStr(); |
300 | 490 | ||
301 | cout<<"# of source points: "<<SCOPE->focalPoints.size()<<endl; | 491 | cout<<"# of source points: "<<SCOPE->focalPoints.size()<<endl; |
302 | 492 | ||
303 | } | 493 | } |
304 | 494 | ||
495 | +vector<ptype> test; | ||
305 | static void SetOptions(po::options_description &desc) | 496 | static void SetOptions(po::options_description &desc) |
306 | { | 497 | { |
307 | desc.add_options() | 498 | desc.add_options() |
308 | - ("help,h", "prints this help") | ||
309 | - ("plane-wave,P", "simulates an incident plane wave") | ||
310 | - ("intensity,I", po::value<string>()->default_value(DEFAULT_INTENSITY_FILE), "output measured intensity (filename)") | ||
311 | - ("absorbance,A", po::value<string>()->default_value(DEFAULT_ABSORBANCE_FILE), "output measured absorbance (filename)") | ||
312 | - ("transmittance,T", po::value<string>()->default_value(DEFAULT_TRANSMITTANCE_FILE), "output measured transmittance (filename)") | ||
313 | - ("far-field,F", po::value<string>()->default_value(DEFAULT_FAR_FILE), "output far-field at detector (filename)") | ||
314 | - ("near-field,N", po::value<string>()->default_value(DEFAULT_NEAR_FILE), "output field at focal plane (filename)") | ||
315 | - ("extended-source,X", po::value<string>()->default_value(DEFAULT_EXTENDED_SOURCE), "image of source at focus (filename)") | ||
316 | - //("sx,x", po::value<ptype>()->default_value(DEFAULT_SPHERE_X), "sphere coordinates") | ||
317 | - //("sy,y", po::value<ptype>()->default_value(DEFAULT_SPHERE_Y)) | ||
318 | - //("sz,z", po::value<ptype>()->default_value(DEFAULT_SPHERE_Z)) | ||
319 | - ("sx,x", po::value<ptype>(), "sphere coordinates") | ||
320 | - ("sy,y", po::value<ptype>()) | ||
321 | - ("sz,z", po::value<ptype>()) | ||
322 | - ("radius,r", po::value<ptype>()->default_value(DEFAULT_SPHERE_A), "sphere radius") | ||
323 | - ("samples,s", po::value<int>()->default_value(DEFAULT_SAMPLES), "Monte-Carlo samples used to compute Us") | ||
324 | - ("sphere-file,S", po::value< vector<string> >()->multitoken(), "sphere file:\n [x y z radius material]") | ||
325 | - ("amplitude,a", po::value<ptype>()->default_value(DEFAULT_AMPLITUDE), "incident field amplitude") | ||
326 | - ("n,n", po::value<ptype>()->default_value(DEFAULT_N, "1.4"), "sphere phase speed") | ||
327 | - ("k,k", po::value<ptype>()->default_value(DEFAULT_K), "sphere absorption coefficient") | ||
328 | - ("material-file,M", po::value< vector<string> >()->multitoken(), "material file:\n [lambda n k]") | ||
329 | - ("materials", po::value< vector<ptype> >()->multitoken(), "materials specified using n, k pairs:\n ex. --materials n1 k1 n2 k2\n (if used --n and --k are ignored)") | ||
330 | - ("lambda,l", po::value<ptype>()->default_value(DEFAULT_LAMBDA), "incident wavelength") | 499 | + ("help", "prints this help") |
500 | + ("verbose", "verbose output\n") | ||
501 | + | ||
502 | + ("intensity", po::value<string>()->default_value(DEFAULT_INTENSITY_FILE), "output measured intensity (filename)") | ||
503 | + ("absorbance", po::value<string>()->default_value(DEFAULT_ABSORBANCE_FILE), "output measured absorbance (filename)") | ||
504 | + ("transmittance", po::value<string>()->default_value(DEFAULT_TRANSMITTANCE_FILE), "output measured transmittance (filename)") | ||
505 | + ("far-field", po::value<string>()->default_value(DEFAULT_FAR_FILE), "output far-field at detector (filename)") | ||
506 | + ("near-field", po::value<string>()->default_value(DEFAULT_NEAR_FILE), "output field at focal plane (filename)") | ||
507 | + ("extended-source", po::value<string>()->default_value(DEFAULT_EXTENDED_SOURCE), "image of source at focus (filename)\n") | ||
508 | + | ||
509 | + ("spheres", po::value< vector<ptype> >()->multitoken(), "sphere position: x y z a m") | ||
510 | + ("sphere-file", po::value< vector<string> >()->multitoken(), "sphere file:\n [x y z radius material]") | ||
511 | + ("materials", po::value< vector<ptype> >()->multitoken(), "refractive indices as n, k pairs:\n ex. -m n0 k0 n1 k1 n2 k2") | ||
512 | + ("material-file", po::value< vector<string> >()->multitoken(), "material file:\n [lambda n k]\n") | ||
513 | + | ||
514 | + ("lambda", po::value<ptype>()->default_value(DEFAULT_LAMBDA), "incident wavelength") | ||
331 | ("nu", po::value<ptype>(), "incident frequency (in cm^-1)\n(if specified, lambda is ignored)") | 515 | ("nu", po::value<ptype>(), "incident frequency (in cm^-1)\n(if specified, lambda is ignored)") |
332 | - ("theta,t", po::value<ptype>()->default_value(DEFAULT_K_THETA), "light direction (polar coords)") | ||
333 | - ("phi,p", po::value<ptype>()->default_value(DEFAULT_K_PHI)) | ||
334 | - ("fx", po::value<ptype>()->default_value(DEFAULT_FOCUS_X), "incident focal point") | ||
335 | - ("fy", po::value<ptype>()->default_value(DEFAULT_FOCUS_Y)) | ||
336 | - ("fz", po::value<ptype>()->default_value(DEFAULT_FOCUS_Z)) | ||
337 | - ("condenser-max,C", po::value<ptype>()->default_value(DEFAULT_CONDENSER_MAX), "condenser numerical aperature") | ||
338 | - ("condenser-min,c", po::value<ptype>()->default_value(DEFAULT_CONDENSER_MIN), "condenser obscuration NA") | ||
339 | - ("objective-max,O", po::value<ptype>()->default_value(DEFAULT_OBJECTIVE_MAX), "objective numerical aperature") | ||
340 | - ("objective-min,o", po::value<ptype>()->default_value(DEFAULT_OBJECTIVE_MIN), "objective obscuration NA") | ||
341 | - ("field-order", po::value<int>()->default_value(DEFAULT_FIELD_ORDER), "order of the incident field") | ||
342 | - ("output-type,f", po::value<string>()->default_value(DEFAULT_FIELD_TYPE), "output field value:\n magnitude, polarization, real, imaginary, angular-spectrum") | ||
343 | - ("resolution,R", po::value<unsigned int>()->default_value(DEFAULT_SLICE_RES), "resolution of the detector") | ||
344 | - ("padding,d", po::value<unsigned int>()->default_value(DEFAULT_PADDING), "FFT padding for the objective bandpass") | 516 | + ("k", po::value< vector<ptype> >()->multitoken(), "k-vector direction: -k theta phi\n theta = [0 2*pi], phi = [0 pi]") |
517 | + ("amplitude", po::value<ptype>()->default_value(DEFAULT_AMPLITUDE), "incident field amplitude") | ||
518 | + ("condenser", po::value< vector<ptype> >()->multitoken(), "condenser numerical aperature\nA pair of values can be used to specify an inner obscuration: -c NAin NAout") | ||
519 | + ("objective", po::value< vector<ptype> >()->multitoken(), "objective numerical aperature\nA pair of values can be used to specify an inner obscuration: -c NAin NAout") | ||
520 | + ("focus", po::value< vector<ptype> >()->multitoken(), "focal position for the incident point source\n (default = --focus 0 0 0)") | ||
521 | + ("plane-wave", "simulates an incident plane wave\n") | ||
522 | + | ||
523 | + ("resolution", po::value<unsigned int>()->default_value(DEFAULT_SLICE_RES), "resolution of the detector") | ||
524 | + ("plane-lower-left", po::value< vector<ptype> >()->multitoken(), "lower-left position of the image plane") | ||
525 | + ("plane-upper-right", po::value< vector<ptype> >()->multitoken(), "upper-right position of the image plane") | ||
526 | + ("plane-normal", po::value< vector<ptype> >()->multitoken(), "normal for the image plane") | ||
527 | + ("xy", po::value< vector<ptype> >()->multitoken(), "specify an x-y image plane\n (standard microscope)") | ||
528 | + ("xz", po::value< vector<ptype> >()->multitoken(), "specify a x-z image plane\n (cross-section of the focal volume)") | ||
529 | + ("yz", po::value< vector<ptype> >()->multitoken(), "specify a y-z image plane\n (cross-section of the focal volume)\n") | ||
530 | + | ||
531 | + ("samples", po::value<int>()->default_value(DEFAULT_SAMPLES), "Monte-Carlo samples used to compute Us") | ||
532 | + ("padding", po::value<unsigned int>()->default_value(DEFAULT_PADDING), "FFT padding for the objective bandpass") | ||
345 | ("supersample", po::value<unsigned int>()->default_value(DEFAULT_SUPERSAMPLE), "super-sampling rate for the detector field") | 533 | ("supersample", po::value<unsigned int>()->default_value(DEFAULT_SUPERSAMPLE), "super-sampling rate for the detector field") |
534 | + ("field-order", po::value<int>()->default_value(DEFAULT_FIELD_ORDER), "order of the incident field") | ||
535 | + ("seed", po::value<unsigned int>(), "seed for the Monte-Carlo random number generator") | ||
536 | + ("recursive", "evaluate all Bessel functions recursively\n") | ||
537 | + ("recursive-us", "evaluate scattered-field Bessel functions recursively\n") | ||
538 | + ("lut-uf", "evaluate the focused-field using a look-up table\n") | ||
539 | + | ||
540 | + ("output-type", po::value<string>()->default_value(DEFAULT_FIELD_TYPE), "output field value:\n magnitude, polarization, real, imaginary, angular-spectrum") | ||
346 | ("colormap", po::value<string>()->default_value(DEFAULT_COLORMAP), "colormap: gray, brewer") | 541 | ("colormap", po::value<string>()->default_value(DEFAULT_COLORMAP), "colormap: gray, brewer") |
347 | ("append", "append result to an existing file\n (binary files only)") | 542 | ("append", "append result to an existing file\n (binary files only)") |
348 | - ("plane-min-x,u", po::value<ptype>()->default_value(DEFAULT_SLICE_MIN_X), "lower-left corner of the field slice") | ||
349 | - ("plane-min-y,v", po::value<ptype>()->default_value(DEFAULT_SLICE_MIN_Y)) | ||
350 | - ("plane-min-z,w", po::value<ptype>()->default_value(DEFAULT_SLICE_MIN_Z)) | ||
351 | - ("plane-max-x,U", po::value<ptype>()->default_value(DEFAULT_SLICE_MAX_X), "upper-right corner of the field slice") | ||
352 | - ("plane-max-y,V", po::value<ptype>()->default_value(DEFAULT_SLICE_MAX_Y)) | ||
353 | - ("plane-max-z,W", po::value<ptype>()->default_value(DEFAULT_SLICE_MAX_Z)) | ||
354 | - ("plane-norm-x", po::value<ptype>()->default_value(DEFAULT_SLICE_NORM_X), "field slice normal") | ||
355 | - ("plane-norm-y", po::value<ptype>()->default_value(DEFAULT_SLICE_NORM_Y)) | ||
356 | - ("plane-norm-z", po::value<ptype>()->default_value(DEFAULT_SLICE_NORM_Z)); | 543 | + ; |
357 | } | 544 | } |
358 | 545 | ||
359 | static void LoadParameters(int argc, char *argv[]) | 546 | static void LoadParameters(int argc, char *argv[]) |
360 | { | 547 | { |
361 | //create an option description | 548 | //create an option description |
362 | - po::options_description desc("Allowed options"); | 549 | + po::options_description desc("BimSim arguments"); |
363 | 550 | ||
364 | //fill it with options | 551 | //fill it with options |
365 | SetOptions(desc); | 552 | SetOptions(desc); |
366 | 553 | ||
367 | po::variables_map vm; | 554 | po::variables_map vm; |
368 | - po::store(po::parse_command_line(argc, argv, desc), vm); | 555 | + po::store(po::parse_command_line(argc, argv, desc, po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm); |
369 | po::notify(vm); | 556 | po::notify(vm); |
370 | 557 | ||
371 | - //display help and exit | ||
372 | - if(vm.count("help")) | ||
373 | - { | ||
374 | - cout<<desc<<endl; | ||
375 | - exit(1); | ||
376 | - } | ||
377 | 558 | ||
378 | - //load the wavelength | ||
379 | - if(vm.count("nu")) | ||
380 | - { | ||
381 | - //wavelength is given in wavenumber - transform and flag | ||
382 | - SCOPE->nf.lambda = 10000/vm["nu"].as<ptype>(); | ||
383 | - gFileOut.wavenumber = true; | ||
384 | - } | ||
385 | - //otherwise we are using lambda = wavelength | ||
386 | - else | ||
387 | - { | ||
388 | - SCOPE->nf.lambda = vm["lambda"].as<ptype>(); | ||
389 | - gFileOut.wavenumber = false; | ||
390 | - } | 559 | + //load flags (help, verbose output) |
560 | + lFlags(vm, desc); | ||
561 | + | ||
562 | + //load the wavelength | ||
563 | + lWavelength(vm); | ||
564 | + | ||
565 | + //load materials | ||
566 | + //loadMaterials(vm); | ||
567 | + lMaterials(vm); | ||
568 | + | ||
569 | + //load the sphere data | ||
570 | + lSpheres(vm); | ||
571 | + | ||
572 | + //load the optics | ||
573 | + lOptics(vm); | ||
574 | + | ||
575 | + //load the position and orientation of the image plane | ||
576 | + lImagePlane(vm); | ||
391 | 577 | ||
392 | //load spheres | 578 | //load spheres |
393 | - loadSpheres(vm); | 579 | + //loadSpheres(vm); |
580 | + | ||
394 | 581 | ||
395 | - //load materials | ||
396 | - loadMaterials(vm); | ||
397 | 582 | ||
398 | - loadNearfieldParams(vm); | 583 | + lNearfield(vm); |
399 | 584 | ||
400 | loadOutputParams(vm); | 585 | loadOutputParams(vm); |
401 | 586 | ||
402 | - loadMicroscopeParams(vm); | 587 | + //loadMicroscopeParams(vm); |
403 | 588 | ||
404 | - loadSliceParams(vm); | 589 | + //loadSliceParams(vm); |
405 | 590 | ||
406 | //if an extended source will be used | 591 | //if an extended source will be used |
407 | if(vm["extended-source"].as<string>() != "") | 592 | if(vm["extended-source"].as<string>() != "") |
scalarslice.cu
@@ -22,16 +22,17 @@ scalarslice::scalarslice() | @@ -22,16 +22,17 @@ scalarslice::scalarslice() | ||
22 | 22 | ||
23 | scalarslice::~scalarslice() | 23 | scalarslice::~scalarslice() |
24 | { | 24 | { |
25 | - HANDLE_ERROR(cudaFree(S)); | 25 | + if(S != NULL) |
26 | + HANDLE_ERROR(cudaFree(S)); | ||
26 | S = NULL; | 27 | S = NULL; |
27 | } | 28 | } |
28 | 29 | ||
29 | -void scalarslice::toImage(std::string filename, ptype vmin, ptype vmax, rts::colormap::colormapType cmap) | 30 | +void scalarslice::toImage(std::string filename, ptype vmin, ptype vmax, rts::colormapType cmap) |
30 | { | 31 | { |
31 | - rts::colormap::gpu2image<ptype>(S, filename, R[0], R[1], vmin, vmax, cmap); | 32 | + rts::gpu2image<ptype>(S, filename, R[0], R[1], vmin, vmax, cmap); |
32 | } | 33 | } |
33 | 34 | ||
34 | -void scalarslice::toImage(std::string filename, bool positive, rts::colormap::colormapType cmap) | 35 | +void scalarslice::toImage(std::string filename, bool positive, rts::colormapType cmap) |
35 | { | 36 | { |
36 | cublasStatus_t stat; | 37 | cublasStatus_t stat; |
37 | cublasHandle_t handle; | 38 | cublasHandle_t handle; |
@@ -62,7 +63,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co | @@ -62,7 +63,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co | ||
62 | exit(1); | 63 | exit(1); |
63 | } | 64 | } |
64 | 65 | ||
65 | - //std::cout<<"Maximum index: "<<result<<std::endl; | 66 | + |
66 | 67 | ||
67 | //retrieve the maximum value | 68 | //retrieve the maximum value |
68 | ptype maxVal; | 69 | ptype maxVal; |
@@ -75,7 +76,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co | @@ -75,7 +76,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co | ||
75 | if(positive) | 76 | if(positive) |
76 | toImage(filename, 0, maxVal, cmap); | 77 | toImage(filename, 0, maxVal, cmap); |
77 | else | 78 | else |
78 | - toImage(filename, -maxVal, maxVal, cmap); | 79 | + toImage(filename, -abs(maxVal), abs(maxVal), cmap); |
79 | } | 80 | } |
80 | 81 | ||
81 | void scalarslice::toEnvi(std::string filename, ptype wavelength, bool append) | 82 | void scalarslice::toEnvi(std::string filename, ptype wavelength, bool append) |
scalarslice.h
@@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
2 | #define RTS_SCALAR_SLICE | 2 | #define RTS_SCALAR_SLICE |
3 | 3 | ||
4 | #include "dataTypes.h" | 4 | #include "dataTypes.h" |
5 | -#include "colormap.h" | 5 | +#include "rts/graphics/colormap.h" |
6 | 6 | ||
7 | struct scalarslice | 7 | struct scalarslice |
8 | { | 8 | { |
@@ -17,8 +17,8 @@ struct scalarslice | @@ -17,8 +17,8 @@ struct scalarslice | ||
17 | ~scalarslice(); | 17 | ~scalarslice(); |
18 | void clear(); | 18 | void clear(); |
19 | 19 | ||
20 | - void toImage(std::string filename, ptype vmin, ptype vmax, rts::colormap::colormapType cmap = rts::colormap::cmBrewer); | ||
21 | - void toImage(std::string filename, bool positive = true, rts::colormap::colormapType cmap = rts::colormap::cmBrewer); | 20 | + void toImage(std::string filename, ptype vmin, ptype vmax, rts::colormapType cmap = rts::cmBrewer); |
21 | + void toImage(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer); | ||
22 | void toEnvi(std::string filename, ptype wavelength = 0, bool append = false); | 22 | void toEnvi(std::string filename, ptype wavelength = 0, bool append = false); |
23 | 23 | ||
24 | }; | 24 | }; |
sphere.cpp
1 | #include "sphere.h" | 1 | #include "sphere.h" |
2 | +#include "defaults.h" | ||
2 | 3 | ||
3 | #include "rts/math/complex.h" | 4 | #include "rts/math/complex.h" |
4 | #include <complex> | 5 | #include <complex> |
5 | #include <stdlib.h> | 6 | #include <stdlib.h> |
7 | +#include <fstream> | ||
6 | 8 | ||
7 | using namespace rts; | 9 | using namespace rts; |
8 | using namespace std; | 10 | using namespace std; |
@@ -13,6 +15,9 @@ int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, | @@ -13,6 +15,9 @@ int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, | ||
13 | int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv, | 15 | int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv, |
14 | complex<double>*cyv,complex<double>*cjvp,complex<double>*cyvp); | 16 | complex<double>*cyv,complex<double>*cjvp,complex<double>*cyvp); |
15 | 17 | ||
18 | +int bessjyv_sph(int v, double z, double &vm, double* cjv, | ||
19 | + double* cyv, double* cjvp, double* cyvp); | ||
20 | + | ||
16 | void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) | 21 | void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) |
17 | { | 22 | { |
18 | /* These calculations are done at high-precision on the CPU | 23 | /* These calculations are done at high-precision on the CPU |
@@ -59,12 +64,6 @@ void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) | @@ -59,12 +64,6 @@ void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) | ||
59 | cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka); | 64 | cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka); |
60 | cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna); | 65 | cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna); |
61 | 66 | ||
62 | - | ||
63 | - //cout<<"Begin Sphere---------"<<endl; | ||
64 | - //cout<<"Nl = "<<Nl<<endl; | ||
65 | - //cout<<"ka = "<<ka<<endl; | ||
66 | - //cout<<"kna = "<<kna<<endl; | ||
67 | - | ||
68 | //compute A for each order | 67 | //compute A for each order |
69 | complex<double> i(0, 1); | 68 | complex<double> i(0, 1); |
70 | complex<double> a, b, c, d; | 69 | complex<double> a, b, c, d; |
@@ -83,7 +82,7 @@ void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) | @@ -83,7 +82,7 @@ void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) | ||
83 | //calculate A and add it to the list | 82 | //calculate A and add it to the list |
84 | An = (2.0 * l + 1.0) * pow(i, l) * (a / b); | 83 | An = (2.0 * l + 1.0) * pow(i, l) * (a / b); |
85 | A.push_back(bsComplex(An.real(), An.imag())); | 84 | A.push_back(bsComplex(An.real(), An.imag())); |
86 | - //cout<<"A: "<<An<<endl; | 85 | + |
87 | 86 | ||
88 | //Compute B (external scattering coefficient) | 87 | //Compute B (external scattering coefficient) |
89 | c = cjv_ka[l] * cjvp_kna[l] * nc - cjv_kna[l] * cjvp_ka[l]; | 88 | c = cjv_ka[l] * cjvp_kna[l] * nc - cjv_kna[l] * cjvp_ka[l]; |
@@ -92,7 +91,206 @@ void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) | @@ -92,7 +91,206 @@ void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) | ||
92 | //calculate B and add it to the list | 91 | //calculate B and add it to the list |
93 | Bn = (2.0 * l + 1.0) * pow(i, l) * (c / d); | 92 | Bn = (2.0 * l + 1.0) * pow(i, l) * (c / d); |
94 | B.push_back(bsComplex(Bn.real(), Bn.imag())); | 93 | B.push_back(bsComplex(Bn.real(), Bn.imag())); |
95 | - //cout<<"B: "<<Bn<<endl; | ||
96 | 94 | ||
95 | + | ||
96 | + } | ||
97 | +} | ||
98 | + | ||
99 | +void sphere::calcBesselLut(bsComplex* j, ptype k, bsComplex n, int aR) | ||
100 | +{ | ||
101 | + /*Compute the look-up-table for spherical bessel functions used inside of the sphere | ||
102 | + j = (Nl + 1) x aR array of values | ||
103 | + aR = resolution of j | ||
104 | + */ | ||
105 | + | ||
106 | + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored) | ||
107 | + int bytes = sizeof(complex<double>) * (Nl + 1); | ||
108 | + complex<double>* cjv_knr = (complex<double>*)malloc(bytes); | ||
109 | + complex<double>* cyv_knr = (complex<double>*)malloc(bytes); | ||
110 | + complex<double>* cjvp_knr = (complex<double>*)malloc(bytes); | ||
111 | + complex<double>* cyvp_knr = (complex<double>*)malloc(bytes); | ||
112 | + | ||
113 | + //compute the bessel functions using the CPU-based algorithm | ||
114 | + double vm; | ||
115 | + | ||
116 | + //for each sample along r | ||
117 | + ptype dr = a / (aR - 1); | ||
118 | + ptype r; | ||
119 | + for(int ir = 0; ir < aR; ir++) | ||
120 | + { | ||
121 | + r = ir * dr; | ||
122 | + complex<double> knr( (k*n*r).real(), (k*n*r).imag() ); | ||
123 | + cbessjyva_sph(Nl, knr, vm, cjv_knr, cyv_knr, cjvp_knr, cyvp_knr); | ||
124 | + | ||
125 | + //copy the double data to the bsComplex array | ||
126 | + for(int l=0; l<=Nl; l++) | ||
127 | + { | ||
128 | + //deal with the NaN case at the origin | ||
129 | + if(ir == 0) | ||
130 | + { | ||
131 | + if(l == 0) | ||
132 | + j[ir * (Nl+1)] = 1; | ||
133 | + else | ||
134 | + j[ir * (Nl+1) + l] = 0; | ||
135 | + } | ||
136 | + else | ||
137 | + j[ir * (Nl+1) + l] = bsComplex(cjv_knr[l].real(), cjv_knr[l].imag()); | ||
138 | + } | ||
139 | + } | ||
140 | + | ||
141 | + /*ofstream outfile("besselout.txt"); | ||
142 | + for(int ir = 0; ir < aR; ir++) | ||
143 | + { | ||
144 | + for(int l = 0; l<Nl+1; l++) | ||
145 | + { | ||
146 | + outfile<<j[ir * (Nl+1) + l].real()<<" "; | ||
147 | + } | ||
148 | + outfile<<endl; | ||
149 | + } | ||
150 | + outfile.close();*/ | ||
151 | + | ||
152 | +} | ||
153 | + | ||
154 | +void sphere::calcHankelLut(bsComplex* h, ptype k, int rR) | ||
155 | +{ | ||
156 | + /*Compute the look-up-table for spherical bessel functions used inside of the sphere | ||
157 | + h_out = (Nl + 1) x aR array of values | ||
158 | + rmin = minimum value of r | ||
159 | + d_max = maximum value of r | ||
160 | + rR = resolution of h_out | ||
161 | + */ | ||
162 | + | ||
163 | + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored) | ||
164 | + int bytes = sizeof(double) * (Nl + 1); | ||
165 | + double* cjv_kr = (double*)malloc(bytes); | ||
166 | + double* cyv_kr = (double*)malloc(bytes); | ||
167 | + double* cjvp_kr = (double*)malloc(bytes); | ||
168 | + double* cyvp_kr = (double*)malloc(bytes); | ||
169 | + | ||
170 | + //compute the bessel functions using the CPU-based algorithm | ||
171 | + double vm; | ||
172 | + | ||
173 | + | ||
174 | + | ||
175 | + //for each sample along r | ||
176 | + ptype dr = (d_max - max(a, d_min)) / (rR - 1); | ||
177 | + ptype r; | ||
178 | + for(int ir = 0; ir < rR; ir++) | ||
179 | + { | ||
180 | + r = ir * dr + max(a, d_min); | ||
181 | + double kr = k*r; | ||
182 | + bessjyv_sph(Nl, kr, vm, cjv_kr, cyv_kr, cjvp_kr, cyvp_kr); | ||
183 | + | ||
184 | + //copy the double data to the bsComplex array | ||
185 | + for(int l=0; l<=Nl; l++) | ||
186 | + { | ||
187 | + //h[ir * (Nl+1) + l] = bsComplex(cjv_kr[l].real(), cyv_kr[l].real()); | ||
188 | + h[ir * (Nl+1) + l] = bsComplex(cjv_kr[l], cyv_kr[l]); | ||
189 | + } | ||
97 | } | 190 | } |
191 | + | ||
192 | + /*ofstream outfile("hankelout.txt"); | ||
193 | + for(int ir = 0; ir < rR; ir++) | ||
194 | + { | ||
195 | + outfile<<ir*dr + max(a, d_min)<<" "; | ||
196 | + for(int l = 0; l<=0; l++) | ||
197 | + { | ||
198 | + outfile<<h[ir * (Nl+1) + l].real()<<" "<<h[ir * (Nl+1) + l].imag()<<" "; | ||
199 | + } | ||
200 | + outfile<<endl; | ||
201 | + } | ||
202 | + outfile.close();*/ | ||
203 | +} | ||
204 | + | ||
205 | +void sphere::calcLut(bsComplex* j, bsComplex* h, ptype lambda, bsComplex n, int aR, int rR) | ||
206 | +{ | ||
207 | + /*Compute the look-up-tables for spherical bessel functions used both inside and outside of the sphere. | ||
208 | + j = (Nl + 1) x aR array of values | ||
209 | + j = (Nl + 1) x rR array of values | ||
210 | + d_max = maximum distance for the LUT | ||
211 | + aR = resolution of j_in | ||
212 | + rR = resolution of j_out | ||
213 | + */ | ||
214 | + | ||
215 | + //compute the magnitude of the k vector | ||
216 | + double k = 2 * PI / lambda; | ||
217 | + | ||
218 | + calcBesselLut(j, k, n, aR); | ||
219 | + calcHankelLut(h, k, rR); | ||
220 | +} | ||
221 | + | ||
222 | +void sphere::calcUp(ptype lambda, bsComplex n, rts::rtsQuad<ptype, 3> nfPlane, unsigned int R) | ||
223 | +{ | ||
224 | + //calculate the parameters of the lookup table | ||
225 | + | ||
226 | + //first find the distance to the closest and furthest points on the nearfield plane | ||
227 | + d_min = nfPlane.dist(p); | ||
228 | + d_max = nfPlane.dist_max(p); | ||
229 | + | ||
230 | + //compute the radius of the cross-section of the sphere with the plane | ||
231 | + ptype a_inter = 0; | ||
232 | + if(d_min < a) | ||
233 | + a_inter = sqrt(a - d_min); | ||
234 | + | ||
235 | + | ||
236 | + //calculate the resolution of the Usp and Uip lookup tables | ||
237 | + int aR = 1 + 2 * R * a_inter / (nfPlane(0, 0) - nfPlane(1, 1)).len(); | ||
238 | + int dR = 2 * R; | ||
239 | + int thetaR = DEFAULT_SPHERE_THETA_R; | ||
240 | + | ||
241 | + //allocate space for the bessel function LUTs | ||
242 | + bsComplex* j = (bsComplex*)malloc(sizeof(bsComplex) * (Nl + 1) * aR); | ||
243 | + bsComplex* h = (bsComplex*)malloc(sizeof(bsComplex) * (Nl + 1) * dR); | ||
244 | + | ||
245 | + calcLut(j, h, lambda, n, aR, dR); | ||
246 | + | ||
247 | + //allocate space for the Usp lookup texture | ||
248 | + Usp.R[0] = dR; | ||
249 | + Usp.R[1] = thetaR; | ||
250 | + Usp.init_gpu(); | ||
251 | + | ||
252 | + //allocate space for the Uip lookup texture | ||
253 | + Uip.R[0] = aR; | ||
254 | + Uip.R[1] = thetaR; | ||
255 | + Uip.init_gpu(); | ||
256 | + | ||
257 | + | ||
258 | + | ||
259 | + scalarUsp(h, dR, thetaR); | ||
260 | + scalarUip(j, aR, thetaR); | ||
261 | + | ||
262 | + scalarslice UspMag = Usp.Mag(); | ||
263 | + UspMag.toImage("Usp.bmp", true); | ||
264 | + | ||
265 | + scalarslice UipMag = Uip.Mag(); | ||
266 | + UipMag.toImage("Uip.bmp", true); | ||
267 | + | ||
268 | + //free memory | ||
269 | + free(j); | ||
270 | + free(h); | ||
271 | + | ||
272 | +} | ||
273 | + | ||
274 | +sphere& sphere::operator=(const sphere &rhs) | ||
275 | +{ | ||
276 | + p = rhs.p; | ||
277 | + a = rhs.a; | ||
278 | + iMaterial = rhs.iMaterial; | ||
279 | + Nl = rhs.Nl; | ||
280 | + n = rhs.n; | ||
281 | + B = rhs.B; | ||
282 | + A = rhs.A; | ||
283 | + | ||
284 | + return *this; | ||
285 | +} | ||
286 | + | ||
287 | +sphere::sphere(const sphere &rhs) | ||
288 | +{ | ||
289 | + p = rhs.p; | ||
290 | + a = rhs.a; | ||
291 | + iMaterial = rhs.iMaterial; | ||
292 | + Nl = rhs.Nl; | ||
293 | + n = rhs.n; | ||
294 | + B = rhs.B; | ||
295 | + A = rhs.A; | ||
98 | } | 296 | } |
1 | +#include "sphere.h" | ||
2 | +#include "rts/math/legendre.h" | ||
3 | + | ||
4 | +__global__ void gpuScalarUsp(bsComplex* Usp, bsComplex* h, bsComplex* B, int Nl, int rR, int thetaR) | ||
5 | +{ | ||
6 | + //get the current coordinate in the plane slice | ||
7 | + int ir = blockIdx.x * blockDim.x + threadIdx.x; | ||
8 | + int itheta = blockIdx.y * blockDim.y + threadIdx.y; | ||
9 | + | ||
10 | + //make sure that the thread indices are in-bounds | ||
11 | + if(itheta >= thetaR || ir >= rR) return; | ||
12 | + | ||
13 | + int i = itheta * rR + ir; | ||
14 | + | ||
15 | + //ptype dr = (rmax - a) / (rR - 1); | ||
16 | + ptype dtheta = (PI) / (thetaR - 1); | ||
17 | + | ||
18 | + //comptue the current angle and distance | ||
19 | + //ptype r = dr * ir + a; | ||
20 | + ptype theta = dtheta * itheta; | ||
21 | + ptype cos_theta = cos(theta); | ||
22 | + | ||
23 | + //initialize the Legendre polynomial | ||
24 | + ptype P[2]; | ||
25 | + rts::init_legendre<ptype>(cos_theta, P[0], P[1]); | ||
26 | + | ||
27 | + //initialize the result | ||
28 | + bsComplex Us((ptype)0, (ptype)0); | ||
29 | + | ||
30 | + //for each order l | ||
31 | + for(int l=0; l <= Nl; l++) | ||
32 | + { | ||
33 | + if(l == 0) | ||
34 | + { | ||
35 | + Us += B[l] * h[ir * (Nl+1) + l] * P[0]; | ||
36 | + //Us += P[0]; | ||
37 | + } | ||
38 | + else | ||
39 | + { | ||
40 | + if(l > 1) | ||
41 | + { | ||
42 | + rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]); | ||
43 | + } | ||
44 | + Us += B[l] * h[ir * (Nl+1) + l] * P[1]; | ||
45 | + //Us += P[1]; | ||
46 | + } | ||
47 | + | ||
48 | + | ||
49 | + } | ||
50 | + Usp[i] = Us; | ||
51 | + //Usp[i] = h[ir * (Nl+1)]; | ||
52 | + //Usp[i] = ir; | ||
53 | + | ||
54 | +} | ||
55 | + | ||
56 | +__global__ void gpuScalarUip(bsComplex* Uip, bsComplex* j, bsComplex* A, int Nl, int aR, int thetaR) | ||
57 | +{ | ||
58 | + //get the current coordinate in the plane slice | ||
59 | + int ia = blockIdx.x * blockDim.x + threadIdx.x; | ||
60 | + int itheta = blockIdx.y * blockDim.y + threadIdx.y; | ||
61 | + | ||
62 | + //make sure that the thread indices are in-bounds | ||
63 | + if(itheta >= thetaR || ia >= aR) return; | ||
64 | + | ||
65 | + int i = itheta * aR + ia; | ||
66 | + | ||
67 | + ptype dtheta = (PI) / (thetaR - 1); | ||
68 | + | ||
69 | + //comptue the current angle and distance | ||
70 | + ptype theta = dtheta * itheta; | ||
71 | + ptype cos_theta = cos(theta); | ||
72 | + | ||
73 | + //initialize the Legendre polynomial | ||
74 | + ptype P[2]; | ||
75 | + rts::init_legendre<ptype>(cos_theta, P[0], P[1]); | ||
76 | + | ||
77 | + //initialize the result | ||
78 | + bsComplex Ui((ptype)0, (ptype)0); | ||
79 | + | ||
80 | + //for each order l | ||
81 | + for(int l=0; l <= Nl; l++) | ||
82 | + { | ||
83 | + if(l == 0) | ||
84 | + { | ||
85 | + Ui += A[l] * j[ia * (Nl+1) + l] * P[0]; | ||
86 | + } | ||
87 | + else | ||
88 | + { | ||
89 | + if(l > 1) | ||
90 | + { | ||
91 | + rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]); | ||
92 | + } | ||
93 | + Ui += A[l] * j[ia * (Nl+1) + l] * P[1]; | ||
94 | + } | ||
95 | + | ||
96 | + | ||
97 | + } | ||
98 | + Uip[i] = Ui; | ||
99 | +} | ||
100 | + | ||
101 | +void sphere::scalarUsp(bsComplex* h, int rR, int thetaR) | ||
102 | +{ | ||
103 | + //copy the hankel function to the GPU | ||
104 | + bsComplex* gpu_h; | ||
105 | + HANDLE_ERROR( cudaMalloc( (void**)&gpu_h, sizeof(bsComplex) * (Nl + 1) * rR ) ); | ||
106 | + HANDLE_ERROR( cudaMemcpy( gpu_h, h, sizeof(bsComplex) * (Nl + 1) * rR, cudaMemcpyHostToDevice ) ); | ||
107 | + | ||
108 | + //allocate memory for the scattering coefficients | ||
109 | + bsComplex* gpuB; | ||
110 | + HANDLE_ERROR(cudaMalloc((void**) &gpuB, (Nl+1) * sizeof(bsComplex))); | ||
111 | + //copy the scattering coefficients to the GPU | ||
112 | + HANDLE_ERROR(cudaMemcpy(gpuB, &B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice)); | ||
113 | + | ||
114 | + //create one thread for each pixel of the field slice | ||
115 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
116 | + dim3 dimGrid((Usp.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Usp.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
117 | + | ||
118 | + gpuScalarUsp<<<dimGrid, dimBlock>>>(Usp.x_hat, gpu_h, gpuB, Nl, rR, thetaR); | ||
119 | + | ||
120 | + //free memory | ||
121 | + cudaFree(gpu_h); | ||
122 | + cudaFree(gpuB); | ||
123 | + | ||
124 | +} | ||
125 | + | ||
126 | +void sphere::scalarUip(bsComplex* j, int rR, int thetaR) | ||
127 | +{ | ||
128 | + //copy the bessel and hankel LUTs to the GPU | ||
129 | + bsComplex* gpu_j; | ||
130 | + HANDLE_ERROR( cudaMalloc( (void**)&gpu_j, sizeof(bsComplex) * (Nl + 1) * rR ) ); | ||
131 | + HANDLE_ERROR( cudaMemcpy( gpu_j, j, sizeof(bsComplex) * (Nl + 1) * rR, cudaMemcpyHostToDevice ) ); | ||
132 | + | ||
133 | + //allocate memory for the scattering coefficients | ||
134 | + bsComplex* gpuA; | ||
135 | + HANDLE_ERROR(cudaMalloc((void**) &gpuA, (Nl+1) * sizeof(bsComplex))); | ||
136 | + //copy the scattering coefficients to the GPU | ||
137 | + HANDLE_ERROR(cudaMemcpy(gpuA, &A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice)); | ||
138 | + | ||
139 | + //create one thread for each pixel of the field slice | ||
140 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
141 | + dim3 dimGrid((Uip.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uip.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
142 | + | ||
143 | + gpuScalarUip<<<dimGrid, dimBlock>>>(Uip.x_hat, gpu_j, gpuA, Nl, rR, thetaR); | ||
144 | + | ||
145 | + //free memory | ||
146 | + cudaFree(gpu_j); | ||
147 | + cudaFree(gpuA); | ||
148 | + | ||
149 | +} |
sphere.h
@@ -22,12 +22,12 @@ struct sphere | @@ -22,12 +22,12 @@ struct sphere | ||
22 | //sphere material index | 22 | //sphere material index |
23 | int iMaterial; | 23 | int iMaterial; |
24 | 24 | ||
25 | - //rtsPointer to the scattered field produced by a plane wave | 25 | + //GPU pointer to the scattered field produced by a plane wave |
26 | // this is a function of cos(theta) and |r| (distance from sphere center) | 26 | // this is a function of cos(theta) and |r| (distance from sphere center) |
27 | - //fieldslice surface; | ||
28 | - | ||
29 | - //resolution of the scattered field | ||
30 | - int thetaR, rR; | 27 | + fieldslice Usp; |
28 | + fieldslice Uip; | ||
29 | + ptype d_min; | ||
30 | + ptype d_max; | ||
31 | 31 | ||
32 | //sphere order | 32 | //sphere order |
33 | int Nl; | 33 | int Nl; |
@@ -50,6 +50,12 @@ struct sphere | @@ -50,6 +50,12 @@ struct sphere | ||
50 | //surface = fieldslice(ang, ang/2); | 50 | //surface = fieldslice(ang, ang/2); |
51 | } | 51 | } |
52 | 52 | ||
53 | + //assignment operator | ||
54 | + sphere & operator=(const sphere &rhs); | ||
55 | + | ||
56 | + //copy constructor | ||
57 | + sphere(const sphere &rhs); | ||
58 | + | ||
53 | std::string toStr() | 59 | std::string toStr() |
54 | { | 60 | { |
55 | std::stringstream ss; | 61 | std::stringstream ss; |
@@ -66,8 +72,19 @@ struct sphere | @@ -66,8 +72,19 @@ struct sphere | ||
66 | Nl = ceil( (2 * PI * a) / lambda + 4 * pow( (2 * PI * a) / lambda, 1.0/3.0) + 2); | 72 | Nl = ceil( (2 * PI * a) / lambda + 4 * pow( (2 * PI * a) / lambda, 1.0/3.0) + 2); |
67 | } | 73 | } |
68 | 74 | ||
69 | - void calcCoeff(ptype lambda, rts::rtsComplex<ptype> n); | 75 | + //compute the scattering coefficients |
76 | + void calcCoeff(ptype lambda, bsComplex n); | ||
77 | + | ||
78 | + //compute the bessel function look-up tables | ||
79 | + void calcLut(bsComplex* j, bsComplex* h, ptype lambda, bsComplex n, int aR, int rR); | ||
80 | + void calcBesselLut(bsComplex* j, ptype k, bsComplex n, int aR); | ||
81 | + void calcHankelLut(bsComplex* h, ptype k, int rR); | ||
82 | + | ||
83 | + //calculate the scattering domain Us(theta, r) | ||
84 | + void calcUp(ptype lambda, bsComplex n, rts::rtsQuad<ptype, 3> nfPlane, unsigned int R); | ||
70 | 85 | ||
86 | + void scalarUsp(bsComplex* h, int rR, int thetaR); | ||
87 | + void scalarUip(bsComplex* j, int aR, int thetaR); | ||
71 | 88 | ||
72 | 89 | ||
73 | 90 |