Commit 51b6469a3ee77583099edb0a57e1bb7859c28fd1
1 parent
b6179de6
added look-up tables
Showing
27 changed files
with
1518 additions
and
588 deletions
Show diff stats
bessjy.cpp
... | ... | @@ -13,7 +13,9 @@ |
13 | 13 | // |
14 | 14 | #define _USE_MATH_DEFINES |
15 | 15 | #include <math.h> |
16 | -#include "bessel.h" | |
16 | +#include "bessel.h" | |
17 | + | |
18 | +#define PI 3.14159 | |
17 | 19 | |
18 | 20 | double gamma(double x); |
19 | 21 | // |
... | ... | @@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &nm,double *jn,double *yn, |
426 | 428 | 0.2775764465332031, |
427 | 429 | -1.993531733751297, |
428 | 430 | 2.724882731126854e1}; |
429 | - | |
431 | + | |
430 | 432 | int i,k,m; |
431 | 433 | nm = n; |
432 | 434 | if ((x < 0.0) || (n < 0)) return 1; |
... | ... | @@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &vm,double *jv,double *yv, |
702 | 704 | } |
703 | 705 | vm = n + v0; |
704 | 706 | return 0; |
707 | +} | |
708 | + | |
709 | +int bessjyv_sph(int v, double z, double &vm, double* cjv, | |
710 | + double* cyv, double* cjvp, double* cyvp) | |
711 | +{ | |
712 | + //first, compute the bessel functions of fractional order | |
713 | + bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); | |
714 | + | |
715 | + //iterate through each and scale | |
716 | + for(int n = 0; n<=v; n++) | |
717 | + { | |
718 | + | |
719 | + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); | |
720 | + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); | |
721 | + | |
722 | + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0)); | |
723 | + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0)); | |
724 | + } | |
725 | + | |
726 | + return 0; | |
727 | + | |
705 | 728 | } |
706 | - | |
729 | + | ... | ... |
cbessjy.cpp
... | ... | @@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv, |
724 | 724 | //iterate through each and scale |
725 | 725 | for(int n = 0; n<=v; n++) |
726 | 726 | { |
727 | + | |
727 | 728 | cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); |
728 | 729 | cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); |
729 | 730 | ... | ... |
colormap.h deleted
1 | -#ifndef RTS_COLORMAP_H | |
2 | -#define RTS_COLORMAP_H | |
3 | - | |
4 | -#include <string> | |
5 | -#include <qimage.h> | |
6 | -#include <qcolor.h> | |
7 | -#include "rts/cuda/error.h" | |
8 | - | |
9 | - | |
10 | -#define BREWER_CTRL_PTS 11 | |
11 | - | |
12 | -#ifdef __CUDACC__ | |
13 | -texture<float4, cudaTextureType1D> cudaTexBrewer; | |
14 | -static cudaArray* gpuBrewer; | |
15 | -#endif | |
16 | - | |
17 | - | |
18 | - | |
19 | -namespace rts{ | |
20 | - namespace colormap{ | |
21 | - | |
22 | -enum colormapType {cmBrewer, cmGrayscale}; | |
23 | - | |
24 | -static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size) | |
25 | -{ | |
26 | - //create an image object | |
27 | - QImage image(x_size, y_size, QImage::Format_RGB32); | |
28 | - | |
29 | - int i; | |
30 | - unsigned char r, g, b; | |
31 | - unsigned int x, y; | |
32 | - for(y=0; y<y_size; y++) | |
33 | - for(x=0; x<x_size; x++) | |
34 | - { | |
35 | - //calculate the 1D index | |
36 | - i = y * x_size + x; | |
37 | - | |
38 | - r = buffer[i * 3 + 0]; | |
39 | - g = buffer[i * 3 + 1]; | |
40 | - b = buffer[i * 3 + 2]; | |
41 | - | |
42 | - //set the image pixel | |
43 | - QColor color(r, g, b); | |
44 | - image.setPixel(x, y, color.rgb()); | |
45 | - } | |
46 | - | |
47 | - image.save(filename.c_str()); | |
48 | -} | |
49 | - | |
50 | -#ifdef __CUDACC__ | |
51 | -static void initBrewer() | |
52 | -{ | |
53 | - //initialize the Brewer colormap | |
54 | - | |
55 | - //allocate CPU space | |
56 | - float4 cpuColorMap[BREWER_CTRL_PTS]; | |
57 | - | |
58 | - //define control rtsPoints | |
59 | - cpuColorMap[0] = make_float4(0.192157f, 0.211765f, 0.584314f, 1.0f); | |
60 | - cpuColorMap[1] = make_float4(0.270588f, 0.458824f, 0.705882f, 1.0f); | |
61 | - cpuColorMap[2] = make_float4(0.454902f, 0.678431f, 0.819608f, 1.0f); | |
62 | - cpuColorMap[3] = make_float4(0.670588f, 0.85098f, 0.913725f, 1.0f); | |
63 | - cpuColorMap[4] = make_float4(0.878431f, 0.952941f, 0.972549f, 1.0f); | |
64 | - cpuColorMap[5] = make_float4(1.0f, 1.0f, 0.74902f, 1.0f); | |
65 | - cpuColorMap[6] = make_float4(0.996078f, 0.878431f, 0.564706f, 1.0f); | |
66 | - cpuColorMap[7] = make_float4(0.992157f, 0.682353f, 0.380392f, 1.0f); | |
67 | - cpuColorMap[8] = make_float4(0.956863f, 0.427451f, 0.262745f, 1.0f); | |
68 | - cpuColorMap[9] = make_float4(0.843137f, 0.188235f, 0.152941f, 1.0f); | |
69 | - cpuColorMap[10] = make_float4(0.647059f, 0.0f, 0.14902f, 1.0f); | |
70 | - | |
71 | - | |
72 | - int width = BREWER_CTRL_PTS; | |
73 | - int height = 0; | |
74 | - | |
75 | - | |
76 | - // allocate array and copy colormap data | |
77 | - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); | |
78 | - | |
79 | - HANDLE_ERROR(cudaMallocArray(&gpuBrewer, &channelDesc, width, height)); | |
80 | - | |
81 | - HANDLE_ERROR(cudaMemcpyToArray(gpuBrewer, 0, 0, cpuColorMap, sizeof(float4)*width, cudaMemcpyHostToDevice)); | |
82 | - | |
83 | - // set texture parameters | |
84 | - cudaTexBrewer.addressMode[0] = cudaAddressModeClamp; | |
85 | - //texBrewer.addressMode[1] = cudaAddressModeClamp; | |
86 | - cudaTexBrewer.filterMode = cudaFilterModeLinear; | |
87 | - cudaTexBrewer.normalized = true; // access with normalized texture coordinates | |
88 | - | |
89 | - // Bind the array to the texture | |
90 | - HANDLE_ERROR(cudaBindTextureToArray( cudaTexBrewer, gpuBrewer, channelDesc)); | |
91 | - | |
92 | -} | |
93 | - | |
94 | -static void destroyBrewer() | |
95 | -{ | |
96 | - HANDLE_ERROR(cudaFreeArray(gpuBrewer)); | |
97 | - | |
98 | -} | |
99 | - | |
100 | -template<class T> | |
101 | -__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) | |
102 | -{ | |
103 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
104 | - if(i >= N) return; | |
105 | - | |
106 | - //compute the normalized value on [minVal maxVal] | |
107 | - float a = (gpuSource[i] - minVal) / (maxVal - minVal); | |
108 | - | |
109 | - //lookup the color | |
110 | - float shift = 1.0/BREWER_CTRL_PTS; | |
111 | - float4 color = tex1D(cudaTexBrewer, a+shift); | |
112 | - | |
113 | - gpuDest[i * 3 + 0] = 255 * color.x; | |
114 | - gpuDest[i * 3 + 1] = 255 * color.y; | |
115 | - gpuDest[i * 3 + 2] = 255 * color.z; | |
116 | -} | |
117 | - | |
118 | -template<class T> | |
119 | -__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) | |
120 | -{ | |
121 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
122 | - if(i >= N) return; | |
123 | - | |
124 | - //compute the normalized value on [minVal maxVal] | |
125 | - float a = (gpuSource[i] - minVal) / (maxVal - minVal); | |
126 | - | |
127 | - gpuDest[i * 3 + 0] = 255 * a; | |
128 | - gpuDest[i * 3 + 1] = 255 * a; | |
129 | - gpuDest[i * 3 + 2] = 255 * a; | |
130 | -} | |
131 | - | |
132 | -template<class T> | |
133 | -static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128) | |
134 | -{ | |
135 | - //This function converts a scalar field on the GPU to a color image on the GPU | |
136 | - int gridDim = (nVals + blockDim - 1)/blockDim; | |
137 | - if(cm == cmGrayscale) | |
138 | - applyGrayscale<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal); | |
139 | - else if(cm == cmBrewer) | |
140 | - { | |
141 | - initBrewer(); | |
142 | - applyBrewer<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal); | |
143 | - destroyBrewer(); | |
144 | - } | |
145 | - | |
146 | -} | |
147 | - | |
148 | -template<class T> | |
149 | -static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale) | |
150 | -{ | |
151 | - //this function converts a scalar field on the GPU to a color image on the CPU | |
152 | - | |
153 | - //first create the color image on the GPU | |
154 | - | |
155 | - //allocate GPU memory for the color image | |
156 | - unsigned char* gpuDest; | |
157 | - HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 )); | |
158 | - | |
159 | - //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals)); | |
160 | - | |
161 | - //create the image on the gpu | |
162 | - gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm); | |
163 | - | |
164 | - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3)); | |
165 | - | |
166 | - //copy the image from the GPU to the CPU | |
167 | - HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost)); | |
168 | - | |
169 | - HANDLE_ERROR(cudaFree( gpuDest )); | |
170 | - | |
171 | -} | |
172 | - | |
173 | -template<typename T> | |
174 | -static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) | |
175 | -{ | |
176 | - //allocate a color buffer | |
177 | - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); | |
178 | - | |
179 | - //do the mapping | |
180 | - gpu2cpu<T>(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); | |
181 | - | |
182 | - //copy the buffer to an image | |
183 | - buffer2image(cpuBuffer, fileDest, x_size, y_size); | |
184 | - | |
185 | - free(cpuBuffer); | |
186 | -} | |
187 | - | |
188 | -#endif | |
189 | - | |
190 | -template<class T> | |
191 | -static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale) | |
192 | -{ | |
193 | - int i; | |
194 | - float a; | |
195 | - float range = valMax - valMin; | |
196 | - for(i = 0; i<nVals; i++) | |
197 | - { | |
198 | - //normalize to the range [valMin valMax] | |
199 | - a = (cpuSource[i] - valMin) / range; | |
200 | - | |
201 | - cpuDest[i * 3 + 0] = 255 * a; | |
202 | - cpuDest[i * 3 + 1] = 255 * a; | |
203 | - cpuDest[i * 3 + 2] = 255 * a; | |
204 | - } | |
205 | - | |
206 | -} | |
207 | - | |
208 | - | |
209 | - | |
210 | -template<typename T> | |
211 | -static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) | |
212 | -{ | |
213 | - //allocate a color buffer | |
214 | - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); | |
215 | - | |
216 | - //do the mapping | |
217 | - cpu2cpu<T>(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); | |
218 | - | |
219 | - //copy the buffer to an image | |
220 | - buffer2image(cpuBuffer, fileDest, x_size, y_size); | |
221 | - | |
222 | - free(cpuBuffer); | |
223 | - | |
224 | -} | |
225 | - | |
226 | -}} //end namespace colormap and rts | |
227 | - | |
228 | -#endif | |
229 | - |
dataTypes.h
defaults.h
... | ... | @@ -15,14 +15,14 @@ |
15 | 15 | #define DEFAULT_FOCUS_X 0 |
16 | 16 | #define DEFAULT_FOCUS_Y 0 |
17 | 17 | #define DEFAULT_FOCUS_Z 0 |
18 | -#define DEFAULT_INCIDENT_ORDER 100 | |
18 | +//#define DEFAULT_INCIDENT_ORDER 20 | |
19 | 19 | #define DEFAULT_STABILITY_PARM 1.4 |
20 | 20 | |
21 | 21 | //optics |
22 | -#define DEFAULT_CONDENSER_MIN 0.0 | |
22 | +#define DEFAULT_CONDENSER_MIN 0 | |
23 | 23 | #define DEFAULT_CONDENSER_MAX 1 |
24 | 24 | |
25 | -#define DEFAULT_OBJECTIVE_MIN 0.0 | |
25 | +#define DEFAULT_OBJECTIVE_MIN 0 | |
26 | 26 | #define DEFAULT_OBJECTIVE_MAX 1 |
27 | 27 | |
28 | 28 | //incident light direction |
... | ... | @@ -36,17 +36,20 @@ |
36 | 36 | //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective |
37 | 37 | |
38 | 38 | |
39 | -#define DEFAULT_SLICE_MIN_X -5 | |
40 | -#define DEFAULT_SLICE_MIN_Y 0 | |
41 | -#define DEFAULT_SLICE_MIN_Z -5 | |
39 | +#define DEFAULT_PLANE_MIN_X -5 | |
40 | +#define DEFAULT_PLANE_MIN_Y 0 | |
41 | +#define DEFAULT_PLANE_MIN_Z -5 | |
42 | 42 | |
43 | -#define DEFAULT_SLICE_MAX_X 5 | |
44 | -#define DEFAULT_SLICE_MAX_Y 0 | |
45 | -#define DEFAULT_SLICE_MAX_Z 5 | |
43 | +#define DEFAULT_PLANE_MAX_X 5 | |
44 | +#define DEFAULT_PLANE_MAX_Y 0 | |
45 | +#define DEFAULT_PLANE_MAX_Z 5 | |
46 | 46 | |
47 | -#define DEFAULT_SLICE_NORM_X 0 | |
48 | -#define DEFAULT_SLICE_NORM_Y 1 | |
49 | -#define DEFAULT_SLICE_NORM_Z 0 | |
47 | +#define DEFAULT_PLANE_NORM_X 0 | |
48 | +#define DEFAULT_PLANE_NORM_Y 1 | |
49 | +#define DEFAULT_PLANE_NORM_Z 0 | |
50 | + | |
51 | +#define DEFAULT_PLANE_SIZE 40 | |
52 | +#define DEFAULT_PLANE_POSITION 0 | |
50 | 53 | |
51 | 54 | |
52 | 55 | /* |
... | ... | @@ -64,21 +67,23 @@ |
64 | 67 | */ |
65 | 68 | |
66 | 69 | |
67 | -#define DEFAULT_FIELD_ORDER 200 | |
70 | +#define DEFAULT_FIELD_ORDER 10 | |
68 | 71 | |
69 | -#define DEFAULT_SAMPLES 200 | |
72 | +#define DEFAULT_SAMPLES 400 | |
70 | 73 | |
71 | 74 | #define DEFAULT_SLICE_RES 256 |
72 | 75 | |
76 | +#define DEFAULT_SPHERE_THETA_R 1000 | |
77 | + | |
73 | 78 | #define DEFAULT_PADDING 1 |
74 | 79 | #define DEFAULT_SUPERSAMPLE 1 |
75 | 80 | |
76 | -#define DEFAULT_INTENSITY_FILE "testappend" | |
81 | +#define DEFAULT_INTENSITY_FILE "out_i.bmp" | |
77 | 82 | #define DEFAULT_TRANSMITTANCE_FILE "" |
78 | -#define DEFAULT_ABSORBANCE_FILE "out_a" | |
83 | +#define DEFAULT_ABSORBANCE_FILE "out_a.bmp" | |
79 | 84 | #define DEFAULT_NEAR_FILE "out_n.bmp" |
80 | 85 | #define DEFAULT_FAR_FILE "out_f.bmp" |
81 | -#define DEFAULT_EXTENDED_SOURCE "einstein_small.jpg" | |
86 | +#define DEFAULT_EXTENDED_SOURCE "" | |
82 | 87 | #define DEFAULT_FIELD_TYPE "magnitude" |
83 | 88 | #define DEFAULT_FORMAT fileoutStruct::formatImage |
84 | 89 | #define DEFAULT_COLORMAP "brewer" | ... | ... |
fieldslice.cpp
... | ... | @@ -8,14 +8,16 @@ |
8 | 8 | using namespace std; |
9 | 9 | |
10 | 10 | fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) |
11 | -{ | |
11 | +{ | |
12 | + x_hat = y_hat = z_hat = NULL; | |
13 | + | |
12 | 14 | //save the slice resolution |
13 | 15 | R[0] = x_size; |
14 | 16 | R[1] = x_size; |
15 | 17 | |
16 | 18 | scalarField = true; |
17 | 19 | |
18 | - //init_gpu(); | |
20 | + init_gpu(); | |
19 | 21 | |
20 | 22 | |
21 | 23 | } |
... | ... | @@ -101,5 +103,5 @@ fieldslice::fieldslice() |
101 | 103 | |
102 | 104 | fieldslice::~fieldslice() |
103 | 105 | { |
104 | - //kill_gpu(); | |
106 | + kill_gpu(); | |
105 | 107 | } | ... | ... |
fieldslice.cu
1 | 1 | #include "fieldslice.h" |
2 | 2 | #include "dataTypes.h" |
3 | -#include "rts/cuda/error.h" | |
3 | +#include "rts/cuda/error.h" | |
4 | +#include "rts/cuda/threads.h" | |
4 | 5 | |
5 | 6 | |
6 | 7 | __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N) |
7 | 8 | { |
8 | 9 | //compute the index for this thread |
9 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
10 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; | |
11 | + int i = ThreadIndex1D(); | |
12 | + | |
10 | 13 | if(i >= N) return; |
11 | 14 | |
12 | 15 | ptype xm = x[i].abs(); |
... | ... | @@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty |
66 | 69 | __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) |
67 | 70 | { |
68 | 71 | //compute the index for this thread |
69 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
72 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; | |
73 | + int i = ThreadIndex1D(); | |
70 | 74 | if(i >= N) return; |
71 | 75 | |
72 | 76 | V[i] = field_component[i].real(); |
... | ... | @@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) |
75 | 79 | __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N) |
76 | 80 | { |
77 | 81 | //compute the index for this thread |
78 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
82 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; | |
83 | + int i = ThreadIndex1D(); | |
79 | 84 | if(i >= N) return; |
80 | 85 | |
81 | 86 | V[i] = field_component[i].imag(); |
... | ... | @@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i |
84 | 89 | __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N) |
85 | 90 | { |
86 | 91 | //compute the index for this thread |
87 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
92 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; | |
93 | + int i = ThreadIndex1D(); | |
88 | 94 | if(i >= N) return; |
89 | 95 | |
90 | 96 | output[i] = sqrt(input[i]); |
... | ... | @@ -115,7 +121,8 @@ scalarslice fieldslice::Mag() |
115 | 121 | |
116 | 122 | //compute the total number of values in the slice |
117 | 123 | unsigned int N = R[0] * R[1]; |
118 | - int gridDim = (N+BLOCK-1)/BLOCK; | |
124 | + //int gridDim = (N+BLOCK-1)/BLOCK; | |
125 | + dim3 gridDim = GenGrid1D(N, BLOCK); | |
119 | 126 | |
120 | 127 | field_intensity<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, result->S, N); |
121 | 128 | field_sqrt<<<gridDim, BLOCK>>>(result->S, result->S, N); |
... | ... | @@ -132,7 +139,8 @@ scalarslice fieldslice::Real() |
132 | 139 | |
133 | 140 | //compute the total number of values in the slice |
134 | 141 | unsigned int N = R[0] * R[1]; |
135 | - int gridDim = (N+BLOCK-1)/BLOCK; | |
142 | + //int gridDim = (N+BLOCK-1)/BLOCK; | |
143 | + dim3 gridDim = GenGrid1D(N, BLOCK); | |
136 | 144 | |
137 | 145 | field_real<<<gridDim, BLOCK>>>(x_hat, result->S, N); |
138 | 146 | |
... | ... | @@ -148,7 +156,8 @@ scalarslice fieldslice::Imag() |
148 | 156 | |
149 | 157 | //compute the total number of values in the slice |
150 | 158 | unsigned int N = R[0] * R[1]; |
151 | - int gridDim = (N+BLOCK-1)/BLOCK; | |
159 | + //int gridDim = (N+BLOCK-1)/BLOCK; | |
160 | + dim3 gridDim = GenGrid1D(N, BLOCK); | |
152 | 161 | |
153 | 162 | field_imaginary<<<gridDim, BLOCK>>>(x_hat, result->S, N); |
154 | 163 | |
... | ... | @@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v) |
192 | 201 | |
193 | 202 | //compute the total number of values in the slice |
194 | 203 | unsigned int N = R[0] * R[1]; |
195 | - //cout<<"Size of mag field: "<<N<<endl; | |
196 | 204 | int gridDim = (N+BLOCK-1)/BLOCK; |
197 | 205 | |
198 | 206 | field_scale<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, N, v); |
... | ... | @@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v) |
200 | 208 | } |
201 | 209 | |
202 | 210 | void fieldslice::init_gpu() |
203 | -{ | |
211 | +{ | |
212 | + //if the field has no size, return | |
213 | + if(R[0] == 0 || R[1] == 0) | |
214 | + return; | |
215 | + | |
216 | + //free any previous memory allocations | |
217 | + if(x_hat) | |
218 | + HANDLE_ERROR(cudaFree(x_hat)); | |
219 | + if(y_hat) | |
220 | + HANDLE_ERROR(cudaFree(y_hat)); | |
221 | + if(z_hat) | |
222 | + HANDLE_ERROR(cudaFree(z_hat)); | |
223 | + | |
204 | 224 | //allocate space on the GPU for the field slice |
205 | 225 | HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex))); |
206 | - //HANDLE_ERROR(cudaMemset(x_hat, 0, R[0] * R[1] * sizeof(bsComplex))); | |
207 | 226 | |
208 | - //if the field is scalar, y_hat and z_hat are unused | |
209 | - if(scalarField) | |
210 | - { | |
211 | - y_hat = NULL; | |
212 | - z_hat = NULL; | |
213 | - | |
214 | - } | |
215 | - else | |
227 | + if(!scalarField) | |
216 | 228 | { |
217 | 229 | HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex))); |
218 | 230 | //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex))); |
... | ... | @@ -233,6 +245,8 @@ void fieldslice::kill_gpu() |
233 | 245 | if(z_hat != NULL) |
234 | 246 | HANDLE_ERROR(cudaFree(z_hat)); |
235 | 247 | |
248 | + x_hat = y_hat = z_hat = NULL; | |
249 | + | |
236 | 250 | } |
237 | 251 | |
238 | 252 | void fieldslice::clear_gpu() |
... | ... | @@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) |
275 | 289 | result.scalarField = scalarField; |
276 | 290 | |
277 | 291 | //allocate space for the new field |
278 | - result.init_gpu(); | |
292 | + //result.init_gpu(); | |
279 | 293 | |
280 | 294 | //create one thread for each pixel of the field slice |
281 | 295 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); |
... | ... | @@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) |
291 | 305 | |
292 | 306 | return result; |
293 | 307 | } |
308 | + | |
309 | +fieldslice::fieldslice(const fieldslice& rhs) | |
310 | +{ | |
311 | + R[0] = rhs.R[0]; | |
312 | + R[1] = rhs.R[1]; | |
313 | + scalarField = rhs.scalarField; | |
314 | + | |
315 | + x_hat = y_hat = z_hat = NULL; | |
316 | + | |
317 | + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1]; | |
318 | + if(rhs.x_hat != NULL) | |
319 | + { | |
320 | + HANDLE_ERROR(cudaMalloc( (void**)&x_hat, bytes)); | |
321 | + HANDLE_ERROR(cudaMemcpy( x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice)); | |
322 | + } | |
323 | + if(rhs.y_hat != NULL) | |
324 | + { | |
325 | + HANDLE_ERROR(cudaMalloc( (void**)&y_hat, bytes)); | |
326 | + HANDLE_ERROR(cudaMemcpy( y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice)); | |
327 | + } | |
328 | + if(rhs.z_hat != NULL) | |
329 | + { | |
330 | + HANDLE_ERROR(cudaMalloc( (void**)&z_hat, bytes)); | |
331 | + HANDLE_ERROR(cudaMemcpy( z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice)); | |
332 | + } | |
333 | + | |
334 | +} | |
335 | + | |
336 | +fieldslice& fieldslice::operator=(const fieldslice& rhs) | |
337 | +{ | |
338 | + //make sure this isn't a self-allocation | |
339 | + if(this != &rhs) | |
340 | + { | |
341 | + //make a shallow copy | |
342 | + R[0] = rhs.R[0]; | |
343 | + R[1] = rhs.R[1]; | |
344 | + scalarField = rhs.scalarField; | |
345 | + | |
346 | + //initialize to new parameters | |
347 | + init_gpu(); | |
348 | + | |
349 | + //make a deep copy | |
350 | + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1]; | |
351 | + if(x_hat != NULL) | |
352 | + HANDLE_ERROR(cudaMemcpy(x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice)); | |
353 | + if(y_hat != NULL) | |
354 | + HANDLE_ERROR(cudaMemcpy(y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice)); | |
355 | + if(z_hat != NULL) | |
356 | + HANDLE_ERROR(cudaMemcpy(z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice)); | |
357 | + } | |
358 | + | |
359 | + return *this; | |
360 | + | |
361 | +} | ... | ... |
fieldslice.h
... | ... | @@ -31,6 +31,9 @@ struct fieldslice |
31 | 31 | |
32 | 32 | ~fieldslice(); |
33 | 33 | |
34 | + //copy constructor | |
35 | + fieldslice(const fieldslice& rhs); | |
36 | + | |
34 | 37 | //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); |
35 | 38 | |
36 | 39 | scalarslice Mag(); |
... | ... | @@ -47,6 +50,7 @@ struct fieldslice |
47 | 50 | |
48 | 51 | //crop a region from the field |
49 | 52 | fieldslice crop(int u, int v, int su, int sv); |
53 | + fieldslice& operator=(const fieldslice& rhs); | |
50 | 54 | |
51 | 55 | void init_gpu(); |
52 | 56 | void kill_gpu(); | ... | ... |
fileout.cu
... | ... | @@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope) |
186 | 186 | //save images of the fields in the microscope |
187 | 187 | |
188 | 188 | //if the user specifies an extended source |
189 | - if(scope->focalPoints.size() > 1) | |
189 | + if(scope->focalPoints.size() > 0) | |
190 | 190 | { |
191 | 191 | //simulate the extended source and output the detector image |
192 | 192 | scope->SimulateExtendedSource(); |
193 | 193 | |
194 | + //saveNearField(&scope->nf); | |
195 | + saveFarField(scope); | |
196 | + | |
197 | + //save the detector images | |
198 | + saveDetector(scope); | |
199 | + | |
200 | + //simulate scattering for the last point (so that you have a near field image) | |
201 | + scope->SimulateScattering(); | |
202 | + saveNearField(&scope->nf); | |
203 | + | |
194 | 204 | } |
195 | 205 | else |
196 | 206 | { |
... | ... | @@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope) |
203 | 213 | //run the far-field simulation |
204 | 214 | scope->SimulateImaging(); |
205 | 215 | |
216 | + //saveNearField(&scope->nf); | |
206 | 217 | saveFarField(scope); |
207 | 218 | |
219 | + //save the detector images | |
220 | + saveDetector(scope); | |
221 | + | |
208 | 222 | } |
209 | 223 | |
210 | - //save the detector images | |
211 | - saveDetector(scope); | |
224 | + | |
212 | 225 | |
213 | 226 | |
214 | 227 | } | ... | ... |
fileout.h
... | ... | @@ -5,7 +5,7 @@ |
5 | 5 | //#include "defaults.h" |
6 | 6 | #include "dataTypes.h" |
7 | 7 | |
8 | -#include "colormap.h" | |
8 | +#include "rts/graphics/colormap.h" | |
9 | 9 | #include "fieldslice.h" |
10 | 10 | #include "nearfield.h" |
11 | 11 | #include "microscope.h" |
... | ... | @@ -34,7 +34,7 @@ struct fileoutStruct{ |
34 | 34 | //image_source source; |
35 | 35 | |
36 | 36 | //color map info |
37 | - rts::colormap::colormapType colormap; | |
37 | + rts::colormapType colormap; | |
38 | 38 | ptype colorMax; |
39 | 39 | |
40 | 40 | void Save(microscopeStruct* scope); | ... | ... |
main.cpp
... | ... | @@ -24,6 +24,7 @@ microscopeStruct* SCOPE; |
24 | 24 | #include "warnings.h" |
25 | 25 | |
26 | 26 | fileoutStruct gFileOut; |
27 | +bool verbose = false; | |
27 | 28 | using namespace std; |
28 | 29 | |
29 | 30 | int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, |
... | ... | @@ -31,32 +32,19 @@ int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, |
31 | 32 | |
32 | 33 | int main(int argc, char *argv[]) |
33 | 34 | { |
34 | - //test Envi loading and saving | |
35 | - //EnviFile envi("testenvi", "w"); | |
36 | - | |
37 | - //float* data = (float*)malloc(sizeof(float) * 100 * 100); | |
38 | - //envi.addBand(data, 100, 100, 100); | |
39 | - | |
40 | - //envi.close(); | |
41 | - | |
42 | - //return 0; | |
43 | 35 | |
44 | 36 | SCOPE = new microscopeStruct(); |
45 | 37 | |
46 | - cout<<SCOPE->nf.Uf.R[0]<<endl; | |
47 | - | |
48 | 38 | LoadParameters(argc, argv); |
49 | 39 | |
50 | - //TestSimulation(NF, SCOPE, &gFileOut); | |
51 | - | |
52 | 40 | //initialize GPU memory for fields |
53 | 41 | SCOPE->init(); |
54 | 42 | |
55 | - OutputOptions(); | |
56 | - | |
57 | 43 | gFileOut.Save(SCOPE); |
58 | 44 | |
59 | - //NF->destroy(); | |
45 | + if(verbose) | |
46 | + OutputOptions(); | |
47 | + | |
60 | 48 | SCOPE->destroy(); |
61 | 49 | |
62 | 50 | ... | ... |
microscope.cu
... | ... | @@ -4,7 +4,7 @@ |
4 | 4 | #include "rts/tools/progressbar.h" |
5 | 5 | #include "rts/cuda/timer.h" |
6 | 6 | #include "dataTypes.h" |
7 | -#include "colormap.h" | |
7 | +#include "rts/graphics/colormap.h" | |
8 | 8 | |
9 | 9 | #include <QImage> |
10 | 10 | |
... | ... | @@ -112,8 +112,8 @@ void microscopeStruct::getFarField() |
112 | 112 | //Compute the Far Field image of the focal plane |
113 | 113 | |
114 | 114 | //clear the memory from previous detector fields |
115 | - Ud.kill_gpu(); | |
116 | - Ufd.kill_gpu(); | |
115 | + //Ud.kill_gpu(); | |
116 | + //Ufd.kill_gpu(); | |
117 | 117 | |
118 | 118 | //first crop the filtered near-field image of the source and scattered fields |
119 | 119 | Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); |
... | ... | @@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource() |
261 | 261 | t += gpuStopTimer(); |
262 | 262 | |
263 | 263 | rtsProgressBar((double)(i+1)/(double)npts * 100); |
264 | + //unsigned char c; | |
265 | + //cin>>c; | |
264 | 266 | } |
265 | - cout<<endl; | |
266 | - cout<<"Time per source: "<<t/npts<<"ms"<<endl; | |
267 | + if(verbose) | |
268 | + { | |
269 | + cout<<endl; | |
270 | + cout<<"Time per source: "<<t/npts<<"ms"<<endl; | |
271 | + } | |
267 | 272 | |
268 | 273 | } |
269 | 274 | |
... | ... | @@ -304,3 +309,15 @@ void microscopeStruct::LoadExtendedSource(std::string filename) |
304 | 309 | } |
305 | 310 | } |
306 | 311 | } |
312 | + | |
313 | +std::string microscopeStruct::toStr() | |
314 | +{ | |
315 | + stringstream ss; | |
316 | + ss<<nf.toStr(); | |
317 | + | |
318 | + ss<<"----------Optics--------------"<<endl<<endl; | |
319 | + ss<<"Objective NA: "<<objective[0]<<" to "<<objective[1]<<endl; | |
320 | + return ss.str(); | |
321 | + | |
322 | + | |
323 | +} | ... | ... |
microscope.h
montecarlo.cpp
... | ... | @@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) |
35 | 35 | ptype inPhi = asin(NAin); |
36 | 36 | ptype outPhi = asin(NAout); |
37 | 37 | |
38 | - //cout<<"inPhi: "<<inPhi<<endl; | |
39 | - //cout<<"outPhi: "<<outPhi<<endl; | |
40 | - | |
41 | 38 | //calculate the z-values associated with these angles |
42 | 39 | ptype inZ = cos(inPhi); |
43 | 40 | ptype outZ = cos(outPhi); |
44 | 41 | |
45 | 42 | ptype rangeZ = inZ - outZ; |
46 | 43 | |
47 | - //cout<<"inZ: "<<inZ<<endl; | |
48 | - //cout<<"outZ: "<<outZ<<endl; | |
49 | - | |
50 | 44 | //draw a distribution of random phi, z values |
51 | 45 | ptype z, phi, theta; |
52 | 46 | for(int i=0; i<N; i++) |
... | ... | @@ -58,7 +52,6 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) |
58 | 52 | phi = acos(z); |
59 | 53 | |
60 | 54 | //compute and store cartesian coordinates |
61 | - //bsVector spherical(1, theta + kSph[1], phi + kSph[2]); | |
62 | 55 | bsVector spherical(1, theta, phi); |
63 | 56 | bsVector cart = spherical.sph2cart(); |
64 | 57 | samples[i] = rotation * cart; | ... | ... |
nearfield.cpp
1 | 1 | #include "nearfield.h" |
2 | +#include <time.h> | |
3 | +#include <math.h> | |
4 | + | |
5 | +#ifdef _WIN32 | |
6 | +#define isnan(x) _isnan(x) | |
7 | +#define isinf(x) (!_finite(x)) | |
8 | +#endif | |
9 | + | |
10 | +int bessjyv_sph(int v, double z, double &vm, double* cjv, | |
11 | + double* cyv, double* cjvp, double* cyvp); | |
2 | 12 | |
3 | 13 | nearfieldStruct::nearfieldStruct() |
4 | 14 | { |
5 | 15 | scalarSim = true; |
6 | 16 | planeWave = false; |
17 | + lut_us = true; | |
18 | + lut_uf = false; | |
7 | 19 | |
8 | 20 | nWaves = 0; |
9 | 21 | } |
... | ... | @@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr() |
46 | 58 | ss<<"Condenser NA: "<<condenser[0]<<" to "<<condenser[1]<<std::endl; |
47 | 59 | ss<<"Focal Point: "<<focus[0]<<", "<<focus[1]<<", "<<focus[2]<<std::endl; |
48 | 60 | ss<<"Field Slice: "<<std::endl; |
61 | + if(lut_us) | |
62 | + ss<<"LUT Parameters --- min: "<<d_min<<" max: "<<d_max<<std::endl; | |
49 | 63 | ss<<pos<<std::endl; |
50 | 64 | |
51 | 65 | ss<<std::endl<<"---------Materials-----------"<<std::endl; |
... | ... | @@ -61,6 +75,10 @@ std::string nearfieldStruct::toStr() |
61 | 75 | for(unsigned int s=0; s<sVector.size(); s++) |
62 | 76 | ss<<sVector[s].toStr()<<std::endl; |
63 | 77 | |
78 | + ss<<"---------Timings-------------"<<std::endl; | |
79 | + ss<<"Uf = "<<t_Uf<<"ms"<<std::endl; | |
80 | + ss<<"Us = "<<t_Us<<"ms"<<std::endl; | |
81 | + | |
64 | 82 | return ss.str(); |
65 | 83 | } |
66 | 84 | |
... | ... | @@ -70,7 +88,8 @@ void nearfieldStruct::calcWaves() |
70 | 88 | inWaves.resize(nWaves); |
71 | 89 | |
72 | 90 | //re-seed the random number generator |
73 | - //srand(seed); | |
91 | + //srand(time(NULL)); | |
92 | + srand(NULL); | |
74 | 93 | |
75 | 94 | //calculate the monte-carlo samples |
76 | 95 | mcSampleNA(&inWaves[0], nWaves, k, condenser[0], condenser[1]); |
... | ... | @@ -84,6 +103,8 @@ void nearfieldStruct::calcSpheres() |
84 | 103 | //calculate all of the constants necessary to evaluate the scattered field |
85 | 104 | //estimate the order required to represent the scattered field for each sphere |
86 | 105 | |
106 | + | |
107 | + | |
87 | 108 | //for each sphere |
88 | 109 | for(int i=0; i<sVector.size(); i++) |
89 | 110 | { |
... | ... | @@ -91,12 +112,10 @@ void nearfieldStruct::calcSpheres() |
91 | 112 | |
92 | 113 | //calculate the required order |
93 | 114 | sVector[i].calcNl(lambda); |
94 | - //std::cout<<sVector[i].Nl<<std::endl; | |
95 | 115 | |
96 | 116 | //set the refractive index for the sphere |
97 | 117 | int imat = sVector[i].iMaterial; |
98 | 118 | rts::rtsComplex<ptype> n = mVector[imat](lambda); |
99 | - //std::cout<<"Sphere refractive index: "<<n<<std::endl; | |
100 | 119 | |
101 | 120 | //calculate the scattering coefficients |
102 | 121 | sVector[i].calcCoeff(lambda, n); |
... | ... | @@ -104,18 +123,109 @@ void nearfieldStruct::calcSpheres() |
104 | 123 | //save the refractive index |
105 | 124 | sVector[i].n = n; |
106 | 125 | |
126 | + //if the LUT is used, calculate Usp(theta, r) | |
127 | + if(lut_us) | |
128 | + { | |
129 | + sVector[i].calcUp(lambda, n, pos, max(U.R[0], U.R[1])); | |
130 | + } | |
131 | + | |
132 | + | |
107 | 133 | } |
108 | 134 | |
109 | 135 | } |
110 | 136 | |
137 | +void nearfieldStruct::calcUs() | |
138 | +{ | |
139 | + | |
140 | + | |
141 | + if(lut_us) | |
142 | + scalarUpLut(); | |
143 | + else | |
144 | + scalarUs(); | |
145 | +} | |
146 | + | |
147 | +void nearfieldStruct::calcUf() | |
148 | +{ | |
149 | + if(lut_uf) | |
150 | + scalarUfLut(); | |
151 | + else | |
152 | + scalarUf(); | |
153 | +} | |
154 | + | |
111 | 155 | void nearfieldStruct::Simulate() |
112 | 156 | { |
157 | + //initialize timings | |
158 | + t_Uf = 0; | |
159 | + t_Us = 0; | |
160 | + | |
113 | 161 | //compute a set of plane waves for Monte-Carlo simulation |
114 | 162 | calcWaves(); |
115 | 163 | |
116 | 164 | //the near field has to be simulated no matter what the output rtsPoint is |
117 | - scalarUf(); | |
165 | + calcUf(); | |
118 | 166 | calcSpheres(); |
119 | - scalarUs(); | |
167 | + calcUs(); | |
120 | 168 | sumUf(); |
169 | + | |
170 | + //U.Mag().toImage("testU.bmp"); | |
171 | +} | |
172 | + | |
173 | +void nearfieldStruct::calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR) | |
174 | +{ | |
175 | + /*Compute the look-up-table for spherical bessel functions used for the incident field | |
176 | + j = (Nl + 1) x aR array of values | |
177 | + aR = resolution of j | |
178 | + */ | |
179 | + | |
180 | + //compute the wavenumber | |
181 | + ptype k = 2 * PI / lambda; | |
182 | + unsigned int Nl = m; | |
183 | + | |
184 | + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored) | |
185 | + int bytes = sizeof(double) * (Nl + 1); | |
186 | + double* cjv_kd = (double*)malloc(bytes); | |
187 | + double* cyv_kd = (double*)malloc(bytes); | |
188 | + double* cjvp_kd = (double*)malloc(bytes); | |
189 | + double* cyvp_kd = (double*)malloc(bytes); | |
190 | + | |
191 | + //compute the bessel functions using the CPU-based algorithm | |
192 | + double vm; | |
193 | + | |
194 | + //for each sample along r | |
195 | + ptype dr = (d_max - d_min) / (dR - 1); | |
196 | + ptype d; | |
197 | + ptype jv; | |
198 | + for(int id = 0; id < dR; id++) | |
199 | + { | |
200 | + d = id * dr + d_min; | |
201 | + double kd = k*d; | |
202 | + bessjyv_sph(Nl, kd, vm, cjv_kd, cyv_kd, cjvp_kd, cyvp_kd); | |
203 | + | |
204 | + //copy the double data to the bsComplex array | |
205 | + for(int l=0; l<=Nl; l++) | |
206 | + { | |
207 | + jv = cjv_kd[l]; | |
208 | + if(isnan(jv) || isinf(jv)) | |
209 | + { | |
210 | + if(kd == 0 && l == 0) | |
211 | + jv = 1; | |
212 | + else | |
213 | + jv = 0; | |
214 | + } | |
215 | + j[id * (Nl+1) + l] = jv; | |
216 | + } | |
217 | + } | |
218 | + | |
219 | + /*ofstream outfile("uf_besselout.txt"); | |
220 | + for(int ir = 0; ir < dR; ir++) | |
221 | + { | |
222 | + outfile<<ir*dr + d_min<<endl; | |
223 | + for(int l = 0; l<=Nl; l++) | |
224 | + { | |
225 | + outfile<<j[ir * (Nl+1) + l]<<" --"; | |
226 | + } | |
227 | + outfile<<endl; | |
228 | + } | |
229 | + outfile.close();*/ | |
230 | + | |
121 | 231 | } | ... | ... |
nearfield.h
... | ... | @@ -31,6 +31,8 @@ struct nearfieldStruct |
31 | 31 | |
32 | 32 | //slices for the focused field |
33 | 33 | fieldslice Uf; |
34 | + ptype d_min, d_max; | |
35 | + | |
34 | 36 | // and total field: Uf + sum(Us) |
35 | 37 | fieldslice U; |
36 | 38 | |
... | ... | @@ -43,6 +45,14 @@ struct nearfieldStruct |
43 | 45 | //flag for a plane wave |
44 | 46 | bool planeWave; |
45 | 47 | |
48 | + //flag for using a LUT | |
49 | + bool lut_uf; | |
50 | + bool lut_us; | |
51 | + | |
52 | + //timings | |
53 | + float t_Uf; | |
54 | + float t_Us; | |
55 | + | |
46 | 56 | |
47 | 57 | |
48 | 58 | //---------Scatterers------------ |
... | ... | @@ -78,10 +88,17 @@ struct nearfieldStruct |
78 | 88 | void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); |
79 | 89 | |
80 | 90 | //this function re-computes the focused field |
91 | + void calcUf(); | |
81 | 92 | void scalarUf(); |
93 | + void scalarUfLut(); | |
94 | + | |
95 | + void calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR); | |
82 | 96 | |
83 | 97 | //compute the field scattered by all of the materials |
98 | + void calcUs(); | |
84 | 99 | void scalarUs(); |
100 | + void scalarUpLut(); | |
101 | + | |
85 | 102 | |
86 | 103 | //add the incident field to the sum of scattered fields |
87 | 104 | void sumUf(); | ... | ... |
nfScalarUf.cu
... | ... | @@ -5,7 +5,7 @@ |
5 | 5 | #include "rts/cuda/error.h" |
6 | 6 | #include "rts/cuda/timer.h" |
7 | 7 | |
8 | - | |
8 | +//Incident field for a single plane wave | |
9 | 9 | __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) |
10 | 10 | { |
11 | 11 | /*Compute the scalar focused field using Debye focusing |
... | ... | @@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p |
41 | 41 | Uf[i] = exp(d) * A; |
42 | 42 | |
43 | 43 | } |
44 | - | |
44 | + | |
45 | +//Incident field for a focused point source | |
45 | 46 | __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) |
46 | 47 | { |
47 | 48 | /*Compute the scalar focused field using Debye focusing |
... | ... | @@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt |
151 | 152 | } |
152 | 153 | |
153 | 154 | sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); |
154 | - //sumUf += il * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); | |
155 | 155 | |
156 | 156 | il *= im; |
157 | 157 | } |
... | ... | @@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt |
162 | 162 | |
163 | 163 | void nearfieldStruct::scalarUf() |
164 | 164 | { |
165 | - //Compute the incident field via a scalar simulation | |
166 | - //This method uses Debye focusing to approximate the field analytically | |
167 | - | |
168 | - //time the calculation of the focused field | |
169 | - //gpuStartTimer(); | |
170 | - | |
171 | - //set the field slice to a scalar field | |
172 | - //Uf.scalarField = true; | |
173 | - | |
174 | - //initialize the GPU arrays | |
175 | - //Uf.init_gpu(); | |
165 | + | |
166 | + gpuStartTimer(); | |
176 | 167 | |
177 | 168 | //create one thread for each pixel of the field slice |
178 | 169 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); |
179 | - dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
170 | + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
180 | 171 | |
181 | 172 | //if we are computing a plane wave, call the gpuScalarUfp function |
182 | 173 | if(planeWave) |
... | ... | @@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf() |
191 | 182 | ptype cosBeta = cos(asin(condenser[1])); |
192 | 183 | //compute the scalar Uf field (this will be in the x_hat channel of Uf) |
193 | 184 | gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); |
194 | - } | |
195 | - | |
196 | - //float t = gpuStopTimer(); | |
197 | - //std::cout<<"Scalar Uf Time: "<<t<<"ms"<<std::endl; | |
198 | - //std::cout<<focus<<std::endl; | |
199 | - | |
185 | + } | |
186 | + | |
187 | + t_Uf = gpuStopTimer(); | |
200 | 188 | } | ... | ... |
1 | +#include "nearfield.h" | |
2 | + | |
3 | +#include "rts/math/legendre.h" | |
4 | +#include "rts/cuda/error.h" | |
5 | +#include "rts/cuda/timer.h" | |
6 | + | |
7 | +texture<float, cudaTextureType2D> texJ; | |
8 | + | |
9 | +__global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR); | |
10 | + | |
11 | +__global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoint f, bsVector k, ptype A, ptype cosAlpha, ptype cosBeta, int nl, ptype dmin, ptype dmax, int dR) | |
12 | +{ | |
13 | + /*This function computes the focused field for a 2D slice | |
14 | + | |
15 | + Uf = destination field slice | |
16 | + ABCD = plane representing the field slice in world space | |
17 | + uR, vR = resolution of the Uf field | |
18 | + f = focal point of the condenser | |
19 | + k = direction of the incident light | |
20 | + A = amplitude of the incident field | |
21 | + cosAlpha= cosine of the solid angle subtended by the condenser obscuration | |
22 | + cosBeta = cosine of the solid angle subtended by the condenser aperature | |
23 | + nl = number of orders used to compute the field | |
24 | + dR = number of Bessel function values in the look-up texture | |
25 | + | |
26 | + */ | |
27 | + | |
28 | + //get the current coordinate in the plane slice | |
29 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
30 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
31 | + | |
32 | + //make sure that the thread indices are in-bounds | |
33 | + if(iu >= uR || iv >= vR) return; | |
34 | + | |
35 | + //compute the index (easier access to the scalar field array) | |
36 | + int i = iv*uR + iu; | |
37 | + | |
38 | + //compute the parameters for u and v | |
39 | + ptype u = (ptype)iu / (uR); | |
40 | + ptype v = (ptype)iv / (vR); | |
41 | + | |
42 | + | |
43 | + | |
44 | + //get the rtsPoint in world space and then the r vector | |
45 | + bsPoint p = ABCD(u, v); | |
46 | + bsVector r = p - f; | |
47 | + ptype d = r.len(); | |
48 | + | |
49 | + if(d == 0) | |
50 | + { | |
51 | + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); | |
52 | + return; | |
53 | + } | |
54 | + | |
55 | + //get info for the light direction and frequency | |
56 | + r = r.norm(); | |
57 | + | |
58 | + //compute the imaginary factor i^l | |
59 | + bsComplex im = bsComplex(0, 1); | |
60 | + bsComplex il = bsComplex(1, 0); | |
61 | + | |
62 | + //Legendre functions are computed dynamically to save memory | |
63 | + //initialize the Legendre functions | |
64 | + | |
65 | + ptype P[2]; | |
66 | + //get the angle between k and r (light direction and position vector) | |
67 | + ptype cosTheta; | |
68 | + cosTheta = k.dot(r); | |
69 | + | |
70 | + rts::init_legendre<ptype>(cosTheta, P[0], P[1]); | |
71 | + | |
72 | + //initialize legendre functions for the cassegrain angles | |
73 | + ptype Palpha[3]; | |
74 | + rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]); | |
75 | + Palpha[2] = 1; | |
76 | + | |
77 | + ptype Pbeta[3]; | |
78 | + rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]); | |
79 | + Pbeta[2] = 1; | |
80 | + | |
81 | + //for each order l | |
82 | + bsComplex sumUf(0.0, 0.0); | |
83 | + ptype jl = 0.0; | |
84 | + ptype Pl; | |
85 | + ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1); | |
86 | + for(int l = 0; l<=nl; l++) | |
87 | + { | |
88 | + jl = tex2D(texJ, l + 0.5, di + 0.5); | |
89 | + if(l==0) | |
90 | + Pl = P[0]; | |
91 | + else if(l==1) | |
92 | + { | |
93 | + Pl = P[1]; | |
94 | + | |
95 | + //adjust the cassegrain Legendre function | |
96 | + Palpha[2] = Palpha[0]; | |
97 | + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]); | |
98 | + Pbeta[2] = Pbeta[0]; | |
99 | + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]); | |
100 | + } | |
101 | + else | |
102 | + { | |
103 | + rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]); | |
104 | + | |
105 | + Pl = P[1]; | |
106 | + | |
107 | + //adjust the cassegrain outer Legendre function | |
108 | + Palpha[2] = Palpha[0]; | |
109 | + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]); | |
110 | + Pbeta[2] = Pbeta[0]; | |
111 | + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]); | |
112 | + } | |
113 | + | |
114 | + sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); | |
115 | + //sumUf += jl; | |
116 | + | |
117 | + il *= im; | |
118 | + } | |
119 | + | |
120 | + Uf[i] = sumUf * 2 * PI * A; | |
121 | + //Uf[i] = u; | |
122 | + //return; | |
123 | +} | |
124 | + | |
125 | +void nearfieldStruct::scalarUfLut() | |
126 | +{ | |
127 | + gpuStartTimer(); | |
128 | + | |
129 | + //calculate the minimum and maximum points in the focused field | |
130 | + d_min = pos.dist(focus); | |
131 | + d_max = pos.dist_max(focus); | |
132 | + | |
133 | + //allocate space for the Bessel function | |
134 | + int dR = 2 * max(Uf.R[0], Uf.R[1]); | |
135 | + ptype* j = NULL; | |
136 | + j = (ptype*) malloc(sizeof(ptype) * dR * (m+1)); | |
137 | + | |
138 | + //calculate Bessel function LUT | |
139 | + calcBesselLut(j, d_min, d_max, dR); | |
140 | + | |
141 | + //create a CUDA array structure and specify the format description | |
142 | + cudaArray* arrayJ; | |
143 | + cudaChannelFormatDesc channelDesc = | |
144 | + cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); | |
145 | + | |
146 | + //allocate memory | |
147 | + HANDLE_ERROR(cudaMallocArray(&arrayJ, &channelDesc, m+1, dR)); | |
148 | + | |
149 | + //specify texture properties | |
150 | + texJ.addressMode[0] = cudaAddressModeMirror; | |
151 | + texJ.addressMode[1] = cudaAddressModeMirror; | |
152 | + texJ.filterMode = cudaFilterModeLinear; | |
153 | + texJ.normalized = false; | |
154 | + | |
155 | + //bind the texture to the array | |
156 | + HANDLE_ERROR(cudaBindTextureToArray(texJ, arrayJ, channelDesc)); | |
157 | + | |
158 | + //copy the CPU Bessel LUT to the GPU-based array | |
159 | + HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice)); | |
160 | + | |
161 | + //----------------Compute the focused field | |
162 | + //create one thread for each pixel of the field slice | |
163 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
164 | + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
165 | + | |
166 | + //if we are computing a plane wave, call the gpuScalarUfp function | |
167 | + if(planeWave) | |
168 | + { | |
169 | + gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]); | |
170 | + } | |
171 | + //otherwise compute the condenser info and create a focused field | |
172 | + else | |
173 | + { | |
174 | + //pre-compute the cosine of the obscuration and objective angles | |
175 | + ptype cosAlpha = cos(asin(condenser[0])); | |
176 | + ptype cosBeta = cos(asin(condenser[1])); | |
177 | + //compute the scalar Uf field (this will be in the x_hat channel of Uf) | |
178 | + gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR); | |
179 | + } | |
180 | + | |
181 | + | |
182 | + //free everything | |
183 | + free(j); | |
184 | + | |
185 | + HANDLE_ERROR(cudaFreeArray(arrayJ)); | |
186 | + | |
187 | + t_Uf = gpuStopTimer(); | |
188 | +} | ... | ... |
1 | +#include "nearfield.h" | |
2 | +#include "rts/math/spherical_bessel.h" | |
3 | +#include "rts/math/legendre.h" | |
4 | +#include <stdlib.h> | |
5 | +#include "rts/cuda/error.h" | |
6 | +#include "rts/cuda/timer.h" | |
7 | + | |
8 | +texture<float2, cudaTextureType2D> texUsp; | |
9 | +texture<float2, cudaTextureType2D> texUip; | |
10 | + | |
11 | +__global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR) | |
12 | +{ | |
13 | + /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field | |
14 | + produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field. | |
15 | + | |
16 | + Us = final scattered field | |
17 | + k = list of incoming plane waves (Monte-Carlo samples) | |
18 | + nk = number of incoming MC samples | |
19 | + kmag= magnitude of the incoming field 2pi/lambda | |
20 | + dmin= minimum distance of the Usp texture | |
21 | + dmax= maximum distance of the Usp texture | |
22 | + f = position of the focus | |
23 | + ps = position of the sphere | |
24 | + A = total amplitude of the incident field arriving at the focal spot | |
25 | + ABCD= rectangle representing the field slice | |
26 | + uR = resolution of the field slice in the u direction | |
27 | + vR = resolution of the field slice in the v direction | |
28 | + dR = resolution of the Usp texture in the d direction | |
29 | + thetaR= resolution of the Usp texture in the theta direction | |
30 | + */ | |
31 | + | |
32 | + //get the current coordinate in the plane slice | |
33 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
34 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
35 | + | |
36 | + //make sure that the thread indices are in-bounds | |
37 | + if(iu >= uR || iv >= vR) return; | |
38 | + | |
39 | + //compute the index (easier access to the scalar field array) | |
40 | + int i = iv*uR + iu; | |
41 | + | |
42 | + //compute the parameters for u and v | |
43 | + ptype u = (ptype)iu / (uR); | |
44 | + ptype v = (ptype)iv / (vR); | |
45 | + | |
46 | + //get the rtsPoint in world space and then the r vector | |
47 | + bsPoint p = ABCD(u, v); | |
48 | + bsVector r = p - ps; | |
49 | + ptype d = r.len(); | |
50 | + float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1); | |
51 | + float ai = ( (d - dmin)/(a - dmin)) * (aR - 1); | |
52 | + | |
53 | + bsComplex sumUs(0, 0); | |
54 | + //for each plane wave in the wave list | |
55 | + for(int iw = 0; iw < nk; iw++) | |
56 | + { | |
57 | + //normalize the direction vectors and find their inner product | |
58 | + r = r.norm(); | |
59 | + ptype cos_theta = k[iw].dot(r); | |
60 | + if(cos_theta < -1) | |
61 | + cos_theta = -1; | |
62 | + if(cos_theta > 1) | |
63 | + cos_theta = 1; | |
64 | + float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1); | |
65 | + | |
66 | + //compute the phase factor for spheres that are not at the origin | |
67 | + bsVector c = ps - f; | |
68 | + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); | |
69 | + | |
70 | + //compute the internal field if we are inside a sphere | |
71 | + if(d < a) | |
72 | + { | |
73 | + float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5); | |
74 | + sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y); | |
75 | + } | |
76 | + //otherwise compute the scattered field | |
77 | + else | |
78 | + { | |
79 | + float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5); | |
80 | + sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y); | |
81 | + } | |
82 | + | |
83 | + } | |
84 | + | |
85 | + Us[i] += sumUs; | |
86 | +} | |
87 | + | |
88 | +void nearfieldStruct::scalarUpLut() | |
89 | +{ | |
90 | + //get the number of spheres | |
91 | + int nSpheres = sVector.size(); | |
92 | + | |
93 | + //if there are no spheres, nothing to do here | |
94 | + if(nSpheres == 0) | |
95 | + return; | |
96 | + | |
97 | + //time the calculation of the focused field | |
98 | + gpuStartTimer(); | |
99 | + | |
100 | + //clear the scattered field | |
101 | + U.clear_gpu(); | |
102 | + | |
103 | + //create one thread for each pixel of the field slice | |
104 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
105 | + dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
106 | + | |
107 | + //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff) | |
108 | + bsVector* gpuk; | |
109 | + int nWaves; | |
110 | + ptype subA; | |
111 | + if(planeWave) | |
112 | + { | |
113 | + nWaves = 1; | |
114 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) ); | |
115 | + HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice)); | |
116 | + subA = A; | |
117 | + } | |
118 | + else | |
119 | + { | |
120 | + nWaves = inWaves.size(); | |
121 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) ); | |
122 | + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice)); | |
123 | + //compute the amplitude that makes it through the condenser | |
124 | + subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); | |
125 | + } | |
126 | + | |
127 | + //for each sphere | |
128 | + for(int s = 0; s<nSpheres; s++) | |
129 | + { | |
130 | + //get the current sphere | |
131 | + //sphere S = sVector[s]; | |
132 | + | |
133 | + //allocate space for the Usp and Uip textures | |
134 | + //allocate the cuda array | |
135 | + cudaArray* arrayUsp; | |
136 | + cudaArray* arrayUip; | |
137 | + cudaChannelFormatDesc channelDescUsp = | |
138 | + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); | |
139 | + cudaChannelFormatDesc channelDescUip = | |
140 | + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); | |
141 | + int dR = sVector[s].Usp.R[0]; | |
142 | + int thetaR = sVector[s].Usp.R[1]; | |
143 | + int aR = sVector[s].Uip.R[0]; | |
144 | + HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR)); | |
145 | + HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR)); | |
146 | + | |
147 | + texUsp.addressMode[0] = cudaAddressModeMirror; | |
148 | + texUsp.addressMode[1] = cudaAddressModeMirror; | |
149 | + texUsp.filterMode = cudaFilterModeLinear; | |
150 | + texUsp.normalized = false; | |
151 | + | |
152 | + texUip.addressMode[0] = cudaAddressModeMirror; | |
153 | + texUip.addressMode[1] = cudaAddressModeMirror; | |
154 | + texUip.filterMode = cudaFilterModeLinear; | |
155 | + texUip.normalized = false; | |
156 | + HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp)); | |
157 | + HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip)); | |
158 | + | |
159 | + //copy the LUT to the Usp texture | |
160 | + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice)); | |
161 | + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice)); | |
162 | + | |
163 | + gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat, | |
164 | + gpuk, | |
165 | + nWaves, | |
166 | + 2 * PI / lambda, | |
167 | + sVector[s].a, | |
168 | + sVector[s].d_min, | |
169 | + sVector[s].d_max, | |
170 | + focus, | |
171 | + sVector[s].p, | |
172 | + subA, | |
173 | + pos, | |
174 | + U.R[0], | |
175 | + U.R[1], | |
176 | + dR, | |
177 | + aR, | |
178 | + thetaR); | |
179 | + | |
180 | + cudaFreeArray(arrayUsp); | |
181 | + cudaFreeArray(arrayUip); | |
182 | + | |
183 | + } | |
184 | + | |
185 | + | |
186 | + //store the time to compute the scattered field | |
187 | + t_Us = gpuStopTimer(); | |
188 | + | |
189 | + //free monte-carlo samples | |
190 | + cudaFree(gpuk); | |
191 | + | |
192 | +} | ... | ... |
nfScalarUs.cu
... | ... | @@ -163,7 +163,7 @@ void nearfieldStruct::scalarUs() |
163 | 163 | return; |
164 | 164 | |
165 | 165 | //time the calculation of the focused field |
166 | - //gpuStartTimer(); | |
166 | + gpuStartTimer(); | |
167 | 167 | |
168 | 168 | //clear the scattered field |
169 | 169 | U.clear_gpu(); |
... | ... | @@ -251,9 +251,8 @@ void nearfieldStruct::scalarUs() |
251 | 251 | } |
252 | 252 | |
253 | 253 | |
254 | + //store the time to compute the scattered field | |
255 | + t_Us = gpuStopTimer(); | |
254 | 256 | |
255 | - //float t = gpuStopTimer(); | |
256 | - //std::cout<<"Scalar Us Time: "<<t<<"ms"<<std::endl; | |
257 | - //std::cout<<focus<<std::endl; | |
258 | 257 | |
259 | 258 | } | ... | ... |
nfSumUf.cu
... | ... | @@ -32,7 +32,7 @@ __global__ void gpuScalarUsp(bsComplex* Ufx, bsComplex* Ufy, bsComplex* Ufz, |
32 | 32 | { |
33 | 33 | r = p - ps[is]; |
34 | 34 | d = r.len(); |
35 | - if(d <= as[is]) | |
35 | + if(d < as[is]) | |
36 | 36 | return; |
37 | 37 | } |
38 | 38 | |
... | ... | @@ -110,8 +110,5 @@ void nearfieldStruct::sumUf() |
110 | 110 | HANDLE_ERROR(cudaFree(gpu_p)); |
111 | 111 | HANDLE_ERROR(cudaFree(gpu_a)); |
112 | 112 | |
113 | - //float t = gpuStopTimer(); | |
114 | - //std::cout<<"Add Us Time: "<<t<<"ms"<<std::endl; | |
115 | - //std::cout<<focus<<std::endl; | |
116 | 113 | |
117 | 114 | } | ... | ... |
options.h
... | ... | @@ -5,7 +5,7 @@ |
5 | 5 | |
6 | 6 | #include "nearfield.h" |
7 | 7 | #include "microscope.h" |
8 | -#include "colormap.h" | |
8 | +#include "rts/graphics/colormap.h" | |
9 | 9 | #include "fileout.h" |
10 | 10 | //extern nearfieldStruct* NF; |
11 | 11 | extern microscopeStruct* SCOPE; |
... | ... | @@ -23,7 +23,179 @@ using namespace std; |
23 | 23 | #include <boost/program_options.hpp> |
24 | 24 | namespace po = boost::program_options; |
25 | 25 | |
26 | -static void loadSpheres(string sphereList) | |
26 | +extern bool verbose; | |
27 | + | |
28 | + | |
29 | + | |
30 | +static void lNearfield(po::variables_map vm) | |
31 | +{ | |
32 | + //test to see if we are simulating a plane wave | |
33 | + bool planeWave = DEFAULT_PLANEWAVE; | |
34 | + if(vm.count("plane-wave")) | |
35 | + planeWave = !planeWave; | |
36 | + SCOPE->nf.planeWave = planeWave; | |
37 | + | |
38 | + //get the incident field amplitude | |
39 | + SCOPE->nf.A = vm["amplitude"].as<ptype>(); | |
40 | + | |
41 | + //get the condenser parameters | |
42 | + SCOPE->nf.condenser[0] = DEFAULT_CONDENSER_MIN; | |
43 | + SCOPE->nf.condenser[1] = DEFAULT_CONDENSER_MAX; | |
44 | + | |
45 | + if(vm.count("condenser")) | |
46 | + { | |
47 | + vector<ptype> cparams = vm["condenser"].as< vector<ptype> >(); | |
48 | + | |
49 | + if(cparams.size() == 1) | |
50 | + SCOPE->nf.condenser[1] = cparams[0]; | |
51 | + else | |
52 | + { | |
53 | + SCOPE->nf.condenser[0] = cparams[0]; | |
54 | + SCOPE->nf.condenser[1] = cparams[1]; | |
55 | + } | |
56 | + } | |
57 | + | |
58 | + | |
59 | + //get the focal rtsPoint position | |
60 | + SCOPE->nf.focus[0] = DEFAULT_FOCUS_X; | |
61 | + SCOPE->nf.focus[1] = DEFAULT_FOCUS_Y; | |
62 | + SCOPE->nf.focus[2] = DEFAULT_FOCUS_Z; | |
63 | + if(vm.count("focus")) | |
64 | + { | |
65 | + vector<ptype> fpos = vm["focus"].as< vector<ptype> >(); | |
66 | + if(fpos.size() != 3) | |
67 | + { | |
68 | + cout<<"BIMSIM Error - the incident focal point is incorrectly specified; it must have three components."<<endl; | |
69 | + exit(1); | |
70 | + } | |
71 | + SCOPE->nf.focus[0] = fpos[0]; | |
72 | + SCOPE->nf.focus[1] = fpos[1]; | |
73 | + SCOPE->nf.focus[2] = fpos[2]; | |
74 | + } | |
75 | + | |
76 | + //get the incident light direction (k-vector) | |
77 | + bsVector spherical(1, 0, 0); | |
78 | + | |
79 | + //if a k-vector is specified | |
80 | + if(vm.count("k")) | |
81 | + { | |
82 | + vector<ptype> kvec = vm["k"].as< vector<ptype> >(); | |
83 | + if(kvec.size() != 2) | |
84 | + { | |
85 | + cout<<"BIMSIM Error - k-vector is not specified correctly: it must contain two elements"<<endl; | |
86 | + exit(1); | |
87 | + } | |
88 | + spherical[1] = kvec[0]; | |
89 | + spherical[2] = kvec[1]; | |
90 | + } | |
91 | + SCOPE->nf.k = spherical.sph2cart(); | |
92 | + | |
93 | + | |
94 | + //incident field order | |
95 | + SCOPE->nf.m = vm["field-order"].as<int>(); | |
96 | + | |
97 | + //number of Monte-Carlo samples | |
98 | + SCOPE->nf.nWaves = vm["samples"].as<int>(); | |
99 | + | |
100 | + //random number seed for Monte-Carlo samples | |
101 | + if(vm.count("seed")) | |
102 | + srand(vm["seed"].as<unsigned int>()); | |
103 | + | |
104 | + | |
105 | + | |
106 | +} | |
107 | + | |
108 | + | |
109 | +static void loadOutputParams(po::variables_map vm) | |
110 | +{ | |
111 | + //append simulation results to previous binary files | |
112 | + gFileOut.append = DEFAULT_APPEND; | |
113 | + if(vm.count("append")) | |
114 | + gFileOut.append = true; | |
115 | + | |
116 | + //image parameters | |
117 | + //component of the field to be saved | |
118 | + std::string fieldStr; | |
119 | + fieldStr = vm["output-type"].as<string>(); | |
120 | + | |
121 | + if(fieldStr == "magnitude") | |
122 | + gFileOut.field = fileoutStruct::fieldMag; | |
123 | + else if(fieldStr == "intensity") | |
124 | + gFileOut.field = fileoutStruct::fieldIntensity; | |
125 | + else if(fieldStr == "polarization") | |
126 | + gFileOut.field = fileoutStruct::fieldPolar; | |
127 | + else if(fieldStr == "imaginary") | |
128 | + gFileOut.field = fileoutStruct::fieldImag; | |
129 | + else if(fieldStr == "real") | |
130 | + gFileOut.field = fileoutStruct::fieldReal; | |
131 | + else if(fieldStr == "angular-spectrum") | |
132 | + gFileOut.field = fileoutStruct::fieldAngularSpectrum; | |
133 | + | |
134 | + | |
135 | + //image file names | |
136 | + gFileOut.intFile = vm["intensity"].as<string>(); | |
137 | + gFileOut.absFile = vm["absorbance"].as<string>(); | |
138 | + gFileOut.transFile = vm["transmittance"].as<string>(); | |
139 | + gFileOut.nearFile = vm["near-field"].as<string>(); | |
140 | + gFileOut.farFile = vm["far-field"].as<string>(); | |
141 | + | |
142 | + //colormap | |
143 | + std::string cmapStr; | |
144 | + cmapStr = vm["colormap"].as<string>(); | |
145 | + if(cmapStr == "brewer") | |
146 | + gFileOut.colormap = rts::cmBrewer; | |
147 | + else if(cmapStr == "gray") | |
148 | + gFileOut.colormap = rts::cmGrayscale; | |
149 | + else | |
150 | + cout<<"color-map value not recognized (using default): "<<cmapStr<<endl; | |
151 | +} | |
152 | + | |
153 | +void lFlags(po::variables_map vm, po::options_description desc) | |
154 | +{ | |
155 | + //display help and exit | |
156 | + if(vm.count("help")) | |
157 | + { | |
158 | + cout<<desc<<endl; | |
159 | + exit(1); | |
160 | + } | |
161 | + | |
162 | + //flag for verbose output | |
163 | + if(vm.count("verbose")) | |
164 | + verbose = true; | |
165 | + | |
166 | + if(vm.count("recursive")) | |
167 | + { | |
168 | + SCOPE->nf.lut_us = false; | |
169 | + SCOPE->nf.lut_uf = false; | |
170 | + } | |
171 | + else if(vm.count("recursive-us")) | |
172 | + { | |
173 | + SCOPE->nf.lut_us = false; | |
174 | + } | |
175 | + else if(vm.count("lut-uf")) | |
176 | + { | |
177 | + SCOPE->nf.lut_uf = true; | |
178 | + } | |
179 | +} | |
180 | + | |
181 | +void lWavelength(po::variables_map vm) | |
182 | +{ | |
183 | + //load the wavelength | |
184 | + if(vm.count("nu")) | |
185 | + { | |
186 | + //wavelength is given in wavenumber - transform and flag | |
187 | + SCOPE->nf.lambda = 10000/vm["nu"].as<ptype>(); | |
188 | + gFileOut.wavenumber = true; | |
189 | + } | |
190 | + //otherwise we are using lambda = wavelength | |
191 | + else | |
192 | + { | |
193 | + SCOPE->nf.lambda = vm["lambda"].as<ptype>(); | |
194 | + gFileOut.wavenumber = false; | |
195 | + } | |
196 | +} | |
197 | + | |
198 | +static void lSpheres(string sphereList) | |
27 | 199 | { |
28 | 200 | /*This function loads a list of sphere given in the string sphereList |
29 | 201 | The format is: |
... | ... | @@ -58,17 +230,60 @@ static void loadSpheres(string sphereList) |
58 | 230 | //check out the next element (this should set the EOF error flag) |
59 | 231 | ss.peek(); |
60 | 232 | } |
233 | +} | |
61 | 234 | |
235 | +void lSpheres(po::variables_map vm) | |
236 | +{ | |
237 | + //if a sphere is specified at the command line | |
238 | + if(vm.count("spheres")) | |
239 | + { | |
240 | + //convert the sphere to a string | |
241 | + vector<ptype> sdesc = vm["spheres"].as< vector<ptype> >(); | |
62 | 242 | |
243 | + //compute the number of spheres specified | |
244 | + unsigned int nS; | |
245 | + if(sdesc.size() <= 5) | |
246 | + nS = 1; | |
247 | + else | |
248 | + { | |
249 | + //if the number of parameters is divisible by 4, compute the number of spheres | |
250 | + if(sdesc.size() % 5 == 0) | |
251 | + nS = sdesc.size() / 5; | |
252 | + else | |
253 | + { | |
254 | + cout<<"BIMSIM Error: Invalid number of sphere parameters."<<endl; | |
255 | + exit(1); | |
256 | + } | |
257 | + } | |
63 | 258 | |
64 | -} | |
259 | + stringstream ss; | |
260 | + | |
261 | + //for each sphere | |
262 | + for(unsigned int s=0; s<nS; s++) | |
263 | + { | |
264 | + //compute the number of sphere parameters | |
265 | + unsigned int nP; | |
266 | + if(nS == 1) nP = sdesc.size(); | |
267 | + else nP = 5; | |
268 | + | |
269 | + //store each parameter as a string | |
270 | + for(unsigned int i=0; i<nP; i++) | |
271 | + { | |
272 | + ss<<sdesc[s*5 + i]<<" "; | |
273 | + } | |
274 | + ss<<endl; | |
275 | + } | |
276 | + | |
277 | + | |
278 | + | |
279 | + //convert the string to a sphere list | |
280 | + lSpheres(ss.str()); | |
281 | + } | |
65 | 282 | |
66 | -static void loadSpheres(po::variables_map vm) | |
67 | -{ | |
68 | 283 | //if a files are specified |
69 | 284 | if(vm.count("sphere-file")) |
70 | 285 | { |
71 | - cout<<"Sphere files detected."<<endl; | |
286 | + | |
72 | 287 | vector<string> filenames = vm["sphere-file"].as< vector<string> >(); |
73 | 288 | //load each file |
74 | 289 | for(int iS=0; iS<filenames.size(); iS++) |
... | ... | @@ -85,69 +300,51 @@ static void loadSpheres(po::variables_map vm) |
85 | 300 | std::string instr((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>()); |
86 | 301 | |
87 | 302 | //load the list of spheres from a string |
88 | - loadSpheres(instr); | |
303 | + lSpheres(instr); | |
89 | 304 | } |
90 | 305 | } |
91 | 306 | |
92 | - //load the sphere from the command line | |
93 | - if(vm.count("sx") || vm.count("sy") || vm.count("sz") || vm.count("s")) | |
94 | - { | |
95 | - //create a new sphere | |
96 | - sphere newS; | |
97 | - | |
98 | - //set defaults | |
99 | - if(vm.count("sx")) | |
100 | - newS.p[0] = vm["sx"].as<ptype>(); | |
101 | - else | |
102 | - newS.p[0] = DEFAULT_SPHERE_X; | |
103 | - | |
104 | - | |
105 | - if(vm.count("sy")) | |
106 | - newS.p[1] = vm["sy"].as<ptype>(); | |
107 | - else | |
108 | - newS.p[1] = DEFAULT_SPHERE_Y; | |
109 | - | |
110 | - if(vm.count("sz")) | |
111 | - newS.p[2] = vm["sz"].as<ptype>(); | |
112 | - else | |
113 | - newS.p[2] = DEFAULT_SPHERE_Z; | |
114 | - | |
115 | - if(vm.count("radius")) | |
116 | - newS.a = vm["radius"].as<ptype>(); | |
117 | - else | |
118 | - newS.a = DEFAULT_SPHERE_A; | |
119 | - | |
120 | - //add the sphere to the sphere vector | |
121 | - SCOPE->nf.sVector.push_back(newS); | |
307 | + //make sure the appropriate materials are loaded | |
308 | + unsigned int nS = SCOPE->nf.sVector.size(); | |
122 | 309 | |
310 | + //for each sphere | |
311 | + for(unsigned int s = 0; s<nS; s++) | |
312 | + { | |
313 | + //make sure the corresponding material exists | |
314 | + if(SCOPE->nf.sVector[s].iMaterial + 1 > SCOPE->nf.mVector.size()) | |
315 | + { | |
316 | + //otherwise output an error | |
317 | + cout<<"BIMSIM Error - A material is not loaded for sphere "<<s+1<<"."<<endl; | |
318 | + exit(1); | |
319 | + } | |
123 | 320 | } |
124 | 321 | } |
125 | 322 | |
126 | -static void loadMaterials(po::variables_map vm) | |
323 | +static void lMaterials(po::variables_map vm) | |
127 | 324 | { |
128 | 325 | //if materials are specified at the command line |
129 | 326 | if(vm.count("materials")) |
130 | 327 | { |
131 | 328 | vector<ptype> matVec = vm["materials"].as< vector<ptype> >(); |
132 | - if(matVec.size() %2 != 0) | |
329 | + if(matVec.size() == 1) | |
330 | + { | |
331 | + rts::material<ptype> newM(SCOPE->nf.lambda, matVec[0], 0); | |
332 | + SCOPE->nf.mVector.push_back(newM); | |
333 | + } | |
334 | + else if(matVec.size() %2 != 0) | |
133 | 335 | { |
134 | 336 | cout<<"BIMSim Error: materials must be specified in n, k pairs"<<endl; |
135 | 337 | exit(1); |
136 | 338 | } |
137 | - | |
138 | - | |
139 | - for(int i=0; i<matVec.size(); i+=2) | |
339 | + else | |
140 | 340 | { |
141 | - rts::material<ptype> newM(SCOPE->nf.lambda, matVec[i], matVec[i+1]); | |
142 | - SCOPE->nf.mVector.push_back(newM); | |
341 | + for(int i=0; i<matVec.size(); i+=2) | |
342 | + { | |
343 | + rts::material<ptype> newM(SCOPE->nf.lambda, matVec[i], matVec[i+1]); | |
344 | + SCOPE->nf.mVector.push_back(newM); | |
345 | + } | |
143 | 346 | } |
144 | 347 | } |
145 | - else | |
146 | - { | |
147 | - //add the command line material as the default (material 0) | |
148 | - rts::material<ptype> newM(SCOPE->nf.lambda, vm["n"].as<ptype>(), vm["k"].as<ptype>()); | |
149 | - SCOPE->nf.mVector.push_back(newM); | |
150 | - } | |
151 | 348 | |
152 | 349 | //if file names are specified, load the materials |
153 | 350 | if(vm.count("material-file")) |
... | ... | @@ -169,57 +366,109 @@ static void loadMaterials(po::variables_map vm) |
169 | 366 | |
170 | 367 | } |
171 | 368 | |
172 | -static void loadNearfieldParams(po::variables_map vm) | |
369 | +static void lOptics(po::variables_map vm) | |
173 | 370 | { |
174 | - //test to see if we are simulating a plane wave | |
175 | - bool planeWave = DEFAULT_PLANEWAVE; | |
176 | - if(vm.count("plane-wave")) | |
177 | - planeWave = !planeWave; | |
178 | - SCOPE->nf.planeWave = planeWave; | |
179 | - | |
180 | - //get the wavelength | |
181 | - //SCOPE->nf.lambda = vm["lambda"].as<ptype>(); | |
182 | - | |
183 | - //get the incident field amplitude | |
184 | - SCOPE->nf.A = vm["amplitude"].as<ptype>(); | |
185 | - | |
186 | - //get the condenser parameters | |
187 | - SCOPE->nf.condenser[0] = vm["condenser-min"].as<ptype>(); | |
188 | - SCOPE->nf.condenser[1] = vm["condenser-max"].as<ptype>(); | |
189 | - | |
190 | - | |
191 | - //get the focal rtsPoint position | |
192 | - SCOPE->nf.focus[0] = vm["fx"].as<ptype>(); | |
193 | - SCOPE->nf.focus[1] = vm["fy"].as<ptype>(); | |
194 | - SCOPE->nf.focus[2] = vm["fz"].as<ptype>(); | |
195 | - | |
196 | - //get the incident light direction (k-vector) | |
197 | - bsVector spherical; | |
198 | - spherical[0] = 1.0; | |
199 | - spherical[1] = vm["theta"].as<ptype>(); | |
200 | - spherical[2] = vm["phi"].as<ptype>(); | |
201 | - SCOPE->nf.k = spherical.sph2cart(); | |
202 | - | |
203 | - | |
204 | - //incident field order | |
205 | - SCOPE->nf.m = vm["field-order"].as<int>(); | |
206 | - | |
207 | - //number of Monte-Carlo samples | |
208 | - SCOPE->nf.nWaves = vm["samples"].as<int>(); | |
209 | - | |
210 | - | |
371 | + SCOPE->objective[0] = DEFAULT_OBJECTIVE_MIN; | |
372 | + SCOPE->objective[1] = DEFAULT_OBJECTIVE_MAX; | |
373 | + if(vm.count("objective")) | |
374 | + { | |
375 | + vector<ptype> oparams = vm["objective"].as< vector<ptype> >(); | |
211 | 376 | |
377 | + if(oparams.size() == 1) | |
378 | + SCOPE->objective[1] = oparams[0]; | |
379 | + else | |
380 | + { | |
381 | + SCOPE->objective[0] = oparams[0]; | |
382 | + SCOPE->objective[1] = oparams[1]; | |
383 | + } | |
384 | + } | |
212 | 385 | } |
213 | 386 | |
214 | -static void loadSliceParams(po::variables_map vm) | |
387 | +static void lImagePlane(po::variables_map vm) | |
215 | 388 | { |
216 | - //parameters for the sample plane | |
217 | - | |
389 | + bsPoint pMin(DEFAULT_PLANE_MIN_X, DEFAULT_PLANE_MIN_Y, DEFAULT_PLANE_MIN_Z); | |
390 | + bsPoint pMax(DEFAULT_PLANE_MAX_X, DEFAULT_PLANE_MAX_Y, DEFAULT_PLANE_MAX_Z); | |
391 | + bsVector normal(DEFAULT_PLANE_NORM_X, DEFAULT_PLANE_NORM_Y, DEFAULT_PLANE_NORM_Z); | |
218 | 392 | |
219 | 393 | //set the default values for the slice position and orientation |
220 | - bsPoint pMin(vm["plane-min-x"].as<ptype>(), vm["plane-min-y"].as<ptype>(), vm["plane-min-z"].as<ptype>()); | |
221 | - bsPoint pMax(vm["plane-max-x"].as<ptype>(), vm["plane-max-y"].as<ptype>(), vm["plane-max-z"].as<ptype>()); | |
222 | - bsVector normal(vm["plane-norm-x"].as<ptype>(), vm["plane-norm-y"].as<ptype>(), vm["plane-norm-z"].as<ptype>()); | |
394 | + if(vm.count("plane-lower-left") && vm.count("plane-upper-right") && vm.count("plane-normal")) | |
395 | + { | |
396 | + vector<ptype> ll = vm["plane-lower-left"].as< vector<ptype> >(); | |
397 | + if(ll.size() != 3) | |
398 | + { | |
399 | + cout<<"BIMSIM Error - The lower-left corner of the image plane is incorrectly specified."<<endl; | |
400 | + exit(1); | |
401 | + } | |
402 | + | |
403 | + vector<ptype> ur = vm["plane-lower-left"].as< vector<ptype> >(); | |
404 | + if(ur.size() != 3) | |
405 | + { | |
406 | + cout<<"BIMSIM Error - The upper-right corner of the image plane is incorrectly specified."<<endl; | |
407 | + exit(1); | |
408 | + } | |
409 | + | |
410 | + vector<ptype> norm = vm["plane-lower-left"].as< vector<ptype> >(); | |
411 | + if(norm.size() != 3) | |
412 | + { | |
413 | + cout<<"BIMSIM Error - The normal of the image plane is incorrectly specified."<<endl; | |
414 | + exit(1); | |
415 | + } | |
416 | + | |
417 | + pMin = bsPoint(ll[0], ll[1], ll[2]); | |
418 | + pMax = bsPoint(ur[0], ur[1], ur[2]); | |
419 | + normal = bsVector(norm[0], norm[1], norm[2]); | |
420 | + } | |
421 | + else if(vm.count("xy")) | |
422 | + { | |
423 | + //default plane size in microns | |
424 | + ptype s = DEFAULT_PLANE_SIZE; | |
425 | + ptype pos = DEFAULT_PLANE_POSITION; | |
426 | + | |
427 | + vector<ptype> xy = vm["xy"].as< vector<ptype> >(); | |
428 | + if(xy.size() >= 1) | |
429 | + s = xy[0]; | |
430 | + if(xy.size() >= 2) | |
431 | + pos = xy[1]; | |
432 | + | |
433 | + //calculate the plane corners and normal based on the size and position | |
434 | + pMin = bsPoint(-s/2, -s/2, pos); | |
435 | + pMax = bsPoint(s/2, s/2, pos); | |
436 | + normal = bsVector(0, 0, 1); | |
437 | + } | |
438 | + else if(vm.count("xz")) | |
439 | + { | |
440 | + //default plane size in microns | |
441 | + ptype size = DEFAULT_PLANE_SIZE; | |
442 | + ptype pos = DEFAULT_PLANE_POSITION; | |
443 | + | |
444 | + vector<ptype> xz = vm["xz"].as< vector<ptype> >(); | |
445 | + if(xz.size() >= 1) | |
446 | + size = xz[0]; | |
447 | + if(xz.size() >= 2) | |
448 | + pos = xz[1]; | |
449 | + | |
450 | + //calculate the plane corners and normal based on the size and position | |
451 | + pMin = bsPoint(-size/2, pos, -size/2); | |
452 | + pMax = bsPoint(size/2, pos, size/2); | |
453 | + normal = bsVector(0, -1, 0); | |
454 | + } | |
455 | + else if(vm.count("yz")) | |
456 | + { | |
457 | + //default plane size in microns | |
458 | + ptype size = DEFAULT_PLANE_SIZE; | |
459 | + ptype pos = DEFAULT_PLANE_POSITION; | |
460 | + | |
461 | + vector<ptype> yz = vm["yz"].as< vector<ptype> >(); | |
462 | + if(yz.size() >= 1) | |
463 | + size = yz[0]; | |
464 | + if(yz.size() >= 2) | |
465 | + pos = yz[1]; | |
466 | + | |
467 | + //calculate the plane corners and normal based on the size and position | |
468 | + pMin = bsPoint(pos, -size/2, -size/2); | |
469 | + pMax = bsPoint(pos, size/2, size/2); | |
470 | + normal = bsVector(1, 0, 0); | |
471 | + } | |
223 | 472 | SCOPE->setPos(pMin, pMax, normal); |
224 | 473 | |
225 | 474 | //resolution |
... | ... | @@ -233,175 +482,111 @@ static void loadSliceParams(po::variables_map vm) |
233 | 482 | |
234 | 483 | |
235 | 484 | SCOPE->setNearfield(); |
236 | - | |
237 | - | |
238 | - | |
239 | -} | |
240 | - | |
241 | -static void loadMicroscopeParams(po::variables_map vm) | |
242 | -{ | |
243 | - //objective | |
244 | - SCOPE->objective[0] = vm["objective-min"].as<ptype>(); | |
245 | - SCOPE->objective[1] = vm["objective-max"].as<ptype>(); | |
246 | - | |
247 | - | |
248 | - | |
249 | - | |
250 | - | |
251 | -} | |
252 | - | |
253 | -static void loadOutputParams(po::variables_map vm) | |
254 | -{ | |
255 | - //append simulation results to previous binary files | |
256 | - gFileOut.append = DEFAULT_APPEND; | |
257 | - if(vm.count("append")) | |
258 | - gFileOut.append = true; | |
259 | - | |
260 | - //image parameters | |
261 | - //component of the field to be saved | |
262 | - std::string fieldStr; | |
263 | - fieldStr = vm["output-type"].as<string>(); | |
264 | - | |
265 | - if(fieldStr == "magnitude") | |
266 | - gFileOut.field = fileoutStruct::fieldMag; | |
267 | - else if(fieldStr == "intensity") | |
268 | - gFileOut.field = fileoutStruct::fieldIntensity; | |
269 | - else if(fieldStr == "polarization") | |
270 | - gFileOut.field = fileoutStruct::fieldPolar; | |
271 | - else if(fieldStr == "imaginary") | |
272 | - gFileOut.field = fileoutStruct::fieldImag; | |
273 | - else if(fieldStr == "real") | |
274 | - gFileOut.field = fileoutStruct::fieldReal; | |
275 | - else if(fieldStr == "angular-spectrum") | |
276 | - gFileOut.field = fileoutStruct::fieldAngularSpectrum; | |
277 | - | |
278 | - | |
279 | - //image file names | |
280 | - gFileOut.intFile = vm["intensity"].as<string>(); | |
281 | - gFileOut.absFile = vm["absorbance"].as<string>(); | |
282 | - gFileOut.transFile = vm["transmittance"].as<string>(); | |
283 | - gFileOut.nearFile = vm["near-field"].as<string>(); | |
284 | - gFileOut.farFile = vm["far-field"].as<string>(); | |
285 | - | |
286 | - //colormap | |
287 | - std::string cmapStr; | |
288 | - cmapStr = vm["colormap"].as<string>(); | |
289 | - if(cmapStr == "brewer") | |
290 | - gFileOut.colormap = rts::colormap::cmBrewer; | |
291 | - else if(cmapStr == "gray") | |
292 | - gFileOut.colormap = rts::colormap::cmGrayscale; | |
293 | - else | |
294 | - cout<<"color-map value not recognized (using default): "<<cmapStr<<endl; | |
295 | 485 | } |
296 | 486 | |
297 | 487 | static void OutputOptions() |
298 | 488 | { |
299 | - cout<<SCOPE->nf.toStr(); | |
489 | + cout<<SCOPE->toStr(); | |
300 | 490 | |
301 | 491 | cout<<"# of source points: "<<SCOPE->focalPoints.size()<<endl; |
302 | 492 | |
303 | 493 | } |
304 | 494 | |
495 | +vector<ptype> test; | |
305 | 496 | static void SetOptions(po::options_description &desc) |
306 | 497 | { |
307 | 498 | desc.add_options() |
308 | - ("help,h", "prints this help") | |
309 | - ("plane-wave,P", "simulates an incident plane wave") | |
310 | - ("intensity,I", po::value<string>()->default_value(DEFAULT_INTENSITY_FILE), "output measured intensity (filename)") | |
311 | - ("absorbance,A", po::value<string>()->default_value(DEFAULT_ABSORBANCE_FILE), "output measured absorbance (filename)") | |
312 | - ("transmittance,T", po::value<string>()->default_value(DEFAULT_TRANSMITTANCE_FILE), "output measured transmittance (filename)") | |
313 | - ("far-field,F", po::value<string>()->default_value(DEFAULT_FAR_FILE), "output far-field at detector (filename)") | |
314 | - ("near-field,N", po::value<string>()->default_value(DEFAULT_NEAR_FILE), "output field at focal plane (filename)") | |
315 | - ("extended-source,X", po::value<string>()->default_value(DEFAULT_EXTENDED_SOURCE), "image of source at focus (filename)") | |
316 | - //("sx,x", po::value<ptype>()->default_value(DEFAULT_SPHERE_X), "sphere coordinates") | |
317 | - //("sy,y", po::value<ptype>()->default_value(DEFAULT_SPHERE_Y)) | |
318 | - //("sz,z", po::value<ptype>()->default_value(DEFAULT_SPHERE_Z)) | |
319 | - ("sx,x", po::value<ptype>(), "sphere coordinates") | |
320 | - ("sy,y", po::value<ptype>()) | |
321 | - ("sz,z", po::value<ptype>()) | |
322 | - ("radius,r", po::value<ptype>()->default_value(DEFAULT_SPHERE_A), "sphere radius") | |
323 | - ("samples,s", po::value<int>()->default_value(DEFAULT_SAMPLES), "Monte-Carlo samples used to compute Us") | |
324 | - ("sphere-file,S", po::value< vector<string> >()->multitoken(), "sphere file:\n [x y z radius material]") | |
325 | - ("amplitude,a", po::value<ptype>()->default_value(DEFAULT_AMPLITUDE), "incident field amplitude") | |
326 | - ("n,n", po::value<ptype>()->default_value(DEFAULT_N, "1.4"), "sphere phase speed") | |
327 | - ("k,k", po::value<ptype>()->default_value(DEFAULT_K), "sphere absorption coefficient") | |
328 | - ("material-file,M", po::value< vector<string> >()->multitoken(), "material file:\n [lambda n k]") | |
329 | - ("materials", po::value< vector<ptype> >()->multitoken(), "materials specified using n, k pairs:\n ex. --materials n1 k1 n2 k2\n (if used --n and --k are ignored)") | |
330 | - ("lambda,l", po::value<ptype>()->default_value(DEFAULT_LAMBDA), "incident wavelength") | |
499 | + ("help", "prints this help") | |
500 | + ("verbose", "verbose output\n") | |
501 | + | |
502 | + ("intensity", po::value<string>()->default_value(DEFAULT_INTENSITY_FILE), "output measured intensity (filename)") | |
503 | + ("absorbance", po::value<string>()->default_value(DEFAULT_ABSORBANCE_FILE), "output measured absorbance (filename)") | |
504 | + ("transmittance", po::value<string>()->default_value(DEFAULT_TRANSMITTANCE_FILE), "output measured transmittance (filename)") | |
505 | + ("far-field", po::value<string>()->default_value(DEFAULT_FAR_FILE), "output far-field at detector (filename)") | |
506 | + ("near-field", po::value<string>()->default_value(DEFAULT_NEAR_FILE), "output field at focal plane (filename)") | |
507 | + ("extended-source", po::value<string>()->default_value(DEFAULT_EXTENDED_SOURCE), "image of source at focus (filename)\n") | |
508 | + | |
509 | + ("spheres", po::value< vector<ptype> >()->multitoken(), "sphere position: x y z a m") | |
510 | + ("sphere-file", po::value< vector<string> >()->multitoken(), "sphere file:\n [x y z radius material]") | |
511 | + ("materials", po::value< vector<ptype> >()->multitoken(), "refractive indices as n, k pairs:\n ex. -m n0 k0 n1 k1 n2 k2") | |
512 | + ("material-file", po::value< vector<string> >()->multitoken(), "material file:\n [lambda n k]\n") | |
513 | + | |
514 | + ("lambda", po::value<ptype>()->default_value(DEFAULT_LAMBDA), "incident wavelength") | |
331 | 515 | ("nu", po::value<ptype>(), "incident frequency (in cm^-1)\n(if specified, lambda is ignored)") |
332 | - ("theta,t", po::value<ptype>()->default_value(DEFAULT_K_THETA), "light direction (polar coords)") | |
333 | - ("phi,p", po::value<ptype>()->default_value(DEFAULT_K_PHI)) | |
334 | - ("fx", po::value<ptype>()->default_value(DEFAULT_FOCUS_X), "incident focal point") | |
335 | - ("fy", po::value<ptype>()->default_value(DEFAULT_FOCUS_Y)) | |
336 | - ("fz", po::value<ptype>()->default_value(DEFAULT_FOCUS_Z)) | |
337 | - ("condenser-max,C", po::value<ptype>()->default_value(DEFAULT_CONDENSER_MAX), "condenser numerical aperature") | |
338 | - ("condenser-min,c", po::value<ptype>()->default_value(DEFAULT_CONDENSER_MIN), "condenser obscuration NA") | |
339 | - ("objective-max,O", po::value<ptype>()->default_value(DEFAULT_OBJECTIVE_MAX), "objective numerical aperature") | |
340 | - ("objective-min,o", po::value<ptype>()->default_value(DEFAULT_OBJECTIVE_MIN), "objective obscuration NA") | |
341 | - ("field-order", po::value<int>()->default_value(DEFAULT_FIELD_ORDER), "order of the incident field") | |
342 | - ("output-type,f", po::value<string>()->default_value(DEFAULT_FIELD_TYPE), "output field value:\n magnitude, polarization, real, imaginary, angular-spectrum") | |
343 | - ("resolution,R", po::value<unsigned int>()->default_value(DEFAULT_SLICE_RES), "resolution of the detector") | |
344 | - ("padding,d", po::value<unsigned int>()->default_value(DEFAULT_PADDING), "FFT padding for the objective bandpass") | |
516 | + ("k", po::value< vector<ptype> >()->multitoken(), "k-vector direction: -k theta phi\n theta = [0 2*pi], phi = [0 pi]") | |
517 | + ("amplitude", po::value<ptype>()->default_value(DEFAULT_AMPLITUDE), "incident field amplitude") | |
518 | + ("condenser", po::value< vector<ptype> >()->multitoken(), "condenser numerical aperature\nA pair of values can be used to specify an inner obscuration: -c NAin NAout") | |
519 | + ("objective", po::value< vector<ptype> >()->multitoken(), "objective numerical aperature\nA pair of values can be used to specify an inner obscuration: -c NAin NAout") | |
520 | + ("focus", po::value< vector<ptype> >()->multitoken(), "focal position for the incident point source\n (default = --focus 0 0 0)") | |
521 | + ("plane-wave", "simulates an incident plane wave\n") | |
522 | + | |
523 | + ("resolution", po::value<unsigned int>()->default_value(DEFAULT_SLICE_RES), "resolution of the detector") | |
524 | + ("plane-lower-left", po::value< vector<ptype> >()->multitoken(), "lower-left position of the image plane") | |
525 | + ("plane-upper-right", po::value< vector<ptype> >()->multitoken(), "upper-right position of the image plane") | |
526 | + ("plane-normal", po::value< vector<ptype> >()->multitoken(), "normal for the image plane") | |
527 | + ("xy", po::value< vector<ptype> >()->multitoken(), "specify an x-y image plane\n (standard microscope)") | |
528 | + ("xz", po::value< vector<ptype> >()->multitoken(), "specify a x-z image plane\n (cross-section of the focal volume)") | |
529 | + ("yz", po::value< vector<ptype> >()->multitoken(), "specify a y-z image plane\n (cross-section of the focal volume)\n") | |
530 | + | |
531 | + ("samples", po::value<int>()->default_value(DEFAULT_SAMPLES), "Monte-Carlo samples used to compute Us") | |
532 | + ("padding", po::value<unsigned int>()->default_value(DEFAULT_PADDING), "FFT padding for the objective bandpass") | |
345 | 533 | ("supersample", po::value<unsigned int>()->default_value(DEFAULT_SUPERSAMPLE), "super-sampling rate for the detector field") |
534 | + ("field-order", po::value<int>()->default_value(DEFAULT_FIELD_ORDER), "order of the incident field") | |
535 | + ("seed", po::value<unsigned int>(), "seed for the Monte-Carlo random number generator") | |
536 | + ("recursive", "evaluate all Bessel functions recursively\n") | |
537 | + ("recursive-us", "evaluate scattered-field Bessel functions recursively\n") | |
538 | + ("lut-uf", "evaluate the focused-field using a look-up table\n") | |
539 | + | |
540 | + ("output-type", po::value<string>()->default_value(DEFAULT_FIELD_TYPE), "output field value:\n magnitude, polarization, real, imaginary, angular-spectrum") | |
346 | 541 | ("colormap", po::value<string>()->default_value(DEFAULT_COLORMAP), "colormap: gray, brewer") |
347 | 542 | ("append", "append result to an existing file\n (binary files only)") |
348 | - ("plane-min-x,u", po::value<ptype>()->default_value(DEFAULT_SLICE_MIN_X), "lower-left corner of the field slice") | |
349 | - ("plane-min-y,v", po::value<ptype>()->default_value(DEFAULT_SLICE_MIN_Y)) | |
350 | - ("plane-min-z,w", po::value<ptype>()->default_value(DEFAULT_SLICE_MIN_Z)) | |
351 | - ("plane-max-x,U", po::value<ptype>()->default_value(DEFAULT_SLICE_MAX_X), "upper-right corner of the field slice") | |
352 | - ("plane-max-y,V", po::value<ptype>()->default_value(DEFAULT_SLICE_MAX_Y)) | |
353 | - ("plane-max-z,W", po::value<ptype>()->default_value(DEFAULT_SLICE_MAX_Z)) | |
354 | - ("plane-norm-x", po::value<ptype>()->default_value(DEFAULT_SLICE_NORM_X), "field slice normal") | |
355 | - ("plane-norm-y", po::value<ptype>()->default_value(DEFAULT_SLICE_NORM_Y)) | |
356 | - ("plane-norm-z", po::value<ptype>()->default_value(DEFAULT_SLICE_NORM_Z)); | |
543 | + ; | |
357 | 544 | } |
358 | 545 | |
359 | 546 | static void LoadParameters(int argc, char *argv[]) |
360 | 547 | { |
361 | 548 | //create an option description |
362 | - po::options_description desc("Allowed options"); | |
549 | + po::options_description desc("BimSim arguments"); | |
363 | 550 | |
364 | 551 | //fill it with options |
365 | 552 | SetOptions(desc); |
366 | 553 | |
367 | 554 | po::variables_map vm; |
368 | - po::store(po::parse_command_line(argc, argv, desc), vm); | |
555 | + po::store(po::parse_command_line(argc, argv, desc, po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm); | |
369 | 556 | po::notify(vm); |
370 | 557 | |
371 | - //display help and exit | |
372 | - if(vm.count("help")) | |
373 | - { | |
374 | - cout<<desc<<endl; | |
375 | - exit(1); | |
376 | - } | |
377 | 558 | |
378 | - //load the wavelength | |
379 | - if(vm.count("nu")) | |
380 | - { | |
381 | - //wavelength is given in wavenumber - transform and flag | |
382 | - SCOPE->nf.lambda = 10000/vm["nu"].as<ptype>(); | |
383 | - gFileOut.wavenumber = true; | |
384 | - } | |
385 | - //otherwise we are using lambda = wavelength | |
386 | - else | |
387 | - { | |
388 | - SCOPE->nf.lambda = vm["lambda"].as<ptype>(); | |
389 | - gFileOut.wavenumber = false; | |
390 | - } | |
559 | + //load flags (help, verbose output) | |
560 | + lFlags(vm, desc); | |
561 | + | |
562 | + //load the wavelength | |
563 | + lWavelength(vm); | |
564 | + | |
565 | + //load materials | |
566 | + //loadMaterials(vm); | |
567 | + lMaterials(vm); | |
568 | + | |
569 | + //load the sphere data | |
570 | + lSpheres(vm); | |
571 | + | |
572 | + //load the optics | |
573 | + lOptics(vm); | |
574 | + | |
575 | + //load the position and orientation of the image plane | |
576 | + lImagePlane(vm); | |
391 | 577 | |
392 | 578 | //load spheres |
393 | - loadSpheres(vm); | |
579 | + //loadSpheres(vm); | |
580 | + | |
394 | 581 | |
395 | - //load materials | |
396 | - loadMaterials(vm); | |
397 | 582 | |
398 | - loadNearfieldParams(vm); | |
583 | + lNearfield(vm); | |
399 | 584 | |
400 | 585 | loadOutputParams(vm); |
401 | 586 | |
402 | - loadMicroscopeParams(vm); | |
587 | + //loadMicroscopeParams(vm); | |
403 | 588 | |
404 | - loadSliceParams(vm); | |
589 | + //loadSliceParams(vm); | |
405 | 590 | |
406 | 591 | //if an extended source will be used |
407 | 592 | if(vm["extended-source"].as<string>() != "") | ... | ... |
scalarslice.cu
... | ... | @@ -22,16 +22,17 @@ scalarslice::scalarslice() |
22 | 22 | |
23 | 23 | scalarslice::~scalarslice() |
24 | 24 | { |
25 | - HANDLE_ERROR(cudaFree(S)); | |
25 | + if(S != NULL) | |
26 | + HANDLE_ERROR(cudaFree(S)); | |
26 | 27 | S = NULL; |
27 | 28 | } |
28 | 29 | |
29 | -void scalarslice::toImage(std::string filename, ptype vmin, ptype vmax, rts::colormap::colormapType cmap) | |
30 | +void scalarslice::toImage(std::string filename, ptype vmin, ptype vmax, rts::colormapType cmap) | |
30 | 31 | { |
31 | - rts::colormap::gpu2image<ptype>(S, filename, R[0], R[1], vmin, vmax, cmap); | |
32 | + rts::gpu2image<ptype>(S, filename, R[0], R[1], vmin, vmax, cmap); | |
32 | 33 | } |
33 | 34 | |
34 | -void scalarslice::toImage(std::string filename, bool positive, rts::colormap::colormapType cmap) | |
35 | +void scalarslice::toImage(std::string filename, bool positive, rts::colormapType cmap) | |
35 | 36 | { |
36 | 37 | cublasStatus_t stat; |
37 | 38 | cublasHandle_t handle; |
... | ... | @@ -62,7 +63,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co |
62 | 63 | exit(1); |
63 | 64 | } |
64 | 65 | |
65 | - //std::cout<<"Maximum index: "<<result<<std::endl; | |
66 | + | |
66 | 67 | |
67 | 68 | //retrieve the maximum value |
68 | 69 | ptype maxVal; |
... | ... | @@ -75,7 +76,7 @@ void scalarslice::toImage(std::string filename, bool positive, rts::colormap::co |
75 | 76 | if(positive) |
76 | 77 | toImage(filename, 0, maxVal, cmap); |
77 | 78 | else |
78 | - toImage(filename, -maxVal, maxVal, cmap); | |
79 | + toImage(filename, -abs(maxVal), abs(maxVal), cmap); | |
79 | 80 | } |
80 | 81 | |
81 | 82 | void scalarslice::toEnvi(std::string filename, ptype wavelength, bool append) | ... | ... |
scalarslice.h
... | ... | @@ -2,7 +2,7 @@ |
2 | 2 | #define RTS_SCALAR_SLICE |
3 | 3 | |
4 | 4 | #include "dataTypes.h" |
5 | -#include "colormap.h" | |
5 | +#include "rts/graphics/colormap.h" | |
6 | 6 | |
7 | 7 | struct scalarslice |
8 | 8 | { |
... | ... | @@ -17,8 +17,8 @@ struct scalarslice |
17 | 17 | ~scalarslice(); |
18 | 18 | void clear(); |
19 | 19 | |
20 | - void toImage(std::string filename, ptype vmin, ptype vmax, rts::colormap::colormapType cmap = rts::colormap::cmBrewer); | |
21 | - void toImage(std::string filename, bool positive = true, rts::colormap::colormapType cmap = rts::colormap::cmBrewer); | |
20 | + void toImage(std::string filename, ptype vmin, ptype vmax, rts::colormapType cmap = rts::cmBrewer); | |
21 | + void toImage(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer); | |
22 | 22 | void toEnvi(std::string filename, ptype wavelength = 0, bool append = false); |
23 | 23 | |
24 | 24 | }; | ... | ... |
sphere.cpp
1 | 1 | #include "sphere.h" |
2 | +#include "defaults.h" | |
2 | 3 | |
3 | 4 | #include "rts/math/complex.h" |
4 | 5 | #include <complex> |
5 | 6 | #include <stdlib.h> |
7 | +#include <fstream> | |
6 | 8 | |
7 | 9 | using namespace rts; |
8 | 10 | using namespace std; |
... | ... | @@ -13,6 +15,9 @@ int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, |
13 | 15 | int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv, |
14 | 16 | complex<double>*cyv,complex<double>*cjvp,complex<double>*cyvp); |
15 | 17 | |
18 | +int bessjyv_sph(int v, double z, double &vm, double* cjv, | |
19 | + double* cyv, double* cjvp, double* cyvp); | |
20 | + | |
16 | 21 | void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) |
17 | 22 | { |
18 | 23 | /* These calculations are done at high-precision on the CPU |
... | ... | @@ -59,12 +64,6 @@ void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) |
59 | 64 | cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka); |
60 | 65 | cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna); |
61 | 66 | |
62 | - | |
63 | - //cout<<"Begin Sphere---------"<<endl; | |
64 | - //cout<<"Nl = "<<Nl<<endl; | |
65 | - //cout<<"ka = "<<ka<<endl; | |
66 | - //cout<<"kna = "<<kna<<endl; | |
67 | - | |
68 | 67 | //compute A for each order |
69 | 68 | complex<double> i(0, 1); |
70 | 69 | complex<double> a, b, c, d; |
... | ... | @@ -83,7 +82,7 @@ void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) |
83 | 82 | //calculate A and add it to the list |
84 | 83 | An = (2.0 * l + 1.0) * pow(i, l) * (a / b); |
85 | 84 | A.push_back(bsComplex(An.real(), An.imag())); |
86 | - //cout<<"A: "<<An<<endl; | |
85 | + | |
87 | 86 | |
88 | 87 | //Compute B (external scattering coefficient) |
89 | 88 | c = cjv_ka[l] * cjvp_kna[l] * nc - cjv_kna[l] * cjvp_ka[l]; |
... | ... | @@ -92,7 +91,206 @@ void sphere::calcCoeff(ptype lambda, rtsComplex<ptype> ri) |
92 | 91 | //calculate B and add it to the list |
93 | 92 | Bn = (2.0 * l + 1.0) * pow(i, l) * (c / d); |
94 | 93 | B.push_back(bsComplex(Bn.real(), Bn.imag())); |
95 | - //cout<<"B: "<<Bn<<endl; | |
96 | 94 | |
95 | + | |
96 | + } | |
97 | +} | |
98 | + | |
99 | +void sphere::calcBesselLut(bsComplex* j, ptype k, bsComplex n, int aR) | |
100 | +{ | |
101 | + /*Compute the look-up-table for spherical bessel functions used inside of the sphere | |
102 | + j = (Nl + 1) x aR array of values | |
103 | + aR = resolution of j | |
104 | + */ | |
105 | + | |
106 | + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored) | |
107 | + int bytes = sizeof(complex<double>) * (Nl + 1); | |
108 | + complex<double>* cjv_knr = (complex<double>*)malloc(bytes); | |
109 | + complex<double>* cyv_knr = (complex<double>*)malloc(bytes); | |
110 | + complex<double>* cjvp_knr = (complex<double>*)malloc(bytes); | |
111 | + complex<double>* cyvp_knr = (complex<double>*)malloc(bytes); | |
112 | + | |
113 | + //compute the bessel functions using the CPU-based algorithm | |
114 | + double vm; | |
115 | + | |
116 | + //for each sample along r | |
117 | + ptype dr = a / (aR - 1); | |
118 | + ptype r; | |
119 | + for(int ir = 0; ir < aR; ir++) | |
120 | + { | |
121 | + r = ir * dr; | |
122 | + complex<double> knr( (k*n*r).real(), (k*n*r).imag() ); | |
123 | + cbessjyva_sph(Nl, knr, vm, cjv_knr, cyv_knr, cjvp_knr, cyvp_knr); | |
124 | + | |
125 | + //copy the double data to the bsComplex array | |
126 | + for(int l=0; l<=Nl; l++) | |
127 | + { | |
128 | + //deal with the NaN case at the origin | |
129 | + if(ir == 0) | |
130 | + { | |
131 | + if(l == 0) | |
132 | + j[ir * (Nl+1)] = 1; | |
133 | + else | |
134 | + j[ir * (Nl+1) + l] = 0; | |
135 | + } | |
136 | + else | |
137 | + j[ir * (Nl+1) + l] = bsComplex(cjv_knr[l].real(), cjv_knr[l].imag()); | |
138 | + } | |
139 | + } | |
140 | + | |
141 | + /*ofstream outfile("besselout.txt"); | |
142 | + for(int ir = 0; ir < aR; ir++) | |
143 | + { | |
144 | + for(int l = 0; l<Nl+1; l++) | |
145 | + { | |
146 | + outfile<<j[ir * (Nl+1) + l].real()<<" "; | |
147 | + } | |
148 | + outfile<<endl; | |
149 | + } | |
150 | + outfile.close();*/ | |
151 | + | |
152 | +} | |
153 | + | |
154 | +void sphere::calcHankelLut(bsComplex* h, ptype k, int rR) | |
155 | +{ | |
156 | + /*Compute the look-up-table for spherical bessel functions used inside of the sphere | |
157 | + h_out = (Nl + 1) x aR array of values | |
158 | + rmin = minimum value of r | |
159 | + d_max = maximum value of r | |
160 | + rR = resolution of h_out | |
161 | + */ | |
162 | + | |
163 | + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored) | |
164 | + int bytes = sizeof(double) * (Nl + 1); | |
165 | + double* cjv_kr = (double*)malloc(bytes); | |
166 | + double* cyv_kr = (double*)malloc(bytes); | |
167 | + double* cjvp_kr = (double*)malloc(bytes); | |
168 | + double* cyvp_kr = (double*)malloc(bytes); | |
169 | + | |
170 | + //compute the bessel functions using the CPU-based algorithm | |
171 | + double vm; | |
172 | + | |
173 | + | |
174 | + | |
175 | + //for each sample along r | |
176 | + ptype dr = (d_max - max(a, d_min)) / (rR - 1); | |
177 | + ptype r; | |
178 | + for(int ir = 0; ir < rR; ir++) | |
179 | + { | |
180 | + r = ir * dr + max(a, d_min); | |
181 | + double kr = k*r; | |
182 | + bessjyv_sph(Nl, kr, vm, cjv_kr, cyv_kr, cjvp_kr, cyvp_kr); | |
183 | + | |
184 | + //copy the double data to the bsComplex array | |
185 | + for(int l=0; l<=Nl; l++) | |
186 | + { | |
187 | + //h[ir * (Nl+1) + l] = bsComplex(cjv_kr[l].real(), cyv_kr[l].real()); | |
188 | + h[ir * (Nl+1) + l] = bsComplex(cjv_kr[l], cyv_kr[l]); | |
189 | + } | |
97 | 190 | } |
191 | + | |
192 | + /*ofstream outfile("hankelout.txt"); | |
193 | + for(int ir = 0; ir < rR; ir++) | |
194 | + { | |
195 | + outfile<<ir*dr + max(a, d_min)<<" "; | |
196 | + for(int l = 0; l<=0; l++) | |
197 | + { | |
198 | + outfile<<h[ir * (Nl+1) + l].real()<<" "<<h[ir * (Nl+1) + l].imag()<<" "; | |
199 | + } | |
200 | + outfile<<endl; | |
201 | + } | |
202 | + outfile.close();*/ | |
203 | +} | |
204 | + | |
205 | +void sphere::calcLut(bsComplex* j, bsComplex* h, ptype lambda, bsComplex n, int aR, int rR) | |
206 | +{ | |
207 | + /*Compute the look-up-tables for spherical bessel functions used both inside and outside of the sphere. | |
208 | + j = (Nl + 1) x aR array of values | |
209 | + j = (Nl + 1) x rR array of values | |
210 | + d_max = maximum distance for the LUT | |
211 | + aR = resolution of j_in | |
212 | + rR = resolution of j_out | |
213 | + */ | |
214 | + | |
215 | + //compute the magnitude of the k vector | |
216 | + double k = 2 * PI / lambda; | |
217 | + | |
218 | + calcBesselLut(j, k, n, aR); | |
219 | + calcHankelLut(h, k, rR); | |
220 | +} | |
221 | + | |
222 | +void sphere::calcUp(ptype lambda, bsComplex n, rts::rtsQuad<ptype, 3> nfPlane, unsigned int R) | |
223 | +{ | |
224 | + //calculate the parameters of the lookup table | |
225 | + | |
226 | + //first find the distance to the closest and furthest points on the nearfield plane | |
227 | + d_min = nfPlane.dist(p); | |
228 | + d_max = nfPlane.dist_max(p); | |
229 | + | |
230 | + //compute the radius of the cross-section of the sphere with the plane | |
231 | + ptype a_inter = 0; | |
232 | + if(d_min < a) | |
233 | + a_inter = sqrt(a - d_min); | |
234 | + | |
235 | + | |
236 | + //calculate the resolution of the Usp and Uip lookup tables | |
237 | + int aR = 1 + 2 * R * a_inter / (nfPlane(0, 0) - nfPlane(1, 1)).len(); | |
238 | + int dR = 2 * R; | |
239 | + int thetaR = DEFAULT_SPHERE_THETA_R; | |
240 | + | |
241 | + //allocate space for the bessel function LUTs | |
242 | + bsComplex* j = (bsComplex*)malloc(sizeof(bsComplex) * (Nl + 1) * aR); | |
243 | + bsComplex* h = (bsComplex*)malloc(sizeof(bsComplex) * (Nl + 1) * dR); | |
244 | + | |
245 | + calcLut(j, h, lambda, n, aR, dR); | |
246 | + | |
247 | + //allocate space for the Usp lookup texture | |
248 | + Usp.R[0] = dR; | |
249 | + Usp.R[1] = thetaR; | |
250 | + Usp.init_gpu(); | |
251 | + | |
252 | + //allocate space for the Uip lookup texture | |
253 | + Uip.R[0] = aR; | |
254 | + Uip.R[1] = thetaR; | |
255 | + Uip.init_gpu(); | |
256 | + | |
257 | + | |
258 | + | |
259 | + scalarUsp(h, dR, thetaR); | |
260 | + scalarUip(j, aR, thetaR); | |
261 | + | |
262 | + scalarslice UspMag = Usp.Mag(); | |
263 | + UspMag.toImage("Usp.bmp", true); | |
264 | + | |
265 | + scalarslice UipMag = Uip.Mag(); | |
266 | + UipMag.toImage("Uip.bmp", true); | |
267 | + | |
268 | + //free memory | |
269 | + free(j); | |
270 | + free(h); | |
271 | + | |
272 | +} | |
273 | + | |
274 | +sphere& sphere::operator=(const sphere &rhs) | |
275 | +{ | |
276 | + p = rhs.p; | |
277 | + a = rhs.a; | |
278 | + iMaterial = rhs.iMaterial; | |
279 | + Nl = rhs.Nl; | |
280 | + n = rhs.n; | |
281 | + B = rhs.B; | |
282 | + A = rhs.A; | |
283 | + | |
284 | + return *this; | |
285 | +} | |
286 | + | |
287 | +sphere::sphere(const sphere &rhs) | |
288 | +{ | |
289 | + p = rhs.p; | |
290 | + a = rhs.a; | |
291 | + iMaterial = rhs.iMaterial; | |
292 | + Nl = rhs.Nl; | |
293 | + n = rhs.n; | |
294 | + B = rhs.B; | |
295 | + A = rhs.A; | |
98 | 296 | } | ... | ... |
1 | +#include "sphere.h" | |
2 | +#include "rts/math/legendre.h" | |
3 | + | |
4 | +__global__ void gpuScalarUsp(bsComplex* Usp, bsComplex* h, bsComplex* B, int Nl, int rR, int thetaR) | |
5 | +{ | |
6 | + //get the current coordinate in the plane slice | |
7 | + int ir = blockIdx.x * blockDim.x + threadIdx.x; | |
8 | + int itheta = blockIdx.y * blockDim.y + threadIdx.y; | |
9 | + | |
10 | + //make sure that the thread indices are in-bounds | |
11 | + if(itheta >= thetaR || ir >= rR) return; | |
12 | + | |
13 | + int i = itheta * rR + ir; | |
14 | + | |
15 | + //ptype dr = (rmax - a) / (rR - 1); | |
16 | + ptype dtheta = (PI) / (thetaR - 1); | |
17 | + | |
18 | + //comptue the current angle and distance | |
19 | + //ptype r = dr * ir + a; | |
20 | + ptype theta = dtheta * itheta; | |
21 | + ptype cos_theta = cos(theta); | |
22 | + | |
23 | + //initialize the Legendre polynomial | |
24 | + ptype P[2]; | |
25 | + rts::init_legendre<ptype>(cos_theta, P[0], P[1]); | |
26 | + | |
27 | + //initialize the result | |
28 | + bsComplex Us((ptype)0, (ptype)0); | |
29 | + | |
30 | + //for each order l | |
31 | + for(int l=0; l <= Nl; l++) | |
32 | + { | |
33 | + if(l == 0) | |
34 | + { | |
35 | + Us += B[l] * h[ir * (Nl+1) + l] * P[0]; | |
36 | + //Us += P[0]; | |
37 | + } | |
38 | + else | |
39 | + { | |
40 | + if(l > 1) | |
41 | + { | |
42 | + rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]); | |
43 | + } | |
44 | + Us += B[l] * h[ir * (Nl+1) + l] * P[1]; | |
45 | + //Us += P[1]; | |
46 | + } | |
47 | + | |
48 | + | |
49 | + } | |
50 | + Usp[i] = Us; | |
51 | + //Usp[i] = h[ir * (Nl+1)]; | |
52 | + //Usp[i] = ir; | |
53 | + | |
54 | +} | |
55 | + | |
56 | +__global__ void gpuScalarUip(bsComplex* Uip, bsComplex* j, bsComplex* A, int Nl, int aR, int thetaR) | |
57 | +{ | |
58 | + //get the current coordinate in the plane slice | |
59 | + int ia = blockIdx.x * blockDim.x + threadIdx.x; | |
60 | + int itheta = blockIdx.y * blockDim.y + threadIdx.y; | |
61 | + | |
62 | + //make sure that the thread indices are in-bounds | |
63 | + if(itheta >= thetaR || ia >= aR) return; | |
64 | + | |
65 | + int i = itheta * aR + ia; | |
66 | + | |
67 | + ptype dtheta = (PI) / (thetaR - 1); | |
68 | + | |
69 | + //comptue the current angle and distance | |
70 | + ptype theta = dtheta * itheta; | |
71 | + ptype cos_theta = cos(theta); | |
72 | + | |
73 | + //initialize the Legendre polynomial | |
74 | + ptype P[2]; | |
75 | + rts::init_legendre<ptype>(cos_theta, P[0], P[1]); | |
76 | + | |
77 | + //initialize the result | |
78 | + bsComplex Ui((ptype)0, (ptype)0); | |
79 | + | |
80 | + //for each order l | |
81 | + for(int l=0; l <= Nl; l++) | |
82 | + { | |
83 | + if(l == 0) | |
84 | + { | |
85 | + Ui += A[l] * j[ia * (Nl+1) + l] * P[0]; | |
86 | + } | |
87 | + else | |
88 | + { | |
89 | + if(l > 1) | |
90 | + { | |
91 | + rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]); | |
92 | + } | |
93 | + Ui += A[l] * j[ia * (Nl+1) + l] * P[1]; | |
94 | + } | |
95 | + | |
96 | + | |
97 | + } | |
98 | + Uip[i] = Ui; | |
99 | +} | |
100 | + | |
101 | +void sphere::scalarUsp(bsComplex* h, int rR, int thetaR) | |
102 | +{ | |
103 | + //copy the hankel function to the GPU | |
104 | + bsComplex* gpu_h; | |
105 | + HANDLE_ERROR( cudaMalloc( (void**)&gpu_h, sizeof(bsComplex) * (Nl + 1) * rR ) ); | |
106 | + HANDLE_ERROR( cudaMemcpy( gpu_h, h, sizeof(bsComplex) * (Nl + 1) * rR, cudaMemcpyHostToDevice ) ); | |
107 | + | |
108 | + //allocate memory for the scattering coefficients | |
109 | + bsComplex* gpuB; | |
110 | + HANDLE_ERROR(cudaMalloc((void**) &gpuB, (Nl+1) * sizeof(bsComplex))); | |
111 | + //copy the scattering coefficients to the GPU | |
112 | + HANDLE_ERROR(cudaMemcpy(gpuB, &B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice)); | |
113 | + | |
114 | + //create one thread for each pixel of the field slice | |
115 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
116 | + dim3 dimGrid((Usp.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Usp.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
117 | + | |
118 | + gpuScalarUsp<<<dimGrid, dimBlock>>>(Usp.x_hat, gpu_h, gpuB, Nl, rR, thetaR); | |
119 | + | |
120 | + //free memory | |
121 | + cudaFree(gpu_h); | |
122 | + cudaFree(gpuB); | |
123 | + | |
124 | +} | |
125 | + | |
126 | +void sphere::scalarUip(bsComplex* j, int rR, int thetaR) | |
127 | +{ | |
128 | + //copy the bessel and hankel LUTs to the GPU | |
129 | + bsComplex* gpu_j; | |
130 | + HANDLE_ERROR( cudaMalloc( (void**)&gpu_j, sizeof(bsComplex) * (Nl + 1) * rR ) ); | |
131 | + HANDLE_ERROR( cudaMemcpy( gpu_j, j, sizeof(bsComplex) * (Nl + 1) * rR, cudaMemcpyHostToDevice ) ); | |
132 | + | |
133 | + //allocate memory for the scattering coefficients | |
134 | + bsComplex* gpuA; | |
135 | + HANDLE_ERROR(cudaMalloc((void**) &gpuA, (Nl+1) * sizeof(bsComplex))); | |
136 | + //copy the scattering coefficients to the GPU | |
137 | + HANDLE_ERROR(cudaMemcpy(gpuA, &A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice)); | |
138 | + | |
139 | + //create one thread for each pixel of the field slice | |
140 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
141 | + dim3 dimGrid((Uip.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uip.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
142 | + | |
143 | + gpuScalarUip<<<dimGrid, dimBlock>>>(Uip.x_hat, gpu_j, gpuA, Nl, rR, thetaR); | |
144 | + | |
145 | + //free memory | |
146 | + cudaFree(gpu_j); | |
147 | + cudaFree(gpuA); | |
148 | + | |
149 | +} | ... | ... |
sphere.h
... | ... | @@ -22,12 +22,12 @@ struct sphere |
22 | 22 | //sphere material index |
23 | 23 | int iMaterial; |
24 | 24 | |
25 | - //rtsPointer to the scattered field produced by a plane wave | |
25 | + //GPU pointer to the scattered field produced by a plane wave | |
26 | 26 | // this is a function of cos(theta) and |r| (distance from sphere center) |
27 | - //fieldslice surface; | |
28 | - | |
29 | - //resolution of the scattered field | |
30 | - int thetaR, rR; | |
27 | + fieldslice Usp; | |
28 | + fieldslice Uip; | |
29 | + ptype d_min; | |
30 | + ptype d_max; | |
31 | 31 | |
32 | 32 | //sphere order |
33 | 33 | int Nl; |
... | ... | @@ -50,6 +50,12 @@ struct sphere |
50 | 50 | //surface = fieldslice(ang, ang/2); |
51 | 51 | } |
52 | 52 | |
53 | + //assignment operator | |
54 | + sphere & operator=(const sphere &rhs); | |
55 | + | |
56 | + //copy constructor | |
57 | + sphere(const sphere &rhs); | |
58 | + | |
53 | 59 | std::string toStr() |
54 | 60 | { |
55 | 61 | std::stringstream ss; |
... | ... | @@ -66,8 +72,19 @@ struct sphere |
66 | 72 | Nl = ceil( (2 * PI * a) / lambda + 4 * pow( (2 * PI * a) / lambda, 1.0/3.0) + 2); |
67 | 73 | } |
68 | 74 | |
69 | - void calcCoeff(ptype lambda, rts::rtsComplex<ptype> n); | |
75 | + //compute the scattering coefficients | |
76 | + void calcCoeff(ptype lambda, bsComplex n); | |
77 | + | |
78 | + //compute the bessel function look-up tables | |
79 | + void calcLut(bsComplex* j, bsComplex* h, ptype lambda, bsComplex n, int aR, int rR); | |
80 | + void calcBesselLut(bsComplex* j, ptype k, bsComplex n, int aR); | |
81 | + void calcHankelLut(bsComplex* h, ptype k, int rR); | |
82 | + | |
83 | + //calculate the scattering domain Us(theta, r) | |
84 | + void calcUp(ptype lambda, bsComplex n, rts::rtsQuad<ptype, 3> nfPlane, unsigned int R); | |
70 | 85 | |
86 | + void scalarUsp(bsComplex* h, int rR, int thetaR); | |
87 | + void scalarUip(bsComplex* j, int aR, int thetaR); | |
71 | 88 | |
72 | 89 | |
73 | 90 | ... | ... |