Commit 51b6469a3ee77583099edb0a57e1bb7859c28fd1
1 parent
b6179de6
added look-up tables
Showing
27 changed files
with
1518 additions
and
588 deletions
Show diff stats
bessjy.cpp
... | ... | @@ -13,7 +13,9 @@ |
13 | 13 | // |
14 | 14 | #define _USE_MATH_DEFINES |
15 | 15 | #include <math.h> |
16 | -#include "bessel.h" | |
16 | +#include "bessel.h" | |
17 | + | |
18 | +#define PI 3.14159 | |
17 | 19 | |
18 | 20 | double gamma(double x); |
19 | 21 | // |
... | ... | @@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &nm,double *jn,double *yn, |
426 | 428 | 0.2775764465332031, |
427 | 429 | -1.993531733751297, |
428 | 430 | 2.724882731126854e1}; |
429 | - | |
431 | + | |
430 | 432 | int i,k,m; |
431 | 433 | nm = n; |
432 | 434 | if ((x < 0.0) || (n < 0)) return 1; |
... | ... | @@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &vm,double *jv,double *yv, |
702 | 704 | } |
703 | 705 | vm = n + v0; |
704 | 706 | return 0; |
707 | +} | |
708 | + | |
709 | +int bessjyv_sph(int v, double z, double &vm, double* cjv, | |
710 | + double* cyv, double* cjvp, double* cyvp) | |
711 | +{ | |
712 | + //first, compute the bessel functions of fractional order | |
713 | + bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); | |
714 | + | |
715 | + //iterate through each and scale | |
716 | + for(int n = 0; n<=v; n++) | |
717 | + { | |
718 | + | |
719 | + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); | |
720 | + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); | |
721 | + | |
722 | + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0)); | |
723 | + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0)); | |
724 | + } | |
725 | + | |
726 | + return 0; | |
727 | + | |
705 | 728 | } |
706 | - | |
729 | + | ... | ... |
cbessjy.cpp
... | ... | @@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv, |
724 | 724 | //iterate through each and scale |
725 | 725 | for(int n = 0; n<=v; n++) |
726 | 726 | { |
727 | + | |
727 | 728 | cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); |
728 | 729 | cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); |
729 | 730 | ... | ... |
colormap.h deleted
1 | -#ifndef RTS_COLORMAP_H | |
2 | -#define RTS_COLORMAP_H | |
3 | - | |
4 | -#include <string> | |
5 | -#include <qimage.h> | |
6 | -#include <qcolor.h> | |
7 | -#include "rts/cuda/error.h" | |
8 | - | |
9 | - | |
10 | -#define BREWER_CTRL_PTS 11 | |
11 | - | |
12 | -#ifdef __CUDACC__ | |
13 | -texture<float4, cudaTextureType1D> cudaTexBrewer; | |
14 | -static cudaArray* gpuBrewer; | |
15 | -#endif | |
16 | - | |
17 | - | |
18 | - | |
19 | -namespace rts{ | |
20 | - namespace colormap{ | |
21 | - | |
22 | -enum colormapType {cmBrewer, cmGrayscale}; | |
23 | - | |
24 | -static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size) | |
25 | -{ | |
26 | - //create an image object | |
27 | - QImage image(x_size, y_size, QImage::Format_RGB32); | |
28 | - | |
29 | - int i; | |
30 | - unsigned char r, g, b; | |
31 | - unsigned int x, y; | |
32 | - for(y=0; y<y_size; y++) | |
33 | - for(x=0; x<x_size; x++) | |
34 | - { | |
35 | - //calculate the 1D index | |
36 | - i = y * x_size + x; | |
37 | - | |
38 | - r = buffer[i * 3 + 0]; | |
39 | - g = buffer[i * 3 + 1]; | |
40 | - b = buffer[i * 3 + 2]; | |
41 | - | |
42 | - //set the image pixel | |
43 | - QColor color(r, g, b); | |
44 | - image.setPixel(x, y, color.rgb()); | |
45 | - } | |
46 | - | |
47 | - image.save(filename.c_str()); | |
48 | -} | |
49 | - | |
50 | -#ifdef __CUDACC__ | |
51 | -static void initBrewer() | |
52 | -{ | |
53 | - //initialize the Brewer colormap | |
54 | - | |
55 | - //allocate CPU space | |
56 | - float4 cpuColorMap[BREWER_CTRL_PTS]; | |
57 | - | |
58 | - //define control rtsPoints | |
59 | - cpuColorMap[0] = make_float4(0.192157f, 0.211765f, 0.584314f, 1.0f); | |
60 | - cpuColorMap[1] = make_float4(0.270588f, 0.458824f, 0.705882f, 1.0f); | |
61 | - cpuColorMap[2] = make_float4(0.454902f, 0.678431f, 0.819608f, 1.0f); | |
62 | - cpuColorMap[3] = make_float4(0.670588f, 0.85098f, 0.913725f, 1.0f); | |
63 | - cpuColorMap[4] = make_float4(0.878431f, 0.952941f, 0.972549f, 1.0f); | |
64 | - cpuColorMap[5] = make_float4(1.0f, 1.0f, 0.74902f, 1.0f); | |
65 | - cpuColorMap[6] = make_float4(0.996078f, 0.878431f, 0.564706f, 1.0f); | |
66 | - cpuColorMap[7] = make_float4(0.992157f, 0.682353f, 0.380392f, 1.0f); | |
67 | - cpuColorMap[8] = make_float4(0.956863f, 0.427451f, 0.262745f, 1.0f); | |
68 | - cpuColorMap[9] = make_float4(0.843137f, 0.188235f, 0.152941f, 1.0f); | |
69 | - cpuColorMap[10] = make_float4(0.647059f, 0.0f, 0.14902f, 1.0f); | |
70 | - | |
71 | - | |
72 | - int width = BREWER_CTRL_PTS; | |
73 | - int height = 0; | |
74 | - | |
75 | - | |
76 | - // allocate array and copy colormap data | |
77 | - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); | |
78 | - | |
79 | - HANDLE_ERROR(cudaMallocArray(&gpuBrewer, &channelDesc, width, height)); | |
80 | - | |
81 | - HANDLE_ERROR(cudaMemcpyToArray(gpuBrewer, 0, 0, cpuColorMap, sizeof(float4)*width, cudaMemcpyHostToDevice)); | |
82 | - | |
83 | - // set texture parameters | |
84 | - cudaTexBrewer.addressMode[0] = cudaAddressModeClamp; | |
85 | - //texBrewer.addressMode[1] = cudaAddressModeClamp; | |
86 | - cudaTexBrewer.filterMode = cudaFilterModeLinear; | |
87 | - cudaTexBrewer.normalized = true; // access with normalized texture coordinates | |
88 | - | |
89 | - // Bind the array to the texture | |
90 | - HANDLE_ERROR(cudaBindTextureToArray( cudaTexBrewer, gpuBrewer, channelDesc)); | |
91 | - | |
92 | -} | |
93 | - | |
94 | -static void destroyBrewer() | |
95 | -{ | |
96 | - HANDLE_ERROR(cudaFreeArray(gpuBrewer)); | |
97 | - | |
98 | -} | |
99 | - | |
100 | -template<class T> | |
101 | -__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) | |
102 | -{ | |
103 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
104 | - if(i >= N) return; | |
105 | - | |
106 | - //compute the normalized value on [minVal maxVal] | |
107 | - float a = (gpuSource[i] - minVal) / (maxVal - minVal); | |
108 | - | |
109 | - //lookup the color | |
110 | - float shift = 1.0/BREWER_CTRL_PTS; | |
111 | - float4 color = tex1D(cudaTexBrewer, a+shift); | |
112 | - | |
113 | - gpuDest[i * 3 + 0] = 255 * color.x; | |
114 | - gpuDest[i * 3 + 1] = 255 * color.y; | |
115 | - gpuDest[i * 3 + 2] = 255 * color.z; | |
116 | -} | |
117 | - | |
118 | -template<class T> | |
119 | -__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) | |
120 | -{ | |
121 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
122 | - if(i >= N) return; | |
123 | - | |
124 | - //compute the normalized value on [minVal maxVal] | |
125 | - float a = (gpuSource[i] - minVal) / (maxVal - minVal); | |
126 | - | |
127 | - gpuDest[i * 3 + 0] = 255 * a; | |
128 | - gpuDest[i * 3 + 1] = 255 * a; | |
129 | - gpuDest[i * 3 + 2] = 255 * a; | |
130 | -} | |
131 | - | |
132 | -template<class T> | |
133 | -static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128) | |
134 | -{ | |
135 | - //This function converts a scalar field on the GPU to a color image on the GPU | |
136 | - int gridDim = (nVals + blockDim - 1)/blockDim; | |
137 | - if(cm == cmGrayscale) | |
138 | - applyGrayscale<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal); | |
139 | - else if(cm == cmBrewer) | |
140 | - { | |
141 | - initBrewer(); | |
142 | - applyBrewer<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal); | |
143 | - destroyBrewer(); | |
144 | - } | |
145 | - | |
146 | -} | |
147 | - | |
148 | -template<class T> | |
149 | -static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale) | |
150 | -{ | |
151 | - //this function converts a scalar field on the GPU to a color image on the CPU | |
152 | - | |
153 | - //first create the color image on the GPU | |
154 | - | |
155 | - //allocate GPU memory for the color image | |
156 | - unsigned char* gpuDest; | |
157 | - HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 )); | |
158 | - | |
159 | - //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals)); | |
160 | - | |
161 | - //create the image on the gpu | |
162 | - gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm); | |
163 | - | |
164 | - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3)); | |
165 | - | |
166 | - //copy the image from the GPU to the CPU | |
167 | - HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost)); | |
168 | - | |
169 | - HANDLE_ERROR(cudaFree( gpuDest )); | |
170 | - | |
171 | -} | |
172 | - | |
173 | -template<typename T> | |
174 | -static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) | |
175 | -{ | |
176 | - //allocate a color buffer | |
177 | - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); | |
178 | - | |
179 | - //do the mapping | |
180 | - gpu2cpu<T>(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); | |
181 | - | |
182 | - //copy the buffer to an image | |
183 | - buffer2image(cpuBuffer, fileDest, x_size, y_size); | |
184 | - | |
185 | - free(cpuBuffer); | |
186 | -} | |
187 | - | |
188 | -#endif | |
189 | - | |
190 | -template<class T> | |
191 | -static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale) | |
192 | -{ | |
193 | - int i; | |
194 | - float a; | |
195 | - float range = valMax - valMin; | |
196 | - for(i = 0; i<nVals; i++) | |
197 | - { | |
198 | - //normalize to the range [valMin valMax] | |
199 | - a = (cpuSource[i] - valMin) / range; | |
200 | - | |
201 | - cpuDest[i * 3 + 0] = 255 * a; | |
202 | - cpuDest[i * 3 + 1] = 255 * a; | |
203 | - cpuDest[i * 3 + 2] = 255 * a; | |
204 | - } | |
205 | - | |
206 | -} | |
207 | - | |
208 | - | |
209 | - | |
210 | -template<typename T> | |
211 | -static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) | |
212 | -{ | |
213 | - //allocate a color buffer | |
214 | - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); | |
215 | - | |
216 | - //do the mapping | |
217 | - cpu2cpu<T>(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); | |
218 | - | |
219 | - //copy the buffer to an image | |
220 | - buffer2image(cpuBuffer, fileDest, x_size, y_size); | |
221 | - | |
222 | - free(cpuBuffer); | |
223 | - | |
224 | -} | |
225 | - | |
226 | -}} //end namespace colormap and rts | |
227 | - | |
228 | -#endif | |
229 | - |
dataTypes.h
defaults.h
... | ... | @@ -15,14 +15,14 @@ |
15 | 15 | #define DEFAULT_FOCUS_X 0 |
16 | 16 | #define DEFAULT_FOCUS_Y 0 |
17 | 17 | #define DEFAULT_FOCUS_Z 0 |
18 | -#define DEFAULT_INCIDENT_ORDER 100 | |
18 | +//#define DEFAULT_INCIDENT_ORDER 20 | |
19 | 19 | #define DEFAULT_STABILITY_PARM 1.4 |
20 | 20 | |
21 | 21 | //optics |
22 | -#define DEFAULT_CONDENSER_MIN 0.0 | |
22 | +#define DEFAULT_CONDENSER_MIN 0 | |
23 | 23 | #define DEFAULT_CONDENSER_MAX 1 |
24 | 24 | |
25 | -#define DEFAULT_OBJECTIVE_MIN 0.0 | |
25 | +#define DEFAULT_OBJECTIVE_MIN 0 | |
26 | 26 | #define DEFAULT_OBJECTIVE_MAX 1 |
27 | 27 | |
28 | 28 | //incident light direction |
... | ... | @@ -36,17 +36,20 @@ |
36 | 36 | //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective |
37 | 37 | |
38 | 38 | |
39 | -#define DEFAULT_SLICE_MIN_X -5 | |
40 | -#define DEFAULT_SLICE_MIN_Y 0 | |
41 | -#define DEFAULT_SLICE_MIN_Z -5 | |
39 | +#define DEFAULT_PLANE_MIN_X -5 | |
40 | +#define DEFAULT_PLANE_MIN_Y 0 | |
41 | +#define DEFAULT_PLANE_MIN_Z -5 | |
42 | 42 | |
43 | -#define DEFAULT_SLICE_MAX_X 5 | |
44 | -#define DEFAULT_SLICE_MAX_Y 0 | |
45 | -#define DEFAULT_SLICE_MAX_Z 5 | |
43 | +#define DEFAULT_PLANE_MAX_X 5 | |
44 | +#define DEFAULT_PLANE_MAX_Y 0 | |
45 | +#define DEFAULT_PLANE_MAX_Z 5 | |
46 | 46 | |
47 | -#define DEFAULT_SLICE_NORM_X 0 | |
48 | -#define DEFAULT_SLICE_NORM_Y 1 | |
49 | -#define DEFAULT_SLICE_NORM_Z 0 | |
47 | +#define DEFAULT_PLANE_NORM_X 0 | |
48 | +#define DEFAULT_PLANE_NORM_Y 1 | |
49 | +#define DEFAULT_PLANE_NORM_Z 0 | |
50 | + | |
51 | +#define DEFAULT_PLANE_SIZE 40 | |
52 | +#define DEFAULT_PLANE_POSITION 0 | |
50 | 53 | |
51 | 54 | |
52 | 55 | /* |
... | ... | @@ -64,21 +67,23 @@ |
64 | 67 | */ |
65 | 68 | |
66 | 69 | |
67 | -#define DEFAULT_FIELD_ORDER 200 | |
70 | +#define DEFAULT_FIELD_ORDER 10 | |
68 | 71 | |
69 | -#define DEFAULT_SAMPLES 200 | |
72 | +#define DEFAULT_SAMPLES 400 | |
70 | 73 | |
71 | 74 | #define DEFAULT_SLICE_RES 256 |
72 | 75 | |
76 | +#define DEFAULT_SPHERE_THETA_R 1000 | |
77 | + | |
73 | 78 | #define DEFAULT_PADDING 1 |
74 | 79 | #define DEFAULT_SUPERSAMPLE 1 |
75 | 80 | |
76 | -#define DEFAULT_INTENSITY_FILE "testappend" | |
81 | +#define DEFAULT_INTENSITY_FILE "out_i.bmp" | |
77 | 82 | #define DEFAULT_TRANSMITTANCE_FILE "" |
78 | -#define DEFAULT_ABSORBANCE_FILE "out_a" | |
83 | +#define DEFAULT_ABSORBANCE_FILE "out_a.bmp" | |
79 | 84 | #define DEFAULT_NEAR_FILE "out_n.bmp" |
80 | 85 | #define DEFAULT_FAR_FILE "out_f.bmp" |
81 | -#define DEFAULT_EXTENDED_SOURCE "einstein_small.jpg" | |
86 | +#define DEFAULT_EXTENDED_SOURCE "" | |
82 | 87 | #define DEFAULT_FIELD_TYPE "magnitude" |
83 | 88 | #define DEFAULT_FORMAT fileoutStruct::formatImage |
84 | 89 | #define DEFAULT_COLORMAP "brewer" | ... | ... |
fieldslice.cpp
... | ... | @@ -8,14 +8,16 @@ |
8 | 8 | using namespace std; |
9 | 9 | |
10 | 10 | fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) |
11 | -{ | |
11 | +{ | |
12 | + x_hat = y_hat = z_hat = NULL; | |
13 | + | |
12 | 14 | //save the slice resolution |
13 | 15 | R[0] = x_size; |
14 | 16 | R[1] = x_size; |
15 | 17 | |
16 | 18 | scalarField = true; |
17 | 19 | |
18 | - //init_gpu(); | |
20 | + init_gpu(); | |
19 | 21 | |
20 | 22 | |
21 | 23 | } |
... | ... | @@ -101,5 +103,5 @@ fieldslice::fieldslice() |
101 | 103 | |
102 | 104 | fieldslice::~fieldslice() |
103 | 105 | { |
104 | - //kill_gpu(); | |
106 | + kill_gpu(); | |
105 | 107 | } | ... | ... |
fieldslice.cu
1 | 1 | #include "fieldslice.h" |
2 | 2 | #include "dataTypes.h" |
3 | -#include "rts/cuda/error.h" | |
3 | +#include "rts/cuda/error.h" | |
4 | +#include "rts/cuda/threads.h" | |
4 | 5 | |
5 | 6 | |
6 | 7 | __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N) |
7 | 8 | { |
8 | 9 | //compute the index for this thread |
9 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
10 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; | |
11 | + int i = ThreadIndex1D(); | |
12 | + | |
10 | 13 | if(i >= N) return; |
11 | 14 | |
12 | 15 | ptype xm = x[i].abs(); |
... | ... | @@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty |
66 | 69 | __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) |
67 | 70 | { |
68 | 71 | //compute the index for this thread |
69 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
72 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; | |
73 | + int i = ThreadIndex1D(); | |
70 | 74 | if(i >= N) return; |
71 | 75 | |
72 | 76 | V[i] = field_component[i].real(); |
... | ... | @@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) |
75 | 79 | __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N) |
76 | 80 | { |
77 | 81 | //compute the index for this thread |
78 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
82 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; | |
83 | + int i = ThreadIndex1D(); | |
79 | 84 | if(i >= N) return; |
80 | 85 | |
81 | 86 | V[i] = field_component[i].imag(); |
... | ... | @@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i |
84 | 89 | __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N) |
85 | 90 | { |
86 | 91 | //compute the index for this thread |
87 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
92 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; | |
93 | + int i = ThreadIndex1D(); | |
88 | 94 | if(i >= N) return; |
89 | 95 | |
90 | 96 | output[i] = sqrt(input[i]); |
... | ... | @@ -115,7 +121,8 @@ scalarslice fieldslice::Mag() |
115 | 121 | |
116 | 122 | //compute the total number of values in the slice |
117 | 123 | unsigned int N = R[0] * R[1]; |
118 | - int gridDim = (N+BLOCK-1)/BLOCK; | |
124 | + //int gridDim = (N+BLOCK-1)/BLOCK; | |
125 | + dim3 gridDim = GenGrid1D(N, BLOCK); | |
119 | 126 | |
120 | 127 | field_intensity<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, result->S, N); |
121 | 128 | field_sqrt<<<gridDim, BLOCK>>>(result->S, result->S, N); |
... | ... | @@ -132,7 +139,8 @@ scalarslice fieldslice::Real() |
132 | 139 | |
133 | 140 | //compute the total number of values in the slice |
134 | 141 | unsigned int N = R[0] * R[1]; |
135 | - int gridDim = (N+BLOCK-1)/BLOCK; | |
142 | + //int gridDim = (N+BLOCK-1)/BLOCK; | |
143 | + dim3 gridDim = GenGrid1D(N, BLOCK); | |
136 | 144 | |
137 | 145 | field_real<<<gridDim, BLOCK>>>(x_hat, result->S, N); |
138 | 146 | |
... | ... | @@ -148,7 +156,8 @@ scalarslice fieldslice::Imag() |
148 | 156 | |
149 | 157 | //compute the total number of values in the slice |
150 | 158 | unsigned int N = R[0] * R[1]; |
151 | - int gridDim = (N+BLOCK-1)/BLOCK; | |
159 | + //int gridDim = (N+BLOCK-1)/BLOCK; | |
160 | + dim3 gridDim = GenGrid1D(N, BLOCK); | |
152 | 161 | |
153 | 162 | field_imaginary<<<gridDim, BLOCK>>>(x_hat, result->S, N); |
154 | 163 | |
... | ... | @@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v) |
192 | 201 | |
193 | 202 | //compute the total number of values in the slice |
194 | 203 | unsigned int N = R[0] * R[1]; |
195 | - //cout<<"Size of mag field: "<<N<<endl; | |
196 | 204 | int gridDim = (N+BLOCK-1)/BLOCK; |
197 | 205 | |
198 | 206 | field_scale<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, N, v); |
... | ... | @@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v) |
200 | 208 | } |
201 | 209 | |
202 | 210 | void fieldslice::init_gpu() |
203 | -{ | |
211 | +{ | |
212 | + //if the field has no size, return | |
213 | + if(R[0] == 0 || R[1] == 0) | |
214 | + return; | |
215 | + | |
216 | + //free any previous memory allocations | |
217 | + if(x_hat) | |
218 | + HANDLE_ERROR(cudaFree(x_hat)); | |
219 | + if(y_hat) | |
220 | + HANDLE_ERROR(cudaFree(y_hat)); | |
221 | + if(z_hat) | |
222 | + HANDLE_ERROR(cudaFree(z_hat)); | |
223 | + | |
204 | 224 | //allocate space on the GPU for the field slice |
205 | 225 | HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex))); |
206 | - //HANDLE_ERROR(cudaMemset(x_hat, 0, R[0] * R[1] * sizeof(bsComplex))); | |
207 | 226 | |
208 | - //if the field is scalar, y_hat and z_hat are unused | |
209 | - if(scalarField) | |
210 | - { | |
211 | - y_hat = NULL; | |
212 | - z_hat = NULL; | |
213 | - | |
214 | - } | |
215 | - else | |
227 | + if(!scalarField) | |
216 | 228 | { |
217 | 229 | HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex))); |
218 | 230 | //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex))); |
... | ... | @@ -233,6 +245,8 @@ void fieldslice::kill_gpu() |
233 | 245 | if(z_hat != NULL) |
234 | 246 | HANDLE_ERROR(cudaFree(z_hat)); |
235 | 247 | |
248 | + x_hat = y_hat = z_hat = NULL; | |
249 | + | |
236 | 250 | } |
237 | 251 | |
238 | 252 | void fieldslice::clear_gpu() |
... | ... | @@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) |
275 | 289 | result.scalarField = scalarField; |
276 | 290 | |
277 | 291 | //allocate space for the new field |
278 | - result.init_gpu(); | |
292 | + //result.init_gpu(); | |
279 | 293 | |
280 | 294 | //create one thread for each pixel of the field slice |
281 | 295 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); |
... | ... | @@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) |
291 | 305 | |
292 | 306 | return result; |
293 | 307 | } |
308 | + | |
309 | +fieldslice::fieldslice(const fieldslice& rhs) | |
310 | +{ | |
311 | + R[0] = rhs.R[0]; | |
312 | + R[1] = rhs.R[1]; | |
313 | + scalarField = rhs.scalarField; | |
314 | + | |
315 | + x_hat = y_hat = z_hat = NULL; | |
316 | + | |
317 | + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1]; | |
318 | + if(rhs.x_hat != NULL) | |
319 | + { | |
320 | + HANDLE_ERROR(cudaMalloc( (void**)&x_hat, bytes)); | |
321 | + HANDLE_ERROR(cudaMemcpy( x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice)); | |
322 | + } | |
323 | + if(rhs.y_hat != NULL) | |
324 | + { | |
325 | + HANDLE_ERROR(cudaMalloc( (void**)&y_hat, bytes)); | |
326 | + HANDLE_ERROR(cudaMemcpy( y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice)); | |
327 | + } | |
328 | + if(rhs.z_hat != NULL) | |
329 | + { | |
330 | + HANDLE_ERROR(cudaMalloc( (void**)&z_hat, bytes)); | |
331 | + HANDLE_ERROR(cudaMemcpy( z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice)); | |
332 | + } | |
333 | + | |
334 | +} | |
335 | + | |
336 | +fieldslice& fieldslice::operator=(const fieldslice& rhs) | |
337 | +{ | |
338 | + //make sure this isn't a self-allocation | |
339 | + if(this != &rhs) | |
340 | + { | |
341 | + //make a shallow copy | |
342 | + R[0] = rhs.R[0]; | |
343 | + R[1] = rhs.R[1]; | |
344 | + scalarField = rhs.scalarField; | |
345 | + | |
346 | + //initialize to new parameters | |
347 | + init_gpu(); | |
348 | + | |
349 | + //make a deep copy | |
350 | + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1]; | |
351 | + if(x_hat != NULL) | |
352 | + HANDLE_ERROR(cudaMemcpy(x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice)); | |
353 | + if(y_hat != NULL) | |
354 | + HANDLE_ERROR(cudaMemcpy(y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice)); | |
355 | + if(z_hat != NULL) | |
356 | + HANDLE_ERROR(cudaMemcpy(z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice)); | |
357 | + } | |
358 | + | |
359 | + return *this; | |
360 | + | |
361 | +} | ... | ... |
fieldslice.h
... | ... | @@ -31,6 +31,9 @@ struct fieldslice |
31 | 31 | |
32 | 32 | ~fieldslice(); |
33 | 33 | |
34 | + //copy constructor | |
35 | + fieldslice(const fieldslice& rhs); | |
36 | + | |
34 | 37 | //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); |
35 | 38 | |
36 | 39 | scalarslice Mag(); |
... | ... | @@ -47,6 +50,7 @@ struct fieldslice |
47 | 50 | |
48 | 51 | //crop a region from the field |
49 | 52 | fieldslice crop(int u, int v, int su, int sv); |
53 | + fieldslice& operator=(const fieldslice& rhs); | |
50 | 54 | |
51 | 55 | void init_gpu(); |
52 | 56 | void kill_gpu(); | ... | ... |
fileout.cu
... | ... | @@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope) |
186 | 186 | //save images of the fields in the microscope |
187 | 187 | |
188 | 188 | //if the user specifies an extended source |
189 | - if(scope->focalPoints.size() > 1) | |
189 | + if(scope->focalPoints.size() > 0) | |
190 | 190 | { |
191 | 191 | //simulate the extended source and output the detector image |
192 | 192 | scope->SimulateExtendedSource(); |
193 | 193 | |
194 | + //saveNearField(&scope->nf); | |
195 | + saveFarField(scope); | |
196 | + | |
197 | + //save the detector images | |
198 | + saveDetector(scope); | |
199 | + | |
200 | + //simulate scattering for the last point (so that you have a near field image) | |
201 | + scope->SimulateScattering(); | |
202 | + saveNearField(&scope->nf); | |
203 | + | |
194 | 204 | } |
195 | 205 | else |
196 | 206 | { |
... | ... | @@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope) |
203 | 213 | //run the far-field simulation |
204 | 214 | scope->SimulateImaging(); |
205 | 215 | |
216 | + //saveNearField(&scope->nf); | |
206 | 217 | saveFarField(scope); |
207 | 218 | |
219 | + //save the detector images | |
220 | + saveDetector(scope); | |
221 | + | |
208 | 222 | } |
209 | 223 | |
210 | - //save the detector images | |
211 | - saveDetector(scope); | |
224 | + | |
212 | 225 | |
213 | 226 | |
214 | 227 | } | ... | ... |
fileout.h
... | ... | @@ -5,7 +5,7 @@ |
5 | 5 | //#include "defaults.h" |
6 | 6 | #include "dataTypes.h" |
7 | 7 | |
8 | -#include "colormap.h" | |
8 | +#include "rts/graphics/colormap.h" | |
9 | 9 | #include "fieldslice.h" |
10 | 10 | #include "nearfield.h" |
11 | 11 | #include "microscope.h" |
... | ... | @@ -34,7 +34,7 @@ struct fileoutStruct{ |
34 | 34 | //image_source source; |
35 | 35 | |
36 | 36 | //color map info |
37 | - rts::colormap::colormapType colormap; | |
37 | + rts::colormapType colormap; | |
38 | 38 | ptype colorMax; |
39 | 39 | |
40 | 40 | void Save(microscopeStruct* scope); | ... | ... |
main.cpp
... | ... | @@ -24,6 +24,7 @@ microscopeStruct* SCOPE; |
24 | 24 | #include "warnings.h" |
25 | 25 | |
26 | 26 | fileoutStruct gFileOut; |
27 | +bool verbose = false; | |
27 | 28 | using namespace std; |
28 | 29 | |
29 | 30 | int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, |
... | ... | @@ -31,32 +32,19 @@ int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, |
31 | 32 | |
32 | 33 | int main(int argc, char *argv[]) |
33 | 34 | { |
34 | - //test Envi loading and saving | |
35 | - //EnviFile envi("testenvi", "w"); | |
36 | - | |
37 | - //float* data = (float*)malloc(sizeof(float) * 100 * 100); | |
38 | - //envi.addBand(data, 100, 100, 100); | |
39 | - | |
40 | - //envi.close(); | |
41 | - | |
42 | - //return 0; | |
43 | 35 | |
44 | 36 | SCOPE = new microscopeStruct(); |
45 | 37 | |
46 | - cout<<SCOPE->nf.Uf.R[0]<<endl; | |
47 | - | |
48 | 38 | LoadParameters(argc, argv); |
49 | 39 | |
50 | - //TestSimulation(NF, SCOPE, &gFileOut); | |
51 | - | |
52 | 40 | //initialize GPU memory for fields |
53 | 41 | SCOPE->init(); |
54 | 42 | |
55 | - OutputOptions(); | |
56 | - | |
57 | 43 | gFileOut.Save(SCOPE); |
58 | 44 | |
59 | - //NF->destroy(); | |
45 | + if(verbose) | |
46 | + OutputOptions(); | |
47 | + | |
60 | 48 | SCOPE->destroy(); |
61 | 49 | |
62 | 50 | ... | ... |
microscope.cu
... | ... | @@ -4,7 +4,7 @@ |
4 | 4 | #include "rts/tools/progressbar.h" |
5 | 5 | #include "rts/cuda/timer.h" |
6 | 6 | #include "dataTypes.h" |
7 | -#include "colormap.h" | |
7 | +#include "rts/graphics/colormap.h" | |
8 | 8 | |
9 | 9 | #include <QImage> |
10 | 10 | |
... | ... | @@ -112,8 +112,8 @@ void microscopeStruct::getFarField() |
112 | 112 | //Compute the Far Field image of the focal plane |
113 | 113 | |
114 | 114 | //clear the memory from previous detector fields |
115 | - Ud.kill_gpu(); | |
116 | - Ufd.kill_gpu(); | |
115 | + //Ud.kill_gpu(); | |
116 | + //Ufd.kill_gpu(); | |
117 | 117 | |
118 | 118 | //first crop the filtered near-field image of the source and scattered fields |
119 | 119 | Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); |
... | ... | @@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource() |
261 | 261 | t += gpuStopTimer(); |
262 | 262 | |
263 | 263 | rtsProgressBar((double)(i+1)/(double)npts * 100); |
264 | + //unsigned char c; | |
265 | + //cin>>c; | |
264 | 266 | } |
265 | - cout<<endl; | |
266 | - cout<<"Time per source: "<<t/npts<<"ms"<<endl; | |
267 | + if(verbose) | |
268 | + { | |
269 | + cout<<endl; | |
270 | + cout<<"Time per source: "<<t/npts<<"ms"<<endl; | |
271 | + } | |
267 | 272 | |
268 | 273 | } |
269 | 274 | |
... | ... | @@ -304,3 +309,15 @@ void microscopeStruct::LoadExtendedSource(std::string filename) |
304 | 309 | } |
305 | 310 | } |
306 | 311 | } |
312 | + | |
313 | +std::string microscopeStruct::toStr() | |
314 | +{ | |
315 | + stringstream ss; | |
316 | + ss<<nf.toStr(); | |
317 | + | |
318 | + ss<<"----------Optics--------------"<<endl<<endl; | |
319 | + ss<<"Objective NA: "<<objective[0]<<" to "<<objective[1]<<endl; | |
320 | + return ss.str(); | |
321 | + | |
322 | + | |
323 | +} | ... | ... |
microscope.h
montecarlo.cpp
... | ... | @@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) |
35 | 35 | ptype inPhi = asin(NAin); |
36 | 36 | ptype outPhi = asin(NAout); |
37 | 37 | |
38 | - //cout<<"inPhi: "<<inPhi<<endl; | |
39 | - //cout<<"outPhi: "<<outPhi<<endl; | |
40 | - | |
41 | 38 | //calculate the z-values associated with these angles |
42 | 39 | ptype inZ = cos(inPhi); |
43 | 40 | ptype outZ = cos(outPhi); |
44 | 41 | |
45 | 42 | ptype rangeZ = inZ - outZ; |
46 | 43 | |
47 | - //cout<<"inZ: "<<inZ<<endl; | |
48 | - //cout<<"outZ: "<<outZ<<endl; | |
49 | - | |
50 | 44 | //draw a distribution of random phi, z values |
51 | 45 | ptype z, phi, theta; |
52 | 46 | for(int i=0; i<N; i++) |
... | ... | @@ -58,7 +52,6 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) |
58 | 52 | phi = acos(z); |
59 | 53 | |
60 | 54 | //compute and store cartesian coordinates |
61 | - //bsVector spherical(1, theta + kSph[1], phi + kSph[2]); | |
62 | 55 | bsVector spherical(1, theta, phi); |
63 | 56 | bsVector cart = spherical.sph2cart(); |
64 | 57 | samples[i] = rotation * cart; | ... | ... |
nearfield.cpp
1 | 1 | #include "nearfield.h" |
2 | +#include <time.h> | |
3 | +#include <math.h> | |
4 | + | |
5 | +#ifdef _WIN32 | |
6 | +#define isnan(x) _isnan(x) | |
7 | +#define isinf(x) (!_finite(x)) | |
8 | +#endif | |
9 | + | |
10 | +int bessjyv_sph(int v, double z, double &vm, double* cjv, | |
11 | + double* cyv, double* cjvp, double* cyvp); | |
2 | 12 | |
3 | 13 | nearfieldStruct::nearfieldStruct() |
4 | 14 | { |
5 | 15 | scalarSim = true; |
6 | 16 | planeWave = false; |
17 | + lut_us = true; | |
18 | + lut_uf = false; | |
7 | 19 | |
8 | 20 | nWaves = 0; |
9 | 21 | } |
... | ... | @@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr() |
46 | 58 | ss<<"Condenser NA: "<<condenser[0]<<" to "<<condenser[1]<<std::endl; |
47 | 59 | ss<<"Focal Point: "<<focus[0]<<", "<<focus[1]<<", "<<focus[2]<<std::endl; |
48 | 60 | ss<<"Field Slice: "<<std::endl; |
61 | + if(lut_us) | |
62 | + ss<<"LUT Parameters --- min: "<<d_min<<" max: "<<d_max<<std::endl; | |
49 | 63 | ss<<pos<<std::endl; |
50 | 64 | |
51 | 65 | ss<<std::endl<<"---------Materials-----------"<<std::endl; |
... | ... | @@ -61,6 +75,10 @@ std::string nearfieldStruct::toStr() |
61 | 75 | for(unsigned int s=0; s<sVector.size(); s++) |
62 | 76 | ss<<sVector[s].toStr()<<std::endl; |
63 | 77 | |
78 | + ss<<"---------Timings-------------"<<std::endl; | |
79 | + ss<<"Uf = "<<t_Uf<<"ms"<<std::endl; | |
80 | + ss<<"Us = "<<t_Us<<"ms"<<std::endl; | |
81 | + | |
64 | 82 | return ss.str(); |
65 | 83 | } |
66 | 84 | |
... | ... | @@ -70,7 +88,8 @@ void nearfieldStruct::calcWaves() |
70 | 88 | inWaves.resize(nWaves); |
71 | 89 | |
72 | 90 | //re-seed the random number generator |
73 | - //srand(seed); | |
91 | + //srand(time(NULL)); | |
92 | + srand(NULL); | |
74 | 93 | |
75 | 94 | //calculate the monte-carlo samples |
76 | 95 | mcSampleNA(&inWaves[0], nWaves, k, condenser[0], condenser[1]); |
... | ... | @@ -84,6 +103,8 @@ void nearfieldStruct::calcSpheres() |
84 | 103 | //calculate all of the constants necessary to evaluate the scattered field |
85 | 104 | //estimate the order required to represent the scattered field for each sphere |
86 | 105 | |
106 | + | |
107 | + | |
87 | 108 | //for each sphere |
88 | 109 | for(int i=0; i<sVector.size(); i++) |
89 | 110 | { |
... | ... | @@ -91,12 +112,10 @@ void nearfieldStruct::calcSpheres() |
91 | 112 | |
92 | 113 | //calculate the required order |
93 | 114 | sVector[i].calcNl(lambda); |
94 | - //std::cout<<sVector[i].Nl<<std::endl; | |
95 | 115 | |
96 | 116 | //set the refractive index for the sphere |
97 | 117 | int imat = sVector[i].iMaterial; |
98 | 118 | rts::rtsComplex<ptype> n = mVector[imat](lambda); |
99 | - //std::cout<<"Sphere refractive index: "<<n<<std::endl; | |
100 | 119 | |
101 | 120 | //calculate the scattering coefficients |
102 | 121 | sVector[i].calcCoeff(lambda, n); |
... | ... | @@ -104,18 +123,109 @@ void nearfieldStruct::calcSpheres() |
104 | 123 | //save the refractive index |
105 | 124 | sVector[i].n = n; |
106 | 125 | |
126 | + //if the LUT is used, calculate Usp(theta, r) | |
127 | + if(lut_us) | |
128 | + { | |
129 | + sVector[i].calcUp(lambda, n, pos, max(U.R[0], U.R[1])); | |
130 | + } | |
131 | + | |
132 | + | |
107 | 133 | } |
108 | 134 | |
109 | 135 | } |
110 | 136 | |
137 | +void nearfieldStruct::calcUs() | |
138 | +{ | |
139 | + | |
140 | + | |
141 | + if(lut_us) | |
142 | + scalarUpLut(); | |
143 | + else | |
144 | + scalarUs(); | |
145 | +} | |
146 | + | |
147 | +void nearfieldStruct::calcUf() | |
148 | +{ | |
149 | + if(lut_uf) | |
150 | + scalarUfLut(); | |
151 | + else | |
152 | + scalarUf(); | |
153 | +} | |
154 | + | |
111 | 155 | void nearfieldStruct::Simulate() |
112 | 156 | { |
157 | + //initialize timings | |
158 | + t_Uf = 0; | |
159 | + t_Us = 0; | |
160 | + | |
113 | 161 | //compute a set of plane waves for Monte-Carlo simulation |
114 | 162 | calcWaves(); |
115 | 163 | |
116 | 164 | //the near field has to be simulated no matter what the output rtsPoint is |
117 | - scalarUf(); | |
165 | + calcUf(); | |
118 | 166 | calcSpheres(); |
119 | - scalarUs(); | |
167 | + calcUs(); | |
120 | 168 | sumUf(); |
169 | + | |
170 | + //U.Mag().toImage("testU.bmp"); | |
171 | +} | |
172 | + | |
173 | +void nearfieldStruct::calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR) | |
174 | +{ | |
175 | + /*Compute the look-up-table for spherical bessel functions used for the incident field | |
176 | + j = (Nl + 1) x aR array of values | |
177 | + aR = resolution of j | |
178 | + */ | |
179 | + | |
180 | + //compute the wavenumber | |
181 | + ptype k = 2 * PI / lambda; | |
182 | + unsigned int Nl = m; | |
183 | + | |
184 | + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored) | |
185 | + int bytes = sizeof(double) * (Nl + 1); | |
186 | + double* cjv_kd = (double*)malloc(bytes); | |
187 | + double* cyv_kd = (double*)malloc(bytes); | |
188 | + double* cjvp_kd = (double*)malloc(bytes); | |
189 | + double* cyvp_kd = (double*)malloc(bytes); | |
190 | + | |
191 | + //compute the bessel functions using the CPU-based algorithm | |
192 | + double vm; | |
193 | + | |
194 | + //for each sample along r | |
195 | + ptype dr = (d_max - d_min) / (dR - 1); | |
196 | + ptype d; | |
197 | + ptype jv; | |
198 | + for(int id = 0; id < dR; id++) | |
199 | + { | |
200 | + d = id * dr + d_min; | |
201 | + double kd = k*d; | |
202 | + bessjyv_sph(Nl, kd, vm, cjv_kd, cyv_kd, cjvp_kd, cyvp_kd); | |
203 | + | |
204 | + //copy the double data to the bsComplex array | |
205 | + for(int l=0; l<=Nl; l++) | |
206 | + { | |
207 | + jv = cjv_kd[l]; | |
208 | + if(isnan(jv) || isinf(jv)) | |
209 | + { | |
210 | + if(kd == 0 && l == 0) | |
211 | + jv = 1; | |
212 | + else | |
213 | + jv = 0; | |
214 | + } | |
215 | + j[id * (Nl+1) + l] = jv; | |
216 | + } | |
217 | + } | |
218 | + | |
219 | + /*ofstream outfile("uf_besselout.txt"); | |
220 | + for(int ir = 0; ir < dR; ir++) | |
221 | + { | |
222 | + outfile<<ir*dr + d_min<<endl; | |
223 | + for(int l = 0; l<=Nl; l++) | |
224 | + { | |
225 | + outfile<<j[ir * (Nl+1) + l]<<" --"; | |
226 | + } | |
227 | + outfile<<endl; | |
228 | + } | |
229 | + outfile.close();*/ | |
230 | + | |
121 | 231 | } | ... | ... |
nearfield.h
... | ... | @@ -31,6 +31,8 @@ struct nearfieldStruct |
31 | 31 | |
32 | 32 | //slices for the focused field |
33 | 33 | fieldslice Uf; |
34 | + ptype d_min, d_max; | |
35 | + | |
34 | 36 | // and total field: Uf + sum(Us) |
35 | 37 | fieldslice U; |
36 | 38 | |
... | ... | @@ -43,6 +45,14 @@ struct nearfieldStruct |
43 | 45 | //flag for a plane wave |
44 | 46 | bool planeWave; |
45 | 47 | |
48 | + //flag for using a LUT | |
49 | + bool lut_uf; | |
50 | + bool lut_us; | |
51 | + | |
52 | + //timings | |
53 | + float t_Uf; | |
54 | + float t_Us; | |
55 | + | |
46 | 56 | |
47 | 57 | |
48 | 58 | //---------Scatterers------------ |
... | ... | @@ -78,10 +88,17 @@ struct nearfieldStruct |
78 | 88 | void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); |
79 | 89 | |
80 | 90 | //this function re-computes the focused field |
91 | + void calcUf(); | |
81 | 92 | void scalarUf(); |
93 | + void scalarUfLut(); | |
94 | + | |
95 | + void calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR); | |
82 | 96 | |
83 | 97 | //compute the field scattered by all of the materials |
98 | + void calcUs(); | |
84 | 99 | void scalarUs(); |
100 | + void scalarUpLut(); | |
101 | + | |
85 | 102 | |
86 | 103 | //add the incident field to the sum of scattered fields |
87 | 104 | void sumUf(); | ... | ... |
nfScalarUf.cu
... | ... | @@ -5,7 +5,7 @@ |
5 | 5 | #include "rts/cuda/error.h" |
6 | 6 | #include "rts/cuda/timer.h" |
7 | 7 | |
8 | - | |
8 | +//Incident field for a single plane wave | |
9 | 9 | __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) |
10 | 10 | { |
11 | 11 | /*Compute the scalar focused field using Debye focusing |
... | ... | @@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p |
41 | 41 | Uf[i] = exp(d) * A; |
42 | 42 | |
43 | 43 | } |
44 | - | |
44 | + | |
45 | +//Incident field for a focused point source | |
45 | 46 | __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) |
46 | 47 | { |
47 | 48 | /*Compute the scalar focused field using Debye focusing |
... | ... | @@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt |
151 | 152 | } |
152 | 153 | |
153 | 154 | sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); |
154 | - //sumUf += il * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); | |
155 | 155 | |
156 | 156 | il *= im; |
157 | 157 | } |
... | ... | @@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt |
162 | 162 | |
163 | 163 | void nearfieldStruct::scalarUf() |
164 | 164 | { |
165 | - //Compute the incident field via a scalar simulation | |
166 | - //This method uses Debye focusing to approximate the field analytically | |
167 | - | |
168 | - //time the calculation of the focused field | |
169 | - //gpuStartTimer(); | |
170 | - | |
171 | - //set the field slice to a scalar field | |
172 | - //Uf.scalarField = true; | |
173 | - | |
174 | - //initialize the GPU arrays | |
175 | - //Uf.init_gpu(); | |
165 | + | |
166 | + gpuStartTimer(); | |
176 | 167 | |
177 | 168 | //create one thread for each pixel of the field slice |
178 | 169 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); |
179 | - dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
170 | + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
180 | 171 | |
181 | 172 | //if we are computing a plane wave, call the gpuScalarUfp function |
182 | 173 | if(planeWave) |
... | ... | @@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf() |
191 | 182 | ptype cosBeta = cos(asin(condenser[1])); |
192 | 183 | //compute the scalar Uf field (this will be in the x_hat channel of Uf) |
193 | 184 | gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); |
194 | - } | |
195 | - | |
196 | - //float t = gpuStopTimer(); | |
197 | - //std::cout<<"Scalar Uf Time: "<<t<<"ms"<<std::endl; | |
198 | - //std::cout<<focus<<std::endl; | |
199 | - | |
185 | + } | |
186 | + | |
187 | + t_Uf = gpuStopTimer(); | |
200 | 188 | } | ... | ... |
1 | +#include "nearfield.h" | |
2 | + | |
3 | +#include "rts/math/legendre.h" | |
4 | +#include "rts/cuda/error.h" | |
5 | +#include "rts/cuda/timer.h" | |
6 | + | |
7 | +texture<float, cudaTextureType2D> texJ; | |
8 | + | |
9 | +__global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR); | |
10 | + | |
11 | +__global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoint f, bsVector k, ptype A, ptype cosAlpha, ptype cosBeta, int nl, ptype dmin, ptype dmax, int dR) | |
12 | +{ | |
13 | + /*This function computes the focused field for a 2D slice | |
14 | + | |
15 | + Uf = destination field slice | |
16 | + ABCD = plane representing the field slice in world space | |
17 | + uR, vR = resolution of the Uf field | |
18 | + f = focal point of the condenser | |
19 | + k = direction of the incident light | |
20 | + A = amplitude of the incident field | |
21 | + cosAlpha= cosine of the solid angle subtended by the condenser obscuration | |
22 | + cosBeta = cosine of the solid angle subtended by the condenser aperature | |
23 | + nl = number of orders used to compute the field | |
24 | + dR = number of Bessel function values in the look-up texture | |
25 | + | |
26 | + */ | |
27 | + | |
28 | + //get the current coordinate in the plane slice | |
29 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
30 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
31 | + | |
32 | + //make sure that the thread indices are in-bounds | |
33 | + if(iu >= uR || iv >= vR) return; | |
34 | + | |
35 | + //compute the index (easier access to the scalar field array) | |
36 | + int i = iv*uR + iu; | |
37 | + | |
38 | + //compute the parameters for u and v | |
39 | + ptype u = (ptype)iu / (uR); | |
40 | + ptype v = (ptype)iv / (vR); | |
41 | + | |
42 | + | |
43 | + | |
44 | + //get the rtsPoint in world space and then the r vector | |
45 | + bsPoint p = ABCD(u, v); | |
46 | + bsVector r = p - f; | |
47 | + ptype d = r.len(); | |
48 | + | |
49 | + if(d == 0) | |
50 | + { | |
51 | + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); | |
52 | + return; | |
53 | + } | |
54 | + | |
55 | + //get info for the light direction and frequency | |
56 | + r = r.norm(); | |
57 | + | |
58 | + //compute the imaginary factor i^l | |
59 | + bsComplex im = bsComplex(0, 1); | |
60 | + bsComplex il = bsComplex(1, 0); | |
61 | + | |
62 | + //Legendre functions are computed dynamically to save memory | |
63 | + //initialize the Legendre functions | |
64 | + | |
65 | + ptype P[2]; | |
66 | + //get the angle between k and r (light direction and position vector) | |
67 | + ptype cosTheta; | |
68 | + cosTheta = k.dot(r); | |
69 | + | |
70 | + rts::init_legendre<ptype>(cosTheta, P[0], P[1]); | |
71 | + | |
72 | + //initialize legendre functions for the cassegrain angles | |
73 | + ptype Palpha[3]; | |
74 | + rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]); | |
75 | + Palpha[2] = 1; | |
76 | + | |
77 | + ptype Pbeta[3]; | |
78 | + rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]); | |
79 | + Pbeta[2] = 1; | |
80 | + | |
81 | + //for each order l | |
82 | + bsComplex sumUf(0.0, 0.0); | |
83 | + ptype jl = 0.0; | |
84 | + ptype Pl; | |
85 | + ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1); | |
86 | + for(int l = 0; l<=nl; l++) | |
87 | + { | |
88 | + jl = tex2D(texJ, l + 0.5, di + 0.5); | |
89 | + if(l==0) | |
90 | + Pl = P[0]; | |
91 | + else if(l==1) | |
92 | + { | |
93 | + Pl = P[1]; | |
94 | + | |
95 | + //adjust the cassegrain Legendre function | |
96 | + Palpha[2] = Palpha[0]; | |
97 | + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]); | |
98 | + Pbeta[2] = Pbeta[0]; | |
99 | + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]); | |
100 | + } | |
101 | + else | |
102 | + { | |
103 | + rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]); | |
104 | + | |
105 | + Pl = P[1]; | |
106 | + | |
107 | + //adjust the cassegrain outer Legendre function | |
108 | + Palpha[2] = Palpha[0]; | |
109 | + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]); | |
110 | + Pbeta[2] = Pbeta[0]; | |
111 | + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]); | |
112 | + } | |
113 | + | |
114 | + sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); | |
115 | + //sumUf += jl; | |
116 | + | |
117 | + il *= im; | |
118 | + } | |
119 | + | |
120 | + Uf[i] = sumUf * 2 * PI * A; | |
121 | + //Uf[i] = u; | |
122 | + //return; | |
123 | +} | |
124 | + | |
125 | +void nearfieldStruct::scalarUfLut() | |
126 | +{ | |
127 | + gpuStartTimer(); | |
128 | + | |
129 | + //calculate the minimum and maximum points in the focused field | |
130 | + d_min = pos.dist(focus); | |
131 | + d_max = pos.dist_max(focus); | |
132 | + | |
133 | + //allocate space for the Bessel function | |
134 | + int dR = 2 * max(Uf.R[0], Uf.R[1]); | |
135 | + ptype* j = NULL; | |
136 | + j = (ptype*) malloc(sizeof(ptype) * dR * (m+1)); | |
137 | + | |
138 | + //calculate Bessel function LUT | |
139 | + calcBesselLut(j, d_min, d_max, dR); | |
140 | + | |
141 | + //create a CUDA array structure and specify the format description | |
142 | + cudaArray* arrayJ; | |
143 | + cudaChannelFormatDesc channelDesc = | |
144 | + cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); | |
145 | + | |
146 | + //allocate memory | |
147 | + HANDLE_ERROR(cudaMallocArray(&arrayJ, &channelDesc, m+1, dR)); | |
148 | + | |
149 | + //specify texture properties | |
150 | + texJ.addressMode[0] = cudaAddressModeMirror; | |
151 | + texJ.addressMode[1] = cudaAddressModeMirror; | |
152 | + texJ.filterMode = cudaFilterModeLinear; | |
153 | + texJ.normalized = false; | |
154 | + | |
155 | + //bind the texture to the array | |
156 | + HANDLE_ERROR(cudaBindTextureToArray(texJ, arrayJ, channelDesc)); | |
157 | + | |
158 | + //copy the CPU Bessel LUT to the GPU-based array | |
159 | + HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice)); | |
160 | + | |
161 | + //----------------Compute the focused field | |
162 | + //create one thread for each pixel of the field slice | |
163 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
164 | + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
165 | + | |
166 | + //if we are computing a plane wave, call the gpuScalarUfp function | |
167 | + if(planeWave) | |
168 | + { | |
169 | + gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]); | |
170 | + } | |
171 | + //otherwise compute the condenser info and create a focused field | |
172 | + else | |
173 | + { | |
174 | + //pre-compute the cosine of the obscuration and objective angles | |
175 | + ptype cosAlpha = cos(asin(condenser[0])); | |
176 | + ptype cosBeta = cos(asin(condenser[1])); | |
177 | + //compute the scalar Uf field (this will be in the x_hat channel of Uf) | |
178 | + gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR); | |
179 | + } | |
180 | + | |
181 | + | |
182 | + //free everything | |
183 | + free(j); | |
184 | + | |
185 | + HANDLE_ERROR(cudaFreeArray(arrayJ)); | |
186 | + | |
187 | + t_Uf = gpuStopTimer(); | |
188 | +} | ... | ... |
1 | +#include "nearfield.h" | |
2 | +#include "rts/math/spherical_bessel.h" | |
3 | +#include "rts/math/legendre.h" | |
4 | +#include <stdlib.h> | |
5 | +#include "rts/cuda/error.h" | |
6 | +#include "rts/cuda/timer.h" | |
7 | + | |
8 | +texture<float2, cudaTextureType2D> texUsp; | |
9 | +texture<float2, cudaTextureType2D> texUip; | |
10 | + | |
11 | +__global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR) | |
12 | +{ | |
13 | + /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field | |
14 | + produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field. | |
15 | + | |
16 | + Us = final scattered field | |
17 | + k = list of incoming plane waves (Monte-Carlo samples) | |
18 | + nk = number of incoming MC samples | |
19 | + kmag= magnitude of the incoming field 2pi/lambda | |
20 | + dmin= minimum distance of the Usp texture | |
21 | + dmax= maximum distance of the Usp texture | |
22 | + f = position of the focus | |
23 | + ps = position of the sphere | |
24 | + A = total amplitude of the incident field arriving at the focal spot | |
25 | + ABCD= rectangle representing the field slice | |
26 | + uR = resolution of the field slice in the u direction | |
27 | + vR = resolution of the field slice in the v direction | |
28 | + dR = resolution of the Usp texture in the d direction | |
29 | + thetaR= resolution of the Usp texture in the theta direction | |
30 | + */ | |
31 | + | |
32 | + //get the current coordinate in the plane slice | |
33 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
34 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
35 | + | |
36 | + //make sure that the thread indices are in-bounds | |
37 | + if(iu >= uR || iv >= vR) return; | |
38 | + | |
39 | + //compute the index (easier access to the scalar field array) | |
40 | + int i = iv*uR + iu; | |
41 | + | |
42 | + //compute the parameters for u and v | |
43 | + ptype u = (ptype)iu / (uR); | |
44 | + ptype v = (ptype)iv / (vR); | |
45 | + | |
46 | + //get the rtsPoint in world space and then the r vector | |
47 | + bsPoint p = ABCD(u, v); | |
48 | + bsVector r = p - ps; | |
49 | + ptype d = r.len(); | |
50 | + float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1); | |
51 | + float ai = ( (d - dmin)/(a - dmin)) * (aR - 1); | |
52 | + | |
53 | + bsComplex sumUs(0, 0); | |
54 | + //for each plane wave in the wave list | |
55 | + for(int iw = 0; iw < nk; iw++) | |
56 | + { | |
57 | + //normalize the direction vectors and find their inner product | |
58 | + r = r.norm(); | |
59 | + ptype cos_theta = k[iw].dot(r); | |
60 | + if(cos_theta < -1) | |
61 | + cos_theta = -1; | |
62 | + if(cos_theta > 1) | |
63 | + cos_theta = 1; | |
64 | + float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1); | |
65 | + | |
66 | + //compute the phase factor for spheres that are not at the origin | |
67 | + bsVector c = ps - f; | |
68 | + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); | |
69 | + | |
70 | + //compute the internal field if we are inside a sphere | |
71 | + if(d < a) | |
72 | + { | |
73 | + float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5); | |
74 | + sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y); | |
75 | + } | |
76 | + //otherwise compute the scattered field | |
77 | + else | |
78 | + { | |
79 | + float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5); | |
80 | + sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y); | |
81 | + } | |
82 | + | |
83 | + } | |
84 | + | |
85 | + Us[i] += sumUs; | |
86 | +} | |
87 | + | |
88 | +void nearfieldStruct::scalarUpLut() | |
89 | +{ | |
90 | + //get the number of spheres | |
91 | + int nSpheres = sVector.size(); | |
92 | + | |
93 | + //if there are no spheres, nothing to do here | |
94 | + if(nSpheres == 0) | |
95 | + return; | |
96 | + | |
97 | + //time the calculation of the focused field | |
98 | + gpuStartTimer(); | |
99 | + | |
100 | + //clear the scattered field | |
101 | + U.clear_gpu(); | |
102 | + | |
103 | + //create one thread for each pixel of the field slice | |
104 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
105 | + dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
106 | + | |
107 | + //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff) | |
108 | + bsVector* gpuk; | |
109 | + int nWaves; | |
110 | + ptype subA; | |
111 | + if(planeWave) | |
112 | + { | |
113 | + nWaves = 1; | |
114 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) ); | |
115 | + HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice)); | |
116 | + subA = A; | |
117 | + } | |
118 | + else | |
119 | + { | |
120 | + nWaves = inWaves.size(); | |
121 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) ); | |
122 | + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice)); | |
123 | + //compute the amplitude that makes it through the condenser | |
124 | + subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); | |
125 | + } | |
126 | + | |
127 | + //for each sphere | |
128 | + for(int s = 0; s<nSpheres; s++) | |
129 | + { | |
130 | + //get the current sphere | |
131 | + //sphere S = sVector[s]; | |
132 | + | |
133 | + //allocate space for the Usp and Uip textures | |
134 | + //allocate the cuda array | |
135 | + cudaArray* arrayUsp; | |
136 | + cudaArray* arrayUip; | |
137 | + cudaChannelFormatDesc channelDescUsp = | |
138 | + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); | |
139 | + cudaChannelFormatDesc channelDescUip = | |
140 | + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); | |
141 | + int dR = sVector[s].Usp.R[0]; | |
142 | + int thetaR = sVector[s].Usp.R[1]; | |
143 | + int aR = sVector[s].Uip.R[0]; | |
144 | + HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR)); | |
145 | + HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR)); | |
146 | + | |
147 | + texUsp.addressMode[0] = cudaAddressModeMirror; | |
148 | + texUsp.addressMode[1] = cudaAddressModeMirror; | |
149 | + texUsp.filterMode = cudaFilterModeLinear; | |
150 | + texUsp.normalized = false; | |
151 | + | |
152 | + texUip.addressMode[0] = cudaAddressModeMirror; | |
153 | + texUip.addressMode[1] = cudaAddressModeMirror; | |
154 | + texUip.filterMode = cudaFilterModeLinear; | |
155 | + texUip.normalized = false; | |
156 | + HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp)); | |
157 | + HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip)); | |
158 | + | |
159 | + //copy the LUT to the Usp texture | |
160 | + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice)); | |
161 | + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice)); | |
162 | + | |
163 | + gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat, | |
164 | + gpuk, | |
165 | + nWaves, | |
166 | + 2 * PI / lambda, | |
167 | + sVector[s].a, | |
168 | + sVector[s].d_min, | |
169 | + sVector[s].d_max, | |
170 | + focus, | |
171 | + sVector[s].p, | |
172 | + subA, | |
173 | + pos, | |
174 | + U.R[0], | |
175 | + U.R[1], | |
176 | + dR, | |
177 | + aR, | |
178 | + thetaR); | |
179 | + | |
180 | + cudaFreeArray(arrayUsp); | |
181 | + cudaFreeArray(arrayUip); | |
182 | + | |
183 | + } | |
184 | + | |
185 | + | |
186 | + //store the time to compute the scattered field | |
187 | + t_Us = gpuStopTimer(); | |
188 | + | |
189 | + //free monte-carlo samples | |
190 | + cudaFree(gpuk); | |
191 | + | |
192 | +} | ... | ... |
nfScalarUs.cu
... | ... | @@ -163,7 +163,7 @@ void nearfieldStruct::scalarUs() |
163 | 163 | return; |
164 | 164 | |
165 | 165 | //time the calculation of the focused field |
166 | - //gpuStartTimer(); | |
166 | + gpuStartTimer(); | |
167 | 167 | |
168 | 168 | //clear the scattered field |
169 | 169 | U.clear_gpu(); |
... | ... | @@ -251,9 +251,8 @@ void nearfieldStruct::scalarUs() |
251 | 251 | } |
252 | 252 | |
253 | 253 | |
254 | + //store the time to compute the scattered field | |
255 | + t_Us = gpuStopTimer(); | |
254 | 256 | |
255 | - //float t = gpuStopTimer(); | |
256 | - //std::cout<<"Scalar Us Time: "<<t<<"ms"<<std::endl; | |
257 | - //std::cout<<focus<<std::endl; | |
258 | 257 | |
259 | 258 | } | ... | ... |
nfSumUf.cu
... | ... | @@ -32,7 +32,7 @@ __global__ void gpuScalarUsp(bsComplex* Ufx, bsComplex* Ufy, bsComplex* Ufz, |
32 | 32 | { |
33 | 33 | r = p - ps[is]; |
34 | 34 | d = r.len(); |
35 | - if(d <= as[is]) | |
35 | + if(d < as[is]) | |
36 | 36 | return; |
37 | 37 | } |
38 | 38 | |
... | ... | @@ -110,8 +110,5 @@ void nearfieldStruct::sumUf() |
110 | 110 | HANDLE_ERROR(cudaFree(gpu_p)); |
111 | 111 | HANDLE_ERROR(cudaFree(gpu_a)); |
112 | 112 | |
113 | - //float t = gpuStopTimer(); | |
114 | - //std::cout<<"Add Us Time: "<<t<<"ms"<<std::endl; | |
115 | - //std::cout<<focus<<std::endl; | |
116 | 113 | |
117 | 114 | } | ... | ... |
options.h
... | ... | @@ -5,7 +5,7 @@ |
5 | 5 | |
6 | 6 | #include "nearfield.h" |
7 | 7 | #include "microscope.h" |
8 | -#include "colormap.h" | |
8 | +#include "rts/graphics/colormap.h" | |
9 | 9 | #include "fileout.h" |
10 | 10 | //extern nearfieldStruct* NF; |
11 | 11 | extern microscopeStruct* SCOPE; |
... | ... | @@ -23,7 +23,179 @@ using namespace std; |
23 | 23 | #include <boost/program_options.hpp> |
24 | 24 | namespace po = boost::program_options; |
25 | 25 | |
26 | -static void loadSpheres(string sphereList) | |
26 | +extern bool verbose; | |
27 | + | |
28 | + | |
29 | + | |
30 | +static void lNearfield(po::variables_map vm) | |
31 | +{ | |
32 | + //test to see if we are simulating a plane wave | |
33 | + bool planeWave = DEFAULT_PLANEWAVE; | |
34 | + if(vm.count("plane-wave")) | |
35 | + planeWave = !planeWave; | |
36 | + SCOPE->nf.planeWave = planeWave; | |
37 | + | |
38 | + //get the incident field amplitude | |
39 | + SCOPE->nf.A = vm["amplitude"].as<ptype>(); | |
40 | + | |
41 | + //get the condenser parameters | |
42 | + SCOPE->nf.condenser[0] = DEFAULT_CONDENSER_MIN; | |
43 | + SCOPE->nf.condenser[1] = DEFAULT_CONDENSER_MAX; | |
44 | + | |
45 | + if(vm.count("condenser")) | |
46 | + { | |
47 | + vector<ptype> cparams = vm["condenser"].as< vector<ptype> >(); | |
48 | + | |
49 | + if(cparams.size() == 1) | |
50 | + SCOPE->nf.condenser[1] = cparams[0]; | |
51 | + else | |
52 | + { | |
53 | + SCOPE->nf.condenser[0] = cparams[0]; | |
54 | + SCOPE->nf.condenser[1] = cparams[1]; | |
55 | + } | |
56 | + } | |
57 | + | |
58 | + | |
59 | + //get the focal rtsPoint position | |
60 | + SCOPE->nf.focus[0] = DEFAULT_FOCUS_X; | |
61 | + SCOPE->nf.focus[1] = DEFAULT_FOCUS_Y; | |
62 | + SCOPE->nf.focus[2] = DEFAULT_FOCUS_Z; | |
63 | + if(vm.count("focus")) | |
64 | + { | |
65 | + vector<ptype> fpos = vm["focus"].as< vector<ptype> >(); | |
66 | + if(fpos.size() != 3) | |
67 | + { | |
68 | + cout<<"BIMSIM Error - the incident focal point is incorrectly specified; it must have three components."<<endl; | |
69 | + exit(1); | |
70 | + } | |
71 | + SCOPE->nf.focus[0] = fpos[0]; | |
72 | + SCOPE->nf.focus[1] = fpos[1]; | |
73 | + SCOPE->nf.focus[2] = fpos[2]; | |
74 | + } | |
75 | + | |
76 | + //get the incident light direction (k-vector) | |
77 | + bsVector spherical(1, 0, 0); | |
78 | + | |
79 | + //if a k-vector is specified | |
80 | + if(vm.count("k")) | |
81 | + { | |
82 | + vector<ptype> kvec = vm["k"].as< vector<ptype> >(); | |
83 | + if(kvec.size() != 2) | |
84 | + { | |
85 | + cout<<"BIMSIM Error - k-vector is not specified correctly: it must contain two elements"<<endl; | |
86 | + exit(1); | |
87 | + } | |
88 | + spherical[1] = kvec[0]; | |
89 | + spherical[2] = kvec[1]; | |
90 | + } | |
91 | + SCOPE->nf.k = spherical.sph2cart(); | |
92 | + | |
93 | + | |
94 | + //incident field order | |
95 | + SCOPE->nf.m = vm["field-order"].as<int>(); | |
96 | + | |
97 | + //number of Monte-Carlo samples | |
98 | + SCOPE->nf.nWaves = vm["samples"].as<int>(); | |
99 | + | |
100 | + //random number seed for Monte-Carlo samples | |
101 | + if(vm.count("seed")) | |
102 | + srand(vm["seed"].as<unsigned int>()); | |
103 | + | |
104 | + | |
105 | + | |
106 | +} | |
107 | + | |
108 | + | |
109 | +static void loadOutputParams(po::variables_map vm) | |
110 | +{ | |
111 | + //append simulation results to previous binary files | |
112 | + gFileOut.append = DEFAULT_APPEND; | |
113 | + if(vm.count("append")) | |
114 | + gFileOut.append = true; | |
115 | + | |
116 | + //image parameters | |
117 | + //component of the field to be saved | |
118 | + std::string fieldStr; | |
119 | + fieldStr = vm["output-type"].as<string>(); | |
120 | + | |
121 | + if(fieldStr == "magnitude") | |
122 | + gFileOut.field = fileoutStruct::fieldMag; | |
123 | + else if(fieldStr == "intensity") | |
124 | + gFileOut.field = fileoutStruct::fieldIntensity; | |
125 | + else if(fieldStr == "polarization") | |
126 | + gFileOut.field = fileoutStruct::fieldPolar; | |
127 | + else if(fieldStr == "imaginary") | |
128 | + gFileOut.field = fileoutStruct::fieldImag; | |
129 | + else if(fieldStr == "real") | |
130 | + gFileOut.field = fileoutStruct::fieldReal; | |
131 | + else if(fieldStr == "angular-spectrum") | |
132 | + gFileOut.field = fileoutStruct::fieldAngularSpectrum; | |
133 | + | |
134 | + | |
135 | + //image file names | |
136 | + gFileOut.intFile = vm["intensity"].as<string>(); | |
137 | + gFileOut.absFile = vm["absorbance"].as<string>(); | |
138 | + gFileOut.transFile = vm["transmittance"].as<string>(); | |
139 | + gFileOut.nearFile = vm["near-field"].as<string>(); | |
140 | + gFileOut.farFile = vm["far-field"].as<string>(); | |
141 | + | |
142 | + //colormap | |
143 | + std::string cmapStr; | |
144 | + cmapStr = vm["colormap"].as<string>(); | |
145 | + if(cmapStr == "brewer") | |
146 | + gFileOut.colormap = rts::cmBrewer; | |
147 | + else if(cmapStr == "gray") | |
148 | + gFileOut.colormap = rts::cmGrayscale; | |
149 | + else | |
150 | + cout<<"color-map value not recognized (using default): "<<cmapStr<<endl; | |
151 | +} | |
152 | + | |
153 | +void lFlags(po::variables_map vm, po::options_description desc) | |
154 | +{ | |
155 | + //display help and exit | |
156 | + if(vm.count("help")) | |
157 | + { | |
158 | + cout<<desc<<endl; | |
159 | + exit(1); | |
160 | + } | |
161 | + | |
162 | + //flag for verbose output | |
163 | + if(vm.count("verbose")) | |
164 | + verbose = true; | |
165 | + | |
166 | + if(vm.count("recursive")) | |
167 | + { | |
168 | + SCOPE->nf.lut_us = false; | |
169 | + SCOPE->nf.lut_uf = false; | |
170 | + } | |
171 | + else if(vm.count("recursive-us")) | |
172 | + { | |
173 | + SCOPE->nf.lut_us = false; | |
174 | + } | |
175 | + else if(vm.count("lut-uf")) | |
176 | + { | |
177 | + SCOPE->nf.lut_uf = true; | |
178 | + } | |
179 | +} | |
180 | + | |
181 | +void lWavelength(po::variables_map vm) | |
182 | +{ | |
183 | + //load the wavelength | |
184 | + if(vm.count("nu")) | |
185 | + { | |
186 | + //wavelength is given in wavenumber - transform and flag | |
187 | + SCOPE->nf.lambda = 10000/vm["nu"].as<ptype>(); | |
188 | + gFileOut.wavenumber = true; | |
189 | + } | |
190 | + //otherwise we are using lambda = wavelength | |
191 | + else | |
192 | + { | |
193 | + SCOPE->nf.lambda = vm["lambda"].as<ptype>(); | |
194 | + gFileOut.wavenumber = false; | |
195 | + } | |
196 | +} | |
197 | + | |
198 | +static void lSpheres(string sphereList) | |
27 | 199 | { |
28 | 200 | /*This function loads a list of sphere given in the string sphereList |
29 | 201 | The format is: |
... | ... | @@ -58,17 +230,60 @@ static void loadSpheres(string sphereList) |
58 | 230 | //check out the next element (this should set the EOF error flag) |
59 | 231 | ss.peek(); |
60 | 232 | } |
233 | +} | |
61 | 234 | |
235 | +void lSpheres(po::variables_map vm) | |
236 | +{ | |
237 | + //if a sphere is specified at the command line | |
238 | + if(vm.count("spheres")) | |
239 | + { | |
240 | + //convert the sphere to a string | |
241 | + vector<ptype> sdesc = vm["spheres"].as< vector<ptype> >(); | |
62 | 242 | |
243 | + //compute the number of spheres specified | |
244 | + unsigned int nS; | |
245 | + if(sdesc.size() <= 5) | |
246 | + nS = 1; | |
247 | + else | |
248 | + { | |
249 | + //if the number of parameters is divisible by 4, compute the number of spheres | |
250 | + if(sdesc.size() % 5 == 0) | |
251 | + nS = sdesc.size() / 5; | |
252 | + else | |
253 | + { | |
254 | + cout<<"BIMSIM Error: Invalid number of sphere parameters."<<endl; | |
255 | + exit(1); | |
256 | + } | |
257 | + } | |
63 | 258 | |
64 | -} | |
259 | + stringstream ss; | |
260 | + | |
261 | + //for each sphere | |
262 | + for(unsigned int s=0; s<nS; s++) | |
263 | + { | |
264 | + //compute the number of sphere parameters | |
265 | + unsigned int nP; | |
266 | + if(nS == 1) nP = sdesc.size(); | |
267 | + else nP = 5; | |
268 | + | |
269 | + //store each parameter as a string | |
270 | + for(unsigned int i=0; i<nP; i++) | |
271 | + { | |
272 | + ss<<sdesc[s*5 + i]<<" "; | |
273 | + } | |
274 | + ss<<endl; | |
275 | + } | |
276 < |