Commit 51b6469a3ee77583099edb0a57e1bb7859c28fd1
1 parent
b6179de6
added look-up tables
Showing
27 changed files
with
1518 additions
and
588 deletions
Show diff stats
bessjy.cpp
@@ -13,7 +13,9 @@ | @@ -13,7 +13,9 @@ | ||
13 | // | 13 | // |
14 | #define _USE_MATH_DEFINES | 14 | #define _USE_MATH_DEFINES |
15 | #include <math.h> | 15 | #include <math.h> |
16 | -#include "bessel.h" | 16 | +#include "bessel.h" |
17 | + | ||
18 | +#define PI 3.14159 | ||
17 | 19 | ||
18 | double gamma(double x); | 20 | double gamma(double x); |
19 | // | 21 | // |
@@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &nm,double *jn,double *yn, | @@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &nm,double *jn,double *yn, | ||
426 | 0.2775764465332031, | 428 | 0.2775764465332031, |
427 | -1.993531733751297, | 429 | -1.993531733751297, |
428 | 2.724882731126854e1}; | 430 | 2.724882731126854e1}; |
429 | - | 431 | + |
430 | int i,k,m; | 432 | int i,k,m; |
431 | nm = n; | 433 | nm = n; |
432 | if ((x < 0.0) || (n < 0)) return 1; | 434 | if ((x < 0.0) || (n < 0)) return 1; |
@@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &vm,double *jv,double *yv, | @@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &vm,double *jv,double *yv, | ||
702 | } | 704 | } |
703 | vm = n + v0; | 705 | vm = n + v0; |
704 | return 0; | 706 | return 0; |
707 | +} | ||
708 | + | ||
709 | +int bessjyv_sph(int v, double z, double &vm, double* cjv, | ||
710 | + double* cyv, double* cjvp, double* cyvp) | ||
711 | +{ | ||
712 | + //first, compute the bessel functions of fractional order | ||
713 | + bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); | ||
714 | + | ||
715 | + //iterate through each and scale | ||
716 | + for(int n = 0; n<=v; n++) | ||
717 | + { | ||
718 | + | ||
719 | + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); | ||
720 | + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); | ||
721 | + | ||
722 | + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0)); | ||
723 | + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0)); | ||
724 | + } | ||
725 | + | ||
726 | + return 0; | ||
727 | + | ||
705 | } | 728 | } |
706 | - | 729 | + |
cbessjy.cpp
@@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv, | @@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv, | ||
724 | //iterate through each and scale | 724 | //iterate through each and scale |
725 | for(int n = 0; n<=v; n++) | 725 | for(int n = 0; n<=v; n++) |
726 | { | 726 | { |
727 | + | ||
727 | cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); | 728 | cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); |
728 | cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); | 729 | cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); |
729 | 730 |
colormap.h deleted
1 | -#ifndef RTS_COLORMAP_H | ||
2 | -#define RTS_COLORMAP_H | ||
3 | - | ||
4 | -#include <string> | ||
5 | -#include <qimage.h> | ||
6 | -#include <qcolor.h> | ||
7 | -#include "rts/cuda/error.h" | ||
8 | - | ||
9 | - | ||
10 | -#define BREWER_CTRL_PTS 11 | ||
11 | - | ||
12 | -#ifdef __CUDACC__ | ||
13 | -texture<float4, cudaTextureType1D> cudaTexBrewer; | ||
14 | -static cudaArray* gpuBrewer; | ||
15 | -#endif | ||
16 | - | ||
17 | - | ||
18 | - | ||
19 | -namespace rts{ | ||
20 | - namespace colormap{ | ||
21 | - | ||
22 | -enum colormapType {cmBrewer, cmGrayscale}; | ||
23 | - | ||
24 | -static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size) | ||
25 | -{ | ||
26 | - //create an image object | ||
27 | - QImage image(x_size, y_size, QImage::Format_RGB32); | ||
28 | - | ||
29 | - int i; | ||
30 | - unsigned char r, g, b; | ||
31 | - unsigned int x, y; | ||
32 | - for(y=0; y<y_size; y++) | ||
33 | - for(x=0; x<x_size; x++) | ||
34 | - { | ||
35 | - //calculate the 1D index | ||
36 | - i = y * x_size + x; | ||
37 | - | ||
38 | - r = buffer[i * 3 + 0]; | ||
39 | - g = buffer[i * 3 + 1]; | ||
40 | - b = buffer[i * 3 + 2]; | ||
41 | - | ||
42 | - //set the image pixel | ||
43 | - QColor color(r, g, b); | ||
44 | - image.setPixel(x, y, color.rgb()); | ||
45 | - } | ||
46 | - | ||
47 | - image.save(filename.c_str()); | ||
48 | -} | ||
49 | - | ||
50 | -#ifdef __CUDACC__ | ||
51 | -static void initBrewer() | ||
52 | -{ | ||
53 | - //initialize the Brewer colormap | ||
54 | - | ||
55 | - //allocate CPU space | ||
56 | - float4 cpuColorMap[BREWER_CTRL_PTS]; | ||
57 | - | ||
58 | - //define control rtsPoints | ||
59 | - cpuColorMap[0] = make_float4(0.192157f, 0.211765f, 0.584314f, 1.0f); | ||
60 | - cpuColorMap[1] = make_float4(0.270588f, 0.458824f, 0.705882f, 1.0f); | ||
61 | - cpuColorMap[2] = make_float4(0.454902f, 0.678431f, 0.819608f, 1.0f); | ||
62 | - cpuColorMap[3] = make_float4(0.670588f, 0.85098f, 0.913725f, 1.0f); | ||
63 | - cpuColorMap[4] = make_float4(0.878431f, 0.952941f, 0.972549f, 1.0f); | ||
64 | - cpuColorMap[5] = make_float4(1.0f, 1.0f, 0.74902f, 1.0f); | ||
65 | - cpuColorMap[6] = make_float4(0.996078f, 0.878431f, 0.564706f, 1.0f); | ||
66 | - cpuColorMap[7] = make_float4(0.992157f, 0.682353f, 0.380392f, 1.0f); | ||
67 | - cpuColorMap[8] = make_float4(0.956863f, 0.427451f, 0.262745f, 1.0f); | ||
68 | - cpuColorMap[9] = make_float4(0.843137f, 0.188235f, 0.152941f, 1.0f); | ||
69 | - cpuColorMap[10] = make_float4(0.647059f, 0.0f, 0.14902f, 1.0f); | ||
70 | - | ||
71 | - | ||
72 | - int width = BREWER_CTRL_PTS; | ||
73 | - int height = 0; | ||
74 | - | ||
75 | - | ||
76 | - // allocate array and copy colormap data | ||
77 | - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); | ||
78 | - | ||
79 | - HANDLE_ERROR(cudaMallocArray(&gpuBrewer, &channelDesc, width, height)); | ||
80 | - | ||
81 | - HANDLE_ERROR(cudaMemcpyToArray(gpuBrewer, 0, 0, cpuColorMap, sizeof(float4)*width, cudaMemcpyHostToDevice)); | ||
82 | - | ||
83 | - // set texture parameters | ||
84 | - cudaTexBrewer.addressMode[0] = cudaAddressModeClamp; | ||
85 | - //texBrewer.addressMode[1] = cudaAddressModeClamp; | ||
86 | - cudaTexBrewer.filterMode = cudaFilterModeLinear; | ||
87 | - cudaTexBrewer.normalized = true; // access with normalized texture coordinates | ||
88 | - | ||
89 | - // Bind the array to the texture | ||
90 | - HANDLE_ERROR(cudaBindTextureToArray( cudaTexBrewer, gpuBrewer, channelDesc)); | ||
91 | - | ||
92 | -} | ||
93 | - | ||
94 | -static void destroyBrewer() | ||
95 | -{ | ||
96 | - HANDLE_ERROR(cudaFreeArray(gpuBrewer)); | ||
97 | - | ||
98 | -} | ||
99 | - | ||
100 | -template<class T> | ||
101 | -__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) | ||
102 | -{ | ||
103 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | ||
104 | - if(i >= N) return; | ||
105 | - | ||
106 | - //compute the normalized value on [minVal maxVal] | ||
107 | - float a = (gpuSource[i] - minVal) / (maxVal - minVal); | ||
108 | - | ||
109 | - //lookup the color | ||
110 | - float shift = 1.0/BREWER_CTRL_PTS; | ||
111 | - float4 color = tex1D(cudaTexBrewer, a+shift); | ||
112 | - | ||
113 | - gpuDest[i * 3 + 0] = 255 * color.x; | ||
114 | - gpuDest[i * 3 + 1] = 255 * color.y; | ||
115 | - gpuDest[i * 3 + 2] = 255 * color.z; | ||
116 | -} | ||
117 | - | ||
118 | -template<class T> | ||
119 | -__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) | ||
120 | -{ | ||
121 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | ||
122 | - if(i >= N) return; | ||
123 | - | ||
124 | - //compute the normalized value on [minVal maxVal] | ||
125 | - float a = (gpuSource[i] - minVal) / (maxVal - minVal); | ||
126 | - | ||
127 | - gpuDest[i * 3 + 0] = 255 * a; | ||
128 | - gpuDest[i * 3 + 1] = 255 * a; | ||
129 | - gpuDest[i * 3 + 2] = 255 * a; | ||
130 | -} | ||
131 | - | ||
132 | -template<class T> | ||
133 | -static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128) | ||
134 | -{ | ||
135 | - //This function converts a scalar field on the GPU to a color image on the GPU | ||
136 | - int gridDim = (nVals + blockDim - 1)/blockDim; | ||
137 | - if(cm == cmGrayscale) | ||
138 | - applyGrayscale<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal); | ||
139 | - else if(cm == cmBrewer) | ||
140 | - { | ||
141 | - initBrewer(); | ||
142 | - applyBrewer<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal); | ||
143 | - destroyBrewer(); | ||
144 | - } | ||
145 | - | ||
146 | -} | ||
147 | - | ||
148 | -template<class T> | ||
149 | -static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale) | ||
150 | -{ | ||
151 | - //this function converts a scalar field on the GPU to a color image on the CPU | ||
152 | - | ||
153 | - //first create the color image on the GPU | ||
154 | - | ||
155 | - //allocate GPU memory for the color image | ||
156 | - unsigned char* gpuDest; | ||
157 | - HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 )); | ||
158 | - | ||
159 | - //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals)); | ||
160 | - | ||
161 | - //create the image on the gpu | ||
162 | - gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm); | ||
163 | - | ||
164 | - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3)); | ||
165 | - | ||
166 | - //copy the image from the GPU to the CPU | ||
167 | - HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost)); | ||
168 | - | ||
169 | - HANDLE_ERROR(cudaFree( gpuDest )); | ||
170 | - | ||
171 | -} | ||
172 | - | ||
173 | -template<typename T> | ||
174 | -static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) | ||
175 | -{ | ||
176 | - //allocate a color buffer | ||
177 | - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); | ||
178 | - | ||
179 | - //do the mapping | ||
180 | - gpu2cpu<T>(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); | ||
181 | - | ||
182 | - //copy the buffer to an image | ||
183 | - buffer2image(cpuBuffer, fileDest, x_size, y_size); | ||
184 | - | ||
185 | - free(cpuBuffer); | ||
186 | -} | ||
187 | - | ||
188 | -#endif | ||
189 | - | ||
190 | -template<class T> | ||
191 | -static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale) | ||
192 | -{ | ||
193 | - int i; | ||
194 | - float a; | ||
195 | - float range = valMax - valMin; | ||
196 | - for(i = 0; i<nVals; i++) | ||
197 | - { | ||
198 | - //normalize to the range [valMin valMax] | ||
199 | - a = (cpuSource[i] - valMin) / range; | ||
200 | - | ||
201 | - cpuDest[i * 3 + 0] = 255 * a; | ||
202 | - cpuDest[i * 3 + 1] = 255 * a; | ||
203 | - cpuDest[i * 3 + 2] = 255 * a; | ||
204 | - } | ||
205 | - | ||
206 | -} | ||
207 | - | ||
208 | - | ||
209 | - | ||
210 | -template<typename T> | ||
211 | -static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) | ||
212 | -{ | ||
213 | - //allocate a color buffer | ||
214 | - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); | ||
215 | - | ||
216 | - //do the mapping | ||
217 | - cpu2cpu<T>(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); | ||
218 | - | ||
219 | - //copy the buffer to an image | ||
220 | - buffer2image(cpuBuffer, fileDest, x_size, y_size); | ||
221 | - | ||
222 | - free(cpuBuffer); | ||
223 | - | ||
224 | -} | ||
225 | - | ||
226 | -}} //end namespace colormap and rts | ||
227 | - | ||
228 | -#endif | ||
229 | - |
dataTypes.h
@@ -24,6 +24,8 @@ typedef double ptype; | @@ -24,6 +24,8 @@ typedef double ptype; | ||
24 | 24 | ||
25 | typedef ptype fieldPoint; | 25 | typedef ptype fieldPoint; |
26 | 26 | ||
27 | +extern bool verbose; | ||
28 | + | ||
27 | //hybrid GPU/CPU complex data typ | 29 | //hybrid GPU/CPU complex data typ |
28 | #include "rts/math/complex.h" | 30 | #include "rts/math/complex.h" |
29 | #include "rts/math/vector.h" | 31 | #include "rts/math/vector.h" |
defaults.h
@@ -15,14 +15,14 @@ | @@ -15,14 +15,14 @@ | ||
15 | #define DEFAULT_FOCUS_X 0 | 15 | #define DEFAULT_FOCUS_X 0 |
16 | #define DEFAULT_FOCUS_Y 0 | 16 | #define DEFAULT_FOCUS_Y 0 |
17 | #define DEFAULT_FOCUS_Z 0 | 17 | #define DEFAULT_FOCUS_Z 0 |
18 | -#define DEFAULT_INCIDENT_ORDER 100 | 18 | +//#define DEFAULT_INCIDENT_ORDER 20 |
19 | #define DEFAULT_STABILITY_PARM 1.4 | 19 | #define DEFAULT_STABILITY_PARM 1.4 |
20 | 20 | ||
21 | //optics | 21 | //optics |
22 | -#define DEFAULT_CONDENSER_MIN 0.0 | 22 | +#define DEFAULT_CONDENSER_MIN 0 |
23 | #define DEFAULT_CONDENSER_MAX 1 | 23 | #define DEFAULT_CONDENSER_MAX 1 |
24 | 24 | ||
25 | -#define DEFAULT_OBJECTIVE_MIN 0.0 | 25 | +#define DEFAULT_OBJECTIVE_MIN 0 |
26 | #define DEFAULT_OBJECTIVE_MAX 1 | 26 | #define DEFAULT_OBJECTIVE_MAX 1 |
27 | 27 | ||
28 | //incident light direction | 28 | //incident light direction |
@@ -36,17 +36,20 @@ | @@ -36,17 +36,20 @@ | ||
36 | //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective | 36 | //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective |
37 | 37 | ||
38 | 38 | ||
39 | -#define DEFAULT_SLICE_MIN_X -5 | ||
40 | -#define DEFAULT_SLICE_MIN_Y 0 | ||
41 | -#define DEFAULT_SLICE_MIN_Z -5 | 39 | +#define DEFAULT_PLANE_MIN_X -5 |
40 | +#define DEFAULT_PLANE_MIN_Y 0 | ||
41 | +#define DEFAULT_PLANE_MIN_Z -5 | ||
42 | 42 | ||
43 | -#define DEFAULT_SLICE_MAX_X 5 | ||
44 | -#define DEFAULT_SLICE_MAX_Y 0 | ||
45 | -#define DEFAULT_SLICE_MAX_Z 5 | 43 | +#define DEFAULT_PLANE_MAX_X 5 |
44 | +#define DEFAULT_PLANE_MAX_Y 0 | ||
45 | +#define DEFAULT_PLANE_MAX_Z 5 | ||
46 | 46 | ||
47 | -#define DEFAULT_SLICE_NORM_X 0 | ||
48 | -#define DEFAULT_SLICE_NORM_Y 1 | ||
49 | -#define DEFAULT_SLICE_NORM_Z 0 | 47 | +#define DEFAULT_PLANE_NORM_X 0 |
48 | +#define DEFAULT_PLANE_NORM_Y 1 | ||
49 | +#define DEFAULT_PLANE_NORM_Z 0 | ||
50 | + | ||
51 | +#define DEFAULT_PLANE_SIZE 40 | ||
52 | +#define DEFAULT_PLANE_POSITION 0 | ||
50 | 53 | ||
51 | 54 | ||
52 | /* | 55 | /* |
@@ -64,21 +67,23 @@ | @@ -64,21 +67,23 @@ | ||
64 | */ | 67 | */ |
65 | 68 | ||
66 | 69 | ||
67 | -#define DEFAULT_FIELD_ORDER 200 | 70 | +#define DEFAULT_FIELD_ORDER 10 |
68 | 71 | ||
69 | -#define DEFAULT_SAMPLES 200 | 72 | +#define DEFAULT_SAMPLES 400 |
70 | 73 | ||
71 | #define DEFAULT_SLICE_RES 256 | 74 | #define DEFAULT_SLICE_RES 256 |
72 | 75 | ||
76 | +#define DEFAULT_SPHERE_THETA_R 1000 | ||
77 | + | ||
73 | #define DEFAULT_PADDING 1 | 78 | #define DEFAULT_PADDING 1 |
74 | #define DEFAULT_SUPERSAMPLE 1 | 79 | #define DEFAULT_SUPERSAMPLE 1 |
75 | 80 | ||
76 | -#define DEFAULT_INTENSITY_FILE "testappend" | 81 | +#define DEFAULT_INTENSITY_FILE "out_i.bmp" |
77 | #define DEFAULT_TRANSMITTANCE_FILE "" | 82 | #define DEFAULT_TRANSMITTANCE_FILE "" |
78 | -#define DEFAULT_ABSORBANCE_FILE "out_a" | 83 | +#define DEFAULT_ABSORBANCE_FILE "out_a.bmp" |
79 | #define DEFAULT_NEAR_FILE "out_n.bmp" | 84 | #define DEFAULT_NEAR_FILE "out_n.bmp" |
80 | #define DEFAULT_FAR_FILE "out_f.bmp" | 85 | #define DEFAULT_FAR_FILE "out_f.bmp" |
81 | -#define DEFAULT_EXTENDED_SOURCE "einstein_small.jpg" | 86 | +#define DEFAULT_EXTENDED_SOURCE "" |
82 | #define DEFAULT_FIELD_TYPE "magnitude" | 87 | #define DEFAULT_FIELD_TYPE "magnitude" |
83 | #define DEFAULT_FORMAT fileoutStruct::formatImage | 88 | #define DEFAULT_FORMAT fileoutStruct::formatImage |
84 | #define DEFAULT_COLORMAP "brewer" | 89 | #define DEFAULT_COLORMAP "brewer" |
fieldslice.cpp
@@ -8,14 +8,16 @@ | @@ -8,14 +8,16 @@ | ||
8 | using namespace std; | 8 | using namespace std; |
9 | 9 | ||
10 | fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) | 10 | fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) |
11 | -{ | 11 | +{ |
12 | + x_hat = y_hat = z_hat = NULL; | ||
13 | + | ||
12 | //save the slice resolution | 14 | //save the slice resolution |
13 | R[0] = x_size; | 15 | R[0] = x_size; |
14 | R[1] = x_size; | 16 | R[1] = x_size; |
15 | 17 | ||
16 | scalarField = true; | 18 | scalarField = true; |
17 | 19 | ||
18 | - //init_gpu(); | 20 | + init_gpu(); |
19 | 21 | ||
20 | 22 | ||
21 | } | 23 | } |
@@ -101,5 +103,5 @@ fieldslice::fieldslice() | @@ -101,5 +103,5 @@ fieldslice::fieldslice() | ||
101 | 103 | ||
102 | fieldslice::~fieldslice() | 104 | fieldslice::~fieldslice() |
103 | { | 105 | { |
104 | - //kill_gpu(); | 106 | + kill_gpu(); |
105 | } | 107 | } |
fieldslice.cu
1 | #include "fieldslice.h" | 1 | #include "fieldslice.h" |
2 | #include "dataTypes.h" | 2 | #include "dataTypes.h" |
3 | -#include "rts/cuda/error.h" | 3 | +#include "rts/cuda/error.h" |
4 | +#include "rts/cuda/threads.h" | ||
4 | 5 | ||
5 | 6 | ||
6 | __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N) | 7 | __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N) |
7 | { | 8 | { |
8 | //compute the index for this thread | 9 | //compute the index for this thread |
9 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | 10 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; |
11 | + int i = ThreadIndex1D(); | ||
12 | + | ||
10 | if(i >= N) return; | 13 | if(i >= N) return; |
11 | 14 | ||
12 | ptype xm = x[i].abs(); | 15 | ptype xm = x[i].abs(); |
@@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty | @@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty | ||
66 | __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) | 69 | __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) |
67 | { | 70 | { |
68 | //compute the index for this thread | 71 | //compute the index for this thread |
69 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | 72 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; |
73 | + int i = ThreadIndex1D(); | ||
70 | if(i >= N) return; | 74 | if(i >= N) return; |
71 | 75 | ||
72 | V[i] = field_component[i].real(); | 76 | V[i] = field_component[i].real(); |
@@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) | @@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) | ||
75 | __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N) | 79 | __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N) |
76 | { | 80 | { |
77 | //compute the index for this thread | 81 | //compute the index for this thread |
78 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | 82 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; |
83 | + int i = ThreadIndex1D(); | ||
79 | if(i >= N) return; | 84 | if(i >= N) return; |
80 | 85 | ||
81 | V[i] = field_component[i].imag(); | 86 | V[i] = field_component[i].imag(); |
@@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i | @@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i | ||
84 | __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N) | 89 | __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N) |
85 | { | 90 | { |
86 | //compute the index for this thread | 91 | //compute the index for this thread |
87 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | 92 | + //int i = blockIdx.x * blockDim.x + threadIdx.x; |
93 | + int i = ThreadIndex1D(); | ||
88 | if(i >= N) return; | 94 | if(i >= N) return; |
89 | 95 | ||
90 | output[i] = sqrt(input[i]); | 96 | output[i] = sqrt(input[i]); |
@@ -115,7 +121,8 @@ scalarslice fieldslice::Mag() | @@ -115,7 +121,8 @@ scalarslice fieldslice::Mag() | ||
115 | 121 | ||
116 | //compute the total number of values in the slice | 122 | //compute the total number of values in the slice |
117 | unsigned int N = R[0] * R[1]; | 123 | unsigned int N = R[0] * R[1]; |
118 | - int gridDim = (N+BLOCK-1)/BLOCK; | 124 | + //int gridDim = (N+BLOCK-1)/BLOCK; |
125 | + dim3 gridDim = GenGrid1D(N, BLOCK); | ||
119 | 126 | ||
120 | field_intensity<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, result->S, N); | 127 | field_intensity<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, result->S, N); |
121 | field_sqrt<<<gridDim, BLOCK>>>(result->S, result->S, N); | 128 | field_sqrt<<<gridDim, BLOCK>>>(result->S, result->S, N); |
@@ -132,7 +139,8 @@ scalarslice fieldslice::Real() | @@ -132,7 +139,8 @@ scalarslice fieldslice::Real() | ||
132 | 139 | ||
133 | //compute the total number of values in the slice | 140 | //compute the total number of values in the slice |
134 | unsigned int N = R[0] * R[1]; | 141 | unsigned int N = R[0] * R[1]; |
135 | - int gridDim = (N+BLOCK-1)/BLOCK; | 142 | + //int gridDim = (N+BLOCK-1)/BLOCK; |
143 | + dim3 gridDim = GenGrid1D(N, BLOCK); | ||
136 | 144 | ||
137 | field_real<<<gridDim, BLOCK>>>(x_hat, result->S, N); | 145 | field_real<<<gridDim, BLOCK>>>(x_hat, result->S, N); |
138 | 146 | ||
@@ -148,7 +156,8 @@ scalarslice fieldslice::Imag() | @@ -148,7 +156,8 @@ scalarslice fieldslice::Imag() | ||
148 | 156 | ||
149 | //compute the total number of values in the slice | 157 | //compute the total number of values in the slice |
150 | unsigned int N = R[0] * R[1]; | 158 | unsigned int N = R[0] * R[1]; |
151 | - int gridDim = (N+BLOCK-1)/BLOCK; | 159 | + //int gridDim = (N+BLOCK-1)/BLOCK; |
160 | + dim3 gridDim = GenGrid1D(N, BLOCK); | ||
152 | 161 | ||
153 | field_imaginary<<<gridDim, BLOCK>>>(x_hat, result->S, N); | 162 | field_imaginary<<<gridDim, BLOCK>>>(x_hat, result->S, N); |
154 | 163 | ||
@@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v) | @@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v) | ||
192 | 201 | ||
193 | //compute the total number of values in the slice | 202 | //compute the total number of values in the slice |
194 | unsigned int N = R[0] * R[1]; | 203 | unsigned int N = R[0] * R[1]; |
195 | - //cout<<"Size of mag field: "<<N<<endl; | ||
196 | int gridDim = (N+BLOCK-1)/BLOCK; | 204 | int gridDim = (N+BLOCK-1)/BLOCK; |
197 | 205 | ||
198 | field_scale<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, N, v); | 206 | field_scale<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, N, v); |
@@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v) | @@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v) | ||
200 | } | 208 | } |
201 | 209 | ||
202 | void fieldslice::init_gpu() | 210 | void fieldslice::init_gpu() |
203 | -{ | 211 | +{ |
212 | + //if the field has no size, return | ||
213 | + if(R[0] == 0 || R[1] == 0) | ||
214 | + return; | ||
215 | + | ||
216 | + //free any previous memory allocations | ||
217 | + if(x_hat) | ||
218 | + HANDLE_ERROR(cudaFree(x_hat)); | ||
219 | + if(y_hat) | ||
220 | + HANDLE_ERROR(cudaFree(y_hat)); | ||
221 | + if(z_hat) | ||
222 | + HANDLE_ERROR(cudaFree(z_hat)); | ||
223 | + | ||
204 | //allocate space on the GPU for the field slice | 224 | //allocate space on the GPU for the field slice |
205 | HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex))); | 225 | HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex))); |
206 | - //HANDLE_ERROR(cudaMemset(x_hat, 0, R[0] * R[1] * sizeof(bsComplex))); | ||
207 | 226 | ||
208 | - //if the field is scalar, y_hat and z_hat are unused | ||
209 | - if(scalarField) | ||
210 | - { | ||
211 | - y_hat = NULL; | ||
212 | - z_hat = NULL; | ||
213 | - | ||
214 | - } | ||
215 | - else | 227 | + if(!scalarField) |
216 | { | 228 | { |
217 | HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex))); | 229 | HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex))); |
218 | //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex))); | 230 | //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex))); |
@@ -233,6 +245,8 @@ void fieldslice::kill_gpu() | @@ -233,6 +245,8 @@ void fieldslice::kill_gpu() | ||
233 | if(z_hat != NULL) | 245 | if(z_hat != NULL) |
234 | HANDLE_ERROR(cudaFree(z_hat)); | 246 | HANDLE_ERROR(cudaFree(z_hat)); |
235 | 247 | ||
248 | + x_hat = y_hat = z_hat = NULL; | ||
249 | + | ||
236 | } | 250 | } |
237 | 251 | ||
238 | void fieldslice::clear_gpu() | 252 | void fieldslice::clear_gpu() |
@@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) | @@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) | ||
275 | result.scalarField = scalarField; | 289 | result.scalarField = scalarField; |
276 | 290 | ||
277 | //allocate space for the new field | 291 | //allocate space for the new field |
278 | - result.init_gpu(); | 292 | + //result.init_gpu(); |
279 | 293 | ||
280 | //create one thread for each pixel of the field slice | 294 | //create one thread for each pixel of the field slice |
281 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | 295 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); |
@@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) | @@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) | ||
291 | 305 | ||
292 | return result; | 306 | return result; |
293 | } | 307 | } |
308 | + | ||
309 | +fieldslice::fieldslice(const fieldslice& rhs) | ||
310 | +{ | ||
311 | + R[0] = rhs.R[0]; | ||
312 | + R[1] = rhs.R[1]; | ||
313 | + scalarField = rhs.scalarField; | ||
314 | + | ||
315 | + x_hat = y_hat = z_hat = NULL; | ||
316 | + | ||
317 | + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1]; | ||
318 | + if(rhs.x_hat != NULL) | ||
319 | + { | ||
320 | + HANDLE_ERROR(cudaMalloc( (void**)&x_hat, bytes)); | ||
321 | + HANDLE_ERROR(cudaMemcpy( x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice)); | ||
322 | + } | ||
323 | + if(rhs.y_hat != NULL) | ||
324 | + { | ||
325 | + HANDLE_ERROR(cudaMalloc( (void**)&y_hat, bytes)); | ||
326 | + HANDLE_ERROR(cudaMemcpy( y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice)); | ||
327 | + } | ||
328 | + if(rhs.z_hat != NULL) | ||
329 | + { | ||
330 | + HANDLE_ERROR(cudaMalloc( (void**)&z_hat, bytes)); | ||
331 | + HANDLE_ERROR(cudaMemcpy( z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice)); | ||
332 | + } | ||
333 | + | ||
334 | +} | ||
335 | + | ||
336 | +fieldslice& fieldslice::operator=(const fieldslice& rhs) | ||
337 | +{ | ||
338 | + //make sure this isn't a self-allocation | ||
339 | + if(this != &rhs) | ||
340 | + { | ||
341 | + //make a shallow copy | ||
342 | + R[0] = rhs.R[0]; | ||
343 | + R[1] = rhs.R[1]; | ||
344 | + scalarField = rhs.scalarField; | ||
345 | + | ||
346 | + //initialize to new parameters | ||
347 | + init_gpu(); | ||
348 | + | ||
349 | + //make a deep copy | ||
350 | + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1]; | ||
351 | + if(x_hat != NULL) | ||
352 | + HANDLE_ERROR(cudaMemcpy(x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice)); | ||
353 | + if(y_hat != NULL) | ||
354 | + HANDLE_ERROR(cudaMemcpy(y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice)); | ||
355 | + if(z_hat != NULL) | ||
356 | + HANDLE_ERROR(cudaMemcpy(z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice)); | ||
357 | + } | ||
358 | + | ||
359 | + return *this; | ||
360 | + | ||
361 | +} |
fieldslice.h
@@ -31,6 +31,9 @@ struct fieldslice | @@ -31,6 +31,9 @@ struct fieldslice | ||
31 | 31 | ||
32 | ~fieldslice(); | 32 | ~fieldslice(); |
33 | 33 | ||
34 | + //copy constructor | ||
35 | + fieldslice(const fieldslice& rhs); | ||
36 | + | ||
34 | //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); | 37 | //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); |
35 | 38 | ||
36 | scalarslice Mag(); | 39 | scalarslice Mag(); |
@@ -47,6 +50,7 @@ struct fieldslice | @@ -47,6 +50,7 @@ struct fieldslice | ||
47 | 50 | ||
48 | //crop a region from the field | 51 | //crop a region from the field |
49 | fieldslice crop(int u, int v, int su, int sv); | 52 | fieldslice crop(int u, int v, int su, int sv); |
53 | + fieldslice& operator=(const fieldslice& rhs); | ||
50 | 54 | ||
51 | void init_gpu(); | 55 | void init_gpu(); |
52 | void kill_gpu(); | 56 | void kill_gpu(); |
fileout.cu
@@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope) | @@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope) | ||
186 | //save images of the fields in the microscope | 186 | //save images of the fields in the microscope |
187 | 187 | ||
188 | //if the user specifies an extended source | 188 | //if the user specifies an extended source |
189 | - if(scope->focalPoints.size() > 1) | 189 | + if(scope->focalPoints.size() > 0) |
190 | { | 190 | { |
191 | //simulate the extended source and output the detector image | 191 | //simulate the extended source and output the detector image |
192 | scope->SimulateExtendedSource(); | 192 | scope->SimulateExtendedSource(); |
193 | 193 | ||
194 | + //saveNearField(&scope->nf); | ||
195 | + saveFarField(scope); | ||
196 | + | ||
197 | + //save the detector images | ||
198 | + saveDetector(scope); | ||
199 | + | ||
200 | + //simulate scattering for the last point (so that you have a near field image) | ||
201 | + scope->SimulateScattering(); | ||
202 | + saveNearField(&scope->nf); | ||
203 | + | ||
194 | } | 204 | } |
195 | else | 205 | else |
196 | { | 206 | { |
@@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope) | @@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope) | ||
203 | //run the far-field simulation | 213 | //run the far-field simulation |
204 | scope->SimulateImaging(); | 214 | scope->SimulateImaging(); |
205 | 215 | ||
216 | + //saveNearField(&scope->nf); | ||
206 | saveFarField(scope); | 217 | saveFarField(scope); |
207 | 218 | ||
219 | + //save the detector images | ||
220 | + saveDetector(scope); | ||
221 | + | ||
208 | } | 222 | } |
209 | 223 | ||
210 | - //save the detector images | ||
211 | - saveDetector(scope); | 224 | + |
212 | 225 | ||
213 | 226 | ||
214 | } | 227 | } |
fileout.h
@@ -5,7 +5,7 @@ | @@ -5,7 +5,7 @@ | ||
5 | //#include "defaults.h" | 5 | //#include "defaults.h" |
6 | #include "dataTypes.h" | 6 | #include "dataTypes.h" |
7 | 7 | ||
8 | -#include "colormap.h" | 8 | +#include "rts/graphics/colormap.h" |
9 | #include "fieldslice.h" | 9 | #include "fieldslice.h" |
10 | #include "nearfield.h" | 10 | #include "nearfield.h" |
11 | #include "microscope.h" | 11 | #include "microscope.h" |
@@ -34,7 +34,7 @@ struct fileoutStruct{ | @@ -34,7 +34,7 @@ struct fileoutStruct{ | ||
34 | //image_source source; | 34 | //image_source source; |
35 | 35 | ||
36 | //color map info | 36 | //color map info |
37 | - rts::colormap::colormapType colormap; | 37 | + rts::colormapType colormap; |
38 | ptype colorMax; | 38 | ptype colorMax; |
39 | 39 | ||
40 | void Save(microscopeStruct* scope); | 40 | void Save(microscopeStruct* scope); |
main.cpp
@@ -24,6 +24,7 @@ microscopeStruct* SCOPE; | @@ -24,6 +24,7 @@ microscopeStruct* SCOPE; | ||
24 | #include "warnings.h" | 24 | #include "warnings.h" |
25 | 25 | ||
26 | fileoutStruct gFileOut; | 26 | fileoutStruct gFileOut; |
27 | +bool verbose = false; | ||
27 | using namespace std; | 28 | using namespace std; |
28 | 29 | ||
29 | int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, | 30 | int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, |
@@ -31,32 +32,19 @@ int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, | @@ -31,32 +32,19 @@ int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, | ||
31 | 32 | ||
32 | int main(int argc, char *argv[]) | 33 | int main(int argc, char *argv[]) |
33 | { | 34 | { |
34 | - //test Envi loading and saving | ||
35 | - //EnviFile envi("testenvi", "w"); | ||
36 | - | ||
37 | - //float* data = (float*)malloc(sizeof(float) * 100 * 100); | ||
38 | - //envi.addBand(data, 100, 100, 100); | ||
39 | - | ||
40 | - //envi.close(); | ||
41 | - | ||
42 | - //return 0; | ||
43 | 35 | ||
44 | SCOPE = new microscopeStruct(); | 36 | SCOPE = new microscopeStruct(); |
45 | 37 | ||
46 | - cout<<SCOPE->nf.Uf.R[0]<<endl; | ||
47 | - | ||
48 | LoadParameters(argc, argv); | 38 | LoadParameters(argc, argv); |
49 | 39 | ||
50 | - //TestSimulation(NF, SCOPE, &gFileOut); | ||
51 | - | ||
52 | //initialize GPU memory for fields | 40 | //initialize GPU memory for fields |
53 | SCOPE->init(); | 41 | SCOPE->init(); |
54 | 42 | ||
55 | - OutputOptions(); | ||
56 | - | ||
57 | gFileOut.Save(SCOPE); | 43 | gFileOut.Save(SCOPE); |
58 | 44 | ||
59 | - //NF->destroy(); | 45 | + if(verbose) |
46 | + OutputOptions(); | ||
47 | + | ||
60 | SCOPE->destroy(); | 48 | SCOPE->destroy(); |
61 | 49 | ||
62 | 50 |
microscope.cu
@@ -4,7 +4,7 @@ | @@ -4,7 +4,7 @@ | ||
4 | #include "rts/tools/progressbar.h" | 4 | #include "rts/tools/progressbar.h" |
5 | #include "rts/cuda/timer.h" | 5 | #include "rts/cuda/timer.h" |
6 | #include "dataTypes.h" | 6 | #include "dataTypes.h" |
7 | -#include "colormap.h" | 7 | +#include "rts/graphics/colormap.h" |
8 | 8 | ||
9 | #include <QImage> | 9 | #include <QImage> |
10 | 10 | ||
@@ -112,8 +112,8 @@ void microscopeStruct::getFarField() | @@ -112,8 +112,8 @@ void microscopeStruct::getFarField() | ||
112 | //Compute the Far Field image of the focal plane | 112 | //Compute the Far Field image of the focal plane |
113 | 113 | ||
114 | //clear the memory from previous detector fields | 114 | //clear the memory from previous detector fields |
115 | - Ud.kill_gpu(); | ||
116 | - Ufd.kill_gpu(); | 115 | + //Ud.kill_gpu(); |
116 | + //Ufd.kill_gpu(); | ||
117 | 117 | ||
118 | //first crop the filtered near-field image of the source and scattered fields | 118 | //first crop the filtered near-field image of the source and scattered fields |
119 | Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); | 119 | Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); |
@@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource() | @@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource() | ||
261 | t += gpuStopTimer(); | 261 | t += gpuStopTimer(); |
262 | 262 | ||
263 | rtsProgressBar((double)(i+1)/(double)npts * 100); | 263 | rtsProgressBar((double)(i+1)/(double)npts * 100); |
264 | + //unsigned char c; | ||
265 | + //cin>>c; | ||
264 | } | 266 | } |
265 | - cout<<endl; | ||
266 | - cout<<"Time per source: "<<t/npts<<"ms"<<endl; | 267 | + if(verbose) |
268 | + { | ||
269 | + cout<<endl; | ||
270 | + cout<<"Time per source: "<<t/npts<<"ms"<<endl; | ||
271 | + } | ||
267 | 272 | ||
268 | } | 273 | } |
269 | 274 | ||
@@ -304,3 +309,15 @@ void microscopeStruct::LoadExtendedSource(std::string filename) | @@ -304,3 +309,15 @@ void microscopeStruct::LoadExtendedSource(std::string filename) | ||
304 | } | 309 | } |
305 | } | 310 | } |
306 | } | 311 | } |
312 | + | ||
313 | +std::string microscopeStruct::toStr() | ||
314 | +{ | ||
315 | + stringstream ss; | ||
316 | + ss<<nf.toStr(); | ||
317 | + | ||
318 | + ss<<"----------Optics--------------"<<endl<<endl; | ||
319 | + ss<<"Objective NA: "<<objective[0]<<" to "<<objective[1]<<endl; | ||
320 | + return ss.str(); | ||
321 | + | ||
322 | + | ||
323 | +} |
microscope.h
montecarlo.cpp
@@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) | @@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) | ||
35 | ptype inPhi = asin(NAin); | 35 | ptype inPhi = asin(NAin); |
36 | ptype outPhi = asin(NAout); | 36 | ptype outPhi = asin(NAout); |
37 | 37 | ||
38 | - //cout<<"inPhi: "<<inPhi<<endl; | ||
39 | - //cout<<"outPhi: "<<outPhi<<endl; | ||
40 | - | ||
41 | //calculate the z-values associated with these angles | 38 | //calculate the z-values associated with these angles |
42 | ptype inZ = cos(inPhi); | 39 | ptype inZ = cos(inPhi); |
43 | ptype outZ = cos(outPhi); | 40 | ptype outZ = cos(outPhi); |
44 | 41 | ||
45 | ptype rangeZ = inZ - outZ; | 42 | ptype rangeZ = inZ - outZ; |
46 | 43 | ||
47 | - //cout<<"inZ: "<<inZ<<endl; | ||
48 | - //cout<<"outZ: "<<outZ<<endl; | ||
49 | - | ||
50 | //draw a distribution of random phi, z values | 44 | //draw a distribution of random phi, z values |
51 | ptype z, phi, theta; | 45 | ptype z, phi, theta; |
52 | for(int i=0; i<N; i++) | 46 | for(int i=0; i<N; i++) |
@@ -58,7 +52,6 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) | @@ -58,7 +52,6 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) | ||
58 | phi = acos(z); | 52 | phi = acos(z); |
59 | 53 | ||
60 | //compute and store cartesian coordinates | 54 | //compute and store cartesian coordinates |
61 | - //bsVector spherical(1, theta + kSph[1], phi + kSph[2]); | ||
62 | bsVector spherical(1, theta, phi); | 55 | bsVector spherical(1, theta, phi); |
63 | bsVector cart = spherical.sph2cart(); | 56 | bsVector cart = spherical.sph2cart(); |
64 | samples[i] = rotation * cart; | 57 | samples[i] = rotation * cart; |
nearfield.cpp
1 | #include "nearfield.h" | 1 | #include "nearfield.h" |
2 | +#include <time.h> | ||
3 | +#include <math.h> | ||
4 | + | ||
5 | +#ifdef _WIN32 | ||
6 | +#define isnan(x) _isnan(x) | ||
7 | +#define isinf(x) (!_finite(x)) | ||
8 | +#endif | ||
9 | + | ||
10 | +int bessjyv_sph(int v, double z, double &vm, double* cjv, | ||
11 | + double* cyv, double* cjvp, double* cyvp); | ||
2 | 12 | ||
3 | nearfieldStruct::nearfieldStruct() | 13 | nearfieldStruct::nearfieldStruct() |
4 | { | 14 | { |
5 | scalarSim = true; | 15 | scalarSim = true; |
6 | planeWave = false; | 16 | planeWave = false; |
17 | + lut_us = true; | ||
18 | + lut_uf = false; | ||
7 | 19 | ||
8 | nWaves = 0; | 20 | nWaves = 0; |
9 | } | 21 | } |
@@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr() | @@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr() | ||
46 | ss<<"Condenser NA: "<<condenser[0]<<" to "<<condenser[1]<<std::endl; | 58 | ss<<"Condenser NA: "<<condenser[0]<<" to "<<condenser[1]<<std::endl; |
47 | ss<<"Focal Point: "<<focus[0]<<", "<<focus[1]<<", "<<focus[2]<<std::endl; | 59 | ss<<"Focal Point: "<<focus[0]<<", "<<focus[1]<<", "<<focus[2]<<std::endl; |
48 | ss<<"Field Slice: "<<std::endl; | 60 | ss<<"Field Slice: "<<std::endl; |
61 | + if(lut_us) | ||
62 | + ss<<"LUT Parameters --- min: "<<d_min<<" max: "<<d_max<<std::endl; | ||
49 | ss<<pos<<std::endl; | 63 | ss<<pos<<std::endl; |
50 | 64 | ||
51 | ss<<std::endl<<"---------Materials-----------"<<std::endl; | 65 | ss<<std::endl<<"---------Materials-----------"<<std::endl; |
@@ -61,6 +75,10 @@ std::string nearfieldStruct::toStr() | @@ -61,6 +75,10 @@ std::string nearfieldStruct::toStr() | ||
61 | for(unsigned int s=0; s<sVector.size(); s++) | 75 | for(unsigned int s=0; s<sVector.size(); s++) |
62 | ss<<sVector[s].toStr()<<std::endl; | 76 | ss<<sVector[s].toStr()<<std::endl; |
63 | 77 | ||
78 | + ss<<"---------Timings-------------"<<std::endl; | ||
79 | + ss<<"Uf = "<<t_Uf<<"ms"<<std::endl; | ||
80 | + ss<<"Us = "<<t_Us<<"ms"<<std::endl; | ||
81 | + | ||
64 | return ss.str(); | 82 | return ss.str(); |
65 | } | 83 | } |
66 | 84 | ||
@@ -70,7 +88,8 @@ void nearfieldStruct::calcWaves() | @@ -70,7 +88,8 @@ void nearfieldStruct::calcWaves() | ||
70 | inWaves.resize(nWaves); | 88 | inWaves.resize(nWaves); |
71 | 89 | ||
72 | //re-seed the random number generator | 90 | //re-seed the random number generator |
73 | - //srand(seed); | 91 | + //srand(time(NULL)); |
92 | + srand(NULL); | ||
74 | 93 | ||
75 | //calculate the monte-carlo samples | 94 | //calculate the monte-carlo samples |
76 | mcSampleNA(&inWaves[0], nWaves, k, condenser[0], condenser[1]); | 95 | mcSampleNA(&inWaves[0], nWaves, k, condenser[0], condenser[1]); |
@@ -84,6 +103,8 @@ void nearfieldStruct::calcSpheres() | @@ -84,6 +103,8 @@ void nearfieldStruct::calcSpheres() | ||
84 | //calculate all of the constants necessary to evaluate the scattered field | 103 | //calculate all of the constants necessary to evaluate the scattered field |
85 | //estimate the order required to represent the scattered field for each sphere | 104 | //estimate the order required to represent the scattered field for each sphere |
86 | 105 | ||
106 | + | ||
107 | + | ||
87 | //for each sphere | 108 | //for each sphere |
88 | for(int i=0; i<sVector.size(); i++) | 109 | for(int i=0; i<sVector.size(); i++) |
89 | { | 110 | { |
@@ -91,12 +112,10 @@ void nearfieldStruct::calcSpheres() | @@ -91,12 +112,10 @@ void nearfieldStruct::calcSpheres() | ||
91 | 112 | ||
92 | //calculate the required order | 113 | //calculate the required order |
93 | sVector[i].calcNl(lambda); | 114 | sVector[i].calcNl(lambda); |
94 | - //std::cout<<sVector[i].Nl<<std::endl; | ||
95 | 115 | ||
96 | //set the refractive index for the sphere | 116 | //set the refractive index for the sphere |
97 | int imat = sVector[i].iMaterial; | 117 | int imat = sVector[i].iMaterial; |
98 | rts::rtsComplex<ptype> n = mVector[imat](lambda); | 118 | rts::rtsComplex<ptype> n = mVector[imat](lambda); |
99 | - //std::cout<<"Sphere refractive index: "<<n<<std::endl; | ||
100 | 119 | ||
101 | //calculate the scattering coefficients | 120 | //calculate the scattering coefficients |
102 | sVector[i].calcCoeff(lambda, n); | 121 | sVector[i].calcCoeff(lambda, n); |
@@ -104,18 +123,109 @@ void nearfieldStruct::calcSpheres() | @@ -104,18 +123,109 @@ void nearfieldStruct::calcSpheres() | ||
104 | //save the refractive index | 123 | //save the refractive index |
105 | sVector[i].n = n; | 124 | sVector[i].n = n; |
106 | 125 | ||
126 | + //if the LUT is used, calculate Usp(theta, r) | ||
127 | + if(lut_us) | ||
128 | + { | ||
129 | + sVector[i].calcUp(lambda, n, pos, max(U.R[0], U.R[1])); | ||
130 | + } | ||
131 | + | ||
132 | + | ||
107 | } | 133 | } |
108 | 134 | ||
109 | } | 135 | } |
110 | 136 | ||
137 | +void nearfieldStruct::calcUs() | ||
138 | +{ | ||
139 | + | ||
140 | + | ||
141 | + if(lut_us) | ||
142 | + scalarUpLut(); | ||
143 | + else | ||
144 | + scalarUs(); | ||
145 | +} | ||
146 | + | ||
147 | +void nearfieldStruct::calcUf() | ||
148 | +{ | ||
149 | + if(lut_uf) | ||
150 | + scalarUfLut(); | ||
151 | + else | ||
152 | + scalarUf(); | ||
153 | +} | ||
154 | + | ||
111 | void nearfieldStruct::Simulate() | 155 | void nearfieldStruct::Simulate() |
112 | { | 156 | { |
157 | + //initialize timings | ||
158 | + t_Uf = 0; | ||
159 | + t_Us = 0; | ||
160 | + | ||
113 | //compute a set of plane waves for Monte-Carlo simulation | 161 | //compute a set of plane waves for Monte-Carlo simulation |
114 | calcWaves(); | 162 | calcWaves(); |
115 | 163 | ||
116 | //the near field has to be simulated no matter what the output rtsPoint is | 164 | //the near field has to be simulated no matter what the output rtsPoint is |
117 | - scalarUf(); | 165 | + calcUf(); |
118 | calcSpheres(); | 166 | calcSpheres(); |
119 | - scalarUs(); | 167 | + calcUs(); |
120 | sumUf(); | 168 | sumUf(); |
169 | + | ||
170 | + //U.Mag().toImage("testU.bmp"); | ||
171 | +} | ||
172 | + | ||
173 | +void nearfieldStruct::calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR) | ||
174 | +{ | ||
175 | + /*Compute the look-up-table for spherical bessel functions used for the incident field | ||
176 | + j = (Nl + 1) x aR array of values | ||
177 | + aR = resolution of j | ||
178 | + */ | ||
179 | + | ||
180 | + //compute the wavenumber | ||
181 | + ptype k = 2 * PI / lambda; | ||
182 | + unsigned int Nl = m; | ||
183 | + | ||
184 | + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored) | ||
185 | + int bytes = sizeof(double) * (Nl + 1); | ||
186 | + double* cjv_kd = (double*)malloc(bytes); | ||
187 | + double* cyv_kd = (double*)malloc(bytes); | ||
188 | + double* cjvp_kd = (double*)malloc(bytes); | ||
189 | + double* cyvp_kd = (double*)malloc(bytes); | ||
190 | + | ||
191 | + //compute the bessel functions using the CPU-based algorithm | ||
192 | + double vm; | ||
193 | + | ||
194 | + //for each sample along r | ||
195 | + ptype dr = (d_max - d_min) / (dR - 1); | ||
196 | + ptype d; | ||
197 | + ptype jv; | ||
198 | + for(int id = 0; id < dR; id++) | ||
199 | + { | ||
200 | + d = id * dr + d_min; | ||
201 | + double kd = k*d; | ||
202 | + bessjyv_sph(Nl, kd, vm, cjv_kd, cyv_kd, cjvp_kd, cyvp_kd); | ||
203 | + | ||
204 | + //copy the double data to the bsComplex array | ||
205 | + for(int l=0; l<=Nl; l++) | ||
206 | + { | ||
207 | + jv = cjv_kd[l]; | ||
208 | + if(isnan(jv) || isinf(jv)) | ||
209 | + { | ||
210 | + if(kd == 0 && l == 0) | ||
211 | + jv = 1; | ||
212 | + else | ||
213 | + jv = 0; | ||
214 | + } | ||
215 | + j[id * (Nl+1) + l] = jv; | ||
216 | + } | ||
217 | + } | ||
218 | + | ||
219 | + /*ofstream outfile("uf_besselout.txt"); | ||
220 | + for(int ir = 0; ir < dR; ir++) | ||
221 | + { | ||
222 | + outfile<<ir*dr + d_min<<endl; | ||
223 | + for(int l = 0; l<=Nl; l++) | ||
224 | + { | ||
225 | + outfile<<j[ir * (Nl+1) + l]<<" --"; | ||
226 | + } | ||
227 | + outfile<<endl; | ||
228 | + } | ||
229 | + outfile.close();*/ | ||
230 | + | ||
121 | } | 231 | } |
nearfield.h
@@ -31,6 +31,8 @@ struct nearfieldStruct | @@ -31,6 +31,8 @@ struct nearfieldStruct | ||
31 | 31 | ||
32 | //slices for the focused field | 32 | //slices for the focused field |
33 | fieldslice Uf; | 33 | fieldslice Uf; |
34 | + ptype d_min, d_max; | ||
35 | + | ||
34 | // and total field: Uf + sum(Us) | 36 | // and total field: Uf + sum(Us) |
35 | fieldslice U; | 37 | fieldslice U; |
36 | 38 | ||
@@ -43,6 +45,14 @@ struct nearfieldStruct | @@ -43,6 +45,14 @@ struct nearfieldStruct | ||
43 | //flag for a plane wave | 45 | //flag for a plane wave |
44 | bool planeWave; | 46 | bool planeWave; |
45 | 47 | ||
48 | + //flag for using a LUT | ||
49 | + bool lut_uf; | ||
50 | + bool lut_us; | ||
51 | + | ||
52 | + //timings | ||
53 | + float t_Uf; | ||
54 | + float t_Us; | ||
55 | + | ||
46 | 56 | ||
47 | 57 | ||
48 | //---------Scatterers------------ | 58 | //---------Scatterers------------ |
@@ -78,10 +88,17 @@ struct nearfieldStruct | @@ -78,10 +88,17 @@ struct nearfieldStruct | ||
78 | void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); | 88 | void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); |
79 | 89 | ||
80 | //this function re-computes the focused field | 90 | //this function re-computes the focused field |
91 | + void calcUf(); | ||
81 | void scalarUf(); | 92 | void scalarUf(); |
93 | + void scalarUfLut(); | ||
94 | + | ||
95 | + void calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR); | ||
82 | 96 | ||
83 | //compute the field scattered by all of the materials | 97 | //compute the field scattered by all of the materials |
98 | + void calcUs(); | ||
84 | void scalarUs(); | 99 | void scalarUs(); |
100 | + void scalarUpLut(); | ||
101 | + | ||
85 | 102 | ||
86 | //add the incident field to the sum of scattered fields | 103 | //add the incident field to the sum of scattered fields |
87 | void sumUf(); | 104 | void sumUf(); |
nfScalarUf.cu
@@ -5,7 +5,7 @@ | @@ -5,7 +5,7 @@ | ||
5 | #include "rts/cuda/error.h" | 5 | #include "rts/cuda/error.h" |
6 | #include "rts/cuda/timer.h" | 6 | #include "rts/cuda/timer.h" |
7 | 7 | ||
8 | - | 8 | +//Incident field for a single plane wave |
9 | __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) | 9 | __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) |
10 | { | 10 | { |
11 | /*Compute the scalar focused field using Debye focusing | 11 | /*Compute the scalar focused field using Debye focusing |
@@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p | @@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p | ||
41 | Uf[i] = exp(d) * A; | 41 | Uf[i] = exp(d) * A; |
42 | 42 | ||
43 | } | 43 | } |
44 | - | 44 | + |
45 | +//Incident field for a focused point source | ||
45 | __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) | 46 | __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) |
46 | { | 47 | { |
47 | /*Compute the scalar focused field using Debye focusing | 48 | /*Compute the scalar focused field using Debye focusing |
@@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | @@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | ||
151 | } | 152 | } |
152 | 153 | ||
153 | sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); | 154 | sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); |
154 | - //sumUf += il * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); | ||
155 | 155 | ||
156 | il *= im; | 156 | il *= im; |
157 | } | 157 | } |
@@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | @@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | ||
162 | 162 | ||
163 | void nearfieldStruct::scalarUf() | 163 | void nearfieldStruct::scalarUf() |
164 | { | 164 | { |
165 | - //Compute the incident field via a scalar simulation | ||
166 | - //This method uses Debye focusing to approximate the field analytically | ||
167 | - | ||
168 | - //time the calculation of the focused field | ||
169 | - //gpuStartTimer(); | ||
170 | - | ||
171 | - //set the field slice to a scalar field | ||
172 | - //Uf.scalarField = true; | ||
173 | - | ||
174 | - //initialize the GPU arrays | ||
175 | - //Uf.init_gpu(); | 165 | + |
166 | + gpuStartTimer(); | ||
176 | 167 | ||
177 | //create one thread for each pixel of the field slice | 168 | //create one thread for each pixel of the field slice |
178 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | 169 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); |
179 | - dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | 170 | + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); |
180 | 171 | ||
181 | //if we are computing a plane wave, call the gpuScalarUfp function | 172 | //if we are computing a plane wave, call the gpuScalarUfp function |
182 | if(planeWave) | 173 | if(planeWave) |
@@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf() | @@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf() | ||
191 | ptype cosBeta = cos(asin(condenser[1])); | 182 | ptype cosBeta = cos(asin(condenser[1])); |
192 | //compute the scalar Uf field (this will be in the x_hat channel of Uf) | 183 | //compute the scalar Uf field (this will be in the x_hat channel of Uf) |
193 | gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); | 184 | gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); |
194 | - } | ||
195 | - | ||
196 | - //float t = gpuStopTimer(); | ||
197 | - //std::cout<<"Scalar Uf Time: "<<t<<"ms"<<std::endl; | ||
198 | - //std::cout<<focus<<std::endl; | ||
199 | - | 185 | + } |
186 | + | ||
187 | + t_Uf = gpuStopTimer(); | ||
200 | } | 188 | } |
1 | +#include "nearfield.h" | ||
2 | + | ||
3 | +#include "rts/math/legendre.h" | ||
4 | +#include "rts/cuda/error.h" | ||
5 | +#include "rts/cuda/timer.h" | ||
6 | + | ||
7 | +texture<float, cudaTextureType2D> texJ; | ||
8 | + | ||
9 | +__global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR); | ||
10 | + | ||
11 | +__global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoint f, bsVector k, ptype A, ptype cosAlpha, ptype cosBeta, int nl, ptype dmin, ptype dmax, int dR) | ||
12 | +{ | ||
13 | + /*This function computes the focused field for a 2D slice | ||
14 | + | ||
15 | + Uf = destination field slice | ||
16 | + ABCD = plane representing the field slice in world space | ||
17 | + uR, vR = resolution of the Uf field | ||
18 | + f = focal point of the condenser | ||
19 | + k = direction of the incident light | ||
20 | + A = amplitude of the incident field | ||
21 | + cosAlpha= cosine of the solid angle subtended by the condenser obscuration | ||
22 | + cosBeta = cosine of the solid angle subtended by the condenser aperature | ||
23 | + nl = number of orders used to compute the field | ||
24 | + dR = number of Bessel function values in the look-up texture | ||
25 | + | ||
26 | + */ | ||
27 | + | ||
28 | + //get the current coordinate in the plane slice | ||
29 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
30 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
31 | + | ||
32 | + //make sure that the thread indices are in-bounds | ||
33 | + if(iu >= uR || iv >= vR) return; | ||
34 | + | ||
35 | + //compute the index (easier access to the scalar field array) | ||
36 | + int i = iv*uR + iu; | ||
37 | + | ||
38 | + //compute the parameters for u and v | ||
39 | + ptype u = (ptype)iu / (uR); | ||
40 | + ptype v = (ptype)iv / (vR); | ||
41 | + | ||
42 | + | ||
43 | + | ||
44 | + //get the rtsPoint in world space and then the r vector | ||
45 | + bsPoint p = ABCD(u, v); | ||
46 | + bsVector r = p - f; | ||
47 | + ptype d = r.len(); | ||
48 | + | ||
49 | + if(d == 0) | ||
50 | + { | ||
51 | + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); | ||
52 | + return; | ||
53 | + } | ||
54 | + | ||
55 | + //get info for the light direction and frequency | ||
56 | + r = r.norm(); | ||
57 | + | ||
58 | + //compute the imaginary factor i^l | ||
59 | + bsComplex im = bsComplex(0, 1); | ||
60 | + bsComplex il = bsComplex(1, 0); | ||
61 | + | ||
62 | + //Legendre functions are computed dynamically to save memory | ||
63 | + //initialize the Legendre functions | ||
64 | + | ||
65 | + ptype P[2]; | ||
66 | + //get the angle between k and r (light direction and position vector) | ||
67 | + ptype cosTheta; | ||
68 | + cosTheta = k.dot(r); | ||
69 | + | ||
70 | + rts::init_legendre<ptype>(cosTheta, P[0], P[1]); | ||
71 | + | ||
72 | + //initialize legendre functions for the cassegrain angles | ||
73 | + ptype Palpha[3]; | ||
74 | + rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]); | ||
75 | + Palpha[2] = 1; | ||
76 | + | ||
77 | + ptype Pbeta[3]; | ||
78 | + rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]); | ||
79 | + Pbeta[2] = 1; | ||
80 | + | ||
81 | + //for each order l | ||
82 | + bsComplex sumUf(0.0, 0.0); | ||
83 | + ptype jl = 0.0; | ||
84 | + ptype Pl; | ||
85 | + ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1); | ||
86 | + for(int l = 0; l<=nl; l++) | ||
87 | + { | ||
88 | + jl = tex2D(texJ, l + 0.5, di + 0.5); | ||
89 | + if(l==0) | ||
90 | + Pl = P[0]; | ||
91 | + else if(l==1) | ||
92 | + { | ||
93 | + Pl = P[1]; | ||
94 | + | ||
95 | + //adjust the cassegrain Legendre function | ||
96 | + Palpha[2] = Palpha[0]; | ||
97 | + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]); | ||
98 | + Pbeta[2] = Pbeta[0]; | ||
99 | + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]); | ||
100 | + } | ||
101 | + else | ||
102 | + { | ||
103 | + rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]); | ||
104 | + | ||
105 | + Pl = P[1]; | ||
106 | + | ||
107 | + //adjust the cassegrain outer Legendre function | ||
108 | + Palpha[2] = Palpha[0]; | ||
109 | + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]); | ||
110 | + Pbeta[2] = Pbeta[0]; | ||
111 | + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]); | ||
112 | + } | ||
113 | + | ||
114 | + sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); | ||
115 | + //sumUf += jl; | ||
116 | + | ||
117 | + il *= im; | ||
118 | + } | ||
119 | + | ||
120 | + Uf[i] = sumUf * 2 * PI * A; | ||
121 | + //Uf[i] = u; | ||
122 | + //return; | ||
123 | +} | ||
124 | + | ||
125 | +void nearfieldStruct::scalarUfLut() | ||
126 | +{ | ||
127 | + gpuStartTimer(); | ||
128 | + | ||
129 | + //calculate the minimum and maximum points in the focused field | ||
130 | + d_min = pos.dist(focus); | ||
131 | + d_max = pos.dist_max(focus); | ||
132 | + | ||
133 | + //allocate space for the Bessel function | ||
134 | + int dR = 2 * max(Uf.R[0], Uf.R[1]); | ||
135 | + ptype* j = NULL; | ||
136 | + j = (ptype*) malloc(sizeof(ptype) * dR * (m+1)); | ||
137 | + | ||
138 | + //calculate Bessel function LUT | ||
139 | + calcBesselLut(j, d_min, d_max, dR); | ||
140 | + | ||
141 | + //create a CUDA array structure and specify the format description | ||
142 | + cudaArray* arrayJ; | ||
143 | + cudaChannelFormatDesc channelDesc = | ||
144 | + cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); | ||
145 | + | ||
146 | + //allocate memory | ||
147 | + HANDLE_ERROR(cudaMallocArray(&arrayJ, &channelDesc, m+1, dR)); | ||
148 | + | ||
149 | + //specify texture properties | ||
150 | + texJ.addressMode[0] = cudaAddressModeMirror; | ||
151 | + texJ.addressMode[1] = cudaAddressModeMirror; | ||
152 | + texJ.filterMode = cudaFilterModeLinear; | ||
153 | + texJ.normalized = false; | ||
154 | + | ||
155 | + //bind the texture to the array | ||
156 | + HANDLE_ERROR(cudaBindTextureToArray(texJ, arrayJ, channelDesc)); | ||
157 | + | ||
158 | + //copy the CPU Bessel LUT to the GPU-based array | ||
159 | + HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice)); | ||
160 | + | ||
161 | + //----------------Compute the focused field | ||
162 | + //create one thread for each pixel of the field slice | ||
163 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
164 | + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
165 | + | ||
166 | + //if we are computing a plane wave, call the gpuScalarUfp function | ||
167 | + if(planeWave) | ||
168 | + { | ||
169 | + gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]); | ||
170 | + } | ||
171 | + //otherwise compute the condenser info and create a focused field | ||
172 | + else | ||
173 | + { | ||
174 | + //pre-compute the cosine of the obscuration and objective angles | ||
175 | + ptype cosAlpha = cos(asin(condenser[0])); | ||
176 | + ptype cosBeta = cos(asin(condenser[1])); | ||
177 | + //compute the scalar Uf field (this will be in the x_hat channel of Uf) | ||
178 | + gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR); | ||
179 | + } | ||
180 | + | ||
181 | + | ||
182 | + //free everything | ||
183 | + free(j); | ||
184 | + | ||
185 | + HANDLE_ERROR(cudaFreeArray(arrayJ)); | ||
186 | + | ||
187 | + t_Uf = gpuStopTimer(); | ||
188 | +} |
1 | +#include "nearfield.h" | ||
2 | +#include "rts/math/spherical_bessel.h" | ||
3 | +#include "rts/math/legendre.h" | ||
4 | +#include <stdlib.h> | ||
5 | +#include "rts/cuda/error.h" | ||
6 | +#include "rts/cuda/timer.h" | ||
7 | + | ||
8 | +texture<float2, cudaTextureType2D> texUsp; | ||
9 | +texture<float2, cudaTextureType2D> texUip; | ||
10 | + | ||
11 | +__global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR) | ||
12 | +{ | ||
13 | + /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field | ||
14 | + produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field. | ||
15 | + | ||
16 | + Us = final scattered field | ||
17 | + k = list of incoming plane waves (Monte-Carlo samples) | ||
18 | + nk = number of incoming MC samples | ||
19 | + kmag= magnitude of the incoming field 2pi/lambda | ||
20 | + dmin= minimum distance of the Usp texture | ||
21 | + dmax= maximum distance of the Usp texture | ||
22 | + f = position of the focus | ||
23 | + ps = position of the sphere | ||
24 | + A = total amplitude of the incident field arriving at the focal spot | ||
25 | + ABCD= rectangle representing the field slice | ||
26 | + uR = resolution of the field slice in the u direction | ||
27 | + vR = resolution of the field slice in the v direction | ||
28 | + dR = resolution of the Usp texture in the d direction | ||
29 | + thetaR= resolution of the Usp texture in the theta direction | ||
30 | + */ | ||
31 | + | ||
32 | + //get the current coordinate in the plane slice | ||
33 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
34 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
35 | + | ||
36 | + //make sure that the thread indices are in-bounds | ||
37 | + if(iu >= uR || iv >= vR) return; | ||
38 | + | ||
39 | + //compute the index (easier access to the scalar field array) | ||
40 | + int i = iv*uR + iu; | ||