Commit 51b6469a3ee77583099edb0a57e1bb7859c28fd1

Authored by dmayerich
1 parent b6179de6

added look-up tables

@@ -13,7 +13,9 @@ @@ -13,7 +13,9 @@
13 // 13 //
14 #define _USE_MATH_DEFINES 14 #define _USE_MATH_DEFINES
15 #include <math.h> 15 #include <math.h>
16 -#include "bessel.h" 16 +#include "bessel.h"
  17 +
  18 +#define PI 3.14159
17 19
18 double gamma(double x); 20 double gamma(double x);
19 // 21 //
@@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &amp;nm,double *jn,double *yn, @@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &amp;nm,double *jn,double *yn,
426 0.2775764465332031, 428 0.2775764465332031,
427 -1.993531733751297, 429 -1.993531733751297,
428 2.724882731126854e1}; 430 2.724882731126854e1};
429 - 431 +
430 int i,k,m; 432 int i,k,m;
431 nm = n; 433 nm = n;
432 if ((x < 0.0) || (n < 0)) return 1; 434 if ((x < 0.0) || (n < 0)) return 1;
@@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &amp;vm,double *jv,double *yv, @@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &amp;vm,double *jv,double *yv,
702 } 704 }
703 vm = n + v0; 705 vm = n + v0;
704 return 0; 706 return 0;
  707 +}
  708 +
  709 +int bessjyv_sph(int v, double z, double &vm, double* cjv,
  710 + double* cyv, double* cjvp, double* cyvp)
  711 +{
  712 + //first, compute the bessel functions of fractional order
  713 + bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  714 +
  715 + //iterate through each and scale
  716 + for(int n = 0; n<=v; n++)
  717 + {
  718 +
  719 + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0));
  720 + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0));
  721 +
  722 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0));
  723 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0));
  724 + }
  725 +
  726 + return 0;
  727 +
705 } 728 }
706 - 729 +
@@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv, @@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv,
724 //iterate through each and scale 724 //iterate through each and scale
725 for(int n = 0; n<=v; n++) 725 for(int n = 0; n<=v; n++)
726 { 726 {
  727 +
727 cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); 728 cjv[n] = cjv[n] * sqrt(PI/(z * 2.0));
728 cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); 729 cyv[n] = cyv[n] * sqrt(PI/(z * 2.0));
729 730
colormap.h deleted
1 -#ifndef RTS_COLORMAP_H  
2 -#define RTS_COLORMAP_H  
3 -  
4 -#include <string>  
5 -#include <qimage.h>  
6 -#include <qcolor.h>  
7 -#include "rts/cuda/error.h"  
8 -  
9 -  
10 -#define BREWER_CTRL_PTS 11  
11 -  
12 -#ifdef __CUDACC__  
13 -texture<float4, cudaTextureType1D> cudaTexBrewer;  
14 -static cudaArray* gpuBrewer;  
15 -#endif  
16 -  
17 -  
18 -  
19 -namespace rts{  
20 - namespace colormap{  
21 -  
22 -enum colormapType {cmBrewer, cmGrayscale};  
23 -  
24 -static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size)  
25 -{  
26 - //create an image object  
27 - QImage image(x_size, y_size, QImage::Format_RGB32);  
28 -  
29 - int i;  
30 - unsigned char r, g, b;  
31 - unsigned int x, y;  
32 - for(y=0; y<y_size; y++)  
33 - for(x=0; x<x_size; x++)  
34 - {  
35 - //calculate the 1D index  
36 - i = y * x_size + x;  
37 -  
38 - r = buffer[i * 3 + 0];  
39 - g = buffer[i * 3 + 1];  
40 - b = buffer[i * 3 + 2];  
41 -  
42 - //set the image pixel  
43 - QColor color(r, g, b);  
44 - image.setPixel(x, y, color.rgb());  
45 - }  
46 -  
47 - image.save(filename.c_str());  
48 -}  
49 -  
50 -#ifdef __CUDACC__  
51 -static void initBrewer()  
52 -{  
53 - //initialize the Brewer colormap  
54 -  
55 - //allocate CPU space  
56 - float4 cpuColorMap[BREWER_CTRL_PTS];  
57 -  
58 - //define control rtsPoints  
59 - cpuColorMap[0] = make_float4(0.192157f, 0.211765f, 0.584314f, 1.0f);  
60 - cpuColorMap[1] = make_float4(0.270588f, 0.458824f, 0.705882f, 1.0f);  
61 - cpuColorMap[2] = make_float4(0.454902f, 0.678431f, 0.819608f, 1.0f);  
62 - cpuColorMap[3] = make_float4(0.670588f, 0.85098f, 0.913725f, 1.0f);  
63 - cpuColorMap[4] = make_float4(0.878431f, 0.952941f, 0.972549f, 1.0f);  
64 - cpuColorMap[5] = make_float4(1.0f, 1.0f, 0.74902f, 1.0f);  
65 - cpuColorMap[6] = make_float4(0.996078f, 0.878431f, 0.564706f, 1.0f);  
66 - cpuColorMap[7] = make_float4(0.992157f, 0.682353f, 0.380392f, 1.0f);  
67 - cpuColorMap[8] = make_float4(0.956863f, 0.427451f, 0.262745f, 1.0f);  
68 - cpuColorMap[9] = make_float4(0.843137f, 0.188235f, 0.152941f, 1.0f);  
69 - cpuColorMap[10] = make_float4(0.647059f, 0.0f, 0.14902f, 1.0f);  
70 -  
71 -  
72 - int width = BREWER_CTRL_PTS;  
73 - int height = 0;  
74 -  
75 -  
76 - // allocate array and copy colormap data  
77 - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);  
78 -  
79 - HANDLE_ERROR(cudaMallocArray(&gpuBrewer, &channelDesc, width, height));  
80 -  
81 - HANDLE_ERROR(cudaMemcpyToArray(gpuBrewer, 0, 0, cpuColorMap, sizeof(float4)*width, cudaMemcpyHostToDevice));  
82 -  
83 - // set texture parameters  
84 - cudaTexBrewer.addressMode[0] = cudaAddressModeClamp;  
85 - //texBrewer.addressMode[1] = cudaAddressModeClamp;  
86 - cudaTexBrewer.filterMode = cudaFilterModeLinear;  
87 - cudaTexBrewer.normalized = true; // access with normalized texture coordinates  
88 -  
89 - // Bind the array to the texture  
90 - HANDLE_ERROR(cudaBindTextureToArray( cudaTexBrewer, gpuBrewer, channelDesc));  
91 -  
92 -}  
93 -  
94 -static void destroyBrewer()  
95 -{  
96 - HANDLE_ERROR(cudaFreeArray(gpuBrewer));  
97 -  
98 -}  
99 -  
100 -template<class T>  
101 -__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1)  
102 -{  
103 - int i = blockIdx.x * blockDim.x + threadIdx.x;  
104 - if(i >= N) return;  
105 -  
106 - //compute the normalized value on [minVal maxVal]  
107 - float a = (gpuSource[i] - minVal) / (maxVal - minVal);  
108 -  
109 - //lookup the color  
110 - float shift = 1.0/BREWER_CTRL_PTS;  
111 - float4 color = tex1D(cudaTexBrewer, a+shift);  
112 -  
113 - gpuDest[i * 3 + 0] = 255 * color.x;  
114 - gpuDest[i * 3 + 1] = 255 * color.y;  
115 - gpuDest[i * 3 + 2] = 255 * color.z;  
116 -}  
117 -  
118 -template<class T>  
119 -__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1)  
120 -{  
121 - int i = blockIdx.x * blockDim.x + threadIdx.x;  
122 - if(i >= N) return;  
123 -  
124 - //compute the normalized value on [minVal maxVal]  
125 - float a = (gpuSource[i] - minVal) / (maxVal - minVal);  
126 -  
127 - gpuDest[i * 3 + 0] = 255 * a;  
128 - gpuDest[i * 3 + 1] = 255 * a;  
129 - gpuDest[i * 3 + 2] = 255 * a;  
130 -}  
131 -  
132 -template<class T>  
133 -static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128)  
134 -{  
135 - //This function converts a scalar field on the GPU to a color image on the GPU  
136 - int gridDim = (nVals + blockDim - 1)/blockDim;  
137 - if(cm == cmGrayscale)  
138 - applyGrayscale<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal);  
139 - else if(cm == cmBrewer)  
140 - {  
141 - initBrewer();  
142 - applyBrewer<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal);  
143 - destroyBrewer();  
144 - }  
145 -  
146 -}  
147 -  
148 -template<class T>  
149 -static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale)  
150 -{  
151 - //this function converts a scalar field on the GPU to a color image on the CPU  
152 -  
153 - //first create the color image on the GPU  
154 -  
155 - //allocate GPU memory for the color image  
156 - unsigned char* gpuDest;  
157 - HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 ));  
158 -  
159 - //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals));  
160 -  
161 - //create the image on the gpu  
162 - gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm);  
163 -  
164 - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3));  
165 -  
166 - //copy the image from the GPU to the CPU  
167 - HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost));  
168 -  
169 - HANDLE_ERROR(cudaFree( gpuDest ));  
170 -  
171 -}  
172 -  
173 -template<typename T>  
174 -static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale)  
175 -{  
176 - //allocate a color buffer  
177 - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size);  
178 -  
179 - //do the mapping  
180 - gpu2cpu<T>(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm);  
181 -  
182 - //copy the buffer to an image  
183 - buffer2image(cpuBuffer, fileDest, x_size, y_size);  
184 -  
185 - free(cpuBuffer);  
186 -}  
187 -  
188 -#endif  
189 -  
190 -template<class T>  
191 -static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale)  
192 -{  
193 - int i;  
194 - float a;  
195 - float range = valMax - valMin;  
196 - for(i = 0; i<nVals; i++)  
197 - {  
198 - //normalize to the range [valMin valMax]  
199 - a = (cpuSource[i] - valMin) / range;  
200 -  
201 - cpuDest[i * 3 + 0] = 255 * a;  
202 - cpuDest[i * 3 + 1] = 255 * a;  
203 - cpuDest[i * 3 + 2] = 255 * a;  
204 - }  
205 -  
206 -}  
207 -  
208 -  
209 -  
210 -template<typename T>  
211 -static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale)  
212 -{  
213 - //allocate a color buffer  
214 - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size);  
215 -  
216 - //do the mapping  
217 - cpu2cpu<T>(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm);  
218 -  
219 - //copy the buffer to an image  
220 - buffer2image(cpuBuffer, fileDest, x_size, y_size);  
221 -  
222 - free(cpuBuffer);  
223 -  
224 -}  
225 -  
226 -}} //end namespace colormap and rts  
227 -  
228 -#endif  
229 -  
@@ -24,6 +24,8 @@ typedef double ptype; @@ -24,6 +24,8 @@ typedef double ptype;
24 24
25 typedef ptype fieldPoint; 25 typedef ptype fieldPoint;
26 26
  27 +extern bool verbose;
  28 +
27 //hybrid GPU/CPU complex data typ 29 //hybrid GPU/CPU complex data typ
28 #include "rts/math/complex.h" 30 #include "rts/math/complex.h"
29 #include "rts/math/vector.h" 31 #include "rts/math/vector.h"
@@ -15,14 +15,14 @@ @@ -15,14 +15,14 @@
15 #define DEFAULT_FOCUS_X 0 15 #define DEFAULT_FOCUS_X 0
16 #define DEFAULT_FOCUS_Y 0 16 #define DEFAULT_FOCUS_Y 0
17 #define DEFAULT_FOCUS_Z 0 17 #define DEFAULT_FOCUS_Z 0
18 -#define DEFAULT_INCIDENT_ORDER 100 18 +//#define DEFAULT_INCIDENT_ORDER 20
19 #define DEFAULT_STABILITY_PARM 1.4 19 #define DEFAULT_STABILITY_PARM 1.4
20 20
21 //optics 21 //optics
22 -#define DEFAULT_CONDENSER_MIN 0.0 22 +#define DEFAULT_CONDENSER_MIN 0
23 #define DEFAULT_CONDENSER_MAX 1 23 #define DEFAULT_CONDENSER_MAX 1
24 24
25 -#define DEFAULT_OBJECTIVE_MIN 0.0 25 +#define DEFAULT_OBJECTIVE_MIN 0
26 #define DEFAULT_OBJECTIVE_MAX 1 26 #define DEFAULT_OBJECTIVE_MAX 1
27 27
28 //incident light direction 28 //incident light direction
@@ -36,17 +36,20 @@ @@ -36,17 +36,20 @@
36 //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective 36 //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective
37 37
38 38
39 -#define DEFAULT_SLICE_MIN_X -5  
40 -#define DEFAULT_SLICE_MIN_Y 0  
41 -#define DEFAULT_SLICE_MIN_Z -5 39 +#define DEFAULT_PLANE_MIN_X -5
  40 +#define DEFAULT_PLANE_MIN_Y 0
  41 +#define DEFAULT_PLANE_MIN_Z -5
42 42
43 -#define DEFAULT_SLICE_MAX_X 5  
44 -#define DEFAULT_SLICE_MAX_Y 0  
45 -#define DEFAULT_SLICE_MAX_Z 5 43 +#define DEFAULT_PLANE_MAX_X 5
  44 +#define DEFAULT_PLANE_MAX_Y 0
  45 +#define DEFAULT_PLANE_MAX_Z 5
46 46
47 -#define DEFAULT_SLICE_NORM_X 0  
48 -#define DEFAULT_SLICE_NORM_Y 1  
49 -#define DEFAULT_SLICE_NORM_Z 0 47 +#define DEFAULT_PLANE_NORM_X 0
  48 +#define DEFAULT_PLANE_NORM_Y 1
  49 +#define DEFAULT_PLANE_NORM_Z 0
  50 +
  51 +#define DEFAULT_PLANE_SIZE 40
  52 +#define DEFAULT_PLANE_POSITION 0
50 53
51 54
52 /* 55 /*
@@ -64,21 +67,23 @@ @@ -64,21 +67,23 @@
64 */ 67 */
65 68
66 69
67 -#define DEFAULT_FIELD_ORDER 200 70 +#define DEFAULT_FIELD_ORDER 10
68 71
69 -#define DEFAULT_SAMPLES 200 72 +#define DEFAULT_SAMPLES 400
70 73
71 #define DEFAULT_SLICE_RES 256 74 #define DEFAULT_SLICE_RES 256
72 75
  76 +#define DEFAULT_SPHERE_THETA_R 1000
  77 +
73 #define DEFAULT_PADDING 1 78 #define DEFAULT_PADDING 1
74 #define DEFAULT_SUPERSAMPLE 1 79 #define DEFAULT_SUPERSAMPLE 1
75 80
76 -#define DEFAULT_INTENSITY_FILE "testappend" 81 +#define DEFAULT_INTENSITY_FILE "out_i.bmp"
77 #define DEFAULT_TRANSMITTANCE_FILE "" 82 #define DEFAULT_TRANSMITTANCE_FILE ""
78 -#define DEFAULT_ABSORBANCE_FILE "out_a" 83 +#define DEFAULT_ABSORBANCE_FILE "out_a.bmp"
79 #define DEFAULT_NEAR_FILE "out_n.bmp" 84 #define DEFAULT_NEAR_FILE "out_n.bmp"
80 #define DEFAULT_FAR_FILE "out_f.bmp" 85 #define DEFAULT_FAR_FILE "out_f.bmp"
81 -#define DEFAULT_EXTENDED_SOURCE "einstein_small.jpg" 86 +#define DEFAULT_EXTENDED_SOURCE ""
82 #define DEFAULT_FIELD_TYPE "magnitude" 87 #define DEFAULT_FIELD_TYPE "magnitude"
83 #define DEFAULT_FORMAT fileoutStruct::formatImage 88 #define DEFAULT_FORMAT fileoutStruct::formatImage
84 #define DEFAULT_COLORMAP "brewer" 89 #define DEFAULT_COLORMAP "brewer"
@@ -8,14 +8,16 @@ @@ -8,14 +8,16 @@
8 using namespace std; 8 using namespace std;
9 9
10 fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) 10 fieldslice::fieldslice(unsigned int x_size, unsigned int y_size)
11 -{ 11 +{
  12 + x_hat = y_hat = z_hat = NULL;
  13 +
12 //save the slice resolution 14 //save the slice resolution
13 R[0] = x_size; 15 R[0] = x_size;
14 R[1] = x_size; 16 R[1] = x_size;
15 17
16 scalarField = true; 18 scalarField = true;
17 19
18 - //init_gpu(); 20 + init_gpu();
19 21
20 22
21 } 23 }
@@ -101,5 +103,5 @@ fieldslice::fieldslice() @@ -101,5 +103,5 @@ fieldslice::fieldslice()
101 103
102 fieldslice::~fieldslice() 104 fieldslice::~fieldslice()
103 { 105 {
104 - //kill_gpu(); 106 + kill_gpu();
105 } 107 }
1 #include "fieldslice.h" 1 #include "fieldslice.h"
2 #include "dataTypes.h" 2 #include "dataTypes.h"
3 -#include "rts/cuda/error.h" 3 +#include "rts/cuda/error.h"
  4 +#include "rts/cuda/threads.h"
4 5
5 6
6 __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N) 7 __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N)
7 { 8 {
8 //compute the index for this thread 9 //compute the index for this thread
9 - int i = blockIdx.x * blockDim.x + threadIdx.x; 10 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  11 + int i = ThreadIndex1D();
  12 +
10 if(i >= N) return; 13 if(i >= N) return;
11 14
12 ptype xm = x[i].abs(); 15 ptype xm = x[i].abs();
@@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty @@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty
66 __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) 69 __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N)
67 { 70 {
68 //compute the index for this thread 71 //compute the index for this thread
69 - int i = blockIdx.x * blockDim.x + threadIdx.x; 72 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  73 + int i = ThreadIndex1D();
70 if(i >= N) return; 74 if(i >= N) return;
71 75
72 V[i] = field_component[i].real(); 76 V[i] = field_component[i].real();
@@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N) @@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N)
75 __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N) 79 __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N)
76 { 80 {
77 //compute the index for this thread 81 //compute the index for this thread
78 - int i = blockIdx.x * blockDim.x + threadIdx.x; 82 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  83 + int i = ThreadIndex1D();
79 if(i >= N) return; 84 if(i >= N) return;
80 85
81 V[i] = field_component[i].imag(); 86 V[i] = field_component[i].imag();
@@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i @@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i
84 __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N) 89 __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N)
85 { 90 {
86 //compute the index for this thread 91 //compute the index for this thread
87 - int i = blockIdx.x * blockDim.x + threadIdx.x; 92 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  93 + int i = ThreadIndex1D();
88 if(i >= N) return; 94 if(i >= N) return;
89 95
90 output[i] = sqrt(input[i]); 96 output[i] = sqrt(input[i]);
@@ -115,7 +121,8 @@ scalarslice fieldslice::Mag() @@ -115,7 +121,8 @@ scalarslice fieldslice::Mag()
115 121
116 //compute the total number of values in the slice 122 //compute the total number of values in the slice
117 unsigned int N = R[0] * R[1]; 123 unsigned int N = R[0] * R[1];
118 - int gridDim = (N+BLOCK-1)/BLOCK; 124 + //int gridDim = (N+BLOCK-1)/BLOCK;
  125 + dim3 gridDim = GenGrid1D(N, BLOCK);
119 126
120 field_intensity<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, result->S, N); 127 field_intensity<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, result->S, N);
121 field_sqrt<<<gridDim, BLOCK>>>(result->S, result->S, N); 128 field_sqrt<<<gridDim, BLOCK>>>(result->S, result->S, N);
@@ -132,7 +139,8 @@ scalarslice fieldslice::Real() @@ -132,7 +139,8 @@ scalarslice fieldslice::Real()
132 139
133 //compute the total number of values in the slice 140 //compute the total number of values in the slice
134 unsigned int N = R[0] * R[1]; 141 unsigned int N = R[0] * R[1];
135 - int gridDim = (N+BLOCK-1)/BLOCK; 142 + //int gridDim = (N+BLOCK-1)/BLOCK;
  143 + dim3 gridDim = GenGrid1D(N, BLOCK);
136 144
137 field_real<<<gridDim, BLOCK>>>(x_hat, result->S, N); 145 field_real<<<gridDim, BLOCK>>>(x_hat, result->S, N);
138 146
@@ -148,7 +156,8 @@ scalarslice fieldslice::Imag() @@ -148,7 +156,8 @@ scalarslice fieldslice::Imag()
148 156
149 //compute the total number of values in the slice 157 //compute the total number of values in the slice
150 unsigned int N = R[0] * R[1]; 158 unsigned int N = R[0] * R[1];
151 - int gridDim = (N+BLOCK-1)/BLOCK; 159 + //int gridDim = (N+BLOCK-1)/BLOCK;
  160 + dim3 gridDim = GenGrid1D(N, BLOCK);
152 161
153 field_imaginary<<<gridDim, BLOCK>>>(x_hat, result->S, N); 162 field_imaginary<<<gridDim, BLOCK>>>(x_hat, result->S, N);
154 163
@@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v) @@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v)
192 201
193 //compute the total number of values in the slice 202 //compute the total number of values in the slice
194 unsigned int N = R[0] * R[1]; 203 unsigned int N = R[0] * R[1];
195 - //cout<<"Size of mag field: "<<N<<endl;  
196 int gridDim = (N+BLOCK-1)/BLOCK; 204 int gridDim = (N+BLOCK-1)/BLOCK;
197 205
198 field_scale<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, N, v); 206 field_scale<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, N, v);
@@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v) @@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v)
200 } 208 }
201 209
202 void fieldslice::init_gpu() 210 void fieldslice::init_gpu()
203 -{ 211 +{
  212 + //if the field has no size, return
  213 + if(R[0] == 0 || R[1] == 0)
  214 + return;
  215 +
  216 + //free any previous memory allocations
  217 + if(x_hat)
  218 + HANDLE_ERROR(cudaFree(x_hat));
  219 + if(y_hat)
  220 + HANDLE_ERROR(cudaFree(y_hat));
  221 + if(z_hat)
  222 + HANDLE_ERROR(cudaFree(z_hat));
  223 +
204 //allocate space on the GPU for the field slice 224 //allocate space on the GPU for the field slice
205 HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex))); 225 HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex)));
206 - //HANDLE_ERROR(cudaMemset(x_hat, 0, R[0] * R[1] * sizeof(bsComplex)));  
207 226
208 - //if the field is scalar, y_hat and z_hat are unused  
209 - if(scalarField)  
210 - {  
211 - y_hat = NULL;  
212 - z_hat = NULL;  
213 -  
214 - }  
215 - else 227 + if(!scalarField)
216 { 228 {
217 HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex))); 229 HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex)));
218 //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex))); 230 //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex)));
@@ -233,6 +245,8 @@ void fieldslice::kill_gpu() @@ -233,6 +245,8 @@ void fieldslice::kill_gpu()
233 if(z_hat != NULL) 245 if(z_hat != NULL)
234 HANDLE_ERROR(cudaFree(z_hat)); 246 HANDLE_ERROR(cudaFree(z_hat));
235 247
  248 + x_hat = y_hat = z_hat = NULL;
  249 +
236 } 250 }
237 251
238 void fieldslice::clear_gpu() 252 void fieldslice::clear_gpu()
@@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) @@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv)
275 result.scalarField = scalarField; 289 result.scalarField = scalarField;
276 290
277 //allocate space for the new field 291 //allocate space for the new field
278 - result.init_gpu(); 292 + //result.init_gpu();
279 293
280 //create one thread for each pixel of the field slice 294 //create one thread for each pixel of the field slice
281 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); 295 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
@@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv) @@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv)
291 305
292 return result; 306 return result;
293 } 307 }
  308 +
  309 +fieldslice::fieldslice(const fieldslice& rhs)
  310 +{
  311 + R[0] = rhs.R[0];
  312 + R[1] = rhs.R[1];
  313 + scalarField = rhs.scalarField;
  314 +
  315 + x_hat = y_hat = z_hat = NULL;
  316 +
  317 + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1];
  318 + if(rhs.x_hat != NULL)
  319 + {
  320 + HANDLE_ERROR(cudaMalloc( (void**)&x_hat, bytes));
  321 + HANDLE_ERROR(cudaMemcpy( x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice));
  322 + }
  323 + if(rhs.y_hat != NULL)
  324 + {
  325 + HANDLE_ERROR(cudaMalloc( (void**)&y_hat, bytes));
  326 + HANDLE_ERROR(cudaMemcpy( y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice));
  327 + }
  328 + if(rhs.z_hat != NULL)
  329 + {
  330 + HANDLE_ERROR(cudaMalloc( (void**)&z_hat, bytes));
  331 + HANDLE_ERROR(cudaMemcpy( z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice));
  332 + }
  333 +
  334 +}
  335 +
  336 +fieldslice& fieldslice::operator=(const fieldslice& rhs)
  337 +{
  338 + //make sure this isn't a self-allocation
  339 + if(this != &rhs)
  340 + {
  341 + //make a shallow copy
  342 + R[0] = rhs.R[0];
  343 + R[1] = rhs.R[1];
  344 + scalarField = rhs.scalarField;
  345 +
  346 + //initialize to new parameters
  347 + init_gpu();
  348 +
  349 + //make a deep copy
  350 + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1];
  351 + if(x_hat != NULL)
  352 + HANDLE_ERROR(cudaMemcpy(x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice));
  353 + if(y_hat != NULL)
  354 + HANDLE_ERROR(cudaMemcpy(y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice));
  355 + if(z_hat != NULL)
  356 + HANDLE_ERROR(cudaMemcpy(z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice));
  357 + }
  358 +
  359 + return *this;
  360 +
  361 +}
@@ -31,6 +31,9 @@ struct fieldslice @@ -31,6 +31,9 @@ struct fieldslice
31 31
32 ~fieldslice(); 32 ~fieldslice();
33 33
  34 + //copy constructor
  35 + fieldslice(const fieldslice& rhs);
  36 +
34 //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); 37 //void setPos(bsPoint pMin, bsPoint pMax, bsVector N);
35 38
36 scalarslice Mag(); 39 scalarslice Mag();
@@ -47,6 +50,7 @@ struct fieldslice @@ -47,6 +50,7 @@ struct fieldslice
47 50
48 //crop a region from the field 51 //crop a region from the field
49 fieldslice crop(int u, int v, int su, int sv); 52 fieldslice crop(int u, int v, int su, int sv);
  53 + fieldslice& operator=(const fieldslice& rhs);
50 54
51 void init_gpu(); 55 void init_gpu();
52 void kill_gpu(); 56 void kill_gpu();
@@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope) @@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope)
186 //save images of the fields in the microscope 186 //save images of the fields in the microscope
187 187
188 //if the user specifies an extended source 188 //if the user specifies an extended source
189 - if(scope->focalPoints.size() > 1) 189 + if(scope->focalPoints.size() > 0)
190 { 190 {
191 //simulate the extended source and output the detector image 191 //simulate the extended source and output the detector image
192 scope->SimulateExtendedSource(); 192 scope->SimulateExtendedSource();
193 193
  194 + //saveNearField(&scope->nf);
  195 + saveFarField(scope);
  196 +
  197 + //save the detector images
  198 + saveDetector(scope);
  199 +
  200 + //simulate scattering for the last point (so that you have a near field image)
  201 + scope->SimulateScattering();
  202 + saveNearField(&scope->nf);
  203 +
194 } 204 }
195 else 205 else
196 { 206 {
@@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope) @@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope)
203 //run the far-field simulation 213 //run the far-field simulation
204 scope->SimulateImaging(); 214 scope->SimulateImaging();
205 215
  216 + //saveNearField(&scope->nf);
206 saveFarField(scope); 217 saveFarField(scope);
207 218
  219 + //save the detector images
  220 + saveDetector(scope);
  221 +
208 } 222 }
209 223
210 - //save the detector images  
211 - saveDetector(scope); 224 +
212 225
213 226
214 } 227 }
@@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
5 //#include "defaults.h" 5 //#include "defaults.h"
6 #include "dataTypes.h" 6 #include "dataTypes.h"
7 7
8 -#include "colormap.h" 8 +#include "rts/graphics/colormap.h"
9 #include "fieldslice.h" 9 #include "fieldslice.h"
10 #include "nearfield.h" 10 #include "nearfield.h"
11 #include "microscope.h" 11 #include "microscope.h"
@@ -34,7 +34,7 @@ struct fileoutStruct{ @@ -34,7 +34,7 @@ struct fileoutStruct{
34 //image_source source; 34 //image_source source;
35 35
36 //color map info 36 //color map info
37 - rts::colormap::colormapType colormap; 37 + rts::colormapType colormap;
38 ptype colorMax; 38 ptype colorMax;
39 39
40 void Save(microscopeStruct* scope); 40 void Save(microscopeStruct* scope);
@@ -24,6 +24,7 @@ microscopeStruct* SCOPE; @@ -24,6 +24,7 @@ microscopeStruct* SCOPE;
24 #include "warnings.h" 24 #include "warnings.h"
25 25
26 fileoutStruct gFileOut; 26 fileoutStruct gFileOut;
  27 +bool verbose = false;
27 using namespace std; 28 using namespace std;
28 29
29 int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, 30 int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv,
@@ -31,32 +32,19 @@ int cbessjyva(double v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv, @@ -31,32 +32,19 @@ int cbessjyva(double v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv,
31 32
32 int main(int argc, char *argv[]) 33 int main(int argc, char *argv[])
33 { 34 {
34 - //test Envi loading and saving  
35 - //EnviFile envi("testenvi", "w");  
36 -  
37 - //float* data = (float*)malloc(sizeof(float) * 100 * 100);  
38 - //envi.addBand(data, 100, 100, 100);  
39 -  
40 - //envi.close();  
41 -  
42 - //return 0;  
43 35
44 SCOPE = new microscopeStruct(); 36 SCOPE = new microscopeStruct();
45 37
46 - cout<<SCOPE->nf.Uf.R[0]<<endl;  
47 -  
48 LoadParameters(argc, argv); 38 LoadParameters(argc, argv);
49 39
50 - //TestSimulation(NF, SCOPE, &gFileOut);  
51 -  
52 //initialize GPU memory for fields 40 //initialize GPU memory for fields
53 SCOPE->init(); 41 SCOPE->init();
54 42
55 - OutputOptions();  
56 -  
57 gFileOut.Save(SCOPE); 43 gFileOut.Save(SCOPE);
58 44
59 - //NF->destroy(); 45 + if(verbose)
  46 + OutputOptions();
  47 +
60 SCOPE->destroy(); 48 SCOPE->destroy();
61 49
62 50
@@ -4,7 +4,7 @@ @@ -4,7 +4,7 @@
4 #include "rts/tools/progressbar.h" 4 #include "rts/tools/progressbar.h"
5 #include "rts/cuda/timer.h" 5 #include "rts/cuda/timer.h"
6 #include "dataTypes.h" 6 #include "dataTypes.h"
7 -#include "colormap.h" 7 +#include "rts/graphics/colormap.h"
8 8
9 #include <QImage> 9 #include <QImage>
10 10
@@ -112,8 +112,8 @@ void microscopeStruct::getFarField() @@ -112,8 +112,8 @@ void microscopeStruct::getFarField()
112 //Compute the Far Field image of the focal plane 112 //Compute the Far Field image of the focal plane
113 113
114 //clear the memory from previous detector fields 114 //clear the memory from previous detector fields
115 - Ud.kill_gpu();  
116 - Ufd.kill_gpu(); 115 + //Ud.kill_gpu();
  116 + //Ufd.kill_gpu();
117 117
118 //first crop the filtered near-field image of the source and scattered fields 118 //first crop the filtered near-field image of the source and scattered fields
119 Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); 119 Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]);
@@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource() @@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource()
261 t += gpuStopTimer(); 261 t += gpuStopTimer();
262 262
263 rtsProgressBar((double)(i+1)/(double)npts * 100); 263 rtsProgressBar((double)(i+1)/(double)npts * 100);
  264 + //unsigned char c;
  265 + //cin>>c;
264 } 266 }
265 - cout<<endl;  
266 - cout<<"Time per source: "<<t/npts<<"ms"<<endl; 267 + if(verbose)
  268 + {
  269 + cout<<endl;
  270 + cout<<"Time per source: "<<t/npts<<"ms"<<endl;
  271 + }
267 272
268 } 273 }
269 274
@@ -304,3 +309,15 @@ void microscopeStruct::LoadExtendedSource(std::string filename) @@ -304,3 +309,15 @@ void microscopeStruct::LoadExtendedSource(std::string filename)
304 } 309 }
305 } 310 }
306 } 311 }
  312 +
  313 +std::string microscopeStruct::toStr()
  314 +{
  315 + stringstream ss;
  316 + ss<<nf.toStr();
  317 +
  318 + ss<<"----------Optics--------------"<<endl<<endl;
  319 + ss<<"Objective NA: "<<objective[0]<<" to "<<objective[1]<<endl;
  320 + return ss.str();
  321 +
  322 +
  323 +}
@@ -63,6 +63,8 @@ struct microscopeStruct @@ -63,6 +63,8 @@ struct microscopeStruct
63 scalarslice getTransmittance(); 63 scalarslice getTransmittance();
64 scalarslice getIntensity(); 64 scalarslice getIntensity();
65 65
  66 + string toStr();
  67 +
66 68
67 69
68 }; 70 };
@@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) @@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout)
35 ptype inPhi = asin(NAin); 35 ptype inPhi = asin(NAin);
36 ptype outPhi = asin(NAout); 36 ptype outPhi = asin(NAout);
37 37
38 - //cout<<"inPhi: "<<inPhi<<endl;  
39 - //cout<<"outPhi: "<<outPhi<<endl;  
40 -  
41 //calculate the z-values associated with these angles 38 //calculate the z-values associated with these angles
42 ptype inZ = cos(inPhi); 39 ptype inZ = cos(inPhi);
43 ptype outZ = cos(outPhi); 40 ptype outZ = cos(outPhi);
44 41
45 ptype rangeZ = inZ - outZ; 42 ptype rangeZ = inZ - outZ;
46 43
47 - //cout<<"inZ: "<<inZ<<endl;  
48 - //cout<<"outZ: "<<outZ<<endl;  
49 -  
50 //draw a distribution of random phi, z values 44 //draw a distribution of random phi, z values
51 ptype z, phi, theta; 45 ptype z, phi, theta;
52 for(int i=0; i<N; i++) 46 for(int i=0; i<N; i++)
@@ -58,7 +52,6 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) @@ -58,7 +52,6 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout)
58 phi = acos(z); 52 phi = acos(z);
59 53
60 //compute and store cartesian coordinates 54 //compute and store cartesian coordinates
61 - //bsVector spherical(1, theta + kSph[1], phi + kSph[2]);  
62 bsVector spherical(1, theta, phi); 55 bsVector spherical(1, theta, phi);
63 bsVector cart = spherical.sph2cart(); 56 bsVector cart = spherical.sph2cart();
64 samples[i] = rotation * cart; 57 samples[i] = rotation * cart;
1 #include "nearfield.h" 1 #include "nearfield.h"
  2 +#include <time.h>
  3 +#include <math.h>
  4 +
  5 +#ifdef _WIN32
  6 +#define isnan(x) _isnan(x)
  7 +#define isinf(x) (!_finite(x))
  8 +#endif
  9 +
  10 +int bessjyv_sph(int v, double z, double &vm, double* cjv,
  11 + double* cyv, double* cjvp, double* cyvp);
2 12
3 nearfieldStruct::nearfieldStruct() 13 nearfieldStruct::nearfieldStruct()
4 { 14 {
5 scalarSim = true; 15 scalarSim = true;
6 planeWave = false; 16 planeWave = false;
  17 + lut_us = true;
  18 + lut_uf = false;
7 19
8 nWaves = 0; 20 nWaves = 0;
9 } 21 }
@@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr() @@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr()
46 ss<<"Condenser NA: "<<condenser[0]<<" to "<<condenser[1]<<std::endl; 58 ss<<"Condenser NA: "<<condenser[0]<<" to "<<condenser[1]<<std::endl;
47 ss<<"Focal Point: "<<focus[0]<<", "<<focus[1]<<", "<<focus[2]<<std::endl; 59 ss<<"Focal Point: "<<focus[0]<<", "<<focus[1]<<", "<<focus[2]<<std::endl;
48 ss<<"Field Slice: "<<std::endl; 60 ss<<"Field Slice: "<<std::endl;
  61 + if(lut_us)
  62 + ss<<"LUT Parameters --- min: "<<d_min<<" max: "<<d_max<<std::endl;
49 ss<<pos<<std::endl; 63 ss<<pos<<std::endl;
50 64
51 ss<<std::endl<<"---------Materials-----------"<<std::endl; 65 ss<<std::endl<<"---------Materials-----------"<<std::endl;
@@ -61,6 +75,10 @@ std::string nearfieldStruct::toStr() @@ -61,6 +75,10 @@ std::string nearfieldStruct::toStr()
61 for(unsigned int s=0; s<sVector.size(); s++) 75 for(unsigned int s=0; s<sVector.size(); s++)
62 ss<<sVector[s].toStr()<<std::endl; 76 ss<<sVector[s].toStr()<<std::endl;
63 77
  78 + ss<<"---------Timings-------------"<<std::endl;
  79 + ss<<"Uf = "<<t_Uf<<"ms"<<std::endl;
  80 + ss<<"Us = "<<t_Us<<"ms"<<std::endl;
  81 +
64 return ss.str(); 82 return ss.str();
65 } 83 }
66 84
@@ -70,7 +88,8 @@ void nearfieldStruct::calcWaves() @@ -70,7 +88,8 @@ void nearfieldStruct::calcWaves()
70 inWaves.resize(nWaves); 88 inWaves.resize(nWaves);
71 89
72 //re-seed the random number generator 90 //re-seed the random number generator
73 - //srand(seed); 91 + //srand(time(NULL));
  92 + srand(NULL);
74 93
75 //calculate the monte-carlo samples 94 //calculate the monte-carlo samples
76 mcSampleNA(&inWaves[0], nWaves, k, condenser[0], condenser[1]); 95 mcSampleNA(&inWaves[0], nWaves, k, condenser[0], condenser[1]);
@@ -84,6 +103,8 @@ void nearfieldStruct::calcSpheres() @@ -84,6 +103,8 @@ void nearfieldStruct::calcSpheres()
84 //calculate all of the constants necessary to evaluate the scattered field 103 //calculate all of the constants necessary to evaluate the scattered field
85 //estimate the order required to represent the scattered field for each sphere 104 //estimate the order required to represent the scattered field for each sphere
86 105
  106 +
  107 +
87 //for each sphere 108 //for each sphere
88 for(int i=0; i<sVector.size(); i++) 109 for(int i=0; i<sVector.size(); i++)
89 { 110 {
@@ -91,12 +112,10 @@ void nearfieldStruct::calcSpheres() @@ -91,12 +112,10 @@ void nearfieldStruct::calcSpheres()
91 112
92 //calculate the required order 113 //calculate the required order
93 sVector[i].calcNl(lambda); 114 sVector[i].calcNl(lambda);
94 - //std::cout<<sVector[i].Nl<<std::endl;  
95 115
96 //set the refractive index for the sphere 116 //set the refractive index for the sphere
97 int imat = sVector[i].iMaterial; 117 int imat = sVector[i].iMaterial;
98 rts::rtsComplex<ptype> n = mVector[imat](lambda); 118 rts::rtsComplex<ptype> n = mVector[imat](lambda);
99 - //std::cout<<"Sphere refractive index: "<<n<<std::endl;  
100 119
101 //calculate the scattering coefficients 120 //calculate the scattering coefficients
102 sVector[i].calcCoeff(lambda, n); 121 sVector[i].calcCoeff(lambda, n);
@@ -104,18 +123,109 @@ void nearfieldStruct::calcSpheres() @@ -104,18 +123,109 @@ void nearfieldStruct::calcSpheres()
104 //save the refractive index 123 //save the refractive index
105 sVector[i].n = n; 124 sVector[i].n = n;
106 125
  126 + //if the LUT is used, calculate Usp(theta, r)
  127 + if(lut_us)
  128 + {
  129 + sVector[i].calcUp(lambda, n, pos, max(U.R[0], U.R[1]));
  130 + }
  131 +
  132 +
107 } 133 }
108 134
109 } 135 }
110 136
  137 +void nearfieldStruct::calcUs()
  138 +{
  139 +
  140 +
  141 + if(lut_us)
  142 + scalarUpLut();
  143 + else
  144 + scalarUs();
  145 +}
  146 +
  147 +void nearfieldStruct::calcUf()
  148 +{
  149 + if(lut_uf)
  150 + scalarUfLut();
  151 + else
  152 + scalarUf();
  153 +}
  154 +
111 void nearfieldStruct::Simulate() 155 void nearfieldStruct::Simulate()
112 { 156 {
  157 + //initialize timings
  158 + t_Uf = 0;
  159 + t_Us = 0;
  160 +
113 //compute a set of plane waves for Monte-Carlo simulation 161 //compute a set of plane waves for Monte-Carlo simulation
114 calcWaves(); 162 calcWaves();
115 163
116 //the near field has to be simulated no matter what the output rtsPoint is 164 //the near field has to be simulated no matter what the output rtsPoint is
117 - scalarUf(); 165 + calcUf();
118 calcSpheres(); 166 calcSpheres();
119 - scalarUs(); 167 + calcUs();
120 sumUf(); 168 sumUf();
  169 +
  170 + //U.Mag().toImage("testU.bmp");
  171 +}
  172 +
  173 +void nearfieldStruct::calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR)
  174 +{
  175 + /*Compute the look-up-table for spherical bessel functions used for the incident field
  176 + j = (Nl + 1) x aR array of values
  177 + aR = resolution of j
  178 + */
  179 +
  180 + //compute the wavenumber
  181 + ptype k = 2 * PI / lambda;
  182 + unsigned int Nl = m;
  183 +
  184 + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored)
  185 + int bytes = sizeof(double) * (Nl + 1);
  186 + double* cjv_kd = (double*)malloc(bytes);
  187 + double* cyv_kd = (double*)malloc(bytes);
  188 + double* cjvp_kd = (double*)malloc(bytes);
  189 + double* cyvp_kd = (double*)malloc(bytes);
  190 +
  191 + //compute the bessel functions using the CPU-based algorithm
  192 + double vm;
  193 +
  194 + //for each sample along r
  195 + ptype dr = (d_max - d_min) / (dR - 1);
  196 + ptype d;
  197 + ptype jv;
  198 + for(int id = 0; id < dR; id++)
  199 + {
  200 + d = id * dr + d_min;
  201 + double kd = k*d;
  202 + bessjyv_sph(Nl, kd, vm, cjv_kd, cyv_kd, cjvp_kd, cyvp_kd);
  203 +
  204 + //copy the double data to the bsComplex array
  205 + for(int l=0; l<=Nl; l++)
  206 + {
  207 + jv = cjv_kd[l];
  208 + if(isnan(jv) || isinf(jv))
  209 + {
  210 + if(kd == 0 && l == 0)
  211 + jv = 1;
  212 + else
  213 + jv = 0;
  214 + }
  215 + j[id * (Nl+1) + l] = jv;
  216 + }
  217 + }
  218 +
  219 + /*ofstream outfile("uf_besselout.txt");
  220 + for(int ir = 0; ir < dR; ir++)
  221 + {
  222 + outfile<<ir*dr + d_min<<endl;
  223 + for(int l = 0; l<=Nl; l++)
  224 + {
  225 + outfile<<j[ir * (Nl+1) + l]<<" --";
  226 + }
  227 + outfile<<endl;
  228 + }
  229 + outfile.close();*/
  230 +
121 } 231 }
@@ -31,6 +31,8 @@ struct nearfieldStruct @@ -31,6 +31,8 @@ struct nearfieldStruct
31 31
32 //slices for the focused field 32 //slices for the focused field
33 fieldslice Uf; 33 fieldslice Uf;
  34 + ptype d_min, d_max;
  35 +
34 // and total field: Uf + sum(Us) 36 // and total field: Uf + sum(Us)
35 fieldslice U; 37 fieldslice U;
36 38
@@ -43,6 +45,14 @@ struct nearfieldStruct @@ -43,6 +45,14 @@ struct nearfieldStruct
43 //flag for a plane wave 45 //flag for a plane wave
44 bool planeWave; 46 bool planeWave;
45 47
  48 + //flag for using a LUT
  49 + bool lut_uf;
  50 + bool lut_us;
  51 +
  52 + //timings
  53 + float t_Uf;
  54 + float t_Us;
  55 +
46 56
47 57
48 //---------Scatterers------------ 58 //---------Scatterers------------
@@ -78,10 +88,17 @@ struct nearfieldStruct @@ -78,10 +88,17 @@ struct nearfieldStruct
78 void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); 88 void setPos(bsPoint pMin, bsPoint pMax, bsVector normal);
79 89
80 //this function re-computes the focused field 90 //this function re-computes the focused field
  91 + void calcUf();
81 void scalarUf(); 92 void scalarUf();
  93 + void scalarUfLut();
  94 +
  95 + void calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR);
82 96
83 //compute the field scattered by all of the materials 97 //compute the field scattered by all of the materials
  98 + void calcUs();
84 void scalarUs(); 99 void scalarUs();
  100 + void scalarUpLut();
  101 +
85 102
86 //add the incident field to the sum of scattered fields 103 //add the incident field to the sum of scattered fields
87 void sumUf(); 104 void sumUf();
@@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
5 #include "rts/cuda/error.h" 5 #include "rts/cuda/error.h"
6 #include "rts/cuda/timer.h" 6 #include "rts/cuda/timer.h"
7 7
8 - 8 +//Incident field for a single plane wave
9 __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) 9 __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR)
10 { 10 {
11 /*Compute the scalar focused field using Debye focusing 11 /*Compute the scalar focused field using Debye focusing
@@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p @@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p
41 Uf[i] = exp(d) * A; 41 Uf[i] = exp(d) * A;
42 42
43 } 43 }
44 - 44 +
  45 +//Incident field for a focused point source
45 __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) 46 __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4)
46 { 47 {
47 /*Compute the scalar focused field using Debye focusing 48 /*Compute the scalar focused field using Debye focusing
@@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt @@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt
151 } 152 }
152 153
153 sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); 154 sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
154 - //sumUf += il * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);  
155 155
156 il *= im; 156 il *= im;
157 } 157 }
@@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt @@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt
162 162
163 void nearfieldStruct::scalarUf() 163 void nearfieldStruct::scalarUf()
164 { 164 {
165 - //Compute the incident field via a scalar simulation  
166 - //This method uses Debye focusing to approximate the field analytically  
167 -  
168 - //time the calculation of the focused field  
169 - //gpuStartTimer();  
170 -  
171 - //set the field slice to a scalar field  
172 - //Uf.scalarField = true;  
173 -  
174 - //initialize the GPU arrays  
175 - //Uf.init_gpu(); 165 +
  166 + gpuStartTimer();
176 167
177 //create one thread for each pixel of the field slice 168 //create one thread for each pixel of the field slice
178 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); 169 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
179 - dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); 170 + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
180 171
181 //if we are computing a plane wave, call the gpuScalarUfp function 172 //if we are computing a plane wave, call the gpuScalarUfp function
182 if(planeWave) 173 if(planeWave)
@@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf() @@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf()
191 ptype cosBeta = cos(asin(condenser[1])); 182 ptype cosBeta = cos(asin(condenser[1]));
192 //compute the scalar Uf field (this will be in the x_hat channel of Uf) 183 //compute the scalar Uf field (this will be in the x_hat channel of Uf)
193 gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); 184 gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m);
194 - }  
195 -  
196 - //float t = gpuStopTimer();  
197 - //std::cout<<"Scalar Uf Time: "<<t<<"ms"<<std::endl;  
198 - //std::cout<<focus<<std::endl;  
199 - 185 + }
  186 +
  187 + t_Uf = gpuStopTimer();
200 } 188 }
nfScalarUfLut.cu 0 → 100644
  1 +#include "nearfield.h"
  2 +
  3 +#include "rts/math/legendre.h"
  4 +#include "rts/cuda/error.h"
  5 +#include "rts/cuda/timer.h"
  6 +
  7 +texture<float, cudaTextureType2D> texJ;
  8 +
  9 +__global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR);
  10 +
  11 +__global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoint f, bsVector k, ptype A, ptype cosAlpha, ptype cosBeta, int nl, ptype dmin, ptype dmax, int dR)
  12 +{
  13 + /*This function computes the focused field for a 2D slice
  14 +
  15 + Uf = destination field slice
  16 + ABCD = plane representing the field slice in world space
  17 + uR, vR = resolution of the Uf field
  18 + f = focal point of the condenser
  19 + k = direction of the incident light
  20 + A = amplitude of the incident field
  21 + cosAlpha= cosine of the solid angle subtended by the condenser obscuration
  22 + cosBeta = cosine of the solid angle subtended by the condenser aperature
  23 + nl = number of orders used to compute the field
  24 + dR = number of Bessel function values in the look-up texture
  25 +
  26 + */
  27 +
  28 + //get the current coordinate in the plane slice
  29 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  30 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  31 +
  32 + //make sure that the thread indices are in-bounds
  33 + if(iu >= uR || iv >= vR) return;
  34 +
  35 + //compute the index (easier access to the scalar field array)
  36 + int i = iv*uR + iu;
  37 +
  38 + //compute the parameters for u and v
  39 + ptype u = (ptype)iu / (uR);
  40 + ptype v = (ptype)iv / (vR);
  41 +
  42 +
  43 +
  44 + //get the rtsPoint in world space and then the r vector
  45 + bsPoint p = ABCD(u, v);
  46 + bsVector r = p - f;
  47 + ptype d = r.len();
  48 +
  49 + if(d == 0)
  50 + {
  51 + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta);
  52 + return;
  53 + }
  54 +
  55 + //get info for the light direction and frequency
  56 + r = r.norm();
  57 +
  58 + //compute the imaginary factor i^l
  59 + bsComplex im = bsComplex(0, 1);
  60 + bsComplex il = bsComplex(1, 0);
  61 +
  62 + //Legendre functions are computed dynamically to save memory
  63 + //initialize the Legendre functions
  64 +
  65 + ptype P[2];
  66 + //get the angle between k and r (light direction and position vector)
  67 + ptype cosTheta;
  68 + cosTheta = k.dot(r);
  69 +
  70 + rts::init_legendre<ptype>(cosTheta, P[0], P[1]);
  71 +
  72 + //initialize legendre functions for the cassegrain angles
  73 + ptype Palpha[3];
  74 + rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]);
  75 + Palpha[2] = 1;
  76 +
  77 + ptype Pbeta[3];
  78 + rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]);
  79 + Pbeta[2] = 1;
  80 +
  81 + //for each order l
  82 + bsComplex sumUf(0.0, 0.0);
  83 + ptype jl = 0.0;
  84 + ptype Pl;
  85 + ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1);
  86 + for(int l = 0; l<=nl; l++)
  87 + {
  88 + jl = tex2D(texJ, l + 0.5, di + 0.5);
  89 + if(l==0)
  90 + Pl = P[0];
  91 + else if(l==1)
  92 + {
  93 + Pl = P[1];
  94 +
  95 + //adjust the cassegrain Legendre function
  96 + Palpha[2] = Palpha[0];
  97 + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
  98 + Pbeta[2] = Pbeta[0];
  99 + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
  100 + }
  101 + else
  102 + {
  103 + rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]);
  104 +
  105 + Pl = P[1];
  106 +
  107 + //adjust the cassegrain outer Legendre function
  108 + Palpha[2] = Palpha[0];
  109 + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
  110 + Pbeta[2] = Pbeta[0];
  111 + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
  112 + }
  113 +
  114 + sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
  115 + //sumUf += jl;
  116 +
  117 + il *= im;
  118 + }
  119 +
  120 + Uf[i] = sumUf * 2 * PI * A;
  121 + //Uf[i] = u;
  122 + //return;
  123 +}
  124 +
  125 +void nearfieldStruct::scalarUfLut()
  126 +{
  127 + gpuStartTimer();
  128 +
  129 + //calculate the minimum and maximum points in the focused field
  130 + d_min = pos.dist(focus);
  131 + d_max = pos.dist_max(focus);
  132 +
  133 + //allocate space for the Bessel function
  134 + int dR = 2 * max(Uf.R[0], Uf.R[1]);
  135 + ptype* j = NULL;
  136 + j = (ptype*) malloc(sizeof(ptype) * dR * (m+1));
  137 +
  138 + //calculate Bessel function LUT
  139 + calcBesselLut(j, d_min, d_max, dR);
  140 +
  141 + //create a CUDA array structure and specify the format description
  142 + cudaArray* arrayJ;
  143 + cudaChannelFormatDesc channelDesc =
  144 + cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
  145 +
  146 + //allocate memory
  147 + HANDLE_ERROR(cudaMallocArray(&arrayJ, &channelDesc, m+1, dR));
  148 +
  149 + //specify texture properties
  150 + texJ.addressMode[0] = cudaAddressModeMirror;
  151 + texJ.addressMode[1] = cudaAddressModeMirror;
  152 + texJ.filterMode = cudaFilterModeLinear;
  153 + texJ.normalized = false;
  154 +
  155 + //bind the texture to the array
  156 + HANDLE_ERROR(cudaBindTextureToArray(texJ, arrayJ, channelDesc));
  157 +
  158 + //copy the CPU Bessel LUT to the GPU-based array
  159 + HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice));
  160 +
  161 + //----------------Compute the focused field
  162 + //create one thread for each pixel of the field slice
  163 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  164 + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  165 +
  166 + //if we are computing a plane wave, call the gpuScalarUfp function
  167 + if(planeWave)
  168 + {
  169 + gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]);
  170 + }
  171 + //otherwise compute the condenser info and create a focused field
  172 + else
  173 + {
  174 + //pre-compute the cosine of the obscuration and objective angles
  175 + ptype cosAlpha = cos(asin(condenser[0]));
  176 + ptype cosBeta = cos(asin(condenser[1]));
  177 + //compute the scalar Uf field (this will be in the x_hat channel of Uf)
  178 + gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR);
  179 + }
  180 +
  181 +
  182 + //free everything
  183 + free(j);
  184 +
  185 + HANDLE_ERROR(cudaFreeArray(arrayJ));
  186 +
  187 + t_Uf = gpuStopTimer();
  188 +}
nfScalarUpLut.cu 0 → 100644
  1 +#include "nearfield.h"
  2 +#include "rts/math/spherical_bessel.h"
  3 +#include "rts/math/legendre.h"
  4 +#include <stdlib.h>
  5 +#include "rts/cuda/error.h"
  6 +#include "rts/cuda/timer.h"
  7 +
  8 +texture<float2, cudaTextureType2D> texUsp;
  9 +texture<float2, cudaTextureType2D> texUip;
  10 +
  11 +__global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR)
  12 +{
  13 + /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field
  14 + produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field.
  15 +
  16 + Us = final scattered field
  17 + k = list of incoming plane waves (Monte-Carlo samples)
  18 + nk = number of incoming MC samples
  19 + kmag= magnitude of the incoming field 2pi/lambda
  20 + dmin= minimum distance of the Usp texture
  21 + dmax= maximum distance of the Usp texture
  22 + f = position of the focus
  23 + ps = position of the sphere
  24 + A = total amplitude of the incident field arriving at the focal spot
  25 + ABCD= rectangle representing the field slice
  26 + uR = resolution of the field slice in the u direction
  27 + vR = resolution of the field slice in the v direction
  28 + dR = resolution of the Usp texture in the d direction
  29 + thetaR= resolution of the Usp texture in the theta direction
  30 + */
  31 +
  32 + //get the current coordinate in the plane slice
  33 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  34 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  35 +
  36 + //make sure that the thread indices are in-bounds
  37 + if(iu >= uR || iv >= vR) return;
  38 +
  39 + //compute the index (easier access to the scalar field array)
  40 + int i = iv*uR + iu;