51b6469a
dmayerich
added look-up tables
|
1
2
3
|
#include "nearfield.h"
#include "rts/math/legendre.h"
|
396a5f12
David Mayerich
added custom code...
|
4
|
#include "rts/cuda/error.h"
|
51b6469a
dmayerich
added look-up tables
|
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
#include "rts/cuda/timer.h"
texture<float, cudaTextureType2D> texJ;
__global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR);
__global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoint f, bsVector k, ptype A, ptype cosAlpha, ptype cosBeta, int nl, ptype dmin, ptype dmax, int dR)
{
/*This function computes the focused field for a 2D slice
Uf = destination field slice
ABCD = plane representing the field slice in world space
uR, vR = resolution of the Uf field
f = focal point of the condenser
k = direction of the incident light
A = amplitude of the incident field
cosAlpha= cosine of the solid angle subtended by the condenser obscuration
cosBeta = cosine of the solid angle subtended by the condenser aperature
nl = number of orders used to compute the field
dR = number of Bessel function values in the look-up texture
*/
|
396a5f12
David Mayerich
added custom code...
|
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
//get the current coordinate in the plane slice
int iu = blockIdx.x * blockDim.x + threadIdx.x;
int iv = blockIdx.y * blockDim.y + threadIdx.y;
//make sure that the thread indices are in-bounds
if(iu >= uR || iv >= vR) return;
//compute the index (easier access to the scalar field array)
int i = iv*uR + iu;
//compute the parameters for u and v
ptype u = (ptype)iu / (uR);
ptype v = (ptype)iv / (vR);
//get the rtsPoint in world space and then the r vector
bsPoint p = ABCD(u, v);
bsVector r = p - f;
|
51b6469a
dmayerich
added look-up tables
|
47
48
49
50
51
52
|
ptype d = r.len();
if(d == 0)
{
Uf[i] = A * 2 * PI * (cosAlpha - cosBeta);
return;
|
396a5f12
David Mayerich
added custom code...
|
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
}
//get info for the light direction and frequency
r = r.norm();
//compute the imaginary factor i^l
bsComplex im = bsComplex(0, 1);
bsComplex il = bsComplex(1, 0);
//Legendre functions are computed dynamically to save memory
//initialize the Legendre functions
ptype P[2];
//get the angle between k and r (light direction and position vector)
ptype cosTheta;
|
51b6469a
dmayerich
added look-up tables
|
68
69
|
cosTheta = k.dot(r);
|
396a5f12
David Mayerich
added custom code...
|
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
rts::init_legendre<ptype>(cosTheta, P[0], P[1]);
//initialize legendre functions for the cassegrain angles
ptype Palpha[3];
rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]);
Palpha[2] = 1;
ptype Pbeta[3];
rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]);
Pbeta[2] = 1;
//for each order l
bsComplex sumUf(0, 0);
ptype jl = 0;
|
51b6469a
dmayerich
added look-up tables
|
84
|
ptype Pl;
|
396a5f12
David Mayerich
added custom code...
|
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1);
for(int l = 0; l<=nl; l++)
{
jl = tex2D(texJ, l + 0.5f, di + 0.5f);
if(l==0)
Pl = P[0];
else if(l==1)
{
Pl = P[1];
//adjust the cassegrain Legendre function
Palpha[2] = Palpha[0];
rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
Pbeta[2] = Pbeta[0];
rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
}
else
{
rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]);
Pl = P[1];
//adjust the cassegrain outer Legendre function
Palpha[2] = Palpha[0];
rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
Pbeta[2] = Pbeta[0];
rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
}
|
51b6469a
dmayerich
added look-up tables
|
114
115
|
sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
//sumUf += jl;
|
396a5f12
David Mayerich
added custom code...
|
116
117
118
119
|
il *= im;
}
|
51b6469a
dmayerich
added look-up tables
|
120
|
Uf[i] = sumUf * 2 * PI * A;
|
396a5f12
David Mayerich
added custom code...
|
121
|
//Uf[i] = u;
|
51b6469a
dmayerich
added look-up tables
|
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
//return;
}
void nearfieldStruct::scalarUfLut()
{
gpuStartTimer();
//calculate the minimum and maximum points in the focused field
d_min = pos.dist(focus);
d_max = pos.dist_max(focus);
//allocate space for the Bessel function
int dR = 2 * max(Uf.R[0], Uf.R[1]);
ptype* j = NULL;
j = (ptype*) malloc(sizeof(ptype) * dR * (m+1));
//calculate Bessel function LUT
calcBesselLut(j, d_min, d_max, dR);
//create a CUDA array structure and specify the format description
cudaArray* arrayJ;
cudaChannelFormatDesc channelDesc =
cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
//allocate memory
HANDLE_ERROR(cudaMallocArray(&arrayJ, &channelDesc, m+1, dR));
//specify texture properties
texJ.addressMode[0] = cudaAddressModeMirror;
texJ.addressMode[1] = cudaAddressModeMirror;
texJ.filterMode = cudaFilterModeLinear;
texJ.normalized = false;
//bind the texture to the array
HANDLE_ERROR(cudaBindTextureToArray(texJ, arrayJ, channelDesc));
//copy the CPU Bessel LUT to the GPU-based array
HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice));
//----------------Compute the focused field
|
396a5f12
David Mayerich
added custom code...
|
162
163
|
//create one thread for each pixel of the field slice
dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
|
51b6469a
dmayerich
added look-up tables
|
164
|
dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
|
396a5f12
David Mayerich
added custom code...
|
165
166
167
168
169
170
171
172
|
//if we are computing a plane wave, call the gpuScalarUfp function
if(planeWave)
{
gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]);
}
//otherwise compute the condenser info and create a focused field
else
|
51b6469a
dmayerich
added look-up tables
|
173
174
175
|
{
//pre-compute the cosine of the obscuration and objective angles
ptype cosAlpha = cos(asin(condenser[0]));
|
396a5f12
David Mayerich
added custom code...
|
176
177
178
|
ptype cosBeta = cos(asin(condenser[1]));
//compute the scalar Uf field (this will be in the x_hat channel of Uf)
gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR);
|
51b6469a
dmayerich
added look-up tables
|
179
180
181
182
183
184
185
186
187
188
|
}
//free everything
free(j);
HANDLE_ERROR(cudaFreeArray(arrayJ));
t_Uf = gpuStopTimer();
}
|