nfScalarUs.cu
6.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#include "nearfield.h"
#include "rts/math/spherical_bessel.h"
#include "rts/math/legendre.h"
#include <stdlib.h>
#include "rts/cuda/error.h"
#include "rts/cuda/timer.h"
__device__ bsComplex calc_Us(ptype kd, ptype cos_theta, int Nl, bsComplex* B)
{
//initialize the spherical Bessel functions
ptype j[2];
rts::init_sbesselj<ptype>(kd, j);
ptype y[2];
rts::init_sbessely<ptype>(kd, y);
//initialize the Legendre polynomial
ptype P[2];
rts::init_legendre<ptype>(cos_theta, P[0], P[1]);
//initialize the spherical Hankel function
bsComplex h((ptype)0, (ptype)0);
//initialize the result
bsComplex Us((ptype)0, (ptype)0);
//for each order up to Nl
for(int l=0; l<=Nl; l++)
{
if(l == 0)
{
h.r = j[0];
h.i = y[0];
Us += B[0] * h * P[0];
}
else
{
//shift the bessel functions and legendre polynomials
if(l > 1)
{
rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);
rts::shift_sbesselj<ptype>(l, kd, j);
rts::shift_sbessely<ptype>(l, kd, y);
}
h.r = j[1];
h.i = y[1];
Us += B[l] * h * P[1];
}
}
return Us;
}
__device__ bsComplex calc_Ui(bsComplex knd, ptype cos_theta, int Nl, bsComplex* A)
{
//calculate the internal field of a sphere
bsComplex Ui((ptype)0, (ptype)0);
//deal with rtsPoints near zero
if(real(knd) < EPSILON_FLOAT)
{
//for(int l=0; l<Nl; l++)
Ui = A[0];
return Ui;
}
//initialize the spherical Bessel functions
bsComplex j[2];
rts::init_sbesselj<bsComplex>(knd, j);
//initialize the Legendre polynomial
ptype P[2];
rts::init_legendre<ptype>(cos_theta, P[0], P[1]);
//for each order up to Nl
for(int l=0; l<=Nl; l++)
{
if(l == 0)
{
Ui += A[0] * j[0] * P[0];
}
else
{
//shift the bessel functions and legendre polynomials
if(l > 1)
{
rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);
rts::shift_sbesselj<bsComplex>(l, knd, j);
}
Ui += A[l] * j[1] * P[1];
}
}
return Ui;
}
__global__ void gpuScalarUsp(bsComplex* Us, bsVector* k, int nk, ptype kmag, bsPoint f, bsPoint ps, ptype a, bsComplex n, bsComplex* Beta, bsComplex* Alpha, int Nl, ptype A, bsRect ABCD, int uR, int vR)
{
//get the current coordinate in the plane slice
int iu = blockIdx.x * blockDim.x + threadIdx.x;
int iv = blockIdx.y * blockDim.y + threadIdx.y;
//make sure that the thread indices are in-bounds
if(iu >= uR || iv >= vR) return;
//compute the index (easier access to the scalar field array)
int i = iv*uR + iu;
//compute the parameters for u and v
ptype u = (ptype)iu / (uR);
ptype v = (ptype)iv / (vR);
//get the rtsPoint in world space and then the r vector
bsPoint p = ABCD(u, v);
bsVector r = p - ps;
ptype d = r.len();
bsComplex sumUs(0, 0);
//for each plane wave in the wave list
for(int iw = 0; iw < nk; iw++)
{
//normalize the direction vectors and find their inner product
r = r.norm();
ptype cos_theta = k[iw].dot(r);
//compute the phase factor for spheres that are not at the origin
bsVector c = ps - f;
bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));
//compute the internal field if we are inside a sphere
if(d <= a)
{
bsComplex knd = kmag * d * n;
sumUs += (1.0/nk) * A * phase * calc_Ui(knd, cos_theta, Nl, Alpha);
}
//otherwise compute the scattered field
else
{
//compute the argument for the spherical Hankel function
ptype kd = kmag * d;
sumUs += (1.0/nk) * A * phase * calc_Us(kd, cos_theta, Nl, Beta);
}
}
Us[i] += sumUs;
}
void nearfieldStruct::scalarUs()
{
//get the number of spheres
int nSpheres = sVector.size();
//if there are no spheres, nothing to do here
if(nSpheres == 0)
return;
//time the calculation of the focused field
gpuStartTimer();
//clear the scattered field
U.clear_gpu();
//create one thread for each pixel of the field slice
dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
//for each sphere
int Nl;
for(int s = 0; s<nSpheres; s++)
{
//get the number of orders for this sphere
Nl = sVector[s].Nl;
//allocate memory for the scattering coefficients
bsComplex* gpuB;
HANDLE_ERROR(cudaMalloc((void**) &gpuB, (Nl+1) * sizeof(bsComplex)));
bsComplex* gpuA;
HANDLE_ERROR(cudaMalloc((void**) &gpuA, (Nl+1) * sizeof(bsComplex)));
//copy the scattering coefficients to the GPU
HANDLE_ERROR(cudaMemcpy(gpuB, &sVector[s].B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));
HANDLE_ERROR(cudaMemcpy(gpuA, &sVector[s].A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));
//if we are computing a plane wave, call the gpuScalarUfp function
sphere S = sVector[s];
bsVector* gpuk;
if(planeWave)
{
//if this is a single plane wave, assume it goes along direction k (copy the k vector to the GPU)
HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ));
HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));
gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat,
gpuk,
1,
2 * PI / lambda,
focus,
sVector[s].p,
sVector[s].a,
sVector[s].n,
gpuB,
gpuA,
Nl,
A,
pos,
U.R[0],
U.R[1]);
HANDLE_ERROR(cudaFree(gpuk));
}
//otherwise copy all of the monte-carlo samples to the GPU and compute
else
{
HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * inWaves.size() ));
HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * inWaves.size(), cudaMemcpyHostToDevice));
//compute the amplitude that makes it through the condenser
ptype subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );
gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat,
gpuk,
inWaves.size(),
2 * PI / lambda,
focus,
sVector[s].p,
sVector[s].a,
sVector[s].n,
gpuB,
gpuA,
Nl,
subA,
pos,
U.R[0],
U.R[1]);
HANDLE_ERROR(cudaFree(gpuk));
}
//free memory for scattering coefficients
HANDLE_ERROR(cudaFree(gpuA));
HANDLE_ERROR(cudaFree(gpuB));
}
//store the time to compute the scattered field
t_Us = gpuStopTimer();
}