3f56f1f9
dmayerich
initial commit
|
1
|
#include "nearfield.h"
|
d6f53e68
dmayerich
rts organization
|
2
3
|
#include "rts/math/spherical_bessel.h"
#include "rts/math/legendre.h"
|
3f56f1f9
dmayerich
initial commit
|
4
|
#include <stdlib.h>
|
d6f53e68
dmayerich
rts organization
|
5
6
|
#include "rts/cuda/error.h"
#include "rts/cuda/timer.h"
|
3f56f1f9
dmayerich
initial commit
|
7
8
9
|
__device__ bsComplex calc_Us(ptype kd, ptype cos_theta, int Nl, bsComplex* B)
{
|
396a5f12
David Mayerich
added custom code...
|
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
|
//initialize the spherical Bessel functions
ptype j[2];
rts::init_sbesselj<ptype>(kd, j);
ptype y[2];
rts::init_sbessely<ptype>(kd, y);
//initialize the Legendre polynomial
ptype P[2];
rts::init_legendre<ptype>(cos_theta, P[0], P[1]);
//initialize the spherical Hankel function
bsComplex h((ptype)0, (ptype)0);
//initialize the result
bsComplex Us((ptype)0, (ptype)0);
//for each order up to Nl
for(int l=0; l<=Nl; l++)
{
if(l == 0)
{
h.r = j[0];
h.i = y[0];
Us += B[0] * h * P[0];
}
else
{
//shift the bessel functions and legendre polynomials
if(l > 1)
{
rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);
rts::shift_sbesselj<ptype>(l, kd, j);
rts::shift_sbessely<ptype>(l, kd, y);
}
h.r = j[1];
h.i = y[1];
Us += B[l] * h * P[1];
}
|
3f56f1f9
dmayerich
initial commit
|
51
52
53
54
55
56
57
58
59
60
61
|
}
return Us;
}
__device__ bsComplex calc_Ui(bsComplex knd, ptype cos_theta, int Nl, bsComplex* A)
{
//calculate the internal field of a sphere
bsComplex Ui((ptype)0, (ptype)0);
//deal with rtsPoints near zero
|
396a5f12
David Mayerich
added custom code...
|
62
63
64
65
66
|
if(real(knd) < EPSILON_FLOAT)
{
//for(int l=0; l<Nl; l++)
Ui = A[0];
return Ui;
|
3f56f1f9
dmayerich
initial commit
|
67
68
|
}
|
396a5f12
David Mayerich
added custom code...
|
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
//initialize the spherical Bessel functions
bsComplex j[2];
rts::init_sbesselj<bsComplex>(knd, j);
//initialize the Legendre polynomial
ptype P[2];
rts::init_legendre<ptype>(cos_theta, P[0], P[1]);
//for each order up to Nl
for(int l=0; l<=Nl; l++)
{
if(l == 0)
{
Ui += A[0] * j[0] * P[0];
}
else
{
//shift the bessel functions and legendre polynomials
if(l > 1)
{
rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);
rts::shift_sbesselj<bsComplex>(l, knd, j);
}
Ui += A[l] * j[1] * P[1];
}
|
3f56f1f9
dmayerich
initial commit
|
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
}
return Ui;
}
__global__ void gpuScalarUsp(bsComplex* Us, bsVector* k, int nk, ptype kmag, bsPoint f, bsPoint ps, ptype a, bsComplex n, bsComplex* Beta, bsComplex* Alpha, int Nl, ptype A, bsRect ABCD, int uR, int vR)
{
//get the current coordinate in the plane slice
int iu = blockIdx.x * blockDim.x + threadIdx.x;
int iv = blockIdx.y * blockDim.y + threadIdx.y;
//make sure that the thread indices are in-bounds
if(iu >= uR || iv >= vR) return;
//compute the index (easier access to the scalar field array)
int i = iv*uR + iu;
//compute the parameters for u and v
ptype u = (ptype)iu / (uR);
ptype v = (ptype)iv / (vR);
//get the rtsPoint in world space and then the r vector
bsPoint p = ABCD(u, v);
bsVector r = p - ps;
|
396a5f12
David Mayerich
added custom code...
|
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
ptype d = r.len();
bsComplex sumUs(0, 0);
//for each plane wave in the wave list
for(int iw = 0; iw < nk; iw++)
{
//normalize the direction vectors and find their inner product
r = r.norm();
ptype cos_theta = k[iw].dot(r);
//compute the phase factor for spheres that are not at the origin
bsVector c = ps - f;
bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));
//compute the internal field if we are inside a sphere
|
3f56f1f9
dmayerich
initial commit
|
136
137
138
|
if(d <= a)
{
bsComplex knd = kmag * d * n;
|
396a5f12
David Mayerich
added custom code...
|
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
sumUs += (1.0f/nk) * A * phase * calc_Ui(knd, cos_theta, Nl, Alpha);
}
//otherwise compute the scattered field
else
{
//compute the argument for the spherical Hankel function
ptype kd = kmag * d;
sumUs += (1.0f/nk) * A * phase * calc_Us(kd, cos_theta, Nl, Beta);
}
}
Us[i] += sumUs;
|
3f56f1f9
dmayerich
initial commit
|
154
155
156
157
158
159
160
161
162
163
164
165
|
}
void nearfieldStruct::scalarUs()
{
//get the number of spheres
int nSpheres = sVector.size();
//if there are no spheres, nothing to do here
if(nSpheres == 0)
return;
//time the calculation of the focused field
|
51b6469a
dmayerich
added look-up tables
|
166
|
gpuStartTimer();
|
3f56f1f9
dmayerich
initial commit
|
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
|
//clear the scattered field
U.clear_gpu();
//create one thread for each pixel of the field slice
dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
//for each sphere
int Nl;
for(int s = 0; s<nSpheres; s++)
{
//get the number of orders for this sphere
Nl = sVector[s].Nl;
//allocate memory for the scattering coefficients
bsComplex* gpuB;
HANDLE_ERROR(cudaMalloc((void**) &gpuB, (Nl+1) * sizeof(bsComplex)));
bsComplex* gpuA;
HANDLE_ERROR(cudaMalloc((void**) &gpuA, (Nl+1) * sizeof(bsComplex)));
//copy the scattering coefficients to the GPU
HANDLE_ERROR(cudaMemcpy(gpuB, &sVector[s].B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));
HANDLE_ERROR(cudaMemcpy(gpuA, &sVector[s].A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));
|
396a5f12
David Mayerich
added custom code...
|
193
194
195
|
//if we are computing a plane wave, call the gpuScalarUfp function
sphere S = sVector[s];
bsVector* gpuk;
|
3f56f1f9
dmayerich
initial commit
|
196
197
|
if(planeWave)
|
396a5f12
David Mayerich
added custom code...
|
198
199
200
|
{
//if this is a single plane wave, assume it goes along direction k (copy the k vector to the GPU)
HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ));
|
3f56f1f9
dmayerich
initial commit
|
201
202
|
HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));
gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat,
|
396a5f12
David Mayerich
added custom code...
|
203
|
gpuk,
|
3f56f1f9
dmayerich
initial commit
|
204
205
206
207
208
209
210
211
212
213
214
215
|
1,
2 * PI / lambda,
focus,
sVector[s].p,
sVector[s].a,
sVector[s].n,
gpuB,
gpuA,
Nl,
A,
pos,
U.R[0],
|
396a5f12
David Mayerich
added custom code...
|
216
|
U.R[1]);
|
3f56f1f9
dmayerich
initial commit
|
217
|
HANDLE_ERROR(cudaFree(gpuk));
|
396a5f12
David Mayerich
added custom code...
|
218
219
220
221
222
223
224
225
226
227
|
}
//otherwise copy all of the monte-carlo samples to the GPU and compute
else
{
HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * inWaves.size() ));
HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * inWaves.size(), cudaMemcpyHostToDevice));
//compute the amplitude that makes it through the condenser
ptype subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );
|
3f56f1f9
dmayerich
initial commit
|
228
|
gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat,
|
396a5f12
David Mayerich
added custom code...
|
229
|
gpuk,
|
3f56f1f9
dmayerich
initial commit
|
230
231
232
233
234
235
236
237
238
239
240
241
|
inWaves.size(),
2 * PI / lambda,
focus,
sVector[s].p,
sVector[s].a,
sVector[s].n,
gpuB,
gpuA,
Nl,
subA,
pos,
U.R[0],
|
396a5f12
David Mayerich
added custom code...
|
242
243
244
245
246
247
248
249
|
U.R[1]);
HANDLE_ERROR(cudaFree(gpuk));
}
//free memory for scattering coefficients
HANDLE_ERROR(cudaFree(gpuA));
|
3f56f1f9
dmayerich
initial commit
|
250
|
HANDLE_ERROR(cudaFree(gpuB));
|
396a5f12
David Mayerich
added custom code...
|
251
252
|
}
|
3f56f1f9
dmayerich
initial commit
|
253
|
|
51b6469a
dmayerich
added look-up tables
|
254
255
|
//store the time to compute the scattered field
t_Us = gpuStopTimer();
|
3f56f1f9
dmayerich
initial commit
|
256
|
|
3f56f1f9
dmayerich
initial commit
|
257
258
|
}
|