nfScalarUpLut.cu
7.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#include "nearfield.h"
#include "rts/math/spherical_bessel.h"
#include "rts/math/legendre.h"
#include <stdlib.h>
#include "rts/cuda/error.h"
#include "rts/cuda/timer.h"
texture<float2, cudaTextureType2D> texUsp;
texture<float2, cudaTextureType2D> texUip;
__global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR)
{
/*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field
produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field.
Us = final scattered field
k = list of incoming plane waves (Monte-Carlo samples)
nk = number of incoming MC samples
kmag= magnitude of the incoming field 2pi/lambda
dmin= minimum distance of the Usp texture
dmax= maximum distance of the Usp texture
f = position of the focus
ps = position of the sphere
A = total amplitude of the incident field arriving at the focal spot
ABCD= rectangle representing the field slice
uR = resolution of the field slice in the u direction
vR = resolution of the field slice in the v direction
dR = resolution of the Usp texture in the d direction
thetaR= resolution of the Usp texture in the theta direction
*/
//get the current coordinate in the plane slice
int iu = blockIdx.x * blockDim.x + threadIdx.x;
int iv = blockIdx.y * blockDim.y + threadIdx.y;
//make sure that the thread indices are in-bounds
if(iu >= uR || iv >= vR) return;
//compute the index (easier access to the scalar field array)
int i = iv*uR + iu;
//compute the parameters for u and v
ptype u = (ptype)iu / (uR);
ptype v = (ptype)iv / (vR);
//get the rtsPoint in world space and then the r vector
bsPoint p = ABCD(u, v);
bsVector r = p - ps;
ptype d = r.len();
float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1);
float ai = ( (d - dmin)/(a - dmin)) * (aR - 1);
bsComplex sumUs(0, 0);
//for each plane wave in the wave list
for(int iw = 0; iw < nk; iw++)
{
//normalize the direction vectors and find their inner product
r = r.norm();
ptype cos_theta = k[iw].dot(r);
if(cos_theta < -1)
cos_theta = -1;
if(cos_theta > 1)
cos_theta = 1;
float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1);
//compute the phase factor for spheres that are not at the origin
bsVector c = ps - f;
bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));
//compute the internal field if we are inside a sphere
if(d < a)
{
float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5);
sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y);
}
//otherwise compute the scattered field
else
{
float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5);
sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y);
}
}
Us[i] += sumUs;
}
void nearfieldStruct::scalarUpLut()
{
//get the number of spheres
int nSpheres = sVector.size();
//if there are no spheres, nothing to do here
if(nSpheres == 0)
return;
//time the calculation of the focused field
gpuStartTimer();
//clear the scattered field
U.clear_gpu();
//create one thread for each pixel of the field slice
dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
//copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff)
bsVector* gpuk;
int nWaves;
ptype subA;
if(planeWave)
{
nWaves = 1;
HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) );
HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));
subA = A;
}
else
{
nWaves = inWaves.size();
HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) );
HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice));
//compute the amplitude that makes it through the condenser
subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );
}
//for each sphere
for(int s = 0; s<nSpheres; s++)
{
//get the current sphere
//sphere S = sVector[s];
//allocate space for the Usp and Uip textures
//allocate the cuda array
cudaArray* arrayUsp;
cudaArray* arrayUip;
cudaChannelFormatDesc channelDescUsp =
cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
cudaChannelFormatDesc channelDescUip =
cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
int dR = sVector[s].Usp.R[0];
int thetaR = sVector[s].Usp.R[1];
int aR = sVector[s].Uip.R[0];
HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR));
HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR));
texUsp.addressMode[0] = cudaAddressModeMirror;
texUsp.addressMode[1] = cudaAddressModeMirror;
texUsp.filterMode = cudaFilterModeLinear;
texUsp.normalized = false;
texUip.addressMode[0] = cudaAddressModeMirror;
texUip.addressMode[1] = cudaAddressModeMirror;
texUip.filterMode = cudaFilterModeLinear;
texUip.normalized = false;
HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp));
HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip));
//copy the LUT to the Usp texture
HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));
HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));
gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat,
gpuk,
nWaves,
2 * PI / lambda,
sVector[s].a,
sVector[s].d_min,
sVector[s].d_max,
focus,
sVector[s].p,
subA,
pos,
U.R[0],
U.R[1],
dR,
aR,
thetaR);
cudaFreeArray(arrayUsp);
cudaFreeArray(arrayUip);
}
//store the time to compute the scattered field
t_Us = gpuStopTimer();
//free monte-carlo samples
cudaFree(gpuk);
}