Blame view

nfScalarUs.cu 6.59 KB
3f56f1f9   dmayerich   initial commit
1
  #include "nearfield.h"

d6f53e68   dmayerich   rts organization
2
3
  #include "rts/math/spherical_bessel.h"

  #include "rts/math/legendre.h"

3f56f1f9   dmayerich   initial commit
4
  #include <stdlib.h>

d6f53e68   dmayerich   rts organization
5
6
  #include "rts/cuda/error.h"

  #include "rts/cuda/timer.h"

3f56f1f9   dmayerich   initial commit
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
  

  __device__ bsComplex calc_Us(ptype kd, ptype cos_theta, int Nl, bsComplex* B)

  {

  	//initialize the spherical Bessel functions
  	ptype j[2];
  	rts::init_sbesselj<ptype>(kd, j);
  	ptype y[2];
  	rts::init_sbessely<ptype>(kd, y);
  
  	//initialize the Legendre polynomial
  	ptype P[2];
  	rts::init_legendre<ptype>(cos_theta, P[0], P[1]);
  
  	//initialize the spherical Hankel function
  	bsComplex h((ptype)0, (ptype)0);
  
  	//initialize the result
  	bsComplex Us((ptype)0, (ptype)0);
  
  	//for each order up to Nl
  	for(int l=0; l<=Nl; l++)
  	{
  		if(l == 0)
  		{
  			h.r = j[0];
  			h.i = y[0];
  			Us += B[0] * h * P[0];
  		}
  		else
  		{
  			//shift the bessel functions and legendre polynomials
  			if(l > 1)
  			{
  				rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);
  				rts::shift_sbesselj<ptype>(l, kd, j);
  				rts::shift_sbessely<ptype>(l, kd, y);
  			}
  
  			h.r = j[1];
  			h.i = y[1];
  			Us += B[l] * h * P[1];
  
  
  		}
  	}

  	return Us;

  }

  

  __device__ bsComplex calc_Ui(bsComplex knd, ptype cos_theta, int Nl, bsComplex* A)

  {

  	//calculate the internal field of a sphere

  

  	bsComplex Ui((ptype)0, (ptype)0);

  

  	//deal with rtsPoints near zero

  	if(real(knd) < EPSILON_FLOAT)
  	{
  		//for(int l=0; l<Nl; l++)
  		Ui = A[0];
          return Ui;
      }

  

  	//initialize the spherical Bessel functions
  	bsComplex j[2];
  	rts::init_sbesselj<bsComplex>(knd, j);
  
  	//initialize the Legendre polynomial
  	ptype P[2];
  	rts::init_legendre<ptype>(cos_theta, P[0], P[1]);
  
  	//for each order up to Nl
  	for(int l=0; l<=Nl; l++)
  	{
  		if(l == 0)
  		{
  			Ui += A[0] * j[0] * P[0];
  		}
  		else
  		{
  			//shift the bessel functions and legendre polynomials
  			if(l > 1)
  			{
  				rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);
  				rts::shift_sbesselj<bsComplex>(l, knd, j);
  			}
  
  			Ui += A[l] * j[1] * P[1];
  
  
  		}
  	}

  	return Ui;

  }

  

  __global__ void gpuScalarUsp(bsComplex* Us, bsVector* k, int nk, ptype kmag, bsPoint f, bsPoint ps, ptype a, bsComplex n, bsComplex* Beta, bsComplex* Alpha, int Nl, ptype A, bsRect ABCD, int uR, int vR)

  {

  

  	//get the current coordinate in the plane slice

  	int iu = blockIdx.x * blockDim.x + threadIdx.x;

  	int iv = blockIdx.y * blockDim.y + threadIdx.y;

  

  	//make sure that the thread indices are in-bounds

  	if(iu >= uR || iv >= vR) return;

  

  	//compute the index (easier access to the scalar field array)

  	int i = iv*uR + iu;

  

  	//compute the parameters for u and v

  	ptype u = (ptype)iu / (uR);

  	ptype v = (ptype)iv / (vR);

  

  	//get the rtsPoint in world space and then the r vector

  	bsPoint p = ABCD(u, v);

  	bsVector r = p - ps;

  	ptype d = r.len();
  
      bsComplex sumUs(0, 0);
      //for each plane wave in the wave list
      for(int iw = 0; iw < nk; iw++)
      {
          //normalize the direction vectors and find their inner product
          r = r.norm();
          ptype cos_theta = k[iw].dot(r);
  
          //compute the phase factor for spheres that are not at the origin
          bsVector c = ps - f;
          bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));
  
          //compute the internal field if we are inside a sphere
          if(d <= a)

          {

              bsComplex knd = kmag * d * n;

              sumUs += (1.0/nk) * A * phase * calc_Ui(knd, cos_theta, Nl, Alpha);
          }
          //otherwise compute the scattered field
          else
          {
              //compute the argument for the spherical Hankel function
              ptype kd = kmag * d;
              sumUs += (1.0/nk) * A * phase * calc_Us(kd, cos_theta, Nl, Beta);
          }
  
      }
  
      Us[i] += sumUs;
  
  
  }

  

  void nearfieldStruct::scalarUs()

  {

  	//get the number of spheres

  	int nSpheres = sVector.size();

  

  	//if there are no spheres, nothing to do here

  	if(nSpheres == 0)

  		return;

  

  	//time the calculation of the focused field

  	//gpuStartTimer();

  

  	//clear the scattered field

  	U.clear_gpu();

  

  	//create one thread for each pixel of the field slice

  	dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);

  	dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

  

  	//for each sphere

  	int Nl;

  	for(int s = 0; s<nSpheres; s++)

  	{

  		//get the number of orders for this sphere

  		Nl = sVector[s].Nl;

  

  		//allocate memory for the scattering coefficients

  		bsComplex* gpuB;

  		HANDLE_ERROR(cudaMalloc((void**) &gpuB, (Nl+1) * sizeof(bsComplex)));

  

  		bsComplex* gpuA;

  		HANDLE_ERROR(cudaMalloc((void**) &gpuA, (Nl+1) * sizeof(bsComplex)));

  

  		//copy the scattering coefficients to the GPU

  		HANDLE_ERROR(cudaMemcpy(gpuB, &sVector[s].B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));

  		HANDLE_ERROR(cudaMemcpy(gpuA, &sVector[s].A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));

  

  		//if we are computing a plane wave, call the gpuScalarUfp function
  		sphere S = sVector[s];
  		bsVector* gpuk;
  

  		if(planeWave)

  		{
              //if this is a single plane wave, assume it goes along direction k (copy the k vector to the GPU)
              HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ));
              HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));

  			gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat,

  												gpuk,
  												1,

  												2 * PI / lambda,

  												focus,

  												sVector[s].p,

  												sVector[s].a,

  												sVector[s].n,

  												gpuB,

  												gpuA,

  												Nl,

  												A,

  												pos,

  												U.R[0],

  												U.R[1]);
              HANDLE_ERROR(cudaFree(gpuk));

  		}
  		//otherwise copy all of the monte-carlo samples to the GPU and compute
  		else
  		{
              HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * inWaves.size() ));
              HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * inWaves.size(), cudaMemcpyHostToDevice));
  
              //compute the amplitude that makes it through the condenser
              ptype subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );
  
              gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat,

  												gpuk,
  												inWaves.size(),

  												2 * PI / lambda,

  												focus,

  												sVector[s].p,

  												sVector[s].a,

  												sVector[s].n,

  												gpuB,

  												gpuA,

  												Nl,

  												subA,

  												pos,

  												U.R[0],

  												U.R[1]);
              HANDLE_ERROR(cudaFree(gpuk));
  
  
  		}
  
  		//free memory for scattering coefficients
          HANDLE_ERROR(cudaFree(gpuA));
          HANDLE_ERROR(cudaFree(gpuB));

  	}
  
  

  

  	//float t = gpuStopTimer();

  	//std::cout<<"Scalar Us Time: "<<t<<"ms"<<std::endl;
  	//std::cout<<focus<<std::endl;

  

  }