Blame view

nfScalarUs.cu 6.68 KB
3f56f1f9   dmayerich   initial commit
1
  #include "nearfield.h"

d6f53e68   dmayerich   rts organization
2
3
  #include "rts/math/spherical_bessel.h"

  #include "rts/math/legendre.h"

3f56f1f9   dmayerich   initial commit
4
  #include <stdlib.h>

d6f53e68   dmayerich   rts organization
5
6
  #include "rts/cuda/error.h"

  #include "rts/cuda/timer.h"

3f56f1f9   dmayerich   initial commit
7
8
9
  

  __device__ bsComplex calc_Us(ptype kd, ptype cos_theta, int Nl, bsComplex* B)

  {

396a5f12   David Mayerich   added custom code...
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
  	//initialize the spherical Bessel functions

  	ptype j[2];

  	rts::init_sbesselj<ptype>(kd, j);

  	ptype y[2];

  	rts::init_sbessely<ptype>(kd, y);

  

  	//initialize the Legendre polynomial

  	ptype P[2];

  	rts::init_legendre<ptype>(cos_theta, P[0], P[1]);

  

  	//initialize the spherical Hankel function

  	bsComplex h((ptype)0, (ptype)0);

  

  	//initialize the result

  	bsComplex Us((ptype)0, (ptype)0);

  

  	//for each order up to Nl

  	for(int l=0; l<=Nl; l++)

  	{

  		if(l == 0)

  		{

  			h.r = j[0];

  			h.i = y[0];

  			Us += B[0] * h * P[0];

  		}

  		else

  		{

  			//shift the bessel functions and legendre polynomials

  			if(l > 1)

  			{

  				rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);

  				rts::shift_sbesselj<ptype>(l, kd, j);

  				rts::shift_sbessely<ptype>(l, kd, y);

  			}

  

  			h.r = j[1];

  			h.i = y[1];

  			Us += B[l] * h * P[1];

  

  

  		}

3f56f1f9   dmayerich   initial commit
51
52
53
54
55
56
57
58
59
60
61
  	}

  	return Us;

  }

  

  __device__ bsComplex calc_Ui(bsComplex knd, ptype cos_theta, int Nl, bsComplex* A)

  {

  	//calculate the internal field of a sphere

  

  	bsComplex Ui((ptype)0, (ptype)0);

  

  	//deal with rtsPoints near zero

396a5f12   David Mayerich   added custom code...
62
63
64
65
66
  	if(real(knd) < EPSILON_FLOAT)

  	{

  		//for(int l=0; l<Nl; l++)

  		Ui = A[0];

          return Ui;

3f56f1f9   dmayerich   initial commit
67
68
      }

  

396a5f12   David Mayerich   added custom code...
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
  	//initialize the spherical Bessel functions

  	bsComplex j[2];

  	rts::init_sbesselj<bsComplex>(knd, j);

  

  	//initialize the Legendre polynomial

  	ptype P[2];

  	rts::init_legendre<ptype>(cos_theta, P[0], P[1]);

  

  	//for each order up to Nl

  	for(int l=0; l<=Nl; l++)

  	{

  		if(l == 0)

  		{

  			Ui += A[0] * j[0] * P[0];

  		}

  		else

  		{

  			//shift the bessel functions and legendre polynomials

  			if(l > 1)

  			{

  				rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);

  				rts::shift_sbesselj<bsComplex>(l, knd, j);

  			}

  

  			Ui += A[l] * j[1] * P[1];

  

  

  		}

3f56f1f9   dmayerich   initial commit
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  	}

  	return Ui;

  }

  

  __global__ void gpuScalarUsp(bsComplex* Us, bsVector* k, int nk, ptype kmag, bsPoint f, bsPoint ps, ptype a, bsComplex n, bsComplex* Beta, bsComplex* Alpha, int Nl, ptype A, bsRect ABCD, int uR, int vR)

  {

  

  	//get the current coordinate in the plane slice

  	int iu = blockIdx.x * blockDim.x + threadIdx.x;

  	int iv = blockIdx.y * blockDim.y + threadIdx.y;

  

  	//make sure that the thread indices are in-bounds

  	if(iu >= uR || iv >= vR) return;

  

  	//compute the index (easier access to the scalar field array)

  	int i = iv*uR + iu;

  

  	//compute the parameters for u and v

  	ptype u = (ptype)iu / (uR);

  	ptype v = (ptype)iv / (vR);

  

  	//get the rtsPoint in world space and then the r vector

  	bsPoint p = ABCD(u, v);

  	bsVector r = p - ps;

396a5f12   David Mayerich   added custom code...
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
  	ptype d = r.len();

  

      bsComplex sumUs(0, 0);

      //for each plane wave in the wave list

      for(int iw = 0; iw < nk; iw++)

      {

          //normalize the direction vectors and find their inner product

          r = r.norm();

          ptype cos_theta = k[iw].dot(r);

  

          //compute the phase factor for spheres that are not at the origin

          bsVector c = ps - f;

          bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));

  

          //compute the internal field if we are inside a sphere

3f56f1f9   dmayerich   initial commit
136
137
138
          if(d <= a)

          {

              bsComplex knd = kmag * d * n;

396a5f12   David Mayerich   added custom code...
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
              sumUs += (1.0f/nk) * A * phase * calc_Ui(knd, cos_theta, Nl, Alpha);

          }

          //otherwise compute the scattered field

          else

          {

              //compute the argument for the spherical Hankel function

              ptype kd = kmag * d;

              sumUs += (1.0f/nk) * A * phase * calc_Us(kd, cos_theta, Nl, Beta);

          }

  

      }

  

      Us[i] += sumUs;

  

  

3f56f1f9   dmayerich   initial commit
154
155
156
157
158
159
160
161
162
163
164
165
  }

  

  void nearfieldStruct::scalarUs()

  {

  	//get the number of spheres

  	int nSpheres = sVector.size();

  

  	//if there are no spheres, nothing to do here

  	if(nSpheres == 0)

  		return;

  

  	//time the calculation of the focused field

51b6469a   dmayerich   added look-up tables
166
  	gpuStartTimer();

3f56f1f9   dmayerich   initial commit
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
  

  	//clear the scattered field

  	U.clear_gpu();

  

  	//create one thread for each pixel of the field slice

  	dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);

  	dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

  

  	//for each sphere

  	int Nl;

  	for(int s = 0; s<nSpheres; s++)

  	{

  		//get the number of orders for this sphere

  		Nl = sVector[s].Nl;

  

  		//allocate memory for the scattering coefficients

  		bsComplex* gpuB;

  		HANDLE_ERROR(cudaMalloc((void**) &gpuB, (Nl+1) * sizeof(bsComplex)));

  

  		bsComplex* gpuA;

  		HANDLE_ERROR(cudaMalloc((void**) &gpuA, (Nl+1) * sizeof(bsComplex)));

  

  		//copy the scattering coefficients to the GPU

  		HANDLE_ERROR(cudaMemcpy(gpuB, &sVector[s].B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));

  		HANDLE_ERROR(cudaMemcpy(gpuA, &sVector[s].A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));

  

396a5f12   David Mayerich   added custom code...
193
194
195
  		//if we are computing a plane wave, call the gpuScalarUfp function

  		sphere S = sVector[s];

  		bsVector* gpuk;

3f56f1f9   dmayerich   initial commit
196
197
  

  		if(planeWave)

396a5f12   David Mayerich   added custom code...
198
199
200
  		{

              //if this is a single plane wave, assume it goes along direction k (copy the k vector to the GPU)

              HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ));

3f56f1f9   dmayerich   initial commit
201
202
              HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));

  			gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat,

396a5f12   David Mayerich   added custom code...
203
  												gpuk,

3f56f1f9   dmayerich   initial commit
204
205
206
207
208
209
210
211
212
213
214
215
  												1,

  												2 * PI / lambda,

  												focus,

  												sVector[s].p,

  												sVector[s].a,

  												sVector[s].n,

  												gpuB,

  												gpuA,

  												Nl,

  												A,

  												pos,

  												U.R[0],

396a5f12   David Mayerich   added custom code...
216
  												U.R[1]);

3f56f1f9   dmayerich   initial commit
217
              HANDLE_ERROR(cudaFree(gpuk));

396a5f12   David Mayerich   added custom code...
218
219
220
221
222
223
224
225
226
227
  		}

  		//otherwise copy all of the monte-carlo samples to the GPU and compute

  		else

  		{

              HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * inWaves.size() ));

              HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * inWaves.size(), cudaMemcpyHostToDevice));

  

              //compute the amplitude that makes it through the condenser

              ptype subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );

  

3f56f1f9   dmayerich   initial commit
228
              gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat,

396a5f12   David Mayerich   added custom code...
229
  												gpuk,

3f56f1f9   dmayerich   initial commit
230
231
232
233
234
235
236
237
238
239
240
241
  												inWaves.size(),

  												2 * PI / lambda,

  												focus,

  												sVector[s].p,

  												sVector[s].a,

  												sVector[s].n,

  												gpuB,

  												gpuA,

  												Nl,

  												subA,

  												pos,

  												U.R[0],

396a5f12   David Mayerich   added custom code...
242
243
244
245
246
247
248
249
  												U.R[1]);

              HANDLE_ERROR(cudaFree(gpuk));

  

  

  		}

  

  		//free memory for scattering coefficients

          HANDLE_ERROR(cudaFree(gpuA));

3f56f1f9   dmayerich   initial commit
250
          HANDLE_ERROR(cudaFree(gpuB));

396a5f12   David Mayerich   added custom code...
251
252
  	}

  

3f56f1f9   dmayerich   initial commit
253
  

51b6469a   dmayerich   added look-up tables
254
255
      //store the time to compute the scattered field

  	t_Us = gpuStopTimer();

3f56f1f9   dmayerich   initial commit
256
  

3f56f1f9   dmayerich   initial commit
257
258
  

  }