Blame view

nfScalarUpLut.cu 7.16 KB
51b6469a   dmayerich   added look-up tables
1
2
3
4
5
  #include "nearfield.h"

  #include "rts/math/spherical_bessel.h"

  #include "rts/math/legendre.h"

  #include <stdlib.h>

  #include "rts/cuda/error.h"

396a5f12   David Mayerich   added custom code...
6
7
8
9
10
  #include "rts/cuda/timer.h"

  

  texture<float2, cudaTextureType2D> texUsp;

  texture<float2, cudaTextureType2D> texUip;

  

51b6469a   dmayerich   added look-up tables
11
  __global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR)

396a5f12   David Mayerich   added custom code...
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
  {

      /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field

          produced by a plane wave through a sphere.  The MC sampling is used to approximate a focused field.

  

          Us  =   final scattered field

          k   =   list of incoming plane waves (Monte-Carlo samples)

          nk  =   number of incoming MC samples

          kmag=   magnitude of the incoming field 2pi/lambda

          dmin=   minimum distance of the Usp texture

          dmax=   maximum distance of the Usp texture

          f   =   position of the focus

          ps  =   position of the sphere

          A   =   total amplitude of the incident field arriving at the focal spot

          ABCD=   rectangle representing the field slice

          uR  =   resolution of the field slice in the u direction

          vR  =   resolution of the field slice in the v direction

          dR  =   resolution of the Usp texture in the d direction

          thetaR= resolution of the Usp texture in the theta direction

51b6469a   dmayerich   added look-up tables
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
      */

  

  	//get the current coordinate in the plane slice

  	int iu = blockIdx.x * blockDim.x + threadIdx.x;

  	int iv = blockIdx.y * blockDim.y + threadIdx.y;

  

  	//make sure that the thread indices are in-bounds

  	if(iu >= uR || iv >= vR) return;

  

  	//compute the index (easier access to the scalar field array)

  	int i = iv*uR + iu;

  

  	//compute the parameters for u and v

  	ptype u = (ptype)iu / (uR);

  	ptype v = (ptype)iv / (vR);

  

  	//get the rtsPoint in world space and then the r vector

  	bsPoint p = ABCD(u, v);

  	bsVector r = p - ps;

396a5f12   David Mayerich   added custom code...
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  	ptype d = r.len();

  	float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1);

  	float ai = ( (d - dmin)/(a - dmin)) * (aR - 1);

  

      bsComplex sumUs(0, 0);

      //for each plane wave in the wave list

      for(int iw = 0; iw < nk; iw++)

      {

          //normalize the direction vectors and find their inner product

          r = r.norm();

          ptype cos_theta = k[iw].dot(r);

          if(cos_theta < -1)

              cos_theta = -1;

          if(cos_theta > 1)

              cos_theta = 1;

          float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1);

  

          //compute the phase factor for spheres that are not at the origin

          bsVector c = ps - f;

          bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));

  

          //compute the internal field if we are inside a sphere

51b6469a   dmayerich   added look-up tables
71
72
          if(d < a)

          {

396a5f12   David Mayerich   added custom code...
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
  			float2 Uip = tex2D(texUip, ai + 0.5f, thetai + 0.5f);

  			sumUs += (1.0f/nk) * A * phase * bsComplex(Uip.x, Uip.y);

          }

          //otherwise compute the scattered field

          else

          {

              float2 Usp = tex2D(texUsp, di + 0.5f, thetai + 0.5f);

              sumUs += (1.0f/nk) * A * phase * bsComplex(Usp.x, Usp.y);

          }

  

      }

  

      Us[i] += sumUs;

  }

  

  void nearfieldStruct::scalarUpLut()

  {

51b6469a   dmayerich   added look-up tables
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
      //get the number of spheres

  	int nSpheres = sVector.size();

  

  	//if there are no spheres, nothing to do here

  	if(nSpheres == 0)

  		return;

  

  	//time the calculation of the focused field

  	gpuStartTimer();

  

  	//clear the scattered field

  	U.clear_gpu();

  

  	//create one thread for each pixel of the field slice

  	dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);

  	dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

396a5f12   David Mayerich   added custom code...
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
  

      //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff)

      bsVector* gpuk;

      int nWaves;

      ptype subA;

      if(planeWave)

      {

          nWaves = 1;

          HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) );

          HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));

          subA = A;

      }

      else

      {

          nWaves = inWaves.size();

          HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) );

          HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice));

          //compute the amplitude that makes it through the condenser

          subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );

      }

51b6469a   dmayerich   added look-up tables
126
127
128
  

  	//for each sphere

  	for(int s = 0; s<nSpheres; s++)

396a5f12   David Mayerich   added custom code...
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
  	{

          //get the current sphere

  		//sphere S = sVector[s];

  

          //allocate space for the Usp and Uip textures

          //allocate the cuda array

          cudaArray* arrayUsp;

  		cudaArray* arrayUip;

          cudaChannelFormatDesc channelDescUsp =

              cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);

  		cudaChannelFormatDesc channelDescUip =

              cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);

          int dR = sVector[s].Usp.R[0];

          int thetaR = sVector[s].Usp.R[1];

  		int aR = sVector[s].Uip.R[0];

          HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR));

  		HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR));

  

          texUsp.addressMode[0] = cudaAddressModeMirror;

          texUsp.addressMode[1] = cudaAddressModeMirror;

          texUsp.filterMode     = cudaFilterModeLinear;

          texUsp.normalized     = false;

  

  		texUip.addressMode[0] = cudaAddressModeMirror;

          texUip.addressMode[1] = cudaAddressModeMirror;

          texUip.filterMode     = cudaFilterModeLinear;

          texUip.normalized     = false;

          HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp));

  		HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip));

  

          //copy the LUT to the Usp texture

          HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));

  		HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));

  

51b6469a   dmayerich   added look-up tables
163
          gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat,

396a5f12   David Mayerich   added custom code...
164
                                              gpuk,

51b6469a   dmayerich   added look-up tables
165
                                              nWaves,

396a5f12   David Mayerich   added custom code...
166
167
168
                                              2 * PI / lambda,

                                              sVector[s].a,

                                              sVector[s].d_min,

51b6469a   dmayerich   added look-up tables
169
170
                                              sVector[s].d_max,

                                              focus,

396a5f12   David Mayerich   added custom code...
171
172
                                              sVector[s].p,

                                              subA,

51b6469a   dmayerich   added look-up tables
173
174
                                              pos,

                                              U.R[0],

396a5f12   David Mayerich   added custom code...
175
176
177
178
179
180
181
182
183
184
                                              U.R[1],

                                              dR,

                                              aR,

  											thetaR);

  

          cudaFreeArray(arrayUsp);

          cudaFreeArray(arrayUip);

  

  	}

  

51b6469a   dmayerich   added look-up tables
185
186
  

      //store the time to compute the scattered field

396a5f12   David Mayerich   added custom code...
187
188
189
190
191
192
  	t_Us = gpuStopTimer();

  

  	//free monte-carlo samples

  	cudaFree(gpuk);

  

  }