Blame view

nfScalarUpLut.cu 7.02 KB
51b6469a   dmayerich   added look-up tables
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
  #include "nearfield.h"

  #include "rts/math/spherical_bessel.h"

  #include "rts/math/legendre.h"

  #include <stdlib.h>

  #include "rts/cuda/error.h"

  #include "rts/cuda/timer.h"
  
  texture<float2, cudaTextureType2D> texUsp;
  texture<float2, cudaTextureType2D> texUip;
  
  __global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR)

  {
      /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field
          produced by a plane wave through a sphere.  The MC sampling is used to approximate a focused field.
  
          Us  =   final scattered field
          k   =   list of incoming plane waves (Monte-Carlo samples)
          nk  =   number of incoming MC samples
          kmag=   magnitude of the incoming field 2pi/lambda
          dmin=   minimum distance of the Usp texture
          dmax=   maximum distance of the Usp texture
          f   =   position of the focus
          ps  =   position of the sphere
          A   =   total amplitude of the incident field arriving at the focal spot
          ABCD=   rectangle representing the field slice
          uR  =   resolution of the field slice in the u direction
          vR  =   resolution of the field slice in the v direction
          dR  =   resolution of the Usp texture in the d direction
          thetaR= resolution of the Usp texture in the theta direction
      */

  

  	//get the current coordinate in the plane slice

  	int iu = blockIdx.x * blockDim.x + threadIdx.x;

  	int iv = blockIdx.y * blockDim.y + threadIdx.y;

  

  	//make sure that the thread indices are in-bounds

  	if(iu >= uR || iv >= vR) return;

  

  	//compute the index (easier access to the scalar field array)

  	int i = iv*uR + iu;

  

  	//compute the parameters for u and v

  	ptype u = (ptype)iu / (uR);

  	ptype v = (ptype)iv / (vR);

  

  	//get the rtsPoint in world space and then the r vector

  	bsPoint p = ABCD(u, v);

  	bsVector r = p - ps;

  	ptype d = r.len();
  	float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1);
  	float ai = ( (d - dmin)/(a - dmin)) * (aR - 1);
  
      bsComplex sumUs(0, 0);
      //for each plane wave in the wave list
      for(int iw = 0; iw < nk; iw++)
      {
          //normalize the direction vectors and find their inner product
          r = r.norm();
          ptype cos_theta = k[iw].dot(r);
          if(cos_theta < -1)
              cos_theta = -1;
          if(cos_theta > 1)
              cos_theta = 1;
          float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1);
  
          //compute the phase factor for spheres that are not at the origin
          bsVector c = ps - f;
          bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));
  
          //compute the internal field if we are inside a sphere
          if(d < a)

          {

  			float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5);
  			sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y);
          }
          //otherwise compute the scattered field
          else
          {
              float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5);
              sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y);
          }
  
      }
  
      Us[i] += sumUs;
  }
  
  void nearfieldStruct::scalarUpLut()
  {
      //get the number of spheres

  	int nSpheres = sVector.size();

  

  	//if there are no spheres, nothing to do here

  	if(nSpheres == 0)

  		return;

  

  	//time the calculation of the focused field

  	gpuStartTimer();

  

  	//clear the scattered field

  	U.clear_gpu();

  

  	//create one thread for each pixel of the field slice

  	dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);

  	dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

  
      //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff)
      bsVector* gpuk;
      int nWaves;
      ptype subA;
      if(planeWave)
      {
          nWaves = 1;
          HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) );
          HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));
          subA = A;
      }
      else
      {
          nWaves = inWaves.size();
          HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) );
          HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice));
          //compute the amplitude that makes it through the condenser
          subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );
      }
  

  	//for each sphere

  	for(int s = 0; s<nSpheres; s++)

  	{
          //get the current sphere
  		//sphere S = sVector[s];
  
          //allocate space for the Usp and Uip textures
          //allocate the cuda array
          cudaArray* arrayUsp;
  		cudaArray* arrayUip;
          cudaChannelFormatDesc channelDescUsp =
              cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
  		cudaChannelFormatDesc channelDescUip =
              cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
          int dR = sVector[s].Usp.R[0];
          int thetaR = sVector[s].Usp.R[1];
  		int aR = sVector[s].Uip.R[0];
          HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR));
  		HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR));
  
          texUsp.addressMode[0] = cudaAddressModeMirror;
          texUsp.addressMode[1] = cudaAddressModeMirror;
          texUsp.filterMode     = cudaFilterModeLinear;
          texUsp.normalized     = false;
  
  		texUip.addressMode[0] = cudaAddressModeMirror;
          texUip.addressMode[1] = cudaAddressModeMirror;
          texUip.filterMode     = cudaFilterModeLinear;
          texUip.normalized     = false;
          HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp));
  		HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip));
  
          //copy the LUT to the Usp texture
          HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));
  		HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));
  
          gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat,

                                              gpuk,
                                              nWaves,

                                              2 * PI / lambda,
                                              sVector[s].a,
                                              sVector[s].d_min,
                                              sVector[s].d_max,

                                              focus,

                                              sVector[s].p,
                                              subA,
                                              pos,

                                              U.R[0],

                                              U.R[1],
                                              dR,
                                              aR,
  											thetaR);
  
          cudaFreeArray(arrayUsp);
          cudaFreeArray(arrayUip);
  
  	}
  
  

      //store the time to compute the scattered field

  	t_Us = gpuStopTimer();
  
  	//free monte-carlo samples
  	cudaFree(gpuk);
  
  }