Blame view

nfScalarUfLut.cu 5.46 KB
51b6469a   dmayerich   added look-up tables
1
2
3
  #include "nearfield.h"
  
  #include "rts/math/legendre.h"
396a5f12   David Mayerich   added custom code...
4
  #include "rts/cuda/error.h"
51b6469a   dmayerich   added look-up tables
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
  #include "rts/cuda/timer.h"
  
  texture<float, cudaTextureType2D> texJ;
  
  __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR);
  
  __global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoint f, bsVector k, ptype A, ptype cosAlpha, ptype cosBeta, int nl, ptype dmin, ptype dmax, int dR)
  {
      /*This function computes the focused field for a 2D slice
  
      Uf      =   destination field slice
      ABCD    =   plane representing the field slice in world space
      uR, vR  =   resolution of the Uf field
      f       =   focal point of the condenser
      k       =   direction of the incident light
      A       =   amplitude of the incident field
      cosAlpha=   cosine of the solid angle subtended by the condenser obscuration
      cosBeta =   cosine of the solid angle subtended by the condenser aperature
      nl      =   number of orders used to compute the field
      dR      =   number of Bessel function values in the look-up texture
  
      */
  
396a5f12   David Mayerich   added custom code...
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
      //get the current coordinate in the plane slice
  	int iu = blockIdx.x * blockDim.x + threadIdx.x;
  	int iv = blockIdx.y * blockDim.y + threadIdx.y;
  
  	//make sure that the thread indices are in-bounds
  	if(iu >= uR || iv >= vR) return;
  
  	//compute the index (easier access to the scalar field array)
  	int i = iv*uR + iu;
  
  	//compute the parameters for u and v
  	ptype u = (ptype)iu / (uR);
  	ptype v = (ptype)iv / (vR);
  
  	
  
  	//get the rtsPoint in world space and then the r vector
  	bsPoint p = ABCD(u, v);
  	bsVector r = p - f;
51b6469a   dmayerich   added look-up tables
47
48
49
50
51
52
  	ptype d = r.len();
  
  	if(d == 0)
  	{
          Uf[i] = A * 2 * PI * (cosAlpha - cosBeta);
          return;
396a5f12   David Mayerich   added custom code...
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
      }
  
  	//get info for the light direction and frequency
  	r = r.norm();
  
  	//compute the imaginary factor i^l
  	bsComplex im = bsComplex(0, 1);
  	bsComplex il = bsComplex(1, 0);
  
  	//Legendre functions are computed dynamically to save memory
  	//initialize the Legendre functions
  
  	ptype P[2];
  	//get the angle between k and r (light direction and position vector)
  	ptype cosTheta;
51b6469a   dmayerich   added look-up tables
68
69
  	cosTheta = k.dot(r);
  
396a5f12   David Mayerich   added custom code...
70
71
72
73
74
75
76
77
78
79
80
81
82
83
  	rts::init_legendre<ptype>(cosTheta, P[0], P[1]);
  
  	//initialize legendre functions for the cassegrain angles
  	ptype Palpha[3];
  	rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]);
  	Palpha[2] = 1;
  
  	ptype Pbeta[3];
  	rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]);
  	Pbeta[2] = 1;
  
  	//for each order l
  	bsComplex sumUf(0, 0);
  	ptype jl = 0;
51b6469a   dmayerich   added look-up tables
84
  	ptype Pl;
396a5f12   David Mayerich   added custom code...
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
  	ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1);
  	for(int l = 0; l<=nl; l++)
  	{
          jl = tex2D(texJ, l + 0.5f, di + 0.5f);
  		if(l==0)
  			Pl = P[0];
  		else if(l==1)
  		{
  			Pl = P[1];
  
  			//adjust the cassegrain Legendre function
  			Palpha[2] = Palpha[0];
  			rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
  			Pbeta[2] = Pbeta[0];
  			rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
  		}
  		else
  		{
  			rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]);
  
  			Pl = P[1];
  
  			//adjust the cassegrain outer Legendre function
  			Palpha[2] = Palpha[0];
  			rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
  			Pbeta[2] = Pbeta[0];
  			rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
  		}
  
51b6469a   dmayerich   added look-up tables
114
115
  		sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
  		//sumUf += jl;
396a5f12   David Mayerich   added custom code...
116
117
118
119
  
  		il *= im;
  	}
  
51b6469a   dmayerich   added look-up tables
120
  	Uf[i] = sumUf * 2 * PI * A;
396a5f12   David Mayerich   added custom code...
121
  	//Uf[i] = u;
51b6469a   dmayerich   added look-up tables
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
  	//return;
  }
  
  void nearfieldStruct::scalarUfLut()
  {
      gpuStartTimer();
  	
      //calculate the minimum and maximum points in the focused field
      d_min = pos.dist(focus);
      d_max = pos.dist_max(focus);
  
      //allocate space for the Bessel function
      int dR = 2 * max(Uf.R[0], Uf.R[1]);
      ptype* j = NULL;
  	j = (ptype*) malloc(sizeof(ptype) * dR * (m+1));
  
  	//calculate Bessel function LUT
  	calcBesselLut(j, d_min, d_max, dR);
  	
      //create a CUDA array structure and specify the format description
  	cudaArray* arrayJ;
      cudaChannelFormatDesc channelDesc =
          cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
  	
      //allocate memory
      HANDLE_ERROR(cudaMallocArray(&arrayJ, &channelDesc, m+1, dR));
  	
      //specify texture properties
      texJ.addressMode[0] = cudaAddressModeMirror;
      texJ.addressMode[1] = cudaAddressModeMirror;
      texJ.filterMode     = cudaFilterModeLinear;
      texJ.normalized     = false;
  
      //bind the texture to the array
      HANDLE_ERROR(cudaBindTextureToArray(texJ, arrayJ, channelDesc));
  
      //copy the CPU Bessel LUT to the GPU-based array
      HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice));
  
      //----------------Compute the focused field
396a5f12   David Mayerich   added custom code...
162
163
      //create one thread for each pixel of the field slice
  	dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
51b6469a   dmayerich   added look-up tables
164
  	dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
396a5f12   David Mayerich   added custom code...
165
166
167
168
169
170
171
172
  
  	//if we are computing a plane wave, call the gpuScalarUfp function
  	if(planeWave)
  	{
  		gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]);
  	}
  	//otherwise compute the condenser info and create a focused field
  	else
51b6469a   dmayerich   added look-up tables
173
174
175
  	{
  		//pre-compute the cosine of the obscuration and objective angles
  		ptype cosAlpha = cos(asin(condenser[0]));
396a5f12   David Mayerich   added custom code...
176
177
178
  		ptype cosBeta = cos(asin(condenser[1]));
  		//compute the scalar Uf field (this will be in the x_hat channel of Uf)
  		gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR);
51b6469a   dmayerich   added look-up tables
179
180
181
182
183
184
185
186
187
188
  	}
  
  	
      //free everything
  	free(j);
  	
  	HANDLE_ERROR(cudaFreeArray(arrayJ));
  
  	t_Uf = gpuStopTimer();
  }