#include "nearfield.h" #include "rts/math/legendre.h" #include "rts/cuda/error.h" #include "rts/cuda/timer.h" texture texJ; __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR); __global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoint f, bsVector k, ptype A, ptype cosAlpha, ptype cosBeta, int nl, ptype dmin, ptype dmax, int dR) { /*This function computes the focused field for a 2D slice Uf = destination field slice ABCD = plane representing the field slice in world space uR, vR = resolution of the Uf field f = focal point of the condenser k = direction of the incident light A = amplitude of the incident field cosAlpha= cosine of the solid angle subtended by the condenser obscuration cosBeta = cosine of the solid angle subtended by the condenser aperature nl = number of orders used to compute the field dR = number of Bessel function values in the look-up texture */ //get the current coordinate in the plane slice int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= uR || iv >= vR) return; //compute the index (easier access to the scalar field array) int i = iv*uR + iu; //compute the parameters for u and v ptype u = (ptype)iu / (uR); ptype v = (ptype)iv / (vR); //get the rtsPoint in world space and then the r vector bsPoint p = ABCD(u, v); bsVector r = p - f; ptype d = r.len(); if(d == 0) { Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); return; } //get info for the light direction and frequency r = r.norm(); //compute the imaginary factor i^l bsComplex im = bsComplex(0, 1); bsComplex il = bsComplex(1, 0); //Legendre functions are computed dynamically to save memory //initialize the Legendre functions ptype P[2]; //get the angle between k and r (light direction and position vector) ptype cosTheta; cosTheta = k.dot(r); rts::init_legendre(cosTheta, P[0], P[1]); //initialize legendre functions for the cassegrain angles ptype Palpha[3]; rts::init_legendre(cosAlpha, Palpha[0], Palpha[1]); Palpha[2] = 1; ptype Pbeta[3]; rts::init_legendre(cosBeta, Pbeta[0], Pbeta[1]); Pbeta[2] = 1; //for each order l bsComplex sumUf(0, 0); ptype jl = 0; ptype Pl; ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1); for(int l = 0; l<=nl; l++) { jl = tex2D(texJ, l + 0.5f, di + 0.5f); if(l==0) Pl = P[0]; else if(l==1) { Pl = P[1]; //adjust the cassegrain Legendre function Palpha[2] = Palpha[0]; rts::shift_legendre(l+1, cosAlpha, Palpha[0], Palpha[1]); Pbeta[2] = Pbeta[0]; rts::shift_legendre(l+1, cosBeta, Pbeta[0], Pbeta[1]); } else { rts::shift_legendre(l, cosTheta, P[0], P[1]); Pl = P[1]; //adjust the cassegrain outer Legendre function Palpha[2] = Palpha[0]; rts::shift_legendre(l+1, cosAlpha, Palpha[0], Palpha[1]); Pbeta[2] = Pbeta[0]; rts::shift_legendre(l+1, cosBeta, Pbeta[0], Pbeta[1]); } sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); //sumUf += jl; il *= im; } Uf[i] = sumUf * 2 * PI * A; //Uf[i] = u; //return; } void nearfieldStruct::scalarUfLut() { gpuStartTimer(); //calculate the minimum and maximum points in the focused field d_min = pos.dist(focus); d_max = pos.dist_max(focus); //allocate space for the Bessel function int dR = 2 * max(Uf.R[0], Uf.R[1]); ptype* j = NULL; j = (ptype*) malloc(sizeof(ptype) * dR * (m+1)); //calculate Bessel function LUT calcBesselLut(j, d_min, d_max, dR); //create a CUDA array structure and specify the format description cudaArray* arrayJ; cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); //allocate memory HANDLE_ERROR(cudaMallocArray(&arrayJ, &channelDesc, m+1, dR)); //specify texture properties texJ.addressMode[0] = cudaAddressModeMirror; texJ.addressMode[1] = cudaAddressModeMirror; texJ.filterMode = cudaFilterModeLinear; texJ.normalized = false; //bind the texture to the array HANDLE_ERROR(cudaBindTextureToArray(texJ, arrayJ, channelDesc)); //copy the CPU Bessel LUT to the GPU-based array HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice)); //----------------Compute the focused field //create one thread for each pixel of the field slice dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); //if we are computing a plane wave, call the gpuScalarUfp function if(planeWave) { gpuScalarUfp<<>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]); } //otherwise compute the condenser info and create a focused field else { //pre-compute the cosine of the obscuration and objective angles ptype cosAlpha = cos(asin(condenser[0])); ptype cosBeta = cos(asin(condenser[1])); //compute the scalar Uf field (this will be in the x_hat channel of Uf) gpuScalarUfLut<<>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR); } //free everything free(j); HANDLE_ERROR(cudaFreeArray(arrayJ)); t_Uf = gpuStopTimer(); }