#include "nearfield.h" #include "rts/math/spherical_bessel.h" #include "rts/math/legendre.h" #include #include "rts/cuda/error.h" #include "rts/cuda/timer.h" texture texUsp; texture texUip; __global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR) { /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field. Us = final scattered field k = list of incoming plane waves (Monte-Carlo samples) nk = number of incoming MC samples kmag= magnitude of the incoming field 2pi/lambda dmin= minimum distance of the Usp texture dmax= maximum distance of the Usp texture f = position of the focus ps = position of the sphere A = total amplitude of the incident field arriving at the focal spot ABCD= rectangle representing the field slice uR = resolution of the field slice in the u direction vR = resolution of the field slice in the v direction dR = resolution of the Usp texture in the d direction thetaR= resolution of the Usp texture in the theta direction */ //get the current coordinate in the plane slice int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= uR || iv >= vR) return; //compute the index (easier access to the scalar field array) int i = iv*uR + iu; //compute the parameters for u and v ptype u = (ptype)iu / (uR); ptype v = (ptype)iv / (vR); //get the rtsPoint in world space and then the r vector bsPoint p = ABCD(u, v); bsVector r = p - ps; ptype d = r.len(); float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1); float ai = ( (d - dmin)/(a - dmin)) * (aR - 1); bsComplex sumUs(0, 0); //for each plane wave in the wave list for(int iw = 0; iw < nk; iw++) { //normalize the direction vectors and find their inner product r = r.norm(); ptype cos_theta = k[iw].dot(r); if(cos_theta < -1) cos_theta = -1; if(cos_theta > 1) cos_theta = 1; float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1); //compute the phase factor for spheres that are not at the origin bsVector c = ps - f; bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); //compute the internal field if we are inside a sphere if(d < a) { float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5); sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y); } //otherwise compute the scattered field else { float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5); sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y); } } Us[i] += sumUs; } void nearfieldStruct::scalarUpLut() { //get the number of spheres int nSpheres = sVector.size(); //if there are no spheres, nothing to do here if(nSpheres == 0) return; //time the calculation of the focused field gpuStartTimer(); //clear the scattered field U.clear_gpu(); //create one thread for each pixel of the field slice dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff) bsVector* gpuk; int nWaves; ptype subA; if(planeWave) { nWaves = 1; HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) ); HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice)); subA = A; } else { nWaves = inWaves.size(); HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) ); HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice)); //compute the amplitude that makes it through the condenser subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); } //for each sphere for(int s = 0; s>>(U.x_hat, gpuk, nWaves, 2 * PI / lambda, sVector[s].a, sVector[s].d_min, sVector[s].d_max, focus, sVector[s].p, subA, pos, U.R[0], U.R[1], dR, aR, thetaR); cudaFreeArray(arrayUsp); cudaFreeArray(arrayUip); } //store the time to compute the scattered field t_Us = gpuStopTimer(); //free monte-carlo samples cudaFree(gpuk); }