#include "nearfield.h" #include "rts/math/spherical_bessel.h" #include "rts/math/legendre.h" #include #include "rts/cuda/error.h" #include "rts/cuda/timer.h" __device__ bsComplex calc_Us(ptype kd, ptype cos_theta, int Nl, bsComplex* B) { //initialize the spherical Bessel functions ptype j[2]; rts::init_sbesselj(kd, j); ptype y[2]; rts::init_sbessely(kd, y); //initialize the Legendre polynomial ptype P[2]; rts::init_legendre(cos_theta, P[0], P[1]); //initialize the spherical Hankel function bsComplex h((ptype)0, (ptype)0); //initialize the result bsComplex Us((ptype)0, (ptype)0); //for each order up to Nl for(int l=0; l<=Nl; l++) { if(l == 0) { h.r = j[0]; h.i = y[0]; Us += B[0] * h * P[0]; } else { //shift the bessel functions and legendre polynomials if(l > 1) { rts::shift_legendre(l, cos_theta, P[0], P[1]); rts::shift_sbesselj(l, kd, j); rts::shift_sbessely(l, kd, y); } h.r = j[1]; h.i = y[1]; Us += B[l] * h * P[1]; } } return Us; } __device__ bsComplex calc_Ui(bsComplex knd, ptype cos_theta, int Nl, bsComplex* A) { //calculate the internal field of a sphere bsComplex Ui((ptype)0, (ptype)0); //deal with rtsPoints near zero if(real(knd) < EPSILON_FLOAT) { //for(int l=0; l(knd, j); //initialize the Legendre polynomial ptype P[2]; rts::init_legendre(cos_theta, P[0], P[1]); //for each order up to Nl for(int l=0; l<=Nl; l++) { if(l == 0) { Ui += A[0] * j[0] * P[0]; } else { //shift the bessel functions and legendre polynomials if(l > 1) { rts::shift_legendre(l, cos_theta, P[0], P[1]); rts::shift_sbesselj(l, knd, j); } Ui += A[l] * j[1] * P[1]; } } return Ui; } __global__ void gpuScalarUsp(bsComplex* Us, bsVector* k, int nk, ptype kmag, bsPoint f, bsPoint ps, ptype a, bsComplex n, bsComplex* Beta, bsComplex* Alpha, int Nl, ptype A, bsRect ABCD, int uR, int vR) { //get the current coordinate in the plane slice int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= uR || iv >= vR) return; //compute the index (easier access to the scalar field array) int i = iv*uR + iu; //compute the parameters for u and v ptype u = (ptype)iu / (uR); ptype v = (ptype)iv / (vR); //get the rtsPoint in world space and then the r vector bsPoint p = ABCD(u, v); bsVector r = p - ps; ptype d = r.len(); bsComplex sumUs(0, 0); //for each plane wave in the wave list for(int iw = 0; iw < nk; iw++) { //normalize the direction vectors and find their inner product r = r.norm(); ptype cos_theta = k[iw].dot(r); //compute the phase factor for spheres that are not at the origin bsVector c = ps - f; bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); //compute the internal field if we are inside a sphere if(d <= a) { bsComplex knd = kmag * d * n; sumUs += (1.0/nk) * A * phase * calc_Ui(knd, cos_theta, Nl, Alpha); } //otherwise compute the scattered field else { //compute the argument for the spherical Hankel function ptype kd = kmag * d; sumUs += (1.0/nk) * A * phase * calc_Us(kd, cos_theta, Nl, Beta); } } Us[i] += sumUs; } void nearfieldStruct::scalarUs() { //get the number of spheres int nSpheres = sVector.size(); //if there are no spheres, nothing to do here if(nSpheres == 0) return; //time the calculation of the focused field //gpuStartTimer(); //clear the scattered field U.clear_gpu(); //create one thread for each pixel of the field slice dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); //for each sphere int Nl; for(int s = 0; s>>(U.x_hat, gpuk, 1, 2 * PI / lambda, focus, sVector[s].p, sVector[s].a, sVector[s].n, gpuB, gpuA, Nl, A, pos, U.R[0], U.R[1]); HANDLE_ERROR(cudaFree(gpuk)); } //otherwise copy all of the monte-carlo samples to the GPU and compute else { HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * inWaves.size() )); HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * inWaves.size(), cudaMemcpyHostToDevice)); //compute the amplitude that makes it through the condenser ptype subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); gpuScalarUsp<<>>(U.x_hat, gpuk, inWaves.size(), 2 * PI / lambda, focus, sVector[s].p, sVector[s].a, sVector[s].n, gpuB, gpuA, Nl, subA, pos, U.R[0], U.R[1]); HANDLE_ERROR(cudaFree(gpuk)); } //free memory for scattering coefficients HANDLE_ERROR(cudaFree(gpuA)); HANDLE_ERROR(cudaFree(gpuB)); } //float t = gpuStopTimer(); //std::cout<<"Scalar Us Time: "<