__device__ double g(double v0, double v1) { return (v0 + v1)*log(abs(v0+v1)) + (v0-v1)*log(abs(v0-v1)); } __device__ double hfin(double v0, double v1, double dv) { double e = 0.001; double t0 = g(v0+e, v1-dv)/dv; double t1 = 2*g(v0+e, v1)/dv; double t2 = g(v0+e, v1+dv)/dv; return -1.0/PI * (t0 - t1 + t2); } __global__ void devKramersKronig(double* gpuN, double* gpuK, int numVals, double nuStart, double nuEnd, double nOffset) { int i = blockIdx.x * blockDim.x + threadIdx.x; if(i >= numVals) return; double nuDelta = (nuEnd - nuStart)/(numVals - 1); double nu = nuStart + i*nuDelta; double n = 0.0; double jNu; double jK; for(int j=1; j>>(gpuN, gpuK, nVals, nuStart, nuEnd, nOffset); HANDLE_ERROR(cudaMemcpy(cpuN, gpuN, sizeof(double)*nVals, cudaMemcpyDeviceToHost)); //free resources HANDLE_ERROR(cudaFree(gpuK)); HANDLE_ERROR(cudaFree(gpuN)); } __global__ void devComputeSpectrum(double* I, double2* B, double* alpha, int Nl, int nSamples, double oThetaI, double oThetaO, double cThetaI, double cThetaO) { int i = blockIdx.x * blockDim.x + threadIdx.x; //compute the delta-theta value double dTheta = (oThetaO - oThetaI)/nSamples; //allocate space for the Legendre polynomials double Ptheta[2]; double cosTheta, theta; cuDoubleComplex Us; cuDoubleComplex UsSample; cuDoubleComplex U; //cuComplex Ui; //Ui.x = 2*PI; //Ui.y = 0.0; cuDoubleComplex numer; numer.x = 0.0; cuDoubleComplex exp_numer; cuDoubleComplex iL; cuDoubleComplex imag; imag.x = 0.0; imag.y = 1.0; double realFac; cuDoubleComplex complexFac; double PlTheta; double Isum = 0.0; //float maxVal = 0; //float val; for(int iTheta = 0; iTheta < nSamples; iTheta++) { //calculate theta theta = iTheta * dTheta + oThetaI; cosTheta = cos(theta); //initialize the theta Legendre polynomial Ptheta[0] = 1.0; Ptheta[1] = cosTheta; //initialize the scattered field Us.x = Us.y = 0.0; iL.x = 1.0; iL.y = 0.0; for(int l = 0; l= cThetaI && theta <= cThetaO) U = cAdd(Us, 2*PI); else U = Us; Isum += (U.x*U.x + U.y*U.y) * sin(theta) * 2 * PI * dTheta; } I[i] = Isum; } void cudaComputeSpectrum(double* cpuI, double* cpuB, double* cpuAlpha, int Nl, int nLambda, double oThetaI, double oThetaO, double cThetaI, double cThetaO, int nSamples) { //copy everything to the GPU double2* gpuB; HANDLE_ERROR(cudaMalloc(&gpuB, sizeof(double2) * nLambda * Nl)); HANDLE_ERROR(cudaMemcpy(gpuB, cpuB, sizeof(double2) * nLambda * Nl, cudaMemcpyHostToDevice)); double* gpuAlpha; HANDLE_ERROR(cudaMalloc(&gpuAlpha, sizeof(double) * Nl)); HANDLE_ERROR(cudaMemcpy(gpuAlpha, cpuAlpha, sizeof(double) * Nl, cudaMemcpyHostToDevice)); double* gpuI; HANDLE_ERROR(cudaMalloc(&gpuI, sizeof(double) * nLambda)); //call the kernel to compute the spectrum dim3 block(BLOCK_SIZE*BLOCK_SIZE); dim3 grid(nLambda/block.x + 1); //devComputeSpectrum devComputeSpectrum<<>>(gpuI, (double2*)gpuB, gpuAlpha, Nl, nSamples, oThetaI, oThetaO, cThetaI, cThetaO); HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, sizeof(double) * nLambda, cudaMemcpyDeviceToHost)); HANDLE_ERROR(cudaFree(gpuB)); HANDLE_ERROR(cudaFree(gpuAlpha)); HANDLE_ERROR(cudaFree(gpuI)); }