#include "fieldslice.h" #include "dataTypes.h" #include "rts/cuda/error.h" #include "rts/cuda/threads.h" __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N) { //compute the index for this thread //int i = blockIdx.x * blockDim.x + threadIdx.x; int i = ThreadIndex1D(); if(i >= N) return; ptype xm = x[i].abs(); if(y != NULL && z != NULL) { ptype ym = y[i].abs(); ptype zm = z[i].abs(); I[i] = xm*xm + ym*ym + zm*zm; } else { I[i] = xm*xm; } } __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* D, int uR, int vR, int ss) { //get the current coordinate in the plane slice int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= uR || iv >= vR) return; //compute the index into the detector int i = iv*uR + iu; //compute the index into the field int fi; //initialize the intensity for the pixel to zero ptype I = 0; ptype xm = 0; ptype ym = 0; ptype zm = 0; int ix, iy; for(ix = 0; ix= N) return; V[i] = field_component[i].real(); } __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N) { //compute the index for this thread //int i = blockIdx.x * blockDim.x + threadIdx.x; int i = ThreadIndex1D(); if(i >= N) return; V[i] = field_component[i].imag(); } __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N) { //compute the index for this thread //int i = blockIdx.x * blockDim.x + threadIdx.x; int i = ThreadIndex1D(); if(i >= N) return; output[i] = sqrt(input[i]); } __global__ void field_scale(bsComplex* x, bsComplex* y, bsComplex* z, unsigned int N, ptype v) { //compute the index for this thread int i = blockIdx.x * blockDim.x + threadIdx.x; if(i >= N) return; if(x != NULL) x[i] *= v; if(y != NULL) y[i] *= v; if(z != NULL) z[i] *= v; } scalarslice fieldslice::Mag() { //compute the magnitude of the field at each rtsPoint in the slice scalarslice* result = new scalarslice(R[0], R[1]); //compute the total number of values in the slice unsigned int N = R[0] * R[1]; //int gridDim = (N+BLOCK-1)/BLOCK; dim3 gridDim = GenGrid1D(N, BLOCK); field_intensity<<>>(x_hat, y_hat, z_hat, result->S, N); field_sqrt<<>>(result->S, result->S, N); return *result; } scalarslice fieldslice::Real() { //compute the magnitude of the field at each rtsPoint in the slice //create a scalar slice at the same resolution as the field scalarslice* result = new scalarslice(R[0], R[1]); //compute the total number of values in the slice unsigned int N = R[0] * R[1]; //int gridDim = (N+BLOCK-1)/BLOCK; dim3 gridDim = GenGrid1D(N, BLOCK); field_real<<>>(x_hat, result->S, N); return *result; } scalarslice fieldslice::Imag() { //compute the magnitude of the field at each rtsPoint in the slice //create a scalar slice at the same resolution as the field scalarslice* result = new scalarslice(R[0], R[1]); //compute the total number of values in the slice unsigned int N = R[0] * R[1]; //int gridDim = (N+BLOCK-1)/BLOCK; dim3 gridDim = GenGrid1D(N, BLOCK); field_imaginary<<>>(x_hat, result->S, N); return *result; } void fieldslice::IntegrateAndResample(scalarslice* detector, unsigned int supersample) { //compute the intensity and resample at the detector resolution unsigned int D[2]; D[0] = detector->R[0]; D[1] = detector->R[1]; //create one thread for each detector pixel dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((D[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (D[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); resample_intensity<<>>(x_hat, y_hat, z_hat, detector->S, D[0], D[1], supersample); } scalarslice fieldslice::Intensity() { //compute the magnitude of the field at each rtsPoint in the slice //create a scalar slice at the same resolution as the field scalarslice* result = new scalarslice(R[0], R[1]); //compute the total number of values in the slice unsigned int N = R[0] * R[1]; int gridDim = (N+BLOCK-1)/BLOCK; field_intensity<<>>(x_hat, y_hat, z_hat, result->S, N); return *result; } void fieldslice::ScaleField(ptype v) { //This function scales the field by some constant value v //This is mostly used for the inverse FFT, which has to divide the field by R^2 //compute the total number of values in the slice unsigned int N = R[0] * R[1]; int gridDim = (N+BLOCK-1)/BLOCK; field_scale<<>>(x_hat, y_hat, z_hat, N, v); } void fieldslice::init_gpu() { //if the field has no size, return if(R[0] == 0 || R[1] == 0) return; //free any previous memory allocations if(x_hat) HANDLE_ERROR(cudaFree(x_hat)); if(y_hat) HANDLE_ERROR(cudaFree(y_hat)); if(z_hat) HANDLE_ERROR(cudaFree(z_hat)); //allocate space on the GPU for the field slice HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex))); if(!scalarField) { HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex))); //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex))); HANDLE_ERROR(cudaMalloc((void**)&z_hat, R[0] * R[1] * sizeof(bsComplex))); //HANDLE_ERROR(cudaMemset(z_hat, 0, R[0] * R[1] * sizeof(bsComplex))); } clear_gpu(); } void fieldslice::kill_gpu() { if(x_hat != NULL) HANDLE_ERROR(cudaFree(x_hat)); if(y_hat != NULL) HANDLE_ERROR(cudaFree(y_hat)); if(z_hat != NULL) HANDLE_ERROR(cudaFree(z_hat)); x_hat = y_hat = z_hat = NULL; } void fieldslice::clear_gpu() { int memsize = R[0] * R[1] * sizeof(bsComplex); if(x_hat != NULL) HANDLE_ERROR(cudaMemset(x_hat, 0, memsize)); if(y_hat != NULL) HANDLE_ERROR(cudaMemset(y_hat, 0, memsize)); if(z_hat != NULL) HANDLE_ERROR(cudaMemset(z_hat, 0, memsize)); } __global__ void copy_crop(bsComplex* source, bsComplex* dest, int u, int v, int su, int sv, int uR, int vR) { //get the current coordinate in the plane slice int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= su || iv >= sv) return; //compute the destination index int i = iv*su + iu; //compute the source index int sourceV = v + iv; int sourceU = u + iu; int is = sourceV * uR + sourceU; dest[i] = source[is]; } fieldslice fieldslice::crop(int u, int v, int su, int sv) { //create a new field slice with the appropriate settings fieldslice result(su, sv); result.scalarField = scalarField; //allocate space for the new field //result.init_gpu(); //create one thread for each pixel of the field slice dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((su + SQRT_BLOCK -1)/SQRT_BLOCK, (sv + SQRT_BLOCK - 1)/SQRT_BLOCK); //call a kernel to copy the cropped to the new field slice if(x_hat != NULL) copy_crop<<>>(x_hat, result.x_hat, u, v, su, sv, R[0], R[1]); if(y_hat != NULL) copy_crop<<>>(y_hat, result.y_hat, u, v, su, sv, R[0], R[1]); if(z_hat != NULL) copy_crop<<>>(z_hat, result.z_hat, u, v, su, sv, R[0], R[1]); return result; } fieldslice::fieldslice(const fieldslice& rhs) { R[0] = rhs.R[0]; R[1] = rhs.R[1]; scalarField = rhs.scalarField; x_hat = y_hat = z_hat = NULL; unsigned int bytes = sizeof(bsComplex) * R[0] * R[1]; if(rhs.x_hat != NULL) { HANDLE_ERROR(cudaMalloc( (void**)&x_hat, bytes)); HANDLE_ERROR(cudaMemcpy( x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice)); } if(rhs.y_hat != NULL) { HANDLE_ERROR(cudaMalloc( (void**)&y_hat, bytes)); HANDLE_ERROR(cudaMemcpy( y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice)); } if(rhs.z_hat != NULL) { HANDLE_ERROR(cudaMalloc( (void**)&z_hat, bytes)); HANDLE_ERROR(cudaMemcpy( z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice)); } } fieldslice& fieldslice::operator=(const fieldslice& rhs) { //make sure this isn't a self-allocation if(this != &rhs) { //make a shallow copy R[0] = rhs.R[0]; R[1] = rhs.R[1]; scalarField = rhs.scalarField; //initialize to new parameters init_gpu(); //make a deep copy unsigned int bytes = sizeof(bsComplex) * R[0] * R[1]; if(x_hat != NULL) HANDLE_ERROR(cudaMemcpy(x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice)); if(y_hat != NULL) HANDLE_ERROR(cudaMemcpy(y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice)); if(z_hat != NULL) HANDLE_ERROR(cudaMemcpy(z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice)); } return *this; }