#ifndef RTS_EFIELD #define RTS_EFIELD #include "../math/complex.h" #include "../math/realfield.cuh" #include "../visualization/colormap.h" #include "../optics/planewave.h" #include "../cuda/devices.h" #include "../optics/beam.h" #include "../math/rect.h" namespace stim{ template __global__ void gpu_planewave2efield(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, planewave w, rect q) { int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= r0 || iv >= r1) return; //compute the index into the field int i = iv*r0 + iu; //get the current position vec p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); vec r(p[0], p[1], p[2]); complex x( 0.0f, w.k.dot(r) ); if(Y == NULL) //if this is a scalar simulation X[i] += w.E0.len() * exp(x); //use the vector magnitude as the plane wave amplitude else { X[i] += w.E0[0] * exp(x); Y[i] += w.E0[1] * exp(x); Z[i] += w.E0[2] * exp(x); } } template __global__ void gpu_efield_magnitude(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, T* M) { int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= r0 || iv >= r1) return; //compute the index into the field int i = iv*r0 + iu; if(Y == NULL) //if this is a scalar simulation M[i] = X[i].abs(); //compute the magnitude of the X component else { M[i] = rts::vec(X[i].abs(), Y[i].abs(), Z[i].abs()).len(); //M[i] = Z[i].abs(); } } template __global__ void gpu_efield_real_magnitude(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, T* M) { int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= r0 || iv >= r1) return; //compute the index into the field int i = iv*r0 + iu; if(Y == NULL) //if this is a scalar simulation M[i] = X[i].abs(); //compute the magnitude of the X component else { M[i] = rts::vec(X[i].real(), Y[i].real(), Z[i].real()).len(); //M[i] = Z[i].abs(); } } template __global__ void gpu_efield_polarization(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, T* Px, T* Py, T* Pz) { int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= r0 || iv >= r1) return; //compute the index into the field int i = iv*r0 + iu; //compute the field polarization Px[i] = X[i].abs(); Py[i] = Y[i].abs(); Pz[i] = Z[i].abs(); } /* This function computes the sum of two complex fields and stores the result in *dest */ template __global__ void gpu_efield_sum(complex* dest, complex* src, unsigned int r0, unsigned int r1) { int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= r0 || iv >= r1) return; //compute the index into the field int i = iv*r0 + iu; //sum the fields dest[i] += src[i]; } /* This class implements a discrete representation of an electromagnetic field in 2D. The majority of this representation is done on the GPU. */ template class efield { protected: bool vector; //gpu pointer to the field data rts::complex* X; rts::complex* Y; rts::complex* Z; //resolution of the discrete field unsigned int R[2]; //shape of the 2D field rect pos; void from_planewave(planewave p) { unsigned int SQRT_BLOCK = 16; //create one thread for each detector pixel dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); gpu_planewave2efield <<>> (X, Y, Z, R[0], R[1], p, pos); } void init(unsigned int res0, unsigned int res1, bool _vector) { vector = _vector; //initialize field type X = Y = Z = NULL; //initialize all pointers to NULL R[0] = res0; R[1] = res1; //allocate memory on the gpu cudaMalloc(&X, sizeof(rts::complex) * R[0] * R[1]); cudaMemset(X, 0, sizeof(rts::complex) * R[0] * R[1]); if(vector) { cudaMalloc(&Y, sizeof(rts::complex) * R[0] * R[1]); cudaMemset(Y, 0, sizeof(rts::complex) * R[0] * R[1]); cudaMalloc(&Z, sizeof(rts::complex) * R[0] * R[1]); cudaMemset(Z, 0, sizeof(rts::complex) * R[0] * R[1]); } } void destroy() { if(X != NULL) cudaFree(X); if(Y != NULL) cudaFree(Y); if(Z != NULL) cudaFree(Z); } void shallowcpy(const rts::efield & src) { vector = src.vector; R[0] = src.R[0]; R[1] = src.R[1]; } void deepcpy(const rts::efield & src) { //perform a shallow copy shallowcpy(src); //allocate memory on the gpu if(src.X != NULL) { cudaMalloc(&X, sizeof(rts::complex) * R[0] * R[1]); cudaMemcpy(X, src.X, sizeof(rts::complex) * R[0] * R[1], cudaMemcpyDeviceToDevice); } if(src.Y != NULL) { cudaMalloc(&Y, sizeof(rts::complex) * R[0] * R[1]); cudaMemcpy(Y, src.Y, sizeof(rts::complex) * R[0] * R[1], cudaMemcpyDeviceToDevice); } if(src.Z != NULL) { cudaMalloc(&Z, sizeof(rts::complex) * R[0] * R[1]); cudaMemcpy(Z, src.Z, sizeof(rts::complex) * R[0] * R[1], cudaMemcpyDeviceToDevice); } } public: efield(unsigned int res0, unsigned int res1, bool _vector = true) { init(res0, res1, _vector); //pos = rts::rect(rts::vec(-10, 0, -10), rts::vec(-10, 0, 10), rts::vec(10, 0, 10)); } //destructor ~efield() { destroy(); } ///Clear the field - set all points to zero void clear() { cudaMemset(X, 0, sizeof(rts::complex) * R[0] * R[1]); if(vector) { cudaMemset(Y, 0, sizeof(rts::complex) * R[0] * R[1]); cudaMemset(Z, 0, sizeof(rts::complex) * R[0] * R[1]); } } void position(rect _p) { pos = _p; } //access functions complex* x(){ return X; } complex* y(){ return Y; } complex* z(){ return Z; } unsigned int Ru(){ return R[0]; } unsigned int Rv(){ return R[1]; } rect p(){ return pos; } std::string str() { stringstream ss; ss< & operator= (const efield & rhs) { destroy(); //destroy any previous information about this field deepcpy(rhs); //create a deep copy return *this; //return the current object } //assignment operator: build an electric field from a plane wave efield & operator= (const planewave & rhs) { clear(); //clear any previous field data from_planewave(rhs); //create a field from the planewave return *this; //return the current object } //assignment operator: add an existing electric field efield & operator+= (const efield & rhs) { //if this field and the source field represent the same regions in space if(R[0] == rhs.R[0] && R[1] == rhs.R[1] && pos == rhs.pos) { int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); //create one thread for each detector pixel dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); //sum the fields gpu_efield_sum <<>> (X, rhs.X, R[0], R[1]); if(Y != NULL) { gpu_efield_sum <<>> (Y, rhs.Y, R[0], R[1]); gpu_efield_sum <<>> (Z, rhs.Z, R[0], R[1]); } } else { std::cout<<"ERROR in efield: The two summed fields do not represent the same geometry."< & operator= (const rts::beam & rhs) { //get a vector of monte-carlo samples std::vector< rts::planewave > p_list = rhs.mc(); clear(); //clear any previous field data for(unsigned int i = 0; i < p_list.size(); i++) from_planewave(p_list[i]); return *this; } //return a scalar field representing field magnitude realfield mag() { int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); //create a scalar field to store the result realfield M(R[0], R[1]); //create one thread for each detector pixel dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); //compute the magnitude and store it in a scalar field gpu_efield_magnitude <<>> (X, Y, Z, R[0], R[1], M.ptr(0)); return M; } //return a sacalar field representing the field magnitude at an infinitely small point in time realfield real_mag() { int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); //create a scalar field to store the result realfield M(R[0], R[1]); //create one thread for each detector pixel dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); //compute the magnitude and store it in a scalar field gpu_efield_real_magnitude <<>> (X, Y, Z, R[0], R[1], M.ptr(0)); return M; } //return a vector field representing field polarization realfield polarization() { if(!vector) { std::cout<<"ERROR: Cannot compute polarization of a scalar field."< Pol(R[0], R[1]); //create a vector field to store the result //compute the polarization and store it in the vector field gpu_efield_polarization <<>> (X, Y, Z, R[0], R[1], Pol.ptr(0), Pol.ptr(1), Pol.ptr(2)); return Pol; //return the vector field } ////////FRIENDSHIP //friend void operator<< (rts::efield &ef, rts::halfspace hs); }; } //end namespace rts #endif