efield.cuh 11.5 KB
``````#ifndef	RTS_EFIELD
#define RTS_EFIELD

#include "../math/complex.h"
#include "../math/realfield.cuh"
#include "../visualization/colormap.h"
#include "../optics/planewave.h"
#include "../cuda/devices.h"
#include "../optics/beam.h"
#include "../math/rect.h"

namespace stim{

template<typename T>
__global__ void gpu_planewave2efield(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1,
planewave<T> w, rect<T> q)
{
int iu = blockIdx.x * blockDim.x + threadIdx.x;
int iv = blockIdx.y * blockDim.y + threadIdx.y;

//make sure that the thread indices are in-bounds
if(iu >= r0 || iv >= r1) return;

//compute the index into the field
int i = iv*r0 + iu;

//get the current position
vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 );
vec<T> r(p[0], p[1], p[2]);

complex<T> x( 0.0f, w.k.dot(r) );

if(Y == NULL)                       //if this is a scalar simulation
X[i] += w.E0.len() * exp(x);    //use the vector magnitude as the plane wave amplitude
else
{
X[i] += w.E0[0] * exp(x);
Y[i] += w.E0[1] * exp(x);
Z[i] += w.E0[2] * exp(x);
}
}

template<typename T>
__global__ void gpu_efield_magnitude(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, T* M)
{
int iu = blockIdx.x * blockDim.x + threadIdx.x;
int iv = blockIdx.y * blockDim.y + threadIdx.y;

//make sure that the thread indices are in-bounds
if(iu >= r0 || iv >= r1) return;

//compute the index into the field
int i = iv*r0 + iu;

if(Y == NULL)                      //if this is a scalar simulation
M[i] = X[i].abs();              //compute the magnitude of the X component
else
{
M[i] = rts::vec<T>(X[i].abs(), Y[i].abs(), Z[i].abs()).len();
//M[i] = Z[i].abs();
}
}

template<typename T>
__global__ void gpu_efield_real_magnitude(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, T* M)
{
int iu = blockIdx.x * blockDim.x + threadIdx.x;
int iv = blockIdx.y * blockDim.y + threadIdx.y;

//make sure that the thread indices are in-bounds
if(iu >= r0 || iv >= r1) return;

//compute the index into the field
int i = iv*r0 + iu;

if(Y == NULL)                      //if this is a scalar simulation
M[i] = X[i].abs();              //compute the magnitude of the X component
else
{
M[i] = rts::vec<T>(X[i].real(), Y[i].real(), Z[i].real()).len();
//M[i] = Z[i].abs();
}
}

template<typename T>
__global__ void gpu_efield_polarization(complex<T>* X, complex<T>* Y, complex<T>* Z,
unsigned int r0, unsigned int r1,
T* Px, T* Py, T* Pz)
{
int iu = blockIdx.x * blockDim.x + threadIdx.x;
int iv = blockIdx.y * blockDim.y + threadIdx.y;

//make sure that the thread indices are in-bounds
if(iu >= r0 || iv >= r1) return;

//compute the index into the field
int i = iv*r0 + iu;

//compute the field polarization
Px[i] = X[i].abs();
Py[i] = Y[i].abs();
Pz[i] = Z[i].abs();

}

/*	This function computes the sum of two complex fields and stores the result in *dest
*/
template<typename T>
__global__ void gpu_efield_sum(complex<T>* dest, complex<T>* src, unsigned int r0, unsigned int r1)
{
int iu = blockIdx.x * blockDim.x + threadIdx.x;
int iv = blockIdx.y * blockDim.y + threadIdx.y;

//make sure that the thread indices are in-bounds
if(iu >= r0 || iv >= r1) return;

//compute the index into the field
int i = iv*r0 + iu;

//sum the fields
dest[i] += src[i];
}

/*  This class implements a discrete representation of an electromagnetic field
in 2D. The majority of this representation is done on the GPU.
*/
template<typename T>
class efield
{
protected:

bool vector;

//gpu pointer to the field data
rts::complex<T>* X;
rts::complex<T>* Y;
rts::complex<T>* Z;

//resolution of the discrete field
unsigned int R[2];

//shape of the 2D field
rect<T> pos;

void from_planewave(planewave<T> p)
{
unsigned int SQRT_BLOCK = 16;
//create one thread for each detector pixel
dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

gpu_planewave2efield<T> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos);
}

void init(unsigned int res0, unsigned int res1, bool _vector)
{
vector = _vector;           //initialize field type

X = Y = Z = NULL;           //initialize all pointers to NULL
R[0] = res0;
R[1] = res1;

//allocate memory on the gpu
cudaMalloc(&X, sizeof(rts::complex<T>) * R[0] * R[1]);
cudaMemset(X, 0, sizeof(rts::complex<T>) * R[0] * R[1]);

if(vector)
{
cudaMalloc(&Y, sizeof(rts::complex<T>) * R[0] * R[1]);
cudaMemset(Y, 0, sizeof(rts::complex<T>) * R[0] * R[1]);

cudaMalloc(&Z, sizeof(rts::complex<T>) * R[0] * R[1]);
cudaMemset(Z, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
}
}

void destroy()
{
if(X != NULL) cudaFree(X);
if(Y != NULL) cudaFree(Y);
if(Z != NULL) cudaFree(Z);
}

void shallowcpy(const rts::efield<T> & src)
{
vector = src.vector;
R[0] = src.R[0];
R[1] = src.R[1];
}

void deepcpy(const rts::efield<T> & src)
{
//perform a shallow copy
shallowcpy(src);

//allocate memory on the gpu
if(src.X != NULL)
{
cudaMalloc(&X, sizeof(rts::complex<T>) * R[0] * R[1]);
cudaMemcpy(X, src.X, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
}
if(src.Y != NULL)
{
cudaMalloc(&Y, sizeof(rts::complex<T>) * R[0] * R[1]);
cudaMemcpy(Y, src.Y, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
}
if(src.Z != NULL)
{
cudaMalloc(&Z, sizeof(rts::complex<T>) * R[0] * R[1]);
cudaMemcpy(Z, src.Z, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
}
}

public:
efield(unsigned int res0, unsigned int res1, bool _vector = true)
{
init(res0, res1, _vector);
//pos = rts::rect<T>(rts::vec<T>(-10, 0, -10), rts::vec<T>(-10, 0, 10), rts::vec<T>(10, 0, 10));
}

//destructor
~efield()
{
destroy();
}

///Clear the field - set all points to zero
void clear()
{
cudaMemset(X, 0, sizeof(rts::complex<T>) * R[0] * R[1]);

if(vector)
{
cudaMemset(Y, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
cudaMemset(Z, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
}
}

void position(rect<T> _p)
{
pos = _p;
}

//access functions
complex<T>* x(){
return X;
}
complex<T>* y(){
return Y;
}
complex<T>* z(){
return Z;
}
unsigned int Ru(){
return R[0];
}
unsigned int Rv(){
return R[1];
}
rect<T> p(){
return pos;
}

std::string str()
{
stringstream ss;
ss<<pos<<std::endl;
ss<<"X: "<<X<<std::endl;
ss<<"Y: "<<Y<<std::endl;
ss<<"Z: "<<Z;

return ss.str();
}

//assignment operator: assignment from another electric field
efield<T> & operator= (const efield<T> & rhs)
{
deepcpy(rhs);			//create a deep copy
return *this;			//return the current object
}

//assignment operator: build an electric field from a plane wave
efield<T> & operator= (const planewave<T> & rhs)
{

clear();				//clear any previous field data
from_planewave(rhs);	//create a field from the planewave
return *this;			//return the current object
}

//assignment operator: add an existing electric field
efield<T> & operator+= (const efield<T> & rhs)
{
//if this field and the source field represent the same regions in space
if(R[0] == rhs.R[0] && R[1] == rhs.R[1] && pos == rhs.pos)
{

//create one thread for each detector pixel
dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

//sum the fields
gpu_efield_sum <<<dimGrid, dimBlock>>> (X, rhs.X, R[0], R[1]);
if(Y != NULL)
{
gpu_efield_sum <<<dimGrid, dimBlock>>> (Y, rhs.Y, R[0], R[1]);
gpu_efield_sum <<<dimGrid, dimBlock>>> (Z, rhs.Z, R[0], R[1]);
}
}
else
{
std::cout<<"ERROR in efield: The two summed fields do not represent the same geometry."<<std::endl;
exit(1);
}

return *this;		//return this object
}

//assignment operator: build an electric field from a beam
efield<T> & operator= (const rts::beam<T> & rhs)
{
//get a vector of monte-carlo samples
std::vector< rts::planewave<T> > p_list = rhs.mc();

clear();                //clear any previous field data
for(unsigned int i = 0; i < p_list.size(); i++)
from_planewave(p_list[i]);
return *this;
}

//return a scalar field representing field magnitude
realfield<T, 1, true> mag()
{

//create a scalar field to store the result
realfield<T, 1, true> M(R[0], R[1]);

//create one thread for each detector pixel
dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

//compute the magnitude and store it in a scalar field
gpu_efield_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.ptr(0));

return M;
}

//return a sacalar field representing the field magnitude at an infinitely small point in time
realfield<T, 1, true> real_mag()
{

//create a scalar field to store the result
realfield<T, 1, true> M(R[0], R[1]);

//create one thread for each detector pixel
dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

//compute the magnitude and store it in a scalar field
gpu_efield_real_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.ptr(0));

return M;
}

//return a vector field representing field polarization
realfield<T, 3, true> polarization()
{
if(!vector)
{
std::cout<<"ERROR: Cannot compute polarization of a scalar field."<<std::endl;
exit(1);
}

//create one thread for each detector pixel
dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

realfield<T, 3, true> Pol(R[0], R[1]);     //create a vector field to store the result

//compute the polarization and store it in the vector field
gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.ptr(0), Pol.ptr(1), Pol.ptr(2));

return Pol;                         //return the vector field
}

////////FRIENDSHIP
//friend void operator<< <T>(rts::efield<T> &ef, rts::halfspace<T> hs);

};

}   //end namespace rts

#endif``````