stim/optics/efield.cuh

#ifndef	RTS_EFIELD
#define RTS_EFIELD
#include "../math/complex.h"
#include "../math/realfield.cuh"
#include "../visualization/colormap.h"
#include "../optics/planewave.h"
#include "../cuda/devices.h"
#include "../optics/beam.h"
#include "../math/rect.h"
namespace stim{
template<typename T>
__global__ void gpu_planewave2efield(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, 
									 planewave<T> w, rect<T> q)
{
    int iu = blockIdx.x * blockDim.x + threadIdx.x;
    int iv = blockIdx.y * blockDim.y + threadIdx.y;
    //make sure that the thread indices are in-bounds
    if(iu >= r0 || iv >= r1) return;
    //compute the index into the field
    int i = iv*r0 + iu;
	//get the current position
	vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 );
	vec<T> r(p[0], p[1], p[2]);
	complex<T> x( 0.0f, w.k.dot(r) );
    if(Y == NULL)                       //if this is a scalar simulation
        X[i] += w.E0.len() * exp(x);    //use the vector magnitude as the plane wave amplitude
    else
    {
        X[i] += w.E0[0] * exp(x);
        Y[i] += w.E0[1] * exp(x);
        Z[i] += w.E0[2] * exp(x);
    }
}
template<typename T>
__global__ void gpu_efield_magnitude(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, T* M)
{
	int iu = blockIdx.x * blockDim.x + threadIdx.x;
    int iv = blockIdx.y * blockDim.y + threadIdx.y;
    //make sure that the thread indices are in-bounds
    if(iu >= r0 || iv >= r1) return;
    //compute the index into the field
    int i = iv*r0 + iu;
    if(Y == NULL)                      //if this is a scalar simulation
	   M[i] = X[i].abs();              //compute the magnitude of the X component
    else
    {
        M[i] = rts::vec<T>(X[i].abs(), Y[i].abs(), Z[i].abs()).len();
        //M[i] = Z[i].abs();
    }
}
template<typename T>
__global__ void gpu_efield_real_magnitude(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, T* M)
{
    int iu = blockIdx.x * blockDim.x + threadIdx.x;
    int iv = blockIdx.y * blockDim.y + threadIdx.y;
    //make sure that the thread indices are in-bounds
    if(iu >= r0 || iv >= r1) return;
    //compute the index into the field
    int i = iv*r0 + iu;
    if(Y == NULL)                      //if this is a scalar simulation
       M[i] = X[i].abs();              //compute the magnitude of the X component
    else
    {
        M[i] = rts::vec<T>(X[i].real(), Y[i].real(), Z[i].real()).len();
        //M[i] = Z[i].abs();
    }
}
template<typename T>
__global__ void gpu_efield_polarization(complex<T>* X, complex<T>* Y, complex<T>* Z, 
                                        unsigned int r0, unsigned int r1,
                                        T* Px, T* Py, T* Pz)
{
    int iu = blockIdx.x * blockDim.x + threadIdx.x;
    int iv = blockIdx.y * blockDim.y + threadIdx.y;
    //make sure that the thread indices are in-bounds
    if(iu >= r0 || iv >= r1) return;
    //compute the index into the field
    int i = iv*r0 + iu;
    //compute the field polarization
    Px[i] = X[i].abs();
    Py[i] = Y[i].abs();
    Pz[i] = Z[i].abs();
}
/*	This function computes the sum of two complex fields and stores the result in *dest
*/
template<typename T>
__global__ void gpu_efield_sum(complex<T>* dest, complex<T>* src, unsigned int r0, unsigned int r1)
{
	int iu = blockIdx.x * blockDim.x + threadIdx.x;
    int iv = blockIdx.y * blockDim.y + threadIdx.y;
    //make sure that the thread indices are in-bounds
    if(iu >= r0 || iv >= r1) return;
    //compute the index into the field
    int i = iv*r0 + iu;
    //sum the fields
    dest[i] += src[i];
}
/*  This class implements a discrete representation of an electromagnetic field
    in 2D. The majority of this representation is done on the GPU.
*/
template<typename T>
class efield
{
protected:
    bool vector;
    //gpu pointer to the field data
    rts::complex<T>* X;
    rts::complex<T>* Y;
    rts::complex<T>* Z;
    //resolution of the discrete field
    unsigned int R[2];
    //shape of the 2D field
    rect<T> pos;
	void from_planewave(planewave<T> p)
	{
        unsigned int SQRT_BLOCK = 16;
        //create one thread for each detector pixel
        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
        
        gpu_planewave2efield<T> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos);
    }
    void init(unsigned int res0, unsigned int res1, bool _vector)
    {
    	vector = _vector;           //initialize field type
        
        X = Y = Z = NULL;           //initialize all pointers to NULL
        R[0] = res0;
        R[1] = res1;
        //allocate memory on the gpu
        cudaMalloc(&X, sizeof(rts::complex<T>) * R[0] * R[1]);
		cudaMemset(X, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
        if(vector)
        {
            cudaMalloc(&Y, sizeof(rts::complex<T>) * R[0] * R[1]);
			cudaMemset(Y, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
            cudaMalloc(&Z, sizeof(rts::complex<T>) * R[0] * R[1]);
			cudaMemset(Z, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
        }
    }
    void destroy()
    {
		if(X != NULL) cudaFree(X);
		if(Y != NULL) cudaFree(Y);
		if(Z != NULL) cudaFree(Z);
    }
    void shallowcpy(const rts::efield<T> & src)
    {
    	vector = src.vector;
    	R[0] = src.R[0];
    	R[1] = src.R[1];
    }
    void deepcpy(const rts::efield<T> & src)
    {
    	//perform a shallow copy
    	shallowcpy(src);
    	//allocate memory on the gpu
    	if(src.X != NULL)
    	{
    		cudaMalloc(&X, sizeof(rts::complex<T>) * R[0] * R[1]);
    		cudaMemcpy(X, src.X, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
    	}
    	if(src.Y != NULL)
    	{
    		cudaMalloc(&Y, sizeof(rts::complex<T>) * R[0] * R[1]);
    		cudaMemcpy(Y, src.Y, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
    	}
    	if(src.Z != NULL)
    	{
    		cudaMalloc(&Z, sizeof(rts::complex<T>) * R[0] * R[1]);
    		cudaMemcpy(Z, src.Z, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
    	}
    }
public:
    efield(unsigned int res0, unsigned int res1, bool _vector = true)
    {
        init(res0, res1, _vector);
        //pos = rts::rect<T>(rts::vec<T>(-10, 0, -10), rts::vec<T>(-10, 0, 10), rts::vec<T>(10, 0, 10));
    }
    //destructor
    ~efield()
    {
    	destroy();
    }
    ///Clear the field - set all points to zero
    void clear()
    {
        cudaMemset(X, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
        if(vector)
        {
            cudaMemset(Y, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
            cudaMemset(Z, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
        }
    }
    void position(rect<T> _p)
    {
        pos = _p;
    }
    //access functions
    complex<T>* x(){
        return X;
    }
    complex<T>* y(){
        return Y;
    }
    complex<T>* z(){
        return Z;
    }
    unsigned int Ru(){
        return R[0];
    }
    unsigned int Rv(){
        return R[1];
    }
    rect<T> p(){
        return pos;
    }
    std::string str()
    {
        stringstream ss;
        ss<<pos<<std::endl;
        ss<<"X: "<<X<<std::endl;
        ss<<"Y: "<<Y<<std::endl;
        ss<<"Z: "<<Z;
        return ss.str();
    }
    //assignment operator: assignment from another electric field
    efield<T> & operator= (const efield<T> & rhs)
    {
    	destroy();				//destroy any previous information about this field
    	deepcpy(rhs);			//create a deep copy
    	return *this;			//return the current object
    }
    //assignment operator: build an electric field from a plane wave
    efield<T> & operator= (const planewave<T> & rhs)
	{
		
		clear();				//clear any previous field data
		from_planewave(rhs);	//create a field from the planewave
		return *this;			//return the current object
	}
	//assignment operator: add an existing electric field
	efield<T> & operator+= (const efield<T> & rhs)
	{
		//if this field and the source field represent the same regions in space
		if(R[0] == rhs.R[0] && R[1] == rhs.R[1] && pos == rhs.pos)
		{
			int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
			int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
			//create one thread for each detector pixel
			dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
			dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
			//sum the fields
			gpu_efield_sum <<<dimGrid, dimBlock>>> (X, rhs.X, R[0], R[1]);
			if(Y != NULL)
			{
				gpu_efield_sum <<<dimGrid, dimBlock>>> (Y, rhs.Y, R[0], R[1]);
				gpu_efield_sum <<<dimGrid, dimBlock>>> (Z, rhs.Z, R[0], R[1]);
			}
		}
		else
		{
			std::cout<<"ERROR in efield: The two summed fields do not represent the same geometry."<<std::endl;
			exit(1);
		}
		return *this;		//return this object
	}
    //assignment operator: build an electric field from a beam
    efield<T> & operator= (const rts::beam<T> & rhs)
    {
        //get a vector of monte-carlo samples
        std::vector< rts::planewave<T> > p_list = rhs.mc();
        clear();                //clear any previous field data
        for(unsigned int i = 0; i < p_list.size(); i++)
            from_planewave(p_list[i]);
        return *this;
    }
	//return a scalar field representing field magnitude
    realfield<T, 1, true> mag()
    {
		int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
		//create a scalar field to store the result
		realfield<T, 1, true> M(R[0], R[1]);
		//create one thread for each detector pixel
		dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
		dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
		//compute the magnitude and store it in a scalar field
		gpu_efield_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.ptr(0));
		return M;
    }
    //return a sacalar field representing the field magnitude at an infinitely small point in time
    realfield<T, 1, true> real_mag()
    {
        int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
        //create a scalar field to store the result
        realfield<T, 1, true> M(R[0], R[1]);
        //create one thread for each detector pixel
        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
        //compute the magnitude and store it in a scalar field
        gpu_efield_real_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.ptr(0));
        return M;
    }
    //return a vector field representing field polarization
    realfield<T, 3, true> polarization()
    {
        if(!vector)
        {
            std::cout<<"ERROR: Cannot compute polarization of a scalar field."<<std::endl;
            exit(1);
        }
        int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);        
        //create one thread for each detector pixel
        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
        
        realfield<T, 3, true> Pol(R[0], R[1]);     //create a vector field to store the result
        //compute the polarization and store it in the vector field
        gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.ptr(0), Pol.ptr(1), Pol.ptr(2));
        return Pol;                         //return the vector field
    }
    ////////FRIENDSHIP
    //friend void operator<< <T>(rts::efield<T> &ef, rts::halfspace<T> hs);
};
}   //end namespace rts
#endif