efield.cuh 9.67 KB
#ifndef	RTS_EFIELD
#define RTS_EFIELD

#include "../math/complex.h"
#include "../math/realfield.cuh"
#include "../visualization/colormap.h"
#include "../optics/planewave.h"
#include "../cuda/devices.h"
#include "../optics/beam.h"



namespace rts{

template<typename T>
__global__ void gpu_planewave2efield(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, 
									 planewave<T> w, quad<T> q)
{
    int iu = blockIdx.x * blockDim.x + threadIdx.x;
    int iv = blockIdx.y * blockDim.y + threadIdx.y;

    //make sure that the thread indices are in-bounds
    if(iu >= r0 || iv >= r1) return;

    //compute the index into the field
    int i = iv*r0 + iu;

	//get the current position
	vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 );
	vec<T> r(p[0], p[1], p[2]);

	complex<T> x( 0.0f, w.k.dot(r) );

    if(Y == NULL)                       //if this is a scalar simulation
        X[i] += w.E0.len() * exp(x);    //use the vector magnitude as the plane wave amplitude
    else
    {
        X[i] += w.E0[0] * exp(x);
        Y[i] += w.E0[1] * exp(x);
        Z[i] += w.E0[2] * exp(x);
    }
}

template<typename T>
__global__ void gpu_efield_magnitude(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, T* M)
{
	int iu = blockIdx.x * blockDim.x + threadIdx.x;
    int iv = blockIdx.y * blockDim.y + threadIdx.y;

    //make sure that the thread indices are in-bounds
    if(iu >= r0 || iv >= r1) return;

    //compute the index into the field
    int i = iv*r0 + iu;

    if(Y == NULL)                      //if this is a scalar simulation
	   M[i] = X[i].abs();              //compute the magnitude of the X component
    else
    {
        M[i] = rts::vec<T>(X[i].abs(), Y[i].abs(), Z[i].abs()).len();
        //M[i] = Z[i].abs();
    }
}

template<typename T>
__global__ void gpu_efield_polarization(complex<T>* X, complex<T>* Y, complex<T>* Z, 
                                        unsigned int r0, unsigned int r1,
                                        T* Px, T* Py, T* Pz)
{
    int iu = blockIdx.x * blockDim.x + threadIdx.x;
    int iv = blockIdx.y * blockDim.y + threadIdx.y;

    //make sure that the thread indices are in-bounds
    if(iu >= r0 || iv >= r1) return;

    //compute the index into the field
    int i = iv*r0 + iu;

    //compute the field polarization
    Px[i] = X[i].abs();
    Py[i] = Y[i].abs();
    Pz[i] = Z[i].abs();
}

/*	This function computes the sum of two complex fields and stores the result in *dest
*/
template<typename T>
__global__ void gpu_efield_sum(complex<T>* dest, complex<T>* src, unsigned int r0, unsigned int r1)
{
	int iu = blockIdx.x * blockDim.x + threadIdx.x;
    int iv = blockIdx.y * blockDim.y + threadIdx.y;

    //make sure that the thread indices are in-bounds
    if(iu >= r0 || iv >= r1) return;

    //compute the index into the field
    int i = iv*r0 + iu;

    //sum the fields
    dest[i] += src[i];
}

/*  This class implements a discrete representation of an electromagnetic field
    in 2D. The majority of this representation is done on the GPU.
*/
template<typename P>
class efield
{
protected:

    bool vector;

    //gpu pointer to the field data
    rts::complex<P>* X;
    rts::complex<P>* Y;
    rts::complex<P>* Z;

    //resolution of the discrete field
    unsigned int R[2];

    //shape of the 2D field
    quad<P> pos;

	void from_planewave(planewave<P> p)
	{
        unsigned int SQRT_BLOCK = 16;
        //create one thread for each detector pixel
        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
        

        gpu_planewave2efield<P> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos);
    }

    void init(unsigned int res0, unsigned int res1, bool _vector)
    {
    	vector = _vector;           //initialize field type
        
        X = Y = Z = NULL;           //initialize all pointers to NULL
        R[0] = res0;
        R[1] = res1;

        //allocate memory on the gpu
        cudaMalloc(&X, sizeof(rts::complex<P>) * R[0] * R[1]);
		cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]);

        if(vector)
        {
            std::cout<<"vector:";
            cudaMalloc(&Y, sizeof(rts::complex<P>) * R[0] * R[1]);
			cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]);

            cudaMalloc(&Z, sizeof(rts::complex<P>) * R[0] * R[1]);
			cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
        }
    }

    void destroy()
    {
		if(X != NULL) cudaFree(X);
		if(Y != NULL) cudaFree(Y);
		if(Z != NULL) cudaFree(Z);
    }

    void shallowcpy(const rts::efield<P> & src)
    {
    	vector = src.vector;
    	R[0] = src.R[0];
    	R[1] = src.R[1];
    }

    void deepcpy(const rts::efield<P> & src)
    {
    	//perform a shallow copy
    	shallowcpy(src);

    	//allocate memory on the gpu
    	if(src.X != NULL)
    	{
    		cudaMalloc(&X, sizeof(rts::complex<P>) * R[0] * R[1]);
    		cudaMemcpy(X, src.X, sizeof(rts::complex<P>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
    	}
    	if(src.Y != NULL)
    	{
    		cudaMalloc(&Y, sizeof(rts::complex<P>) * R[0] * R[1]);
    		cudaMemcpy(Y, src.Y, sizeof(rts::complex<P>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
    	}
    	if(src.Z != NULL)
    	{
    		cudaMalloc(&Z, sizeof(rts::complex<P>) * R[0] * R[1]);
    		cudaMemcpy(Z, src.Z, sizeof(rts::complex<P>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
    	}
    }

public:
    efield(unsigned int res0, unsigned int res1, bool _vector = true)
    {
        init(res0, res1, _vector);
        pos = rts::quad<P>(rts::vec<P>(-10, 0, -10), rts::vec<P>(-10, 0, 10), rts::vec<P>(10, 0, 10));
    }

    //destructor
    ~efield()
    {
    	destroy();
    }

    ///Clear the field - set all points to zero
    void clear()
    {
        cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]);

        if(vector)
        {
            cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
            cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
        }
    }

    void position(quad<P> _p)
    {
        pos = _p;
    }

    std::string str()
    {
        stringstream ss;
        ss<<pos<<std::endl;
        ss<<"X: "<<X<<std::endl;
        ss<<"Y: "<<Y<<std::endl;
        ss<<"Z: "<<Z;

        return ss.str();
    }

    //assignment operator: assignment from another electric field
    efield<P> & operator= (const efield<P> & rhs)
    {
    	destroy();				//destroy any previous information about this field
    	deepcpy(rhs);			//create a deep copy
    	return *this;			//return the current object
    }

    //assignment operator: build an electric field from a plane wave
    efield<P> & operator= (const planewave<P> & rhs)
	{
		
		clear();				//clear any previous field data
		from_planewave(rhs);	//create a field from the planewave
		return *this;			//return the current object
	}

	//assignment operator: add an existing electric field
	efield<P> & operator+= (const efield<P> & rhs)
	{
		//if this field and the source field represent the same regions in space
		if(R[0] == rhs.R[0] && R[1] == rhs.R[1] && pos == rhs.pos)
		{
			int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
			int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);

			//create one thread for each detector pixel
			dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
			dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

			//sum the fields
			gpu_efield_sum <<<dimGrid, dimBlock>>> (X, rhs.X, R[0], R[1]);
			if(Y != NULL)
			{
				gpu_efield_sum <<<dimGrid, dimBlock>>> (Y, rhs.Y, R[0], R[1]);
				gpu_efield_sum <<<dimGrid, dimBlock>>> (Z, rhs.Z, R[0], R[1]);
			}
		}
		else
		{
			std::cout<<"ERROR in efield: The two summed fields do not represent the same geometry."<<std::endl;
			exit(1);
		}

		return *this;		//return this object
	}

    efield<P> & operator= (const rts::beam<P> & rhs)
    {
        //get a vector of monte-carlo samples
        std::vector< rts::planewave<P> > p_list = rhs.mc();

        clear();                //clear any previous field data
        for(unsigned int i = 0; i < p_list.size(); i++)
            from_planewave(p_list[i]);
        return *this;
    }


	//return a scalar field representing field magnitude
    realfield<P, 1, true> mag()
    {
		int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);

		//create a scalar field to store the result
		realfield<P, 1, true> M(R[0], R[1]);

		//create one thread for each detector pixel
		dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
		dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

		//compute the magnitude and store it in a scalar field
		gpu_efield_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.ptr(0));

		return M;
    }

    //return a vector field representing field polarization
    realfield<P, 3, true> polarization()
    {
        if(!vector)
        {
            std::cout<<"ERROR: Cannot compute polarization of a scalar field."<<std::endl;
            exit(1);
        }
        int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);        

        //create one thread for each detector pixel
        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

        
        realfield<P, 3, true> Pol(R[0], R[1]);     //create a vector field to store the result

        //compute the polarization and store it in the vector field
        gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.ptr(0), Pol.ptr(1), Pol.ptr(2));

        return Pol;                         //return the vector field
    }



};



}   //end namespace rts

#endif