added field and complexfield classes

David Mayerich
1 parent ecfd14df
Showing 4 changed files with 340 additions and 5 deletions Show diff stats
math/complexfield.cuh
math/field.cuh
math/realfield.cuh
math/rect.h
+#ifndef	RTS_COMPLEXFIELD_H
+#define RTS_COMPLEXFIELD_H
+
+#include "cublas_v2.h"
+#include <cuda_runtime.h>
+
+#include "../math/field.cuh"
+#include "../math/complex.h"
+
+namespace rts{
+
+/*This class stores functions for saving images of complex fields
+*/
+template<typename T, unsigned int D>
+class complexfield : public field< rts::complex<T>, D >{
+	using field< rts::complex<T>, D >::R;
+	using field< rts::complex<T>, D >::X;
+	using field< rts::complex<T>, D >::shape;
+
+public:
+
+	//find the maximum value of component n
+	rts::complex<T> find_max(unsigned int n){
+		cublasStatus_t stat;
+		cublasHandle_t handle;
+
+		//create a CUBLAS handle
+		stat = cublasCreate(&handle);
+		if(stat != CUBLAS_STATUS_SUCCESS){
+			std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
+			exit(1);
+		}
+
+		int L = R[0] * R[1];    //compute the number of discrete points in a slice
+		int index;				//result of the max operation
+		rts::complex<T> result;
+
+		if(sizeof(T) == 4)
+			stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index);
+		else
+			stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index);
+
+		index -= 1;        //adjust for 1-based indexing
+
+		//if there was a GPU error, terminate
+		if(stat != CUBLAS_STATUS_SUCCESS){
+			std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
+			exit(1);
+		}
+
+		//retrieve the maximum value for this slice and store it in the maxVal array
+		std::cout<<X[n]<<std::endl;
+		HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost));
+		return result;
+	}
+
+public:
+
+	//constructor (no parameters)
+	complexfield() : field<rts::complex<T>, D>(){};
+
+	//constructor (resolution specified)
+	complexfield(unsigned int r0, unsigned int r1) : field<rts::complex<T>, D>(r0, r1){};
+
+	//assignment operator (scalar value)
+	complexfield & operator= (const complex<T> rhs){
+
+		field< complex<T>, D >::operator=(rhs);
+		return *this;
+	}
+
+	//assignment operator (vector value)
+	complexfield & operator= (const vec< complex<T>, D > rhs){
+
+		field< complex<T>, D >::operator=(rhs);
+		return *this;
+	}
+
+	void toImage(std::string filename, unsigned int n){
+
+	}
+
+
+};
+
+
+}	//end namespace rts
+
+
+#endif
 \ No newline at end of file
+#ifndef RTS_FIELD_CUH
+#define RTS_FIELD_CUH
+
+#include <vector>
+#include <string>
+#include <sstream>
+
+#include "../math/rect.h"
+#include "../cuda/threads.h"
+#include "../cuda/error.h"
+#include "../cuda/devices.h"
+
+
+namespace rts{
+
+//multiply R = X * Y
+template<typename T>
+__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){
+
+	int iu = blockIdx.x * blockDim.x + threadIdx.x;
+    int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+    //make sure that the thread indices are in-bounds
+    if(iu >= r0 || iv >= r1) return;
+
+    //compute the index into the field
+    int i = iv*r0 + iu;
+
+    //calculate and store the result
+    R[i] = X[i] * Y[i];
+}
+
+//assign a constant value to all points
+template<typename T>
+__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){
+
+	int iu = blockIdx.x * blockDim.x + threadIdx.x;
+	int iv = blockIdx.y * blockDim.y + threadIdx.y;
+
+	//make sure that the thread indices are in-bounds
+	if(iu >= r0 || iv >= r1) return;
+
+	//compute the index into the field
+	int i = iv*r0 + iu;
+
+	//calculate and store the result
+	ptr[i] = val;
+}
+
+template<typename T, unsigned int D>
+class field{
+
+protected:
+
+	T* X[D];			//pointer to the field data
+	unsigned int R[2];	//field resolution
+	rts::rect<T> shape;		//position and shape of the field slice
+
+public:
+
+	//returns a list of file names given an input string with wild cards
+	std::vector<std::string> process_filename(std::string name){
+		std::stringstream ss(name);
+		std::string item;
+		std::vector<std::string> elems;
+		while(std::getline(ss, item, '.'))      //split the string at the '.' character (filename and extension)
+		{
+		    elems.push_back(item);
+		}
+
+		std::string prefix = elems[0];                      //prefix contains the filename (with wildcard '?' characters)
+		std::string ext = elems[1];                         //file extension (ex. .bmp, .png)
+		ext = std::string(".") + ext;           //add a period back into the extension
+
+		size_t i0 = prefix.find_first_of("?");  //find the positions of the first and last wildcard ('?'')
+		size_t i1 = prefix.find_last_of("?");
+
+		std::string postfix = prefix.substr(i1+1);
+		prefix = prefix.substr(0, i0);
+
+		unsigned int digits = i1 - i0 + 1;                   //compute the number of wildcards
+
+		std::vector<std::string> flist;			//create a vector of file names
+		//fill the list
+		for(unsigned int d=0; d<D; d++){
+			std::stringstream ss;            //assemble the file name
+			ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext;
+			flist.push_back(ss.str());
+		}
+
+		return flist;
+	}
+
+	void init(){
+		for(unsigned int n=0; n<D; n++)
+			X[n] = NULL;
+	}
+	void destroy(){
+		for(unsigned int n=0; n<D; n++)
+			if(X[n] != NULL)
+				HANDLE_ERROR(cudaFree(X[n]));
+	}
+
+
+public:
+	//field constructor
+	field(){
+		R[0] = R[1] = 0;
+		init();
+	}
+
+	field(unsigned int x, unsigned int y){
+        //set the resolution
+        R[0] = x;
+        R[1] = y;
+		//allocate memory on the GPU
+		for(unsigned int n=0; n<D; n++){
+			HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] ));
+		}
+		clear();		//zero the field
+    }
+
+    ///copy constructor
+	field(const field &rhs){
+		//first make a shallow copy
+		R[0] = rhs.R[0];
+		R[1] = rhs.R[1];
+
+		for(unsigned int n=0; n<D; n++){
+			//do we have to make a deep copy?
+			if(rhs.X[n] == NULL)
+				X[n] = NULL;		//no
+			else{
+				//allocate the necessary memory
+				HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
+
+				//copy the slice
+				HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
+			}
+		}
+	}
+
+	~field(){
+		destroy();
+    }
+
+    //assignment operator
+	field & operator= (const field & rhs){
+
+        //de-allocate any existing GPU memory
+        destroy();
+
+        //copy the slice resolution
+        R[0] = rhs.R[0];
+        R[1] = rhs.R[1];
+
+		for(unsigned int n=0; n<D; n++)
+		{
+			//allocate the necessary memory
+			HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
+			//copy the slice
+			HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
+		}
+        return *this;
+    }
+
+    field & operator= (const T rhs){
+
+    	int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
+        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+        //create one thread for each detector pixel
+        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+        //assign the constant value to all positions and dimensions
+        for(int n=0; n<D; n++)
+        	rts::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]);
+
+        return *this;
+    }
+
+    //assignment of vector component
+    field & operator= (const vec<T, D> rhs){
+
+    	int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
+        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+        //create one thread for each detector pixel
+        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+        //assign the constant value to all positions and dimensions
+        for(unsigned int n=0; n<D; n++)
+        	rts::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]);
+
+        return *this;
+
+    }
+
+    //multiply two fields (element-wise multiplication)
+    field<T, D> operator* (const field & rhs){
+
+    	int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
+        int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
+
+        //create one thread for each detector pixel
+        dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
+        dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
+
+        //create a scalar field to store the result
+        field<T, D> result(R[0], R[1]);
+
+        for(int n=0; n<D; n++)
+        	rts::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]);
+
+        return result;
+    }
+
+	T* ptr(unsigned int n = 0){
+		if(n < D)
+			return X[n];
+		else return NULL;
+	}
+
+	//return the vector component at position (u, v)
+	vec<T, D> get(unsigned int u, unsigned int v){
+
+		vec<T, D> result;
+		for(unsigned int d=0; d<D; d++){
+			HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost));
+		}
+
+		return result;
+	}
+
+	//set all components of the field to zero
+	void clear(){
+		for(unsigned int n=0; n<D; n++)
+			if(X[n] != NULL)
+				HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1]));
+    }
+
+};
+
+}		//end namespace rts
+#endif
 \ No newline at end of file
@@ -247,8 +247,6 @@ public:
         	rts::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]);
  
         return result;
-
-
     }
  
 	///copy constructor
@@ -154,9 +154,9 @@ public:
 	{
 		std::stringstream ss;
 		vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
-		ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
-		ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl;
-		ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X;
+		ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
+		ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl;
+		ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X;
  
         return ss.str();