Commit 24aab7c9a3fce4b4e78bf210874c7efc0d1e66f8

Authored by David Mayerich
1 parent ecfd14df

added field and complexfield classes

math/complexfield.cuh 0 → 100644
  1 +#ifndef RTS_COMPLEXFIELD_H
  2 +#define RTS_COMPLEXFIELD_H
  3 +
  4 +#include "cublas_v2.h"
  5 +#include <cuda_runtime.h>
  6 +
  7 +#include "../math/field.cuh"
  8 +#include "../math/complex.h"
  9 +
  10 +namespace rts{
  11 +
  12 +/*This class stores functions for saving images of complex fields
  13 +*/
  14 +template<typename T, unsigned int D>
  15 +class complexfield : public field< rts::complex<T>, D >{
  16 + using field< rts::complex<T>, D >::R;
  17 + using field< rts::complex<T>, D >::X;
  18 + using field< rts::complex<T>, D >::shape;
  19 +
  20 +public:
  21 +
  22 + //find the maximum value of component n
  23 + rts::complex<T> find_max(unsigned int n){
  24 + cublasStatus_t stat;
  25 + cublasHandle_t handle;
  26 +
  27 + //create a CUBLAS handle
  28 + stat = cublasCreate(&handle);
  29 + if(stat != CUBLAS_STATUS_SUCCESS){
  30 + std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
  31 + exit(1);
  32 + }
  33 +
  34 + int L = R[0] * R[1]; //compute the number of discrete points in a slice
  35 + int index; //result of the max operation
  36 + rts::complex<T> result;
  37 +
  38 + if(sizeof(T) == 4)
  39 + stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index);
  40 + else
  41 + stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index);
  42 +
  43 + index -= 1; //adjust for 1-based indexing
  44 +
  45 + //if there was a GPU error, terminate
  46 + if(stat != CUBLAS_STATUS_SUCCESS){
  47 + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
  48 + exit(1);
  49 + }
  50 +
  51 + //retrieve the maximum value for this slice and store it in the maxVal array
  52 + std::cout<<X[n]<<std::endl;
  53 + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost));
  54 + return result;
  55 + }
  56 +
  57 +public:
  58 +
  59 + //constructor (no parameters)
  60 + complexfield() : field<rts::complex<T>, D>(){};
  61 +
  62 + //constructor (resolution specified)
  63 + complexfield(unsigned int r0, unsigned int r1) : field<rts::complex<T>, D>(r0, r1){};
  64 +
  65 + //assignment operator (scalar value)
  66 + complexfield & operator= (const complex<T> rhs){
  67 +
  68 + field< complex<T>, D >::operator=(rhs);
  69 + return *this;
  70 + }
  71 +
  72 + //assignment operator (vector value)
  73 + complexfield & operator= (const vec< complex<T>, D > rhs){
  74 +
  75 + field< complex<T>, D >::operator=(rhs);
  76 + return *this;
  77 + }
  78 +
  79 + void toImage(std::string filename, unsigned int n){
  80 +
  81 + }
  82 +
  83 +
  84 +};
  85 +
  86 +
  87 +} //end namespace rts
  88 +
  89 +
  90 +#endif
0 91 \ No newline at end of file
... ...
math/field.cuh 0 → 100644
  1 +#ifndef RTS_FIELD_CUH
  2 +#define RTS_FIELD_CUH
  3 +
  4 +#include <vector>
  5 +#include <string>
  6 +#include <sstream>
  7 +
  8 +#include "../math/rect.h"
  9 +#include "../cuda/threads.h"
  10 +#include "../cuda/error.h"
  11 +#include "../cuda/devices.h"
  12 +
  13 +
  14 +namespace rts{
  15 +
  16 +//multiply R = X * Y
  17 +template<typename T>
  18 +__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){
  19 +
  20 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  21 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  22 +
  23 + //make sure that the thread indices are in-bounds
  24 + if(iu >= r0 || iv >= r1) return;
  25 +
  26 + //compute the index into the field
  27 + int i = iv*r0 + iu;
  28 +
  29 + //calculate and store the result
  30 + R[i] = X[i] * Y[i];
  31 +}
  32 +
  33 +//assign a constant value to all points
  34 +template<typename T>
  35 +__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){
  36 +
  37 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  38 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  39 +
  40 + //make sure that the thread indices are in-bounds
  41 + if(iu >= r0 || iv >= r1) return;
  42 +
  43 + //compute the index into the field
  44 + int i = iv*r0 + iu;
  45 +
  46 + //calculate and store the result
  47 + ptr[i] = val;
  48 +}
  49 +
  50 +template<typename T, unsigned int D>
  51 +class field{
  52 +
  53 +protected:
  54 +
  55 + T* X[D]; //pointer to the field data
  56 + unsigned int R[2]; //field resolution
  57 + rts::rect<T> shape; //position and shape of the field slice
  58 +
  59 +public:
  60 +
  61 + //returns a list of file names given an input string with wild cards
  62 + std::vector<std::string> process_filename(std::string name){
  63 + std::stringstream ss(name);
  64 + std::string item;
  65 + std::vector<std::string> elems;
  66 + while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension)
  67 + {
  68 + elems.push_back(item);
  69 + }
  70 +
  71 + std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters)
  72 + std::string ext = elems[1]; //file extension (ex. .bmp, .png)
  73 + ext = std::string(".") + ext; //add a period back into the extension
  74 +
  75 + size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'')
  76 + size_t i1 = prefix.find_last_of("?");
  77 +
  78 + std::string postfix = prefix.substr(i1+1);
  79 + prefix = prefix.substr(0, i0);
  80 +
  81 + unsigned int digits = i1 - i0 + 1; //compute the number of wildcards
  82 +
  83 + std::vector<std::string> flist; //create a vector of file names
  84 + //fill the list
  85 + for(unsigned int d=0; d<D; d++){
  86 + std::stringstream ss; //assemble the file name
  87 + ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext;
  88 + flist.push_back(ss.str());
  89 + }
  90 +
  91 + return flist;
  92 + }
  93 +
  94 + void init(){
  95 + for(unsigned int n=0; n<D; n++)
  96 + X[n] = NULL;
  97 + }
  98 + void destroy(){
  99 + for(unsigned int n=0; n<D; n++)
  100 + if(X[n] != NULL)
  101 + HANDLE_ERROR(cudaFree(X[n]));
  102 + }
  103 +
  104 +
  105 +public:
  106 + //field constructor
  107 + field(){
  108 + R[0] = R[1] = 0;
  109 + init();
  110 + }
  111 +
  112 + field(unsigned int x, unsigned int y){
  113 + //set the resolution
  114 + R[0] = x;
  115 + R[1] = y;
  116 + //allocate memory on the GPU
  117 + for(unsigned int n=0; n<D; n++){
  118 + HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] ));
  119 + }
  120 + clear(); //zero the field
  121 + }
  122 +
  123 + ///copy constructor
  124 + field(const field &rhs){
  125 + //first make a shallow copy
  126 + R[0] = rhs.R[0];
  127 + R[1] = rhs.R[1];
  128 +
  129 + for(unsigned int n=0; n<D; n++){
  130 + //do we have to make a deep copy?
  131 + if(rhs.X[n] == NULL)
  132 + X[n] = NULL; //no
  133 + else{
  134 + //allocate the necessary memory
  135 + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
  136 +
  137 + //copy the slice
  138 + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  139 + }
  140 + }
  141 + }
  142 +
  143 + ~field(){
  144 + destroy();
  145 + }
  146 +
  147 + //assignment operator
  148 + field & operator= (const field & rhs){
  149 +
  150 + //de-allocate any existing GPU memory
  151 + destroy();
  152 +
  153 + //copy the slice resolution
  154 + R[0] = rhs.R[0];
  155 + R[1] = rhs.R[1];
  156 +
  157 + for(unsigned int n=0; n<D; n++)
  158 + {
  159 + //allocate the necessary memory
  160 + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
  161 + //copy the slice
  162 + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  163 + }
  164 + return *this;
  165 + }
  166 +
  167 + field & operator= (const T rhs){
  168 +
  169 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  170 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  171 +
  172 + //create one thread for each detector pixel
  173 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  174 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  175 +
  176 + //assign the constant value to all positions and dimensions
  177 + for(int n=0; n<D; n++)
  178 + rts::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]);
  179 +
  180 + return *this;
  181 + }
  182 +
  183 + //assignment of vector component
  184 + field & operator= (const vec<T, D> rhs){
  185 +
  186 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  187 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  188 +
  189 + //create one thread for each detector pixel
  190 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  191 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  192 +
  193 + //assign the constant value to all positions and dimensions
  194 + for(unsigned int n=0; n<D; n++)
  195 + rts::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]);
  196 +
  197 + return *this;
  198 +
  199 + }
  200 +
  201 + //multiply two fields (element-wise multiplication)
  202 + field<T, D> operator* (const field & rhs){
  203 +
  204 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  205 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  206 +
  207 + //create one thread for each detector pixel
  208 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  209 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  210 +
  211 + //create a scalar field to store the result
  212 + field<T, D> result(R[0], R[1]);
  213 +
  214 + for(int n=0; n<D; n++)
  215 + rts::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]);
  216 +
  217 + return result;
  218 + }
  219 +
  220 + T* ptr(unsigned int n = 0){
  221 + if(n < D)
  222 + return X[n];
  223 + else return NULL;
  224 + }
  225 +
  226 + //return the vector component at position (u, v)
  227 + vec<T, D> get(unsigned int u, unsigned int v){
  228 +
  229 + vec<T, D> result;
  230 + for(unsigned int d=0; d<D; d++){
  231 + HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost));
  232 + }
  233 +
  234 + return result;
  235 + }
  236 +
  237 + //set all components of the field to zero
  238 + void clear(){
  239 + for(unsigned int n=0; n<D; n++)
  240 + if(X[n] != NULL)
  241 + HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1]));
  242 + }
  243 +
  244 +};
  245 +
  246 +} //end namespace rts
  247 +#endif
0 248 \ No newline at end of file
... ...
math/realfield.cuh
... ... @@ -247,8 +247,6 @@ public:
247 247 rts::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]);
248 248  
249 249 return result;
250   -
251   -
252 250 }
253 251  
254 252 ///copy constructor
... ...
math/rect.h
... ... @@ -154,9 +154,9 @@ public:
154 154 {
155 155 std::stringstream ss;
156 156 vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
157   - ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
158   - ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl;
159   - ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X;
  157 + ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
  158 + ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl;
  159 + ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X;
160 160  
161 161 return ss.str();
162 162  
... ...