#ifndef RTS_FIELD_CUH #define RTS_FIELD_CUH #include #include #include #include "cublas_v2.h" #include #include "../math/rect.h" #include "../cuda/threads.h" #include "../cuda/error.h" #include "../cuda/devices.h" #include "../visualization/colormap.h" namespace stim{ //multiply R = X * Y template __global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){ int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= r0 || iv >= r1) return; //compute the index into the field int i = iv*r0 + iu; //calculate and store the result R[i] = X[i] * Y[i]; } //assign a constant value to all points template __global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){ int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= r0 || iv >= r1) return; //compute the index into the field int i = iv*r0 + iu; //calculate and store the result ptr[i] = val; } //crop the field to the new dimensions (width x height) template __global__ void gpu_field_crop(T* dest, T* source, unsigned int r0, unsigned int r1, unsigned int width, unsigned int height){ int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= width || iv >= height) return; //compute the index into the field int is = iv*r0 + iu; int id = iv*width + iu; //calculate and store the result dest[id] = source[is]; } template class field{ protected: T* X[D]; //pointer to the field data unsigned int R[2]; //field resolution stim::rect shape; //position and shape of the field slice //calculates the optimal block and grid sizes using information from the GPU void cuda_params(dim3& grids, dim3& blocks){ int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); //create one thread for each detector pixel blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); } //find the maximum value of component n T find_max(unsigned int n){ cublasStatus_t stat; cublasHandle_t handle; //create a CUBLAS handle stat = cublasCreate(&handle); if(stat != CUBLAS_STATUS_SUCCESS){ std::cout<<"CUBLAS Error: initialization failed"< process_filename(std::string name){ std::stringstream ss(name); std::string item; std::vector elems; while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension) { elems.push_back(item); } std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters) std::string ext = elems[1]; //file extension (ex. .bmp, .png) ext = std::string(".") + ext; //add a period back into the extension size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'') size_t i1 = prefix.find_last_of("?"); std::string postfix = prefix.substr(i1+1); prefix = prefix.substr(0, i0); unsigned int digits = i1 - i0 + 1; //compute the number of wildcards std::vector flist; //create a vector of file names //fill the list for(unsigned int d=0; d>> (X[n], rhs, R[0], R[1]); return *this; } //assignment of vector component field & operator= (const vec rhs){ int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); //create one thread for each detector pixel dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); //assign the constant value to all positions and dimensions for(unsigned int n=0; n>> (X[n], rhs.v[n], R[0], R[1]); return *this; } //multiply two fields (element-wise multiplication) field operator* (const field & rhs){ int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); //create one thread for each detector pixel dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); //create a scalar field to store the result field result(R[0], R[1]); for(int n=0; n>> (result.X[n], X[n], rhs.X[n], R[0], R[1]); return result; } T* ptr(unsigned int n = 0){ if(n < D) return X[n]; else return NULL; } //return the vector component at position (u, v) vec get(unsigned int u, unsigned int v){ vec result; for(unsigned int d=0; d crop(unsigned int width, unsigned int height){ int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); //create one thread for each detector pixel dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK); //create a scalar field to store the result field result(width, height); for(int n=0; n>> (result.X[n], X[n], R[0], R[1], width, height); return result; } //save an image representing component n void toImage(std::string filename, unsigned int n = 0, bool positive = false, stim::colormapType cmap = stim::cmBrewer){ T max_val = find_max(n); //find the maximum value if(positive) //if the field is positive, use the range [0 max_val] stim::gpu2image(X[n], filename, R[0], R[1], 0, max_val, cmap); else stim::gpu2image(X[n], filename, R[0], R[1], -max_val, max_val, cmap); } }; } //end namespace rts #endif