From 2e5e3a2645ea3b53494717fc90ac414c0d9ef448 Mon Sep 17 00:00:00 2001 From: David Date: Wed, 5 Oct 2016 18:06:46 -0500 Subject: [PATCH] added 2D convolution algorithm --- stim/image/image.h | 126 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------- stim/math/filters/conv2.h | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 51 deletions(-) create mode 100644 stim/math/filters/conv2.h diff --git a/stim/image/image.h b/stim/image/image.h index 9a85584..7a461b9 100644 --- a/stim/image/image.h +++ b/stim/image/image.h @@ -1,8 +1,10 @@ #ifndef STIM_IMAGE_H #define STIM_IMAGE_H -#include -#include +#ifdef USING_OPENCV + #include + #include +#endif #include #include #include @@ -18,7 +20,6 @@ namespace stim{ template class image{ - //cimg_library::CImg img; T* img; //pointer to the image data (interleaved RGB for color) size_t R[3]; @@ -57,18 +58,8 @@ class image{ return y * C() * X() + x * C() + c; } - +#ifdef USING_OPENCV int cv_type(){ - // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution - - //if(std::is_same::value) return CV_MAKETYPE(CV_8U, (int)C()); - //if(std::is_same::value) return CV_MAKETYPE(CV_8S, (int)C()); - //if(std::is_same::value) return CV_MAKETYPE(CV_16U, (int)C()); - //if(std::is_same::value) return CV_MAKETYPE(CV_16S, (int)C()); - //if(std::is_same::value) return CV_MAKETYPE(CV_32S, (int)C()); - //if(std::is_same::value) return CV_MAKETYPE(CV_32F, (int)C()); - //if(std::is_same::value) return CV_MAKETYPE(CV_64F, (int)C()); - if(typeid(T) == typeid(unsigned char)) return CV_MAKETYPE(CV_8U, (int)C()); if(typeid(T) == typeid(char)) return CV_MAKETYPE(CV_8S, (int)C()); if(typeid(T) == typeid(unsigned short)) return CV_MAKETYPE(CV_16U, (int)C()); @@ -80,18 +71,9 @@ class image{ std::cout<<"ERROR in stim::image::cv_type - no valid data type found"<::value) return UCHAR_MAX; - //if(std::is_same::value) return SHRT_MAX; - //if(std::is_same::value) return UINT_MAX; - //if(std::is_same::value) return ULONG_MAX; - //if(std::is_same::value) return ULLONG_MAX; - //if(std::is_same::value) return 1.0f; - //if(std::is_same::value) return 1.0; if(typeid(T) == typeid(unsigned char)) return UCHAR_MAX; if(typeid(T) == typeid(unsigned short)) return SHRT_MAX; @@ -164,23 +146,31 @@ public: exit(1); } - size_t c; //allocate space for the number of channels - unsigned char format[2]; //allocate space to hold the image format tag + size_t nc; //allocate space for the number of channels + char format[2]; //allocate space to hold the image format tag infile.read(format, 2); //read the image format tag - infile.seekg(1); //skip the newline character + infile.seekg(1, std::ios::cur); //skip the newline character if (format[0] != 'P') { std::cout << "Error in image::load_netpbm() - file format tag is invalid: " << format[0] << format[1] << std::endl; exit(1); } - if (format[1] == '5') c = 1; //get the number of channels from the format flag - else if (format[1] == '6') c = 3; + if (format[1] == '5') nc = 1; //get the number of channels from the format flag + else if (format[1] == '6') nc = 3; else { std::cout << "Error in image::load_netpbm() - file format tag is invalid: " << format[0] << format[1] << std::endl; exit(1); } + + unsigned char c; //stores a character + while (infile.peek() == '#') { //if the next character indicates the start of a comment + while (true) { + c = infile.get(); + if (c == 0x0A) break; + } + } + std::string sw; //create a string to store the width of the image - unsigned char c; while(true){ c = infile.get(); //get a single character if (c == ' ') break; //exit if we've encountered a space @@ -194,18 +184,38 @@ public: if (c == 0x0A) break; sh.push_back(c); } - size_t h = atoi(ah.c_str()); //convert the string into an integer - allocate(w, h, c); //allocate space for the image - infile.read(img, size()); //copy the binary data from the file to the image + while (true) { //skip the maximum value + c = infile.get(); + if (c == 0x0A) break; + } + size_t h = atoi(sh.c_str()); //convert the string into an integer + + allocate(w, h, nc); //allocate space for the image + infile.read((char*)img, size()); //copy the binary data from the file to the image infile.close(); } - +#ifdef USING_OPENCV + void from_opencv(unsigned char* buffer, size_t width, size_t height) { + allocate(width, height, 3); + T value; + size_t i; + for (size_t c = 0; c < C(); c++) { //copy directly + for (size_t y = 0; y < Y(); y++) { + for (size_t x = 0; x < X(); x++) { + i = y * X() * C() + x * C() + (2 - c); + value = buffer[i]; + img[idx(x, y, c)] = value; + } + } + } + } +#endif /// Load an image from a file void load(std::string filename){ - +#ifdef USING_OPENCV cv::Mat cvImage = cv::imread(filename, CV_LOAD_IMAGE_UNCHANGED); //use OpenCV to open the image file if(!cvImage.data){ std::cout<<"ERROR stim::image::load() - unable to find image "< + operator image() { + //create a new image + //image r(X(), Y(), C()); + V* dst = (V*)malloc(size() * sizeof(V)); + + for (size_t x = 0; x < X(); x++) { + for (size_t y = 0; y < Y(); y++) { + for (size_t c = 0; c < C(); c++) { + dst[idx(x, y, c)] = (V)img[idx(x, y, c)]; + } + } + } + + image r(dst, X(), Y(), C()); + return r; + } + }; }; //end namespace stim diff --git a/stim/math/filters/conv2.h b/stim/math/filters/conv2.h new file mode 100644 index 0000000..760dea4 --- /dev/null +++ b/stim/math/filters/conv2.h @@ -0,0 +1,106 @@ +#ifndef STIM_CUDA_CONV2_H +#define STIM_CUDA_CONV2_H +//#define __CUDACC__ + +#ifdef __CUDACC__ +#include +#endif + +namespace stim { + + //Kernel function that performs the 2D convolution. + template + __global__ void kernel_conv2(T* out, T* in, T* kernel, size_t sx, size_t sy, size_t kx, size_t ky) { + size_t xi = blockIdx.x * blockDim.x + threadIdx.x; //threads correspond to indices into the output image + size_t yi = blockIdx.y * blockDim.y + threadIdx.y; + + size_t X = sx - kx + 1; //calculate the size of the output image + size_t Y = sy - ky + 1; + + if (xi >= X || yi >= Y) return; //returns if the thread is outside of the output image + + //loop through the kernel + size_t kxi, kyi; + T v = 0; + for (kyi = 0; kyi < ky; kyi++) { + for (kxi = 0; kxi < kx; kxi++) { + v += in[(yi + kyi) * sx + xi + kxi] * kernel[kyi * kx + kxi]; + } + } + out[yi * X + xi] = v; //write the result to global memory + + } + + //Performs a convolution of a 2D image using the GPU. All pointers are assumed to be to memory on the current device. + //@param out is a pointer to the output image + //@param in is a pointer to the input image + //@param sx is the size of the input image along X + //@param sy is the size of the input image along Y + //@param kx is the size of the kernel along X + //@param ky is the size of the kernel along Y + template + void gpu_conv2(T* out, T* in, T* kernel, size_t sx, size_t sy, size_t kx, size_t ky) { + cudaDeviceProp p; + HANDLE_ERROR(cudaGetDeviceProperties(&p, 0)); + size_t tmax = p.maxThreadsPerBlock; + dim3 tn(sqrt(tmax), sqrt(tmax)); //calculate the block dimensions + size_t X = sx - kx + 1; //calculate the size of the output image + size_t Y = sy - ky + 1; + dim3 bn(X / tn.x + 1, Y / tn.y + 1); //calculate the grid dimensions + kernel_conv2 <<>> (out, in, kernel, sx, sy, kx, ky); //launch the kernel + } + + //Performs a convolution of a 2D image. Only valid pixels based on the kernel are returned. + // As a result, the output image will be smaller than the input image by (kx-1, ky-1) + //@param out is a pointer to the output image + //@param in is a pointer to the input image + //@param sx is the size of the input image along X + //@param sy is the size of the input image along Y + //@param kx is the size of the kernel along X + //@param ky is the size of the kernel along Y + template + void cpu_conv2(T* out, T* in, T* kernel, size_t sx, size_t sy, size_t kx, size_t ky) { + size_t X = sx - kx + 1; //x size of the output image + size_t Y = sy - ky + 1; //y size of the output image + +#ifdef __CUDACC__ + //allocate memory and copy everything to the GPU + T* gpu_in; + HANDLE_ERROR(cudaMalloc(&gpu_in, sx * sy * sizeof(T))); + HANDLE_ERROR(cudaMemcpy(gpu_in, in, sx * sy * sizeof(T), cudaMemcpyHostToDevice)); + T* gpu_kernel; + HANDLE_ERROR(cudaMalloc(&gpu_kernel, kx * ky * sizeof(T))); + HANDLE_ERROR(cudaMemcpy(gpu_kernel, kernel, kx * ky * sizeof(T), cudaMemcpyHostToDevice)); + T* gpu_out; + HANDLE_ERROR(cudaMalloc(&gpu_out, X * Y * sizeof(T))); + gpu_conv2(gpu_out, gpu_in, gpu_kernel, sx, sy, kx, ky); //execute the GPU kernel + HANDLE_ERROR(cudaMemcpy(out, gpu_out, X * Y * sizeof(T), cudaMemcpyDeviceToHost)); //copy the result to the host + HANDLE_ERROR(cudaFree(gpu_in)); + HANDLE_ERROR(cudaFree(gpu_kernel)); + HANDLE_ERROR(cudaFree(gpu_out)); +#else + + + T v; //register stores the integral of the current pixel value + size_t yi, xi, kyi, kxi, yi_kyi_sx; + for (yi = 0; yi < Y; yi++) { //for each pixel in the output image + for (xi = 0; xi < X; xi++) { + v = 0; + for (kyi = 0; kyi < ky; kyi++) { //for each pixel in the kernel + yi_kyi_sx = (yi + kyi) * sx; + for (kxi = 0; kxi < kx; kxi++) { + v += in[yi_kyi_sx + xi + kxi] * kernel[kyi * kx + kxi]; + } + } + out[yi * X + xi] = v; //save the result to the output array + } + } + +#endif + } + + +} + + +#endif \ No newline at end of file -- libgit2 0.21.4