From 41acaf5d09dec699149063551ea2864fd819021e Mon Sep 17 00:00:00 2001 From: David Mayerich Date: Tue, 21 Jul 2015 18:27:16 -0500 Subject: [PATCH] added a CUDA Gaussian blur function for 2D images --- stim/cuda/imageproc/gaussian_blur.cuh | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ stim/image/image.h | 16 ++++++++++++++++ 2 files changed, 157 insertions(+), 0 deletions(-) create mode 100644 stim/cuda/imageproc/gaussian_blur.cuh diff --git a/stim/cuda/imageproc/gaussian_blur.cuh b/stim/cuda/imageproc/gaussian_blur.cuh new file mode 100644 index 0000000..99056af --- /dev/null +++ b/stim/cuda/imageproc/gaussian_blur.cuh @@ -0,0 +1,141 @@ +#ifndef STIM_CUDA_GAUSSIAN_BLUR_H +#define STIM_CUDA_GAUSSIAN_BLUR_H + +#include +#include +#include "gaussian_blur.cuh" +#include + +#define pi 3.14159 + +// This CUDA kernel applies a Gaussian blur along the x axis +template +__global__ void gaussian_blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y){ + + //calculate the 1D image index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + + //convert this to 2D pixel coordinates + int yi = i / x; + int xi = i - (yi * x); + + int k_size = sigma * 4; //calculate the kernel size + + int gx, gy, gi; //variables to store global coordinates + T sum = 0; //variable stores the integral of the kernel times the image + T G; //stores the value of the gaussian kernel + + out[i] = 0; + //for each element of the kernel + for(int ki = -k_size; ki <= k_size; ki++){ + + //calculate the gaussian value + G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma)); + + //calculate the global coordinates for this point in the kernel + gx = xi + ki; + gy = yi; + + //make sure we are inside the bounds of the image + if(gx >= 0 && gy >= 0 && gx < x && gy < y){ + gi = gy * x + gx; + + //perform the integration + sum += G * in[gi]; + } + } + + //set the output value to the integral of the function times the kernel + out[i] = sum; +} + +// This CUDA kernel applies a Gaussian blur along the y axis. +template +__global__ void gaussian_blur_y(T* out, T* in, T sigma, unsigned int x, unsigned int y){ + + //calculate the 1D image index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + + //convert this to 2D pixel coordinates + int yi = i / x; + int xi = i - (yi * x); + + int k_size = sigma * 4; //calculate the kernel size + + int gx, gy, gi; //variables to store global coordinates + T sum = 0; //variable stores the integral of the kernel times the image + T G; //stores the value of the gaussian kernel + + out[i] = 0; + //for each element of the kernel + for(int ki = -k_size; ki <= k_size; ki++){ + + //calculate the gaussian value + G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma)); + + //calculate the global coordinates for this point in the kernel + gx = xi; + gy = yi + ki; + + //make sure we are inside the bounds of the image + if(gx >= 0 && gy >= 0 && gx < x && gy < y){ + gi = gy * x + gx; + + //perform the integration + sum += G * in[gi]; + } + } + + //set the output value to the integral of the function times the kernel + out[i] = sum; +} + +/// Applies a Gaussian blur to a 2D image stored on the GPU +template +void gpu_gaussian_blur_2d(T* image, T sigma, unsigned int x, unsigned int y){ + + //get the number of pixels in the image + unsigned int pixels = x * y; + unsigned int bytes = sizeof(T) * pixels; + + //allocate temporary space on the GPU + T* gpuI1; + cudaMalloc(&gpuI1, bytes); + + //get the maximum number of threads per block for the CUDA device + int threads = stim::maxThreadsPerBlock(); + + //calculate the number of blocks + int blocks = pixels / threads + (pixels%threads == 0 ? 0:1); + + //run the kernel for the x dimension + gaussian_blur_x <<< blocks, threads >>>(gpuI1, image, sigma, x, y); + + //run the kernel for the y dimension + gaussian_blur_y <<< blocks, threads >>>(image, gpuI1, sigma, x, y); + +} + +/// Applies a Gaussian blur to a 2D image stored on the CPU +template +void cpu_gaussian_blur_2d(T* image, T sigma, unsigned int x, unsigned int y){ + + //get the number of pixels in the image + unsigned int pixels = x * y; + unsigned int bytes = sizeof(T) * pixels; + + //allocate space on the GPU + T* gpuI0; + cudaMalloc(&gpuI0, bytes); + + //copy the image data to the GPU + cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice); + + //run the GPU-based version of the algorithm + gpu_gaussian_blur_2d(gpuI0, sigma, x, y); + + //copy the image data from the device + cudaMemcpy(image, gpuI0, bytes, cudaMemcpyDeviceToHost); +} + +#endif diff --git a/stim/image/image.h b/stim/image/image.h index 6b763dc..c093405 100644 --- a/stim/image/image.h +++ b/stim/image/image.h @@ -28,6 +28,11 @@ public: img.load(filename.c_str()); } + /// Constructor initializes an image to a given size + image(unsigned int x, unsigned int y = 1, unsigned int z = 1){ + img = cimg_library::CImg(x, y, z); + } + //Load an image from a file void load(std::string filename){ img.load(filename.c_str()); @@ -72,6 +77,17 @@ public: data[x * C + c] = ptr[c * X + x]; } + image channel(unsigned int c){ + + //create a new image + image single; + + single.img = img.channel(c); + + return single; + + } + unsigned int channels(){ return (unsigned int)img.spectrum(); } -- libgit2 0.21.4