Commit 41acaf5d09dec699149063551ea2864fd819021e

Authored by David Mayerich
1 parent df036f4c

added a CUDA Gaussian blur function for 2D images

Showing 2 changed files with 157 additions and 0 deletions   Show diff stats
stim/cuda/imageproc/gaussian_blur.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_GAUSSIAN_BLUR_H
  2 +#define STIM_CUDA_GAUSSIAN_BLUR_H
  3 +
  4 +#include <iostream>
  5 +#include <cuda.h>
  6 +#include "gaussian_blur.cuh"
  7 +#include <stim/cuda/devices.h>
  8 +
  9 +#define pi 3.14159
  10 +
  11 +// This CUDA kernel applies a Gaussian blur along the x axis
  12 +template<typename T>
  13 +__global__ void gaussian_blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y){
  14 +
  15 + //calculate the 1D image index for this thread
  16 + int i = blockIdx.x * blockDim.x + threadIdx.x;
  17 +
  18 + //convert this to 2D pixel coordinates
  19 + int yi = i / x;
  20 + int xi = i - (yi * x);
  21 +
  22 + int k_size = sigma * 4; //calculate the kernel size
  23 +
  24 + int gx, gy, gi; //variables to store global coordinates
  25 + T sum = 0; //variable stores the integral of the kernel times the image
  26 + T G; //stores the value of the gaussian kernel
  27 +
  28 + out[i] = 0;
  29 + //for each element of the kernel
  30 + for(int ki = -k_size; ki <= k_size; ki++){
  31 +
  32 + //calculate the gaussian value
  33 + G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma));
  34 +
  35 + //calculate the global coordinates for this point in the kernel
  36 + gx = xi + ki;
  37 + gy = yi;
  38 +
  39 + //make sure we are inside the bounds of the image
  40 + if(gx >= 0 && gy >= 0 && gx < x && gy < y){
  41 + gi = gy * x + gx;
  42 +
  43 + //perform the integration
  44 + sum += G * in[gi];
  45 + }
  46 + }
  47 +
  48 + //set the output value to the integral of the function times the kernel
  49 + out[i] = sum;
  50 +}
  51 +
  52 +// This CUDA kernel applies a Gaussian blur along the y axis.
  53 +template<typename T>
  54 +__global__ void gaussian_blur_y(T* out, T* in, T sigma, unsigned int x, unsigned int y){
  55 +
  56 + //calculate the 1D image index for this thread
  57 + int i = blockIdx.x * blockDim.x + threadIdx.x;
  58 +
  59 + //convert this to 2D pixel coordinates
  60 + int yi = i / x;
  61 + int xi = i - (yi * x);
  62 +
  63 + int k_size = sigma * 4; //calculate the kernel size
  64 +
  65 + int gx, gy, gi; //variables to store global coordinates
  66 + T sum = 0; //variable stores the integral of the kernel times the image
  67 + T G; //stores the value of the gaussian kernel
  68 +
  69 + out[i] = 0;
  70 + //for each element of the kernel
  71 + for(int ki = -k_size; ki <= k_size; ki++){
  72 +
  73 + //calculate the gaussian value
  74 + G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma));
  75 +
  76 + //calculate the global coordinates for this point in the kernel
  77 + gx = xi;
  78 + gy = yi + ki;
  79 +
  80 + //make sure we are inside the bounds of the image
  81 + if(gx >= 0 && gy >= 0 && gx < x && gy < y){
  82 + gi = gy * x + gx;
  83 +
  84 + //perform the integration
  85 + sum += G * in[gi];
  86 + }
  87 + }
  88 +
  89 + //set the output value to the integral of the function times the kernel
  90 + out[i] = sum;
  91 +}
  92 +
  93 +/// Applies a Gaussian blur to a 2D image stored on the GPU
  94 +template<typename T>
  95 +void gpu_gaussian_blur_2d(T* image, T sigma, unsigned int x, unsigned int y){
  96 +
  97 + //get the number of pixels in the image
  98 + unsigned int pixels = x * y;
  99 + unsigned int bytes = sizeof(T) * pixels;
  100 +
  101 + //allocate temporary space on the GPU
  102 + T* gpuI1;
  103 + cudaMalloc(&gpuI1, bytes);
  104 +
  105 + //get the maximum number of threads per block for the CUDA device
  106 + int threads = stim::maxThreadsPerBlock();
  107 +
  108 + //calculate the number of blocks
  109 + int blocks = pixels / threads + (pixels%threads == 0 ? 0:1);
  110 +
  111 + //run the kernel for the x dimension
  112 + gaussian_blur_x <<< blocks, threads >>>(gpuI1, image, sigma, x, y);
  113 +
  114 + //run the kernel for the y dimension
  115 + gaussian_blur_y <<< blocks, threads >>>(image, gpuI1, sigma, x, y);
  116 +
  117 +}
  118 +
  119 +/// Applies a Gaussian blur to a 2D image stored on the CPU
  120 +template<typename T>
  121 +void cpu_gaussian_blur_2d(T* image, T sigma, unsigned int x, unsigned int y){
  122 +
  123 + //get the number of pixels in the image
  124 + unsigned int pixels = x * y;
  125 + unsigned int bytes = sizeof(T) * pixels;
  126 +
  127 + //allocate space on the GPU
  128 + T* gpuI0;
  129 + cudaMalloc(&gpuI0, bytes);
  130 +
  131 + //copy the image data to the GPU
  132 + cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice);
  133 +
  134 + //run the GPU-based version of the algorithm
  135 + gpu_gaussian_blur_2d<T>(gpuI0, sigma, x, y);
  136 +
  137 + //copy the image data from the device
  138 + cudaMemcpy(image, gpuI0, bytes, cudaMemcpyDeviceToHost);
  139 +}
  140 +
  141 +#endif
... ...
stim/image/image.h
... ... @@ -28,6 +28,11 @@ public:
28 28 img.load(filename.c_str());
29 29 }
30 30  
  31 + /// Constructor initializes an image to a given size
  32 + image(unsigned int x, unsigned int y = 1, unsigned int z = 1){
  33 + img = cimg_library::CImg<T>(x, y, z);
  34 + }
  35 +
31 36 //Load an image from a file
32 37 void load(std::string filename){
33 38 img.load(filename.c_str());
... ... @@ -72,6 +77,17 @@ public:
72 77 data[x * C + c] = ptr[c * X + x];
73 78 }
74 79  
  80 + image<T> channel(unsigned int c){
  81 +
  82 + //create a new image
  83 + image<T> single;
  84 +
  85 + single.img = img.channel(c);
  86 +
  87 + return single;
  88 +
  89 + }
  90 +
75 91 unsigned int channels(){
76 92 return (unsigned int)img.spectrum();
77 93 }
... ...