Commit 41acaf5d09dec699149063551ea2864fd819021e
1 parent
df036f4c
added a CUDA Gaussian blur function for 2D images
Showing
2 changed files
with
157 additions
and
0 deletions
Show diff stats
1 | +#ifndef STIM_CUDA_GAUSSIAN_BLUR_H | ||
2 | +#define STIM_CUDA_GAUSSIAN_BLUR_H | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <cuda.h> | ||
6 | +#include "gaussian_blur.cuh" | ||
7 | +#include <stim/cuda/devices.h> | ||
8 | + | ||
9 | +#define pi 3.14159 | ||
10 | + | ||
11 | +// This CUDA kernel applies a Gaussian blur along the x axis | ||
12 | +template<typename T> | ||
13 | +__global__ void gaussian_blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y){ | ||
14 | + | ||
15 | + //calculate the 1D image index for this thread | ||
16 | + int i = blockIdx.x * blockDim.x + threadIdx.x; | ||
17 | + | ||
18 | + //convert this to 2D pixel coordinates | ||
19 | + int yi = i / x; | ||
20 | + int xi = i - (yi * x); | ||
21 | + | ||
22 | + int k_size = sigma * 4; //calculate the kernel size | ||
23 | + | ||
24 | + int gx, gy, gi; //variables to store global coordinates | ||
25 | + T sum = 0; //variable stores the integral of the kernel times the image | ||
26 | + T G; //stores the value of the gaussian kernel | ||
27 | + | ||
28 | + out[i] = 0; | ||
29 | + //for each element of the kernel | ||
30 | + for(int ki = -k_size; ki <= k_size; ki++){ | ||
31 | + | ||
32 | + //calculate the gaussian value | ||
33 | + G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma)); | ||
34 | + | ||
35 | + //calculate the global coordinates for this point in the kernel | ||
36 | + gx = xi + ki; | ||
37 | + gy = yi; | ||
38 | + | ||
39 | + //make sure we are inside the bounds of the image | ||
40 | + if(gx >= 0 && gy >= 0 && gx < x && gy < y){ | ||
41 | + gi = gy * x + gx; | ||
42 | + | ||
43 | + //perform the integration | ||
44 | + sum += G * in[gi]; | ||
45 | + } | ||
46 | + } | ||
47 | + | ||
48 | + //set the output value to the integral of the function times the kernel | ||
49 | + out[i] = sum; | ||
50 | +} | ||
51 | + | ||
52 | +// This CUDA kernel applies a Gaussian blur along the y axis. | ||
53 | +template<typename T> | ||
54 | +__global__ void gaussian_blur_y(T* out, T* in, T sigma, unsigned int x, unsigned int y){ | ||
55 | + | ||
56 | + //calculate the 1D image index for this thread | ||
57 | + int i = blockIdx.x * blockDim.x + threadIdx.x; | ||
58 | + | ||
59 | + //convert this to 2D pixel coordinates | ||
60 | + int yi = i / x; | ||
61 | + int xi = i - (yi * x); | ||
62 | + | ||
63 | + int k_size = sigma * 4; //calculate the kernel size | ||
64 | + | ||
65 | + int gx, gy, gi; //variables to store global coordinates | ||
66 | + T sum = 0; //variable stores the integral of the kernel times the image | ||
67 | + T G; //stores the value of the gaussian kernel | ||
68 | + | ||
69 | + out[i] = 0; | ||
70 | + //for each element of the kernel | ||
71 | + for(int ki = -k_size; ki <= k_size; ki++){ | ||
72 | + | ||
73 | + //calculate the gaussian value | ||
74 | + G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma)); | ||
75 | + | ||
76 | + //calculate the global coordinates for this point in the kernel | ||
77 | + gx = xi; | ||
78 | + gy = yi + ki; | ||
79 | + | ||
80 | + //make sure we are inside the bounds of the image | ||
81 | + if(gx >= 0 && gy >= 0 && gx < x && gy < y){ | ||
82 | + gi = gy * x + gx; | ||
83 | + | ||
84 | + //perform the integration | ||
85 | + sum += G * in[gi]; | ||
86 | + } | ||
87 | + } | ||
88 | + | ||
89 | + //set the output value to the integral of the function times the kernel | ||
90 | + out[i] = sum; | ||
91 | +} | ||
92 | + | ||
93 | +/// Applies a Gaussian blur to a 2D image stored on the GPU | ||
94 | +template<typename T> | ||
95 | +void gpu_gaussian_blur_2d(T* image, T sigma, unsigned int x, unsigned int y){ | ||
96 | + | ||
97 | + //get the number of pixels in the image | ||
98 | + unsigned int pixels = x * y; | ||
99 | + unsigned int bytes = sizeof(T) * pixels; | ||
100 | + | ||
101 | + //allocate temporary space on the GPU | ||
102 | + T* gpuI1; | ||
103 | + cudaMalloc(&gpuI1, bytes); | ||
104 | + | ||
105 | + //get the maximum number of threads per block for the CUDA device | ||
106 | + int threads = stim::maxThreadsPerBlock(); | ||
107 | + | ||
108 | + //calculate the number of blocks | ||
109 | + int blocks = pixels / threads + (pixels%threads == 0 ? 0:1); | ||
110 | + | ||
111 | + //run the kernel for the x dimension | ||
112 | + gaussian_blur_x <<< blocks, threads >>>(gpuI1, image, sigma, x, y); | ||
113 | + | ||
114 | + //run the kernel for the y dimension | ||
115 | + gaussian_blur_y <<< blocks, threads >>>(image, gpuI1, sigma, x, y); | ||
116 | + | ||
117 | +} | ||
118 | + | ||
119 | +/// Applies a Gaussian blur to a 2D image stored on the CPU | ||
120 | +template<typename T> | ||
121 | +void cpu_gaussian_blur_2d(T* image, T sigma, unsigned int x, unsigned int y){ | ||
122 | + | ||
123 | + //get the number of pixels in the image | ||
124 | + unsigned int pixels = x * y; | ||
125 | + unsigned int bytes = sizeof(T) * pixels; | ||
126 | + | ||
127 | + //allocate space on the GPU | ||
128 | + T* gpuI0; | ||
129 | + cudaMalloc(&gpuI0, bytes); | ||
130 | + | ||
131 | + //copy the image data to the GPU | ||
132 | + cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice); | ||
133 | + | ||
134 | + //run the GPU-based version of the algorithm | ||
135 | + gpu_gaussian_blur_2d<T>(gpuI0, sigma, x, y); | ||
136 | + | ||
137 | + //copy the image data from the device | ||
138 | + cudaMemcpy(image, gpuI0, bytes, cudaMemcpyDeviceToHost); | ||
139 | +} | ||
140 | + | ||
141 | +#endif |
stim/image/image.h
@@ -28,6 +28,11 @@ public: | @@ -28,6 +28,11 @@ public: | ||
28 | img.load(filename.c_str()); | 28 | img.load(filename.c_str()); |
29 | } | 29 | } |
30 | 30 | ||
31 | + /// Constructor initializes an image to a given size | ||
32 | + image(unsigned int x, unsigned int y = 1, unsigned int z = 1){ | ||
33 | + img = cimg_library::CImg<T>(x, y, z); | ||
34 | + } | ||
35 | + | ||
31 | //Load an image from a file | 36 | //Load an image from a file |
32 | void load(std::string filename){ | 37 | void load(std::string filename){ |
33 | img.load(filename.c_str()); | 38 | img.load(filename.c_str()); |
@@ -72,6 +77,17 @@ public: | @@ -72,6 +77,17 @@ public: | ||
72 | data[x * C + c] = ptr[c * X + x]; | 77 | data[x * C + c] = ptr[c * X + x]; |
73 | } | 78 | } |
74 | 79 | ||
80 | + image<T> channel(unsigned int c){ | ||
81 | + | ||
82 | + //create a new image | ||
83 | + image<T> single; | ||
84 | + | ||
85 | + single.img = img.channel(c); | ||
86 | + | ||
87 | + return single; | ||
88 | + | ||
89 | + } | ||
90 | + | ||
75 | unsigned int channels(){ | 91 | unsigned int channels(){ |
76 | return (unsigned int)img.spectrum(); | 92 | return (unsigned int)img.spectrum(); |
77 | } | 93 | } |