Commit 41acaf5d09dec699149063551ea2864fd819021e
1 parent
df036f4c
added a CUDA Gaussian blur function for 2D images
Showing
2 changed files
with
157 additions
and
0 deletions
Show diff stats
1 | +#ifndef STIM_CUDA_GAUSSIAN_BLUR_H | |
2 | +#define STIM_CUDA_GAUSSIAN_BLUR_H | |
3 | + | |
4 | +#include <iostream> | |
5 | +#include <cuda.h> | |
6 | +#include "gaussian_blur.cuh" | |
7 | +#include <stim/cuda/devices.h> | |
8 | + | |
9 | +#define pi 3.14159 | |
10 | + | |
11 | +// This CUDA kernel applies a Gaussian blur along the x axis | |
12 | +template<typename T> | |
13 | +__global__ void gaussian_blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y){ | |
14 | + | |
15 | + //calculate the 1D image index for this thread | |
16 | + int i = blockIdx.x * blockDim.x + threadIdx.x; | |
17 | + | |
18 | + //convert this to 2D pixel coordinates | |
19 | + int yi = i / x; | |
20 | + int xi = i - (yi * x); | |
21 | + | |
22 | + int k_size = sigma * 4; //calculate the kernel size | |
23 | + | |
24 | + int gx, gy, gi; //variables to store global coordinates | |
25 | + T sum = 0; //variable stores the integral of the kernel times the image | |
26 | + T G; //stores the value of the gaussian kernel | |
27 | + | |
28 | + out[i] = 0; | |
29 | + //for each element of the kernel | |
30 | + for(int ki = -k_size; ki <= k_size; ki++){ | |
31 | + | |
32 | + //calculate the gaussian value | |
33 | + G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma)); | |
34 | + | |
35 | + //calculate the global coordinates for this point in the kernel | |
36 | + gx = xi + ki; | |
37 | + gy = yi; | |
38 | + | |
39 | + //make sure we are inside the bounds of the image | |
40 | + if(gx >= 0 && gy >= 0 && gx < x && gy < y){ | |
41 | + gi = gy * x + gx; | |
42 | + | |
43 | + //perform the integration | |
44 | + sum += G * in[gi]; | |
45 | + } | |
46 | + } | |
47 | + | |
48 | + //set the output value to the integral of the function times the kernel | |
49 | + out[i] = sum; | |
50 | +} | |
51 | + | |
52 | +// This CUDA kernel applies a Gaussian blur along the y axis. | |
53 | +template<typename T> | |
54 | +__global__ void gaussian_blur_y(T* out, T* in, T sigma, unsigned int x, unsigned int y){ | |
55 | + | |
56 | + //calculate the 1D image index for this thread | |
57 | + int i = blockIdx.x * blockDim.x + threadIdx.x; | |
58 | + | |
59 | + //convert this to 2D pixel coordinates | |
60 | + int yi = i / x; | |
61 | + int xi = i - (yi * x); | |
62 | + | |
63 | + int k_size = sigma * 4; //calculate the kernel size | |
64 | + | |
65 | + int gx, gy, gi; //variables to store global coordinates | |
66 | + T sum = 0; //variable stores the integral of the kernel times the image | |
67 | + T G; //stores the value of the gaussian kernel | |
68 | + | |
69 | + out[i] = 0; | |
70 | + //for each element of the kernel | |
71 | + for(int ki = -k_size; ki <= k_size; ki++){ | |
72 | + | |
73 | + //calculate the gaussian value | |
74 | + G = 1.0 / (sigma * sqrt(2 * pi)) * exp(-(ki*ki) / (2*sigma*sigma)); | |
75 | + | |
76 | + //calculate the global coordinates for this point in the kernel | |
77 | + gx = xi; | |
78 | + gy = yi + ki; | |
79 | + | |
80 | + //make sure we are inside the bounds of the image | |
81 | + if(gx >= 0 && gy >= 0 && gx < x && gy < y){ | |
82 | + gi = gy * x + gx; | |
83 | + | |
84 | + //perform the integration | |
85 | + sum += G * in[gi]; | |
86 | + } | |
87 | + } | |
88 | + | |
89 | + //set the output value to the integral of the function times the kernel | |
90 | + out[i] = sum; | |
91 | +} | |
92 | + | |
93 | +/// Applies a Gaussian blur to a 2D image stored on the GPU | |
94 | +template<typename T> | |
95 | +void gpu_gaussian_blur_2d(T* image, T sigma, unsigned int x, unsigned int y){ | |
96 | + | |
97 | + //get the number of pixels in the image | |
98 | + unsigned int pixels = x * y; | |
99 | + unsigned int bytes = sizeof(T) * pixels; | |
100 | + | |
101 | + //allocate temporary space on the GPU | |
102 | + T* gpuI1; | |
103 | + cudaMalloc(&gpuI1, bytes); | |
104 | + | |
105 | + //get the maximum number of threads per block for the CUDA device | |
106 | + int threads = stim::maxThreadsPerBlock(); | |
107 | + | |
108 | + //calculate the number of blocks | |
109 | + int blocks = pixels / threads + (pixels%threads == 0 ? 0:1); | |
110 | + | |
111 | + //run the kernel for the x dimension | |
112 | + gaussian_blur_x <<< blocks, threads >>>(gpuI1, image, sigma, x, y); | |
113 | + | |
114 | + //run the kernel for the y dimension | |
115 | + gaussian_blur_y <<< blocks, threads >>>(image, gpuI1, sigma, x, y); | |
116 | + | |
117 | +} | |
118 | + | |
119 | +/// Applies a Gaussian blur to a 2D image stored on the CPU | |
120 | +template<typename T> | |
121 | +void cpu_gaussian_blur_2d(T* image, T sigma, unsigned int x, unsigned int y){ | |
122 | + | |
123 | + //get the number of pixels in the image | |
124 | + unsigned int pixels = x * y; | |
125 | + unsigned int bytes = sizeof(T) * pixels; | |
126 | + | |
127 | + //allocate space on the GPU | |
128 | + T* gpuI0; | |
129 | + cudaMalloc(&gpuI0, bytes); | |
130 | + | |
131 | + //copy the image data to the GPU | |
132 | + cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice); | |
133 | + | |
134 | + //run the GPU-based version of the algorithm | |
135 | + gpu_gaussian_blur_2d<T>(gpuI0, sigma, x, y); | |
136 | + | |
137 | + //copy the image data from the device | |
138 | + cudaMemcpy(image, gpuI0, bytes, cudaMemcpyDeviceToHost); | |
139 | +} | |
140 | + | |
141 | +#endif | ... | ... |
stim/image/image.h
... | ... | @@ -28,6 +28,11 @@ public: |
28 | 28 | img.load(filename.c_str()); |
29 | 29 | } |
30 | 30 | |
31 | + /// Constructor initializes an image to a given size | |
32 | + image(unsigned int x, unsigned int y = 1, unsigned int z = 1){ | |
33 | + img = cimg_library::CImg<T>(x, y, z); | |
34 | + } | |
35 | + | |
31 | 36 | //Load an image from a file |
32 | 37 | void load(std::string filename){ |
33 | 38 | img.load(filename.c_str()); |
... | ... | @@ -72,6 +77,17 @@ public: |
72 | 77 | data[x * C + c] = ptr[c * X + x]; |
73 | 78 | } |
74 | 79 | |
80 | + image<T> channel(unsigned int c){ | |
81 | + | |
82 | + //create a new image | |
83 | + image<T> single; | |
84 | + | |
85 | + single.img = img.channel(c); | |
86 | + | |
87 | + return single; | |
88 | + | |
89 | + } | |
90 | + | |
75 | 91 | unsigned int channels(){ |
76 | 92 | return (unsigned int)img.spectrum(); |
77 | 93 | } | ... | ... |