gaussian_blur.cuh
2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#ifndef STIM_CUDA_GAUSSIAN_BLUR_H
#define STIM_CUDA_GAUSSIAN_BLUR_H
#include <iostream>
#include <cuda.h>
#include <stim/cuda/cudatools.h>
#include <stim/cuda/sharedmem.cuh>
#include <stim/cuda/templates/conv2sep.cuh> //GPU-based separable convolution algorithm
namespace stim{
namespace cuda{
template<typename T>
void gen_gaussian(T* out, T sigma, unsigned int width){
//fill the kernel with a gaussian
for(unsigned int xi = 0; xi < width; xi++){
float x = (float)xi - (float)(width/2); //calculate the x position of the gaussian
float g = 1.0 / (sigma * sqrt(2 * 3.14159)) * exp( - (x*x) / (2*sigma*sigma) );
out[xi] = g;
}
}
template<typename T>
void tex_gaussian_blur2(T* out, T sigma, unsigned int x, unsigned int y, cudaTextureObject_t texObj, cudaArray* cuArray){
//allocate space for the kernel
unsigned int kwidth = sigma * 8 + 1;
float* kernel0 = (float*) malloc( kwidth * sizeof(float) );
//fill the kernel with a gaussian
gen_gaussian(kernel0, sigma, kwidth);
//copy the kernel to the GPU
T* gpuKernel0;
HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth*sizeof(T)));
HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
//perform the gaussian blur as a separable convolution
stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth);
HANDLE_ERROR(cudaFree(gpuKernel0));
free(kernel0);
}
template<typename T>
void gpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){
//allocate space for the kernel
unsigned int kwidth = sigma * 8 + 1;
float* kernel0 = (float*) malloc( kwidth * sizeof(float) );
//fill the kernel with a gaussian
gen_gaussian(kernel0, sigma, kwidth);
//copy the kernel to the GPU
T* gpuKernel0;
HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth*sizeof(T)));
HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
//perform the gaussian blur as a separable convolution
stim::cuda::gpu_conv2sep<float>(image, x, y, gpuKernel0, kwidth, gpuKernel0, kwidth);
HANDLE_ERROR(cudaFree(gpuKernel0));
}
/// Applies a Gaussian blur to a 2D image stored on the CPU
template<typename T>
void cpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){
//allocate space for the kernel
unsigned int kwidth = sigma * 8 + 1;
float* kernel0 = (float*) malloc( kwidth * sizeof(float) );
//fill the kernel with a gaussian
gen_gaussian(kernel0, sigma, kwidth);
//perform the gaussian blur as a separable convolution
stim::cuda::cpu_conv2sep<float>(image, x, y, kernel0, kwidth, kernel0, kwidth);
}
};
};
#endif