5cc0976c
David Mayerich
added separable c...
|
1
2
3
4
5
6
7
8
9
|
#ifndef STIM_CUDA_GAUSSIAN_BLUR_H
#define STIM_CUDA_GAUSSIAN_BLUR_H
#include <iostream>
#include <cuda.h>
#include <stim/cuda/cudatools.h>
#include <stim/cuda/sharedmem.cuh>
#include <stim/cuda/templates/conv2sep.cuh> //GPU-based separable convolution algorithm
|
5cc0976c
David Mayerich
added separable c...
|
10
11
|
namespace stim{
|
8da0df3e
Jiabing Li
whatever
|
12
|
namespace cuda {
|
5cc0976c
David Mayerich
added separable c...
|
13
14
|
template<typename T>
|
8da0df3e
Jiabing Li
whatever
|
15
|
void gen_gaussian(T* out, T sigma, unsigned int width) {
|
5cc0976c
David Mayerich
added separable c...
|
16
17
|
//fill the kernel with a gaussian
|
8da0df3e
Jiabing Li
whatever
|
18
|
for (unsigned int xi = 0; xi < width; xi++) {
|
5cc0976c
David Mayerich
added separable c...
|
19
|
|
8da0df3e
Jiabing Li
whatever
|
20
21
|
float x = (float)xi - (float)(width / 2); //calculate the x position of the gaussian
float g = 1.0 / (sigma * sqrt(2 * 3.14159)) * exp(-(x*x) / (2 * sigma*sigma));
|
5cc0976c
David Mayerich
added separable c...
|
22
23
24
25
26
27
|
out[xi] = g;
}
}
template<typename T>
|
8da0df3e
Jiabing Li
whatever
|
28
|
void tex_gaussian_blur2(T* out, T sigma, unsigned int x, unsigned int y, cudaTextureObject_t texObj, cudaArray* cuArray) {
|
5cc0976c
David Mayerich
added separable c...
|
29
30
31
|
//allocate space for the kernel
unsigned int kwidth = sigma * 8 + 1;
|
8da0df3e
Jiabing Li
whatever
|
32
|
float* kernel0 = (float*)malloc(kwidth * sizeof(float));
|
5cc0976c
David Mayerich
added separable c...
|
33
34
35
36
37
38
|
//fill the kernel with a gaussian
gen_gaussian(kernel0, sigma, kwidth);
//copy the kernel to the GPU
T* gpuKernel0;
|
8da0df3e
Jiabing Li
whatever
|
39
|
HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T)));
|
5cc0976c
David Mayerich
added separable c...
|
40
41
42
43
44
45
|
HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
//perform the gaussian blur as a separable convolution
stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth);
HANDLE_ERROR(cudaFree(gpuKernel0));
|
59781ee3
Pavel Govyadinov
fixed a stask bug...
|
46
|
free(kernel0);
|
5cc0976c
David Mayerich
added separable c...
|
47
48
49
50
|
}
template<typename T>
|
8da0df3e
Jiabing Li
whatever
|
51
|
void gpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y) {
|
5cc0976c
David Mayerich
added separable c...
|
52
53
54
|
//allocate space for the kernel
unsigned int kwidth = sigma * 8 + 1;
|
8da0df3e
Jiabing Li
whatever
|
55
|
float* kernel0 = (float*)malloc(kwidth * sizeof(float));
|
5cc0976c
David Mayerich
added separable c...
|
56
57
58
59
60
61
|
//fill the kernel with a gaussian
gen_gaussian(kernel0, sigma, kwidth);
//copy the kernel to the GPU
T* gpuKernel0;
|
8da0df3e
Jiabing Li
whatever
|
62
|
HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T)));
|
5cc0976c
David Mayerich
added separable c...
|
63
64
65
66
67
68
69
70
71
72
73
|
HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
//perform the gaussian blur as a separable convolution
stim::cuda::gpu_conv2sep<float>(image, x, y, gpuKernel0, kwidth, gpuKernel0, kwidth);
HANDLE_ERROR(cudaFree(gpuKernel0));
}
/// Applies a Gaussian blur to a 2D image stored on the CPU
template<typename T>
|
8da0df3e
Jiabing Li
whatever
|
74
|
void cpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y, float &gpu_time) {
|
5cc0976c
David Mayerich
added separable c...
|
75
|
|
8da0df3e
Jiabing Li
whatever
|
76
|
gpuTimer_start();
|
5cc0976c
David Mayerich
added separable c...
|
77
78
|
//allocate space for the kernel
unsigned int kwidth = sigma * 8 + 1;
|
8da0df3e
Jiabing Li
whatever
|
79
|
float* kernel0 = (float*)malloc(kwidth * sizeof(float));
|
5cc0976c
David Mayerich
added separable c...
|
80
81
82
83
84
85
|
//fill the kernel with a gaussian
gen_gaussian(kernel0, sigma, kwidth);
//perform the gaussian blur as a separable convolution
stim::cuda::cpu_conv2sep<float>(image, x, y, kernel0, kwidth, kernel0, kwidth);
|
8da0df3e
Jiabing Li
whatever
|
86
87
|
gpu_time = gpuTimer_end();
|
5cc0976c
David Mayerich
added separable c...
|
88
|
}
|
8da0df3e
Jiabing Li
whatever
|
89
|
|
5cc0976c
David Mayerich
added separable c...
|
90
|
|
8da0df3e
Jiabing Li
whatever
|
91
92
|
}
}
|
5cc0976c
David Mayerich
added separable c...
|
93
|
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
94
|
#endif
|