Blame view

stim/cuda/templates/gaussian_blur.cuh 2.73 KB
5cc0976c   David Mayerich   added separable c...
1
2
3
4
5
6
7
8
9
  #ifndef STIM_CUDA_GAUSSIAN_BLUR_H
  #define STIM_CUDA_GAUSSIAN_BLUR_H
  
  #include <iostream>
  #include <cuda.h>
  #include <stim/cuda/cudatools.h>
  #include <stim/cuda/sharedmem.cuh>
  #include <stim/cuda/templates/conv2sep.cuh>		//GPU-based separable convolution algorithm
  
5cc0976c   David Mayerich   added separable c...
10
11
  
  namespace stim{
8da0df3e   Jiabing Li   whatever
12
  	namespace cuda {
5cc0976c   David Mayerich   added separable c...
13
14
  
  		template<typename T>
8da0df3e   Jiabing Li   whatever
15
  		void gen_gaussian(T* out, T sigma, unsigned int width) {
5cc0976c   David Mayerich   added separable c...
16
17
  
  			//fill the kernel with a gaussian
8da0df3e   Jiabing Li   whatever
18
  			for (unsigned int xi = 0; xi < width; xi++) {
5cc0976c   David Mayerich   added separable c...
19
  
8da0df3e   Jiabing Li   whatever
20
21
  				float x = (float)xi - (float)(width / 2);	//calculate the x position of the gaussian
  				float g = 1.0 / (sigma * sqrt(2 * 3.14159)) * exp(-(x*x) / (2 * sigma*sigma));
5cc0976c   David Mayerich   added separable c...
22
23
24
25
26
27
  				out[xi] = g;
  			}
  
  		}
  
  		template<typename T>
8da0df3e   Jiabing Li   whatever
28
  		void tex_gaussian_blur2(T* out, T sigma, unsigned int x, unsigned int y, cudaTextureObject_t texObj, cudaArray* cuArray) {
5cc0976c   David Mayerich   added separable c...
29
30
31
  
  			//allocate space for the kernel
  			unsigned int kwidth = sigma * 8 + 1;
8da0df3e   Jiabing Li   whatever
32
  			float* kernel0 = (float*)malloc(kwidth * sizeof(float));
5cc0976c   David Mayerich   added separable c...
33
34
35
36
37
38
  
  			//fill the kernel with a gaussian
  			gen_gaussian(kernel0, sigma, kwidth);
  
  			//copy the kernel to the GPU
  			T* gpuKernel0;
8da0df3e   Jiabing Li   whatever
39
  			HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T)));
5cc0976c   David Mayerich   added separable c...
40
41
42
43
44
45
  			HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
  
  			//perform the gaussian blur as a separable convolution
  			stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth);
  
  			HANDLE_ERROR(cudaFree(gpuKernel0));
59781ee3   Pavel Govyadinov   fixed a stask bug...
46
  			free(kernel0);
5cc0976c   David Mayerich   added separable c...
47
48
49
50
  
  		}
  
  		template<typename T>
8da0df3e   Jiabing Li   whatever
51
  		void gpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y) {
5cc0976c   David Mayerich   added separable c...
52
53
54
  
  			//allocate space for the kernel
  			unsigned int kwidth = sigma * 8 + 1;
8da0df3e   Jiabing Li   whatever
55
  			float* kernel0 = (float*)malloc(kwidth * sizeof(float));
5cc0976c   David Mayerich   added separable c...
56
57
58
59
60
61
  
  			//fill the kernel with a gaussian
  			gen_gaussian(kernel0, sigma, kwidth);
  
  			//copy the kernel to the GPU
  			T* gpuKernel0;
8da0df3e   Jiabing Li   whatever
62
  			HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T)));
5cc0976c   David Mayerich   added separable c...
63
64
65
66
67
68
69
70
71
72
73
  			HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
  
  			//perform the gaussian blur as a separable convolution
  			stim::cuda::gpu_conv2sep<float>(image, x, y, gpuKernel0, kwidth, gpuKernel0, kwidth);
  
  			HANDLE_ERROR(cudaFree(gpuKernel0));
  
  		}
  
  		/// Applies a Gaussian blur to a 2D image stored on the CPU
  		template<typename T>
8da0df3e   Jiabing Li   whatever
74
  		void cpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y, float &gpu_time) {
5cc0976c   David Mayerich   added separable c...
75
  
8da0df3e   Jiabing Li   whatever
76
  			gpuTimer_start();
5cc0976c   David Mayerich   added separable c...
77
78
  			//allocate space for the kernel
  			unsigned int kwidth = sigma * 8 + 1;
8da0df3e   Jiabing Li   whatever
79
  			float* kernel0 = (float*)malloc(kwidth * sizeof(float));
5cc0976c   David Mayerich   added separable c...
80
81
82
83
84
85
  
  			//fill the kernel with a gaussian
  			gen_gaussian(kernel0, sigma, kwidth);
  
  			//perform the gaussian blur as a separable convolution
  			stim::cuda::cpu_conv2sep<float>(image, x, y, kernel0, kwidth, kernel0, kwidth);
8da0df3e   Jiabing Li   whatever
86
87
  			gpu_time = gpuTimer_end();
  
5cc0976c   David Mayerich   added separable c...
88
  		}
8da0df3e   Jiabing Li   whatever
89
  	
5cc0976c   David Mayerich   added separable c...
90
  		
8da0df3e   Jiabing Li   whatever
91
92
  	}
  }
5cc0976c   David Mayerich   added separable c...
93
  
84eff8b1   Pavel Govyadinov   Merged only the n...
94
  #endif