Blame view

stim/cuda/templates/gaussian_blur.cuh 2.66 KB
5cc0976c   David Mayerich   added separable c...
1
2
3
4
5
6
7
8
9
  #ifndef STIM_CUDA_GAUSSIAN_BLUR_H
  #define STIM_CUDA_GAUSSIAN_BLUR_H
  
  #include <iostream>
  #include <cuda.h>
  #include <stim/cuda/cudatools.h>
  #include <stim/cuda/sharedmem.cuh>
  #include <stim/cuda/templates/conv2sep.cuh>		//GPU-based separable convolution algorithm
  
5cc0976c   David Mayerich   added separable c...
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
  
  namespace stim{
  	namespace cuda{
  
  		template<typename T>
  		void gen_gaussian(T* out, T sigma, unsigned int width){
  
  			//fill the kernel with a gaussian
  			for(unsigned int xi = 0; xi < width; xi++){
  
  				float x = (float)xi - (float)(width/2);	//calculate the x position of the gaussian
  				float g = 1.0 / (sigma * sqrt(2 * 3.14159)) * exp( - (x*x) / (2*sigma*sigma) );
  				out[xi] = g;
  			}
  
  		}
  
  		template<typename T>
  		void tex_gaussian_blur2(T* out, T sigma, unsigned int x, unsigned int y, cudaTextureObject_t texObj, cudaArray* cuArray){
  
  			//allocate space for the kernel
  			unsigned int kwidth = sigma * 8 + 1;
  			float* kernel0 = (float*) malloc( kwidth * sizeof(float) );
  
  			//fill the kernel with a gaussian
  			gen_gaussian(kernel0, sigma, kwidth);
  
  			//copy the kernel to the GPU
  			T* gpuKernel0;
84eff8b1   Pavel Govyadinov   Merged only the n...
39
  			HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth*sizeof(T)));
5cc0976c   David Mayerich   added separable c...
40
41
42
43
44
45
  			HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
  
  			//perform the gaussian blur as a separable convolution
  			stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth);
  
  			HANDLE_ERROR(cudaFree(gpuKernel0));
59781ee3   Pavel Govyadinov   fixed a stask bug...
46
  			free(kernel0);
5cc0976c   David Mayerich   added separable c...
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
  
  		}
  
  		template<typename T>
  		void gpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){
  
  			//allocate space for the kernel
  			unsigned int kwidth = sigma * 8 + 1;
  			float* kernel0 = (float*) malloc( kwidth * sizeof(float) );
  
  			//fill the kernel with a gaussian
  			gen_gaussian(kernel0, sigma, kwidth);
  
  			//copy the kernel to the GPU
  			T* gpuKernel0;
59781ee3   Pavel Govyadinov   fixed a stask bug...
62
  			HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth*sizeof(T)));
5cc0976c   David Mayerich   added separable c...
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  			HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
  
  			//perform the gaussian blur as a separable convolution
  			stim::cuda::gpu_conv2sep<float>(image, x, y, gpuKernel0, kwidth, gpuKernel0, kwidth);
  
  			HANDLE_ERROR(cudaFree(gpuKernel0));
  
  		}
  
  		/// Applies a Gaussian blur to a 2D image stored on the CPU
  		template<typename T>
  		void cpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){
  
  			//allocate space for the kernel
  			unsigned int kwidth = sigma * 8 + 1;
  			float* kernel0 = (float*) malloc( kwidth * sizeof(float) );
  
  			//fill the kernel with a gaussian
  			gen_gaussian(kernel0, sigma, kwidth);
  
  			//perform the gaussian blur as a separable convolution
  			stim::cuda::cpu_conv2sep<float>(image, x, y, kernel0, kwidth, kernel0, kwidth);
  			
  		}
  		
  	};
  };
  
84eff8b1   Pavel Govyadinov   Merged only the n...
91
  #endif