Blame view

stim/cuda/templates/gaussian_blur.cuh 2.6 KB
5cc0976c   David Mayerich   added separable c...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
  #ifndef STIM_CUDA_GAUSSIAN_BLUR_H
  #define STIM_CUDA_GAUSSIAN_BLUR_H
  
  #include <iostream>
  #include <cuda.h>
  #include <stim/cuda/cudatools.h>
  #include <stim/cuda/sharedmem.cuh>
  #include <stim/cuda/templates/conv2sep.cuh>		//GPU-based separable convolution algorithm
  
  #define pi	3.14159
  
  namespace stim{
  	namespace cuda{
  
  		template<typename T>
  		void gen_gaussian(T* out, T sigma, unsigned int width){
  
  			//fill the kernel with a gaussian
  			for(unsigned int xi = 0; xi < width; xi++){
  
  				float x = (float)xi - (float)(width/2);	//calculate the x position of the gaussian
  				float g = 1.0 / (sigma * sqrt(2 * 3.14159)) * exp( - (x*x) / (2*sigma*sigma) );
  				out[xi] = g;
  			}
  
  		}
  
  		template<typename T>
  		void tex_gaussian_blur2(T* out, T sigma, unsigned int x, unsigned int y, cudaTextureObject_t texObj, cudaArray* cuArray){
  
  			//allocate space for the kernel
  			unsigned int kwidth = sigma * 8 + 1;
  			float* kernel0 = (float*) malloc( kwidth * sizeof(float) );
  
  			//fill the kernel with a gaussian
  			gen_gaussian(kernel0, sigma, kwidth);
  
  			//copy the kernel to the GPU
  			T* gpuKernel0;
  			HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
  
  			//perform the gaussian blur as a separable convolution
  			stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth);
  
  			HANDLE_ERROR(cudaFree(gpuKernel0));
  
  		}
  
  		template<typename T>
  		void gpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){
  
  			//allocate space for the kernel
  			unsigned int kwidth = sigma * 8 + 1;
  			float* kernel0 = (float*) malloc( kwidth * sizeof(float) );
  
  			//fill the kernel with a gaussian
  			gen_gaussian(kernel0, sigma, kwidth);
  
  			//copy the kernel to the GPU
  			T* gpuKernel0;
96f9b10f   Laila Saadatifard   change the header...
61
  			HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T)));
5cc0976c   David Mayerich   added separable c...
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  			HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
  
  			//perform the gaussian blur as a separable convolution
  			stim::cuda::gpu_conv2sep<float>(image, x, y, gpuKernel0, kwidth, gpuKernel0, kwidth);
  
  			HANDLE_ERROR(cudaFree(gpuKernel0));
  
  		}
  
  		/// Applies a Gaussian blur to a 2D image stored on the CPU
  		template<typename T>
  		void cpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){
  
  			//allocate space for the kernel
  			unsigned int kwidth = sigma * 8 + 1;
  			float* kernel0 = (float*) malloc( kwidth * sizeof(float) );
  
  			//fill the kernel with a gaussian
  			gen_gaussian(kernel0, sigma, kwidth);
  
  			//perform the gaussian blur as a separable convolution
  			stim::cuda::cpu_conv2sep<float>(image, x, y, kernel0, kwidth, kernel0, kwidth);
  			
  		}
  		
  	};
  };
  
  #endif