gaussian_blur.cuh 2.66 KB
#ifndef STIM_CUDA_GAUSSIAN_BLUR_H
#define STIM_CUDA_GAUSSIAN_BLUR_H

#include <iostream>
#include <cuda.h>
#include <stim/cuda/cudatools.h>
#include <stim/cuda/sharedmem.cuh>
#include <stim/cuda/templates/conv2sep.cuh>		//GPU-based separable convolution algorithm


namespace stim{
	namespace cuda{

		template<typename T>
		void gen_gaussian(T* out, T sigma, unsigned int width){

			//fill the kernel with a gaussian
			for(unsigned int xi = 0; xi < width; xi++){

				float x = (float)xi - (float)(width/2);	//calculate the x position of the gaussian
				float g = 1.0 / (sigma * sqrt(2 * 3.14159)) * exp( - (x*x) / (2*sigma*sigma) );
				out[xi] = g;
			}

		}

		template<typename T>
		void tex_gaussian_blur2(T* out, T sigma, unsigned int x, unsigned int y, cudaTextureObject_t texObj, cudaArray* cuArray){

			//allocate space for the kernel
			unsigned int kwidth = sigma * 8 + 1;
			float* kernel0 = (float*) malloc( kwidth * sizeof(float) );

			//fill the kernel with a gaussian
			gen_gaussian(kernel0, sigma, kwidth);

			//copy the kernel to the GPU
			T* gpuKernel0;
			HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth*sizeof(T)));
			HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));

			//perform the gaussian blur as a separable convolution
			stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth);

			HANDLE_ERROR(cudaFree(gpuKernel0));
			free(kernel0);

		}

		template<typename T>
		void gpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){

			//allocate space for the kernel
			unsigned int kwidth = sigma * 8 + 1;
			float* kernel0 = (float*) malloc( kwidth * sizeof(float) );

			//fill the kernel with a gaussian
			gen_gaussian(kernel0, sigma, kwidth);

			//copy the kernel to the GPU
			T* gpuKernel0;
			HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth*sizeof(T)));
			HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));

			//perform the gaussian blur as a separable convolution
			stim::cuda::gpu_conv2sep<float>(image, x, y, gpuKernel0, kwidth, gpuKernel0, kwidth);

			HANDLE_ERROR(cudaFree(gpuKernel0));

		}

		/// Applies a Gaussian blur to a 2D image stored on the CPU
		template<typename T>
		void cpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){

			//allocate space for the kernel
			unsigned int kwidth = sigma * 8 + 1;
			float* kernel0 = (float*) malloc( kwidth * sizeof(float) );

			//fill the kernel with a gaussian
			gen_gaussian(kernel0, sigma, kwidth);

			//perform the gaussian blur as a separable convolution
			stim::cuda::cpu_conv2sep<float>(image, x, y, kernel0, kwidth, kernel0, kwidth);
			
		}
		
	};
};

#endif