gaussian_blur.cuh 2.73 KB
#ifndef STIM_CUDA_GAUSSIAN_BLUR_H
#define STIM_CUDA_GAUSSIAN_BLUR_H

#include <iostream>
#include <cuda.h>
#include <stim/cuda/cudatools.h>
#include <stim/cuda/sharedmem.cuh>
#include <stim/cuda/templates/conv2sep.cuh>		//GPU-based separable convolution algorithm


namespace stim{
	namespace cuda {

		template<typename T>
		void gen_gaussian(T* out, T sigma, unsigned int width) {

			//fill the kernel with a gaussian
			for (unsigned int xi = 0; xi < width; xi++) {

				float x = (float)xi - (float)(width / 2);	//calculate the x position of the gaussian
				float g = 1.0 / (sigma * sqrt(2 * 3.14159)) * exp(-(x*x) / (2 * sigma*sigma));
				out[xi] = g;
			}

		}

		template<typename T>
		void tex_gaussian_blur2(T* out, T sigma, unsigned int x, unsigned int y, cudaTextureObject_t texObj, cudaArray* cuArray) {

			//allocate space for the kernel
			unsigned int kwidth = sigma * 8 + 1;
			float* kernel0 = (float*)malloc(kwidth * sizeof(float));

			//fill the kernel with a gaussian
			gen_gaussian(kernel0, sigma, kwidth);

			//copy the kernel to the GPU
			T* gpuKernel0;
			HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T)));
			HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));

			//perform the gaussian blur as a separable convolution
			stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth);

			HANDLE_ERROR(cudaFree(gpuKernel0));
			free(kernel0);

		}

		template<typename T>
		void gpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y) {

			//allocate space for the kernel
			unsigned int kwidth = sigma * 8 + 1;
			float* kernel0 = (float*)malloc(kwidth * sizeof(float));

			//fill the kernel with a gaussian
			gen_gaussian(kernel0, sigma, kwidth);

			//copy the kernel to the GPU
			T* gpuKernel0;
			HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T)));
			HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));

			//perform the gaussian blur as a separable convolution
			stim::cuda::gpu_conv2sep<float>(image, x, y, gpuKernel0, kwidth, gpuKernel0, kwidth);

			HANDLE_ERROR(cudaFree(gpuKernel0));

		}

		/// Applies a Gaussian blur to a 2D image stored on the CPU
		template<typename T>
		void cpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y, float &gpu_time) {

			gpuTimer_start();
			//allocate space for the kernel
			unsigned int kwidth = sigma * 8 + 1;
			float* kernel0 = (float*)malloc(kwidth * sizeof(float));

			//fill the kernel with a gaussian
			gen_gaussian(kernel0, sigma, kwidth);

			//perform the gaussian blur as a separable convolution
			stim::cuda::cpu_conv2sep<float>(image, x, y, kernel0, kwidth, kernel0, kwidth);
			gpu_time = gpuTimer_end();

		}
	
		
	}
}

#endif