#ifndef STIM_CUDA_GAUSSIAN_BLUR_H #define STIM_CUDA_GAUSSIAN_BLUR_H #include #include #include #include #include //GPU-based separable convolution algorithm #define pi 3.14159 namespace stim{ namespace cuda{ template void gen_gaussian(T* out, T sigma, unsigned int width){ //fill the kernel with a gaussian for(unsigned int xi = 0; xi < width; xi++){ float x = (float)xi - (float)(width/2); //calculate the x position of the gaussian float g = 1.0 / (sigma * sqrt(2 * 3.14159)) * exp( - (x*x) / (2*sigma*sigma) ); out[xi] = g; } } template void tex_gaussian_blur2(T* out, T sigma, unsigned int x, unsigned int y, cudaTextureObject_t texObj, cudaArray* cuArray){ //allocate space for the kernel unsigned int kwidth = sigma * 8 + 1; float* kernel0 = (float*) malloc( kwidth * sizeof(float) ); //fill the kernel with a gaussian gen_gaussian(kernel0, sigma, kwidth); //copy the kernel to the GPU T* gpuKernel0; HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice)); //perform the gaussian blur as a separable convolution stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth); HANDLE_ERROR(cudaFree(gpuKernel0)); } template void gpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){ //allocate space for the kernel unsigned int kwidth = sigma * 8 + 1; float* kernel0 = (float*) malloc( kwidth * sizeof(float) ); //fill the kernel with a gaussian gen_gaussian(kernel0, sigma, kwidth); //copy the kernel to the GPU T* gpuKernel0; HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T))); HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice)); //perform the gaussian blur as a separable convolution stim::cuda::gpu_conv2sep(image, x, y, gpuKernel0, kwidth, gpuKernel0, kwidth); HANDLE_ERROR(cudaFree(gpuKernel0)); } /// Applies a Gaussian blur to a 2D image stored on the CPU template void cpu_gaussian_blur2(T* image, T sigma, unsigned int x, unsigned int y){ //allocate space for the kernel unsigned int kwidth = sigma * 8 + 1; float* kernel0 = (float*) malloc( kwidth * sizeof(float) ); //fill the kernel with a gaussian gen_gaussian(kernel0, sigma, kwidth); //perform the gaussian blur as a separable convolution stim::cuda::cpu_conv2sep(image, x, y, kernel0, kwidth, kernel0, kwidth); } }; }; #endif