filter.h 2.15 KB
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include <stim/visualization/colormap.h>
#include <sstream>

#define IMAD(a,b,c) ( __mul24((a), (b)) + (c) )

int kr;
int kl;
float sigma;
float* LoG;
float* result;
cudaArray* srcArray;
texture<uchar, cudaTextureType2D, cudaReadModeElementType> texIn;


__device__ float filterKernel ()
{
	float t = 0;
	idx = j*kl+i;
	for(int i = 0; i < kl; i++){
		for(int j = 0; j < kl; j++){
			x = i - floor(kl);
			y = j - floor(kl);
			LoG(idx) = (-1/M_PI/sigma^4)* (1 - (x^2+y^2)/2/sigma^2)
					*exp(-(x^2+y^2)/2/sigma^2);	
			t +=LoG(idx);
		}
	}
	LoG =/ t;
}

void initArray(cudaGraphicsResource_t src, int DIM_X, int DIM_Y)
{
	HANDLE_ERROR(
		cudaGraphicsMapResources(1, &src)
	);
	HANDLE_ERROR(
		cudaGraphicsSubResourceGetMappedArray(&srcArray, src, 0,0)
		);
	HANDLE_ERROR(
		cudaBindTertureToArray(texIn, srcArray)
		);
	cudaMalloc( (void**) &LoG, kl*kl*sizeof(float));
	checkCUDAerrors("Memory Allocation, LoG");
	cudaMalloc( (void**) &result, DIM_Y*DIM_X*sizeof(float));
	checkCUDAerrors("Memory Allocation, Result");
}

void cleanUp(cudaGraphicsResource_t src);
{
	HANDLE_ERROR(
		cudaUnbindTexture(texIn)
	);
	HANDLE_ERROR(
		cudaFree(LoG)
	);
	HANDLE_ERROR(
		cudaFree(result)
	);
	HANDLE_ERROR(
		cudaGraphicsUnmapResources(1, &src)
	);
}

//Shared memory would be better.
__global__
void
applyFilter(unsigned int DIM_X, unsigned int DIM_Y){
//R = floor(size/2)
//THIS IS A NAIVE WAY TO DO IT, and there is a better way)
	//__shared__ float shared[(DIM_X+2*R), (DIM_Y+2*R)];
	
	const	 int x = IMAD(blockDim.x, blockIdx.x, threadIdx.x);
	const	 int y = IMAD(blockDim.y, blockIdx.y, threadIdx.y);
	float val = 0;
	//x = max(0,x);
	//x = min(x, width-1);
	//y = max(y, 0);
	//y = min(y, height - 1);

	int idx = y*DIM_X+x;
	//unsigned int bindex = threadIdx.y * blockDim.y + threadIdx.x;

	//float valIn		= tex2D(texIn, x, y);
	for (int i = -kr; i <= kr; i++){	//rows
		for (int j = -kr; i <= kr; j++){	//colls
			k_idx = (j+kr)+(i+kr)*kl;
			xi = max(0, x+i);
			xi = min(x+i, DIM_X-1);
			yj = max(y+j, 0);
			yj = min(y+j, DIM_Y-1);
			val += LoG(k_idx)*tex2D(texIn,x+i, y+j);	
		}
	}

	result[idx] = val;
}