Blame view

stim/cuda/filter.h 2.15 KB
d46f9ac6   Pavel Govyadinov   added first suppo...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
  #include <assert.h>
  #include <cuda.h>
  #include <cuda_runtime.h>
  #include <stdio.h>
  #include <stim/visualization/colormap.h>
  #include <sstream>
  
  #define IMAD(a,b,c) ( __mul24((a), (b)) + (c) )
  
  int kr;
  int kl;
  float sigma;
  float* LoG;
  float* result;
  cudaArray* srcArray;
  texture<uchar, cudaTextureType2D, cudaReadModeElementType> texIn;
  
  
  __device__ float filterKernel ()
  {
  	float t = 0;
3bd72370   Pavel Govyadinov   very minor indexi...
22
  	idx = j*kl+i;
d46f9ac6   Pavel Govyadinov   added first suppo...
23
24
25
26
  	for(int i = 0; i < kl; i++){
  		for(int j = 0; j < kl; j++){
  			x = i - floor(kl);
  			y = j - floor(kl);
3bd72370   Pavel Govyadinov   very minor indexi...
27
  			LoG(idx) = (-1/M_PI/sigma^4)* (1 - (x^2+y^2)/2/sigma^2)
d46f9ac6   Pavel Govyadinov   added first suppo...
28
  					*exp(-(x^2+y^2)/2/sigma^2);	
3bd72370   Pavel Govyadinov   very minor indexi...
29
  			t +=LoG(idx);
d46f9ac6   Pavel Govyadinov   added first suppo...
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
  		}
  	}
  	LoG =/ t;
  }
  
  void initArray(cudaGraphicsResource_t src, int DIM_X, int DIM_Y)
  {
  	HANDLE_ERROR(
  		cudaGraphicsMapResources(1, &src)
  	);
  	HANDLE_ERROR(
  		cudaGraphicsSubResourceGetMappedArray(&srcArray, src, 0,0)
  		);
  	HANDLE_ERROR(
  		cudaBindTertureToArray(texIn, srcArray)
  		);
  	cudaMalloc( (void**) &LoG, kl*kl*sizeof(float));
  	checkCUDAerrors("Memory Allocation, LoG");
  	cudaMalloc( (void**) &result, DIM_Y*DIM_X*sizeof(float));
  	checkCUDAerrors("Memory Allocation, Result");
  }
  
  void cleanUp(cudaGraphicsResource_t src);
  {
  	HANDLE_ERROR(
  		cudaUnbindTexture(texIn)
  	);
  	HANDLE_ERROR(
  		cudaFree(LoG)
  	);
  	HANDLE_ERROR(
  		cudaFree(result)
  	);
  	HANDLE_ERROR(
  		cudaGraphicsUnmapResources(1, &src)
  	);
  }
  
  //Shared memory would be better.
  __global__
  void
  applyFilter(unsigned int DIM_X, unsigned int DIM_Y){
  //R = floor(size/2)
  //THIS IS A NAIVE WAY TO DO IT, and there is a better way)
  	//__shared__ float shared[(DIM_X+2*R), (DIM_Y+2*R)];
  	
  	const	 int x = IMAD(blockDim.x, blockIdx.x, threadIdx.x);
  	const	 int y = IMAD(blockDim.y, blockIdx.y, threadIdx.y);
  	float val = 0;
  	//x = max(0,x);
  	//x = min(x, width-1);
  	//y = max(y, 0);
  	//y = min(y, height - 1);
  
  	int idx = y*DIM_X+x;
  	//unsigned int bindex = threadIdx.y * blockDim.y + threadIdx.x;
  
  	//float valIn		= tex2D(texIn, x, y);
  	for (int i = -kr; i <= kr; i++){	//rows
  		for (int j = -kr; i <= kr; j++){	//colls
  			k_idx = (j+kr)+(i+kr)*kl;
  			xi = max(0, x+i);
  			xi = min(x+i, DIM_X-1);
  			yj = max(y+j, 0);
  			yj = min(y+j, DIM_Y-1);
  			val += LoG(k_idx)*tex2D(texIn,x+i, y+j);	
  		}
  	}
  
  	result[idx] = val;
  }