Blame view

stim/cuda/filter.cuh 4.45 KB
27194b56   Pavel Govyadinov   major bug fixes, ...
1
2
3
4
5
6
7
8
9
  #ifndef STIM_FILTER_H
  #define STIM_FILTER_H
  
  #include <assert.h>
  #include <cuda.h>
  #include <cuda_runtime.h>
  #include <stdio.h>
  #include <stim/visualization/colormap.h>
  #include <sstream>
6ada8448   Pavel Govyadinov   Reverted to 40db1...
10
  #include <stim/math/constants.h>
27194b56   Pavel Govyadinov   major bug fixes, ...
11
12
13
  #include <stim/cuda/cudatools/devices.h>
  #include <stim/cuda/cudatools/threads.h>
  #include <stim/cuda/cuda_texture.cuh>
7f1ab3c2   Pavel Govyadinov   fixed problems wi...
14
  #include <stim/iVote/ivote2.cuh>
27194b56   Pavel Govyadinov   major bug fixes, ...
15
16
17
  #include <stim/cuda/arraymath.cuh>
  
  #define IMAD(a,b,c) ( __mul24((a), (b)) + (c) )
27194b56   Pavel Govyadinov   major bug fixes, ...
18
19
20
21
22
23
24
25
26
27
28
  
  
  namespace stim
  {
  	namespace cuda
  	{
  
  	float* gpuLoG;
  	float* LoG;
  	float* res;
  	float* centers;
2eefb035   Pavel Govyadinov   added debugging c...
29
30
31
32
33
  
  //#ifdef DEBUG
  	float* print;
  //#endif
  
27194b56   Pavel Govyadinov   major bug fixes, ...
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
  	stim::cuda::cuda_texture tx;
  
  
  
  	void initArray(int DIM_X, int DIM_Y, int kl)
  	{
  		
  			LoG =  (float*) malloc(kl*kl*sizeof(float));
  		HANDLE_ERROR(
  			cudaMalloc( (void**) &gpuLoG, kl*kl*sizeof(float))
  		);
  	//	checkCUDAerrors("Memory Allocation, LoG");
  		HANDLE_ERROR(
  			cudaMalloc( (void**) &res, DIM_Y*DIM_X*sizeof(float))
  		);
  		HANDLE_ERROR(
  			cudaMalloc( (void**) &centers, DIM_Y*DIM_X*sizeof(float))
  		);
2eefb035   Pavel Govyadinov   added debugging c...
52
53
54
55
56
57
58
  
  //#ifdef DEBUG
  		HANDLE_ERROR(
  			cudaMalloc( (void**) &print, DIM_Y*DIM_X*sizeof(float))
  		);
  //#endif
  
27194b56   Pavel Govyadinov   major bug fixes, ...
59
60
61
  	//	checkCUDAerrors("Memory Allocation, Result");
  	}
  
c37611a6   Pavel Govyadinov   removed the time ...
62
  	void cleanUP()
27194b56   Pavel Govyadinov   major bug fixes, ...
63
64
65
66
67
68
69
70
71
72
  	{
  		HANDLE_ERROR(
  			cudaFree(gpuLoG)
  		);
  		HANDLE_ERROR(
  			cudaFree(res)
  		);
  		HANDLE_ERROR(
  			cudaFree(centers)
  		);
2eefb035   Pavel Govyadinov   added debugging c...
73
74
75
76
77
  //#ifdef DEBUG
  		HANDLE_ERROR(
  			cudaFree(print)
  		);
  //#endif
27194b56   Pavel Govyadinov   major bug fixes, ...
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
  			free(LoG);
  	}
  
  	void
  	filterKernel (float kl, float sigma, float *LoG)
  	{
  		float t = 0.0;
  		float kr = kl/2; 
  		float x, y;
  		int idx;
  		for(int i = 0; i < kl; i++){
  			for(int j = 0; j < kl; j++){
  				idx = j*kl+i;
  				x = i - kr - 0.5;
  				y = j - kr - 0.5;
6ada8448   Pavel Govyadinov   Reverted to 40db1...
93
  				LoG[idx] = (-1.0/PI/powf(sigma, 4))* (1 - (powf(x,2)+powf(y,2))/2.0/powf(sigma, 2))
27194b56   Pavel Govyadinov   major bug fixes, ...
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
  						*expf(-(powf(x,2)+powf(y,2))/2/powf(sigma,2));	
  				t +=LoG[idx];
  			}
  		}
  		
  		for(int i = 0; i < kl*kl; i++)
  		{
  			LoG[i] = LoG[i]/t;
  		}
  		
  	}
  
  	//Shared memory would be better.
  	__global__
  	void
2eefb035   Pavel Govyadinov   added debugging c...
109
  	applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG, float* p){
27194b56   Pavel Govyadinov   major bug fixes, ...
110
111
112
113
114
115
116
117
  	//R = floor(size/2)
  	//THIS IS A NAIVE WAY TO DO IT, and there is a better way)
  		
  		__shared__ float shared[7][7];
  		int x = blockIdx.x;
  		int y = blockIdx.y;
  		int xi = threadIdx.x;
  		int yi = threadIdx.y;
6ada8448   Pavel Govyadinov   Reverted to 40db1...
118
  	//	float val = 0;
27194b56   Pavel Govyadinov   major bug fixes, ...
119
120
  		float tu = (x-kr+xi)/(float)DIM_X;
  		float tv = (y-kr+yi)/(float)DIM_Y;
2eefb035   Pavel Govyadinov   added debugging c...
121
  		int idx = y*DIM_X+x;
27194b56   Pavel Govyadinov   major bug fixes, ...
122
123
  		shared[xi][yi] = gpuLoG[yi*kl+xi]*(255.0-(float)tex2D<unsigned char>(texIn, tu, tv));
  		__syncthreads();
27194b56   Pavel Govyadinov   major bug fixes, ...
124
125
126
127
128
129
  		
  		//x = max(0,x);
  		//x = min(x, width-1);
  		//y = max(y, 0);
  		//y = min(y, height - 1);
  
6ada8448   Pavel Govyadinov   Reverted to 40db1...
130
  	//	int k_idx;
27194b56   Pavel Govyadinov   major bug fixes, ...
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
                  for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
                  {
                          __syncthreads();
                          if (xi < step)
                          {
                                  shared[xi][yi] += shared[xi + step][yi];
                          }
                  __syncthreads();
                  }
                  __syncthreads();
  
                  for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
                  {
                          __syncthreads();
                          if(yi < step)
                          {
                                  shared[xi][yi] += shared[xi][yi + step];
                          }
                  __syncthreads();
                  }
                  __syncthreads();
                  if(xi == 0 && yi == 0)
                          res[idx] = shared[0][0];
2eefb035   Pavel Govyadinov   added debugging c...
154
  
27194b56   Pavel Govyadinov   major bug fixes, ...
155
156
157
158
  	}
  
  	extern "C"
  	float *
7f1ab3c2   Pavel Govyadinov   fixed problems wi...
159
  	get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, int iter = 0)
27194b56   Pavel Govyadinov   major bug fixes, ...
160
161
162
163
164
165
166
167
168
169
170
171
172
  	{
  		tx.SetTextureCoordinates(1);
  		tx.SetAddressMode(1, 3);
  		tx.MapCudaTexture(texbufferID, texType);
  		float* result =  (float*) malloc(DIM_X*DIM_Y*sizeof(float));
  		
  		initArray(DIM_X, DIM_Y, sizeK);
  
  		filterKernel(sizeK, sigma, LoG);
  		cudaMemcpy(gpuLoG, LoG, sizeK*sizeK*sizeof(float), cudaMemcpyHostToDevice);
  		dim3 numBlocks(DIM_X, DIM_Y);
  		dim3 threadsPerBlock(sizeK, sizeK);
  
2eefb035   Pavel Govyadinov   added debugging c...
173
174
175
176
177
178
179
180
  		applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG, print);
  
  		#ifdef DEBUG
  			stringstream name;
  			name.str("");
  			name << "Fiber Cylinder " << iter << ".bmp";
  			stim::gpu2image<float>(res, name.str(), DIM_X, DIM_Y, 0, 255);
  		#endif
27194b56   Pavel Govyadinov   major bug fixes, ...
181
182
  
  
7f1ab3c2   Pavel Govyadinov   fixed problems wi...
183
  		stim::cuda::gpu_local_max<float>(centers, res, conn, DIM_X, DIM_Y);
27194b56   Pavel Govyadinov   major bug fixes, ...
184
185
186
187
188
189
190
191
192
193
194
195
196
197
  		
  		cudaDeviceSynchronize();
  
  
  		cudaMemcpy(result, centers, DIM_X*DIM_Y*sizeof(float), cudaMemcpyDeviceToHost);
  
  		tx.UnmapCudaTexture();
  		cleanUP();
  		return result;
  	}
  
  	}
  }
  #endif