27194b56
Pavel Govyadinov
major bug fixes, ...
|
1
2
3
4
5
6
7
8
9
|
#ifndef STIM_FILTER_H
#define STIM_FILTER_H
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include <stim/visualization/colormap.h>
#include <sstream>
|
6ada8448
Pavel Govyadinov
Reverted to 40db1...
|
10
|
#include <stim/math/constants.h>
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
11
12
13
14
15
16
17
|
#include <stim/cuda/cudatools/devices.h>
#include <stim/cuda/cudatools/threads.h>
#include <stim/cuda/cuda_texture.cuh>
#include <stim/cuda/ivote.cuh>
#include <stim/cuda/arraymath.cuh>
#define IMAD(a,b,c) ( __mul24((a), (b)) + (c) )
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
18
19
20
21
22
23
24
25
26
27
28
|
namespace stim
{
namespace cuda
{
float* gpuLoG;
float* LoG;
float* res;
float* centers;
|
2eefb035
Pavel Govyadinov
added debugging c...
|
29
30
31
32
33
|
//#ifdef DEBUG
float* print;
//#endif
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
stim::cuda::cuda_texture tx;
void initArray(int DIM_X, int DIM_Y, int kl)
{
LoG = (float*) malloc(kl*kl*sizeof(float));
HANDLE_ERROR(
cudaMalloc( (void**) &gpuLoG, kl*kl*sizeof(float))
);
// checkCUDAerrors("Memory Allocation, LoG");
HANDLE_ERROR(
cudaMalloc( (void**) &res, DIM_Y*DIM_X*sizeof(float))
);
HANDLE_ERROR(
cudaMalloc( (void**) ¢ers, DIM_Y*DIM_X*sizeof(float))
);
|
2eefb035
Pavel Govyadinov
added debugging c...
|
52
53
54
55
56
57
58
|
//#ifdef DEBUG
HANDLE_ERROR(
cudaMalloc( (void**) &print, DIM_Y*DIM_X*sizeof(float))
);
//#endif
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
59
60
61
|
// checkCUDAerrors("Memory Allocation, Result");
}
|
c37611a6
Pavel Govyadinov
removed the time ...
|
62
|
void cleanUP()
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
63
64
65
66
67
68
69
70
71
72
|
{
HANDLE_ERROR(
cudaFree(gpuLoG)
);
HANDLE_ERROR(
cudaFree(res)
);
HANDLE_ERROR(
cudaFree(centers)
);
|
2eefb035
Pavel Govyadinov
added debugging c...
|
73
74
75
76
77
|
//#ifdef DEBUG
HANDLE_ERROR(
cudaFree(print)
);
//#endif
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
free(LoG);
}
void
filterKernel (float kl, float sigma, float *LoG)
{
float t = 0.0;
float kr = kl/2;
float x, y;
int idx;
for(int i = 0; i < kl; i++){
for(int j = 0; j < kl; j++){
idx = j*kl+i;
x = i - kr - 0.5;
y = j - kr - 0.5;
|
6ada8448
Pavel Govyadinov
Reverted to 40db1...
|
93
|
LoG[idx] = (-1.0/PI/powf(sigma, 4))* (1 - (powf(x,2)+powf(y,2))/2.0/powf(sigma, 2))
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
*expf(-(powf(x,2)+powf(y,2))/2/powf(sigma,2));
t +=LoG[idx];
}
}
for(int i = 0; i < kl*kl; i++)
{
LoG[i] = LoG[i]/t;
}
}
//Shared memory would be better.
__global__
void
|
2eefb035
Pavel Govyadinov
added debugging c...
|
109
|
applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG, float* p){
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
110
111
112
113
114
115
116
117
|
//R = floor(size/2)
//THIS IS A NAIVE WAY TO DO IT, and there is a better way)
__shared__ float shared[7][7];
int x = blockIdx.x;
int y = blockIdx.y;
int xi = threadIdx.x;
int yi = threadIdx.y;
|
6ada8448
Pavel Govyadinov
Reverted to 40db1...
|
118
|
// float val = 0;
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
119
120
|
float tu = (x-kr+xi)/(float)DIM_X;
float tv = (y-kr+yi)/(float)DIM_Y;
|
2eefb035
Pavel Govyadinov
added debugging c...
|
121
|
int idx = y*DIM_X+x;
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
122
123
|
shared[xi][yi] = gpuLoG[yi*kl+xi]*(255.0-(float)tex2D<unsigned char>(texIn, tu, tv));
__syncthreads();
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
124
125
126
127
128
129
|
//x = max(0,x);
//x = min(x, width-1);
//y = max(y, 0);
//y = min(y, height - 1);
|
6ada8448
Pavel Govyadinov
Reverted to 40db1...
|
130
|
// int k_idx;
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
{
__syncthreads();
if (xi < step)
{
shared[xi][yi] += shared[xi + step][yi];
}
__syncthreads();
}
__syncthreads();
for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
{
__syncthreads();
if(yi < step)
{
shared[xi][yi] += shared[xi][yi + step];
}
__syncthreads();
}
__syncthreads();
if(xi == 0 && yi == 0)
res[idx] = shared[0][0];
|
2eefb035
Pavel Govyadinov
added debugging c...
|
154
|
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
155
156
157
158
|
}
extern "C"
float *
|
2eefb035
Pavel Govyadinov
added debugging c...
|
159
|
get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold, int iter = 0)
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
160
161
162
163
164
165
166
167
168
169
170
171
172
|
{
tx.SetTextureCoordinates(1);
tx.SetAddressMode(1, 3);
tx.MapCudaTexture(texbufferID, texType);
float* result = (float*) malloc(DIM_X*DIM_Y*sizeof(float));
initArray(DIM_X, DIM_Y, sizeK);
filterKernel(sizeK, sigma, LoG);
cudaMemcpy(gpuLoG, LoG, sizeK*sizeK*sizeof(float), cudaMemcpyHostToDevice);
dim3 numBlocks(DIM_X, DIM_Y);
dim3 threadsPerBlock(sizeK, sizeK);
|
2eefb035
Pavel Govyadinov
added debugging c...
|
173
174
175
176
177
178
179
180
|
applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG, print);
#ifdef DEBUG
stringstream name;
name.str("");
name << "Fiber Cylinder " << iter << ".bmp";
stim::gpu2image<float>(res, name.str(), DIM_X, DIM_Y, 0, 255);
#endif
|
27194b56
Pavel Govyadinov
major bug fixes, ...
|
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
|
stim::cuda::gpu_local_max<float>(centers, res, threshold, conn, DIM_X, DIM_Y);
cudaDeviceSynchronize();
cudaMemcpy(result, centers, DIM_X*DIM_Y*sizeof(float), cudaMemcpyDeviceToHost);
tx.UnmapCudaTexture();
cleanUP();
return result;
}
}
}
#endif
|