84eff8b1
Pavel Govyadinov
Merged only the n...
|
1
2
3
4
5
|
#ifndef STIM_SPIDER_COST_H
#define STIM_SPIDER_COST_H
#include <assert.h>
#include <cuda.h>
|
9b766f1f
Pavel Govyadinov
completed merge f...
|
6
|
//#include <cuda_runtime.h>
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
7
8
9
10
|
#include <stdio.h>
#include <stim/visualization/colormap.h>
#include <sstream>
#include <stim/math/vector.h>
|
ef5cebe5
Pavel Govyadinov
stable version
|
11
|
#include <stim/cuda/cudatools/timer.h>
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
12
13
14
15
16
17
18
|
#include <stim/cuda/cudatools/devices.h>
#include <stim/cuda/cudatools/threads.h>
#include <stim/cuda/cuda_texture.cuh>
namespace stim{
namespace cuda
{
|
c37611a6
Pavel Govyadinov
removed the time ...
|
19
|
// float* result;
|
c0e09133
Pavel Govyadinov
STABLE: made temp...
|
20
|
// float* print;
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
21
22
23
24
|
///Initialization function, allocates the memory and passes the necessary
///handles from OpenGL and Cuda.
///@param DIM_Y --integer controlling how much memory to allocate.
|
c37611a6
Pavel Govyadinov
removed the time ...
|
25
26
|
// void initArray(int DIM_Y)
// {
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
27
|
// cudaMalloc( (void**) &print, DIM_Y*16*sizeof(float)); ///temporary
|
c37611a6
Pavel Govyadinov
removed the time ...
|
28
29
|
// cudaMalloc( (void**) &result, DIM_Y*sizeof(float));
// }
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
30
31
32
|
///Deinit function that frees the memery used and releases the texture resource
///back to OpenGL.
|
c37611a6
Pavel Govyadinov
removed the time ...
|
33
34
35
|
// void cleanUP()
// {
// cudaFree(result);
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
36
|
// cudaFree(print); ///temporary
|
c37611a6
Pavel Govyadinov
removed the time ...
|
37
|
// }
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
38
39
40
41
42
|
///A virtual representation of a uniform template.
///Returns the value of the template pixel.
///@param int x --location of a pixel.
__device__
|
efe7b7cc
Pavel Govyadinov
Added a detailed ...
|
43
|
float Template(int x, int max_x)
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
44
|
{
|
efe7b7cc
Pavel Govyadinov
Added a detailed ...
|
45
46
|
if(x < max_x/6 || x > max_x*5/6 || (x > max_x*2/6 && x < max_x*4/6))
{
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
return 1.0;
}else{
return 0.0;
}
}
///Find the difference of the given set of samples and the template
///using cuda acceleration.
///@param stim::cuda::cuda_texture t --stim texture that holds all the references
/// to the data.
///@param float* result --a pointer to the memory that stores the result.
__global__
//void get_diff (float *result)
|
c0e09133
Pavel Govyadinov
STABLE: made temp...
|
61
|
void get_diff (cudaTextureObject_t texIn, float *result, int dx, int dy)
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
62
|
{
|
c0e09133
Pavel Govyadinov
STABLE: made temp...
|
63
64
|
// __shared__ float shared[32][16];
extern __shared__ float shared[];
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
65
66
67
68
|
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int x_t = threadIdx.x;
int y_t = threadIdx.y;
|
c0e09133
Pavel Govyadinov
STABLE: made temp...
|
69
|
int idx = y_t*dx+x_t;
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
70
71
72
|
int g_idx = blockIdx.y;
float valIn = tex2D<unsigned char>(texIn, x, y)/255.0;
|
efe7b7cc
Pavel Govyadinov
Added a detailed ...
|
73
|
float valTemp = Template(x, dx);
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
74
75
76
|
// print[idx] = abs(valIn); ///temporary
|
c0e09133
Pavel Govyadinov
STABLE: made temp...
|
77
|
shared[idx] = abs(valIn-valTemp);
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
78
79
80
81
82
83
84
85
|
__syncthreads();
for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
{
__syncthreads();
if (x_t < step)
{
|
c0e09133
Pavel Govyadinov
STABLE: made temp...
|
86
87
|
// shared[x_t][y_t] += shared[x_t + step][y_t];
shared[idx] += shared[y_t*dx+x_t+step];
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
88
89
90
91
92
93
94
95
96
97
|
}
__syncthreads();
}
__syncthreads();
for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
{
__syncthreads();
if(y_t < step)
{
|
c0e09133
Pavel Govyadinov
STABLE: made temp...
|
98
99
|
// shared[x_t][y_t] += shared[x_t][y_t + step];
shared[idx] += shared[(y_t+step)*dx+x_t];
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
100
101
102
103
104
|
}
__syncthreads();
}
__syncthreads();
if(x_t == 0 && y_t == 0)
|
c0e09133
Pavel Govyadinov
STABLE: made temp...
|
105
|
result[g_idx] = shared[0];
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
106
107
108
109
110
111
112
113
114
115
116
117
118
|
// //result[idx] = abs(valIn);
}
///External access-point to the cuda function
///@param GLuint texbufferID --GLtexture (most be contained in a framebuffer object)
/// that holds the data that will be handed to cuda.
///@param GLenum texType --either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
/// may work with other gl texture types, but untested.
///@param DIM_Y, the number of samples in the template.
extern "C"
|
c37611a6
Pavel Govyadinov
removed the time ...
|
119
120
|
//stim::vec<int> get_cost(GLint texbufferID, GLenum texType, int DIM_Y,int dx = 16, int dy = 8)
stim::vec<int> get_cost(cudaTextureObject_t tObj, float* result, int DIM_Y,int dx = 16, int dy = 8)
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
121
122
123
|
{
//Bind the Texture in GL and allow access to cuda.
|
ef5cebe5
Pavel Govyadinov
stable version
|
124
125
126
|
// #ifdef TIMING
// gpuStartTimer();
// #endif
|
c37611a6
Pavel Govyadinov
removed the time ...
|
127
|
// t.MapCudaTexture(texbufferID, texType);
|
ef5cebe5
Pavel Govyadinov
stable version
|
128
129
130
|
// #ifdef TIMING
// std::cout << " " << gpuStopTimer();
// #endif
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
131
132
|
//initialize the return arrays.
|
ef5cebe5
Pavel Govyadinov
stable version
|
133
134
135
|
// #ifdef TIMING
// gpuStartTimer();
// #endif
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
136
137
138
139
|
float* output;
output = (float* ) malloc(DIM_Y*sizeof(float));
stim::vec<int> ret(0, 0);
|
c37611a6
Pavel Govyadinov
removed the time ...
|
140
|
// initArray(DIM_Y);
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
141
142
143
144
145
|
//variables for finding the min.
float mini = 10000000000000000.0;
int idx = 0;
|
ef5cebe5
Pavel Govyadinov
stable version
|
146
147
148
|
// #ifdef TIMING
// std::cout << " " << gpuStopTimer();
// #endif
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
149
150
|
//cuda launch variables.
|
ef5cebe5
Pavel Govyadinov
stable version
|
151
152
153
|
// #ifdef TIMING
// gpuStartTimer();
// #endif
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
154
|
dim3 numBlocks(1, DIM_Y);
|
c0e09133
Pavel Govyadinov
STABLE: made temp...
|
155
|
dim3 threadsPerBlock(dx, dy);
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
156
|
|
c37611a6
Pavel Govyadinov
removed the time ...
|
157
|
get_diff <<< numBlocks, threadsPerBlock, dx*dy*sizeof(float) >>> (tObj, result, dx, dy);
|
ef5cebe5
Pavel Govyadinov
stable version
|
158
159
160
161
162
163
164
165
|
cudaDeviceSynchronize();
// #ifdef TIMING
// std::cout << " " << gpuStopTimer();
// #endif
// #ifdef TIMING
// gpuStartTimer();
// #endif
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
166
167
168
169
170
171
172
|
HANDLE_ERROR(
cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost)
);
for( int i = 0; i<DIM_Y; i++){
if(output[i] < mini){
mini = output[i];
|
efe7b7cc
Pavel Govyadinov
Added a detailed ...
|
173
|
idx = i;
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
174
175
|
}
}
|
ef5cebe5
Pavel Govyadinov
stable version
|
176
177
178
|
// #ifdef TIMING
// std::cout << " " << gpuStopTimer();
// #endif
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
179
|
|
ef5cebe5
Pavel Govyadinov
stable version
|
180
181
182
|
// #ifdef TIMING
// gpuStartTimer();
// #endif
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
183
184
|
// stringstream name; //for debugging
// name << "Test.bmp";
|
035d798f
Pavel Govyadinov
modified the spid...
|
185
|
// stim::gpu2image<float>(print, name.str(),16,218,0,256);
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
186
|
|
c37611a6
Pavel Govyadinov
removed the time ...
|
187
188
|
// t.UnmapCudaTexture();
// cleanUP();
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
189
|
ret[0] = idx; ret[1] = (int) output[idx];
|
c0e09133
Pavel Govyadinov
STABLE: made temp...
|
190
|
// std::cout << "The cost is " << output[idx] << std::endl;
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
191
|
free(output);
|
ef5cebe5
Pavel Govyadinov
stable version
|
192
193
194
|
// #ifdef TIMING
// std::cout << " " << gpuStopTimer() << std::endl;
// #endif
|
84eff8b1
Pavel Govyadinov
Merged only the n...
|
195
196
197
198
199
200
201
202
|
return ret;
}
}
}
#endif
|