Blame view

stim/cuda/spider_cost.cuh 5.12 KB
84eff8b1   Pavel Govyadinov   Merged only the n...
1
2
3
4
5
  #ifndef STIM_SPIDER_COST_H
  #define STIM_SPIDER_COST_H
  
  #include <assert.h>
  #include <cuda.h>
9b766f1f   Pavel Govyadinov   completed merge f...
6
  //#include <cuda_runtime.h>
84eff8b1   Pavel Govyadinov   Merged only the n...
7
8
9
10
  #include <stdio.h>
  #include <stim/visualization/colormap.h>
  #include <sstream>
  #include <stim/math/vector.h>
ef5cebe5   Pavel Govyadinov   stable version
11
  #include <stim/cuda/cudatools/timer.h>
84eff8b1   Pavel Govyadinov   Merged only the n...
12
13
14
15
16
17
18
  #include <stim/cuda/cudatools/devices.h>
  #include <stim/cuda/cudatools/threads.h>
  #include <stim/cuda/cuda_texture.cuh>
  namespace stim{
  	namespace cuda
  	{
  	
c37611a6   Pavel Govyadinov   removed the time ...
19
  //	float* result;
c0e09133   Pavel Govyadinov   STABLE: made temp...
20
  //	float* print;
84eff8b1   Pavel Govyadinov   Merged only the n...
21
22
23
24
  	
  	///Initialization function, allocates the memory and passes the necessary
  	///handles from OpenGL and Cuda.
  	///@param DIM_Y			--integer controlling how much memory to allocate.
c37611a6   Pavel Govyadinov   removed the time ...
25
26
  //	void initArray(int DIM_Y)
  //	{
84eff8b1   Pavel Govyadinov   Merged only the n...
27
  //			cudaMalloc( (void**) &print, DIM_Y*16*sizeof(float));     ///temporary
c37611a6   Pavel Govyadinov   removed the time ...
28
29
  //			cudaMalloc( (void**) &result, DIM_Y*sizeof(float));
  //	}
84eff8b1   Pavel Govyadinov   Merged only the n...
30
31
32
  
  	///Deinit function that frees the memery used and releases the texture resource
  	///back to OpenGL.
c37611a6   Pavel Govyadinov   removed the time ...
33
34
35
  //	void cleanUP()
  //	{
  //			cudaFree(result);
84eff8b1   Pavel Govyadinov   Merged only the n...
36
  //			cudaFree(print);         ///temporary
c37611a6   Pavel Govyadinov   removed the time ...
37
  //	}  
84eff8b1   Pavel Govyadinov   Merged only the n...
38
39
40
41
42
  
  	///A virtual representation of a uniform template.
  	///Returns the value of the template pixel.
  	///@param int x			--location of a pixel.
  	__device__
efe7b7cc   Pavel Govyadinov   Added a detailed ...
43
  	float Template(int x, int max_x)
84eff8b1   Pavel Govyadinov   Merged only the n...
44
  	{
efe7b7cc   Pavel Govyadinov   Added a detailed ...
45
46
  	if(x < max_x/6 || x > max_x*5/6 || (x > max_x*2/6 && x < max_x*4/6))
  		{
84eff8b1   Pavel Govyadinov   Merged only the n...
47
48
49
50
51
52
53
54
55
56
57
58
59
60
  			return 1.0;
  		}else{
  			return 0.0;
  		}
  
  	}
  		
  	///Find the difference of the given set of samples and the template
  	///using cuda acceleration.
  	///@param stim::cuda::cuda_texture t	--stim texture that holds all the references
  	///					  to the data.
  	///@param float* result			--a pointer to the memory that stores the result.
  	__global__
  	//void get_diff (float *result)
c0e09133   Pavel Govyadinov   STABLE: made temp...
61
  	void get_diff (cudaTextureObject_t texIn, float *result, int dx, int dy)
84eff8b1   Pavel Govyadinov   Merged only the n...
62
  	{       
c0e09133   Pavel Govyadinov   STABLE: made temp...
63
64
  //		__shared__ float shared[32][16];
  		extern __shared__ float shared[];
84eff8b1   Pavel Govyadinov   Merged only the n...
65
66
67
68
  		int x   = threadIdx.x + blockIdx.x * blockDim.x;
  		int y   = threadIdx.y + blockIdx.y * blockDim.y;
  		int x_t = threadIdx.x;
  		int y_t = threadIdx.y;
c0e09133   Pavel Govyadinov   STABLE: made temp...
69
  		int idx = y_t*dx+x_t;
84eff8b1   Pavel Govyadinov   Merged only the n...
70
71
72
  		int g_idx = blockIdx.y;
  
  		float valIn             = tex2D<unsigned char>(texIn, x, y)/255.0;
efe7b7cc   Pavel Govyadinov   Added a detailed ...
73
  		float valTemp           = Template(x, dx);
84eff8b1   Pavel Govyadinov   Merged only the n...
74
75
76
  
  //		print[idx]              = abs(valIn);             ///temporary
  
c0e09133   Pavel Govyadinov   STABLE: made temp...
77
  		shared[idx]        = abs(valIn-valTemp);
84eff8b1   Pavel Govyadinov   Merged only the n...
78
79
80
81
82
83
84
85
  
  		__syncthreads();
  
  		for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
  		{
  			__syncthreads();
  			if (x_t < step)
  			{
c0e09133   Pavel Govyadinov   STABLE: made temp...
86
87
  //				shared[x_t][y_t] += shared[x_t + step][y_t];
  				shared[idx] += shared[y_t*dx+x_t+step];
84eff8b1   Pavel Govyadinov   Merged only the n...
88
89
90
91
92
93
94
95
96
97
  			}
  		__syncthreads();
  		}
  		__syncthreads();
  
  		for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
  		{
  			__syncthreads();
  			if(y_t < step)
  			{
c0e09133   Pavel Govyadinov   STABLE: made temp...
98
99
  //				shared[x_t][y_t] += shared[x_t][y_t + step];
  				shared[idx] += shared[(y_t+step)*dx+x_t];
84eff8b1   Pavel Govyadinov   Merged only the n...
100
101
102
103
104
  			}
  		__syncthreads();
  		}
  		__syncthreads();
  		if(x_t == 0 && y_t == 0)
c0e09133   Pavel Govyadinov   STABLE: made temp...
105
  			result[g_idx] = shared[0];
84eff8b1   Pavel Govyadinov   Merged only the n...
106
107
108
109
110
111
112
113
114
115
116
117
118
  
  
  	//      //result[idx]           = abs(valIn);
  	}
  
  
  	///External access-point to the cuda function
  	///@param GLuint texbufferID 	--GLtexture (most be contained in a framebuffer object)
  	///				  that holds the data that will be handed to cuda.
  	///@param GLenum texType	--either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
  	///				  may work with other gl texture types, but untested.
  	///@param DIM_Y, the number of samples in the template.
  	extern "C"
c37611a6   Pavel Govyadinov   removed the time ...
119
120
  	//stim::vec<int> get_cost(GLint texbufferID, GLenum texType, int DIM_Y,int dx = 16, int dy = 8)
  	stim::vec<int> get_cost(cudaTextureObject_t tObj, float* result, int DIM_Y,int dx = 16, int dy = 8)
84eff8b1   Pavel Govyadinov   Merged only the n...
121
122
123
  	{
  
  		//Bind the Texture in GL and allow access to cuda.
ef5cebe5   Pavel Govyadinov   stable version
124
125
126
  //		#ifdef TIMING
  //			gpuStartTimer();
  //		#endif
c37611a6   Pavel Govyadinov   removed the time ...
127
  //		t.MapCudaTexture(texbufferID, texType);
ef5cebe5   Pavel Govyadinov   stable version
128
129
130
  //		#ifdef TIMING
  //			std::cout << "      " << gpuStopTimer();
  //		#endif
84eff8b1   Pavel Govyadinov   Merged only the n...
131
132
  
  		//initialize the return arrays.
ef5cebe5   Pavel Govyadinov   stable version
133
134
135
  //		#ifdef TIMING
  //			gpuStartTimer();
  //		#endif
84eff8b1   Pavel Govyadinov   Merged only the n...
136
137
138
139
  		float* output;	
  		output = (float* ) malloc(DIM_Y*sizeof(float));
  
  		stim::vec<int> ret(0, 0);
c37611a6   Pavel Govyadinov   removed the time ...
140
  //		initArray(DIM_Y);
84eff8b1   Pavel Govyadinov   Merged only the n...
141
142
143
144
145
  		
  
  		//variables for finding the min.
  		float mini = 10000000000000000.0;
  		int idx = 0;
ef5cebe5   Pavel Govyadinov   stable version
146
147
148
  //		#ifdef TIMING
  //			std::cout << "      " << gpuStopTimer();
  //		#endif
84eff8b1   Pavel Govyadinov   Merged only the n...
149
150
  	
  		//cuda launch variables.
ef5cebe5   Pavel Govyadinov   stable version
151
152
153
  //		#ifdef TIMING
  //			gpuStartTimer();
  //		#endif
84eff8b1   Pavel Govyadinov   Merged only the n...
154
  		dim3 numBlocks(1, DIM_Y);
c0e09133   Pavel Govyadinov   STABLE: made temp...
155
  		dim3 threadsPerBlock(dx, dy);
84eff8b1   Pavel Govyadinov   Merged only the n...
156
  
c37611a6   Pavel Govyadinov   removed the time ...
157
  		get_diff <<< numBlocks, threadsPerBlock, dx*dy*sizeof(float) >>> (tObj, result, dx, dy);
ef5cebe5   Pavel Govyadinov   stable version
158
159
160
161
162
163
164
165
  		cudaDeviceSynchronize();
  //		#ifdef TIMING
  //			std::cout << "      " << gpuStopTimer();
  //		#endif
  
  //		#ifdef TIMING
  //			gpuStartTimer();
  //		#endif
84eff8b1   Pavel Govyadinov   Merged only the n...
166
167
168
169
170
171
172
  		HANDLE_ERROR(
  			cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost)
  			);
  
  		for( int i = 0; i<DIM_Y; i++){
  			if(output[i] < mini){
  				mini = output[i];
efe7b7cc   Pavel Govyadinov   Added a detailed ...
173
  				idx = i;                                          
84eff8b1   Pavel Govyadinov   Merged only the n...
174
175
  			}
  		}
ef5cebe5   Pavel Govyadinov   stable version
176
177
178
  //		#ifdef TIMING
  //			std::cout << "      " << gpuStopTimer();
  //		#endif
84eff8b1   Pavel Govyadinov   Merged only the n...
179
  
ef5cebe5   Pavel Govyadinov   stable version
180
181
182
  //		#ifdef TIMING
  //			gpuStartTimer();
  //		#endif
84eff8b1   Pavel Govyadinov   Merged only the n...
183
184
  //		stringstream name;      //for debugging
  //		name << "Test.bmp";
035d798f   Pavel Govyadinov   modified the spid...
185
  //		stim::gpu2image<float>(print, name.str(),16,218,0,256);
84eff8b1   Pavel Govyadinov   Merged only the n...
186
  	  
c37611a6   Pavel Govyadinov   removed the time ...
187
188
  //		t.UnmapCudaTexture();
  //		cleanUP();
84eff8b1   Pavel Govyadinov   Merged only the n...
189
  		ret[0] = idx; ret[1] = (int) output[idx];
c0e09133   Pavel Govyadinov   STABLE: made temp...
190
  //		std::cout << "The cost is " << output[idx] << std::endl;
84eff8b1   Pavel Govyadinov   Merged only the n...
191
  		free(output);
ef5cebe5   Pavel Govyadinov   stable version
192
193
194
  //		#ifdef TIMING
  //			std::cout << "      " << gpuStopTimer() << std::endl;
  //		#endif
84eff8b1   Pavel Govyadinov   Merged only the n...
195
196
197
198
199
200
201
202
  		return ret;
  	}
  
  	}
  }
  
  
  #endif