Blame view

stim/cuda/spider_cost.cuh 4.49 KB
84eff8b1   Pavel Govyadinov   Merged only the n...
1
2
3
4
5
  #ifndef STIM_SPIDER_COST_H
  #define STIM_SPIDER_COST_H
  
  #include <assert.h>
  #include <cuda.h>
9b766f1f   Pavel Govyadinov   completed merge f...
6
  //#include <cuda_runtime.h>
84eff8b1   Pavel Govyadinov   Merged only the n...
7
8
9
10
11
12
13
14
15
16
17
18
19
  #include <stdio.h>
  #include <stim/visualization/colormap.h>
  #include <sstream>
  #include <stim/math/vector.h>
  #include <stim/cuda/cudatools/devices.h>
  #include <stim/cuda/cudatools/threads.h>
  #include <stim/cuda/cuda_texture.cuh>
  namespace stim{
  	namespace cuda
  	{
  	
  	stim::cuda::cuda_texture t;  //texture object.
  	float* result;
c0e09133   Pavel Govyadinov   STABLE: made temp...
20
  //	float* print;
84eff8b1   Pavel Govyadinov   Merged only the n...
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
  	
  	///Initialization function, allocates the memory and passes the necessary
  	///handles from OpenGL and Cuda.
  	///@param DIM_Y			--integer controlling how much memory to allocate.
  	void initArray(int DIM_Y)
  	{
  //			cudaMalloc( (void**) &print, DIM_Y*16*sizeof(float));     ///temporary
  			cudaMalloc( (void**) &result, DIM_Y*sizeof(float));
  	}
  
  	///Deinit function that frees the memery used and releases the texture resource
  	///back to OpenGL.
  	void cleanUP()
  	{
  			cudaFree(result);
  //			cudaFree(print);         ///temporary
  	}  
  
  	///A virtual representation of a uniform template.
  	///Returns the value of the template pixel.
  	///@param int x			--location of a pixel.
  	__device__
  	float Template(int x)
  	{
c0e09133   Pavel Govyadinov   STABLE: made temp...
45
46
47
  		if(x < 32/6 || x > 32*5/6 || (x > 32*2/6 && x < 32*4/6)){
  //		if(x < 2 || x > 13 || (x > 5 && x < 10)){
  //	if(x < ceilf(16/6) || x > floorf(16*5/6) || (x > floorf(16*2/6) && x < ceilf(16*4/6))){
84eff8b1   Pavel Govyadinov   Merged only the n...
48
49
50
51
52
53
54
55
56
57
58
59
60
61
  			return 1.0;
  		}else{
  			return 0.0;
  		}
  
  	}
  		
  	///Find the difference of the given set of samples and the template
  	///using cuda acceleration.
  	///@param stim::cuda::cuda_texture t	--stim texture that holds all the references
  	///					  to the data.
  	///@param float* result			--a pointer to the memory that stores the result.
  	__global__
  	//void get_diff (float *result)
c0e09133   Pavel Govyadinov   STABLE: made temp...
62
  	void get_diff (cudaTextureObject_t texIn, float *result, int dx, int dy)
84eff8b1   Pavel Govyadinov   Merged only the n...
63
  	{       
c0e09133   Pavel Govyadinov   STABLE: made temp...
64
65
  //		__shared__ float shared[32][16];
  		extern __shared__ float shared[];
84eff8b1   Pavel Govyadinov   Merged only the n...
66
67
68
69
  		int x   = threadIdx.x + blockIdx.x * blockDim.x;
  		int y   = threadIdx.y + blockIdx.y * blockDim.y;
  		int x_t = threadIdx.x;
  		int y_t = threadIdx.y;
c0e09133   Pavel Govyadinov   STABLE: made temp...
70
  		int idx = y_t*dx+x_t;
84eff8b1   Pavel Govyadinov   Merged only the n...
71
72
73
74
75
76
77
  		int g_idx = blockIdx.y;
  
  		float valIn             = tex2D<unsigned char>(texIn, x, y)/255.0;
  		float valTemp           = Template(x);
  
  //		print[idx]              = abs(valIn);             ///temporary
  
c0e09133   Pavel Govyadinov   STABLE: made temp...
78
  		shared[idx]        = abs(valIn-valTemp);
84eff8b1   Pavel Govyadinov   Merged only the n...
79
80
81
82
83
84
85
86
  
  		__syncthreads();
  
  		for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
  		{
  			__syncthreads();
  			if (x_t < step)
  			{
c0e09133   Pavel Govyadinov   STABLE: made temp...
87
88
  //				shared[x_t][y_t] += shared[x_t + step][y_t];
  				shared[idx] += shared[y_t*dx+x_t+step];
84eff8b1   Pavel Govyadinov   Merged only the n...
89
90
91
92
93
94
95
96
97
98
  			}
  		__syncthreads();
  		}
  		__syncthreads();
  
  		for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
  		{
  			__syncthreads();
  			if(y_t < step)
  			{
c0e09133   Pavel Govyadinov   STABLE: made temp...
99
100
  //				shared[x_t][y_t] += shared[x_t][y_t + step];
  				shared[idx] += shared[(y_t+step)*dx+x_t];
84eff8b1   Pavel Govyadinov   Merged only the n...
101
102
103
104
105
  			}
  		__syncthreads();
  		}
  		__syncthreads();
  		if(x_t == 0 && y_t == 0)
c0e09133   Pavel Govyadinov   STABLE: made temp...
106
  			result[g_idx] = shared[0];
84eff8b1   Pavel Govyadinov   Merged only the n...
107
108
109
110
111
112
113
114
115
116
117
118
119
  
  
  	//      //result[idx]           = abs(valIn);
  	}
  
  
  	///External access-point to the cuda function
  	///@param GLuint texbufferID 	--GLtexture (most be contained in a framebuffer object)
  	///				  that holds the data that will be handed to cuda.
  	///@param GLenum texType	--either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
  	///				  may work with other gl texture types, but untested.
  	///@param DIM_Y, the number of samples in the template.
  	extern "C"
c0e09133   Pavel Govyadinov   STABLE: made temp...
120
  	stim::vec<int> get_cost(GLint texbufferID, GLenum texType, int DIM_Y,int dx = 16, int dy = 8)
84eff8b1   Pavel Govyadinov   Merged only the n...
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
  	{
  
  		//Bind the Texture in GL and allow access to cuda.
  		t.MapCudaTexture(texbufferID, texType);
  
  		//initialize the return arrays.
  		float* output;	
  		output = (float* ) malloc(DIM_Y*sizeof(float));
  
  		stim::vec<int> ret(0, 0);
  		initArray(DIM_Y);
  		
  
  		//variables for finding the min.
  		float mini = 10000000000000000.0;
  		int idx = 0;
  	
  		//cuda launch variables.
  		dim3 numBlocks(1, DIM_Y);
c0e09133   Pavel Govyadinov   STABLE: made temp...
140
  		dim3 threadsPerBlock(dx, dy);
84eff8b1   Pavel Govyadinov   Merged only the n...
141
142
  
  
c0e09133   Pavel Govyadinov   STABLE: made temp...
143
  		get_diff <<< numBlocks, threadsPerBlock, dx*dy*sizeof(float) >>> (t.getTexture(), result, dx, dy);
84eff8b1   Pavel Govyadinov   Merged only the n...
144
145
146
147
148
149
150
151
152
153
154
155
156
157
  
  		HANDLE_ERROR(
  			cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost)
  			);
  
  		for( int i = 0; i<DIM_Y; i++){
  			if(output[i] < mini){
  				mini = output[i];
  				idx = i;                                                                   
  			}
  		}
  
  //		stringstream name;      //for debugging
  //		name << "Test.bmp";
035d798f   Pavel Govyadinov   modified the spid...
158
  //		stim::gpu2image<float>(print, name.str(),16,218,0,256);
84eff8b1   Pavel Govyadinov   Merged only the n...
159
160
161
162
  	  
  		t.UnmapCudaTexture();
  		cleanUP();
  		ret[0] = idx; ret[1] = (int) output[idx];
c0e09133   Pavel Govyadinov   STABLE: made temp...
163
  //		std::cout << "The cost is " << output[idx] << std::endl;
84eff8b1   Pavel Govyadinov   Merged only the n...
164
165
166
167
168
169
170
171
172
  		free(output);
  		return ret;
  	}
  
  	}
  }
  
  
  #endif