Blame view

stim/cuda/spider_cost.cuh 4.11 KB
84eff8b1   Pavel Govyadinov   Merged only the n...
1
2
3
4
5
  #ifndef STIM_SPIDER_COST_H
  #define STIM_SPIDER_COST_H
  
  #include <assert.h>
  #include <cuda.h>
9b766f1f   Pavel Govyadinov   completed merge f...
6
  //#include <cuda_runtime.h>
84eff8b1   Pavel Govyadinov   Merged only the n...
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
  #include <stdio.h>
  #include <stim/visualization/colormap.h>
  #include <sstream>
  #include <stim/math/vector.h>
  #include <stim/cuda/cudatools/devices.h>
  #include <stim/cuda/cudatools/threads.h>
  #include <stim/cuda/cuda_texture.cuh>
  namespace stim{
  	namespace cuda
  	{
  	
  	stim::cuda::cuda_texture t;  //texture object.
  	float* result;
  	float* print;
  	
  	///Initialization function, allocates the memory and passes the necessary
  	///handles from OpenGL and Cuda.
  	///@param DIM_Y			--integer controlling how much memory to allocate.
  	void initArray(int DIM_Y)
  	{
  //			cudaMalloc( (void**) &print, DIM_Y*16*sizeof(float));     ///temporary
  			cudaMalloc( (void**) &result, DIM_Y*sizeof(float));
  	}
  
  	///Deinit function that frees the memery used and releases the texture resource
  	///back to OpenGL.
  	void cleanUP()
  	{
  			cudaFree(result);
  //			cudaFree(print);         ///temporary
  	}  
  
  	///A virtual representation of a uniform template.
  	///Returns the value of the template pixel.
  	///@param int x			--location of a pixel.
  	__device__
  	float Template(int x)
  	{
  		if(x < 16/6 || x > 16*5/6 || (x > 16*2/6 && x < 16*4/6)){
  			return 1.0;
  		}else{
  			return 0.0;
  		}
  
  	}
  		
  	///Find the difference of the given set of samples and the template
  	///using cuda acceleration.
  	///@param stim::cuda::cuda_texture t	--stim texture that holds all the references
  	///					  to the data.
  	///@param float* result			--a pointer to the memory that stores the result.
  	__global__
  	//void get_diff (float *result)
  	void get_diff (cudaTextureObject_t texIn, float *result)
  	{       
  		__shared__ float shared[16][8];
  		int x   = threadIdx.x + blockIdx.x * blockDim.x;
  		int y   = threadIdx.y + blockIdx.y * blockDim.y;
  		int x_t = threadIdx.x;
  		int y_t = threadIdx.y;
  //		int idx = y*16+x;
  		int g_idx = blockIdx.y;
  
  		float valIn             = tex2D<unsigned char>(texIn, x, y)/255.0;
  		float valTemp           = Template(x);
  
  //		print[idx]              = abs(valIn);             ///temporary
  
  		shared[x_t][y_t]        = abs(valIn-valTemp);
  
  		__syncthreads();
  
  		for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
  		{
  			__syncthreads();
  			if (x_t < step)
  			{
  				shared[x_t][y_t] += shared[x_t + step][y_t];
  			}
  		__syncthreads();
  		}
  		__syncthreads();
  
  		for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
  		{
  			__syncthreads();
  			if(y_t < step)
  			{
  				shared[x_t][y_t] += shared[x_t][y_t + step];
  			}
  		__syncthreads();
  		}
  		__syncthreads();
  		if(x_t == 0 && y_t == 0)
  			result[g_idx] = shared[0][0];
  
  
  	//      //result[idx]           = abs(valIn);
  	}
  
  
  	///External access-point to the cuda function
  	///@param GLuint texbufferID 	--GLtexture (most be contained in a framebuffer object)
  	///				  that holds the data that will be handed to cuda.
  	///@param GLenum texType	--either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
  	///				  may work with other gl texture types, but untested.
  	///@param DIM_Y, the number of samples in the template.
  	extern "C"
  	stim::vec<int> get_cost(GLint texbufferID, GLenum texType, int DIM_Y)
  	{
  
  		//Bind the Texture in GL and allow access to cuda.
  		t.MapCudaTexture(texbufferID, texType);
  
  		//initialize the return arrays.
  		float* output;	
  		output = (float* ) malloc(DIM_Y*sizeof(float));
  
  		stim::vec<int> ret(0, 0);
  		initArray(DIM_Y);
  		
  
  		//variables for finding the min.
  		float mini = 10000000000000000.0;
  		int idx = 0;
  	
  		//cuda launch variables.
  		dim3 numBlocks(1, DIM_Y);
  		dim3 threadsPerBlock(16, 8);
  
  
  		get_diff <<< numBlocks, threadsPerBlock >>> (t.getTexture(), result);
  
  		HANDLE_ERROR(
  			cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost)
  			);
  
  		for( int i = 0; i<DIM_Y; i++){
  			if(output[i] < mini){
  				mini = output[i];
  				idx = i;                                                                   
  			}
  		}
  
  //		stringstream name;      //for debugging
  //		name << "Test.bmp";
035d798f   Pavel Govyadinov   modified the spid...
153
  //		stim::gpu2image<float>(print, name.str(),16,218,0,256);
84eff8b1   Pavel Govyadinov   Merged only the n...
154
155
156
157
  	  
  		t.UnmapCudaTexture();
  		cleanUP();
  		ret[0] = idx; ret[1] = (int) output[idx];
84eff8b1   Pavel Govyadinov   Merged only the n...
158
159
160
161
162
163
164
165
166
  		free(output);
  		return ret;
  	}
  
  	}
  }
  
  
  #endif