Blame view

stim/cuda/cost.h 4.75 KB
7e099e80   Pavel Govyadinov   "lots of stuff, t...
1
2
3
4
  #include <assert.h>
  #include <cuda.h>
  #include <cuda_runtime.h>
  #include <cublas_v2.h>
7e099e80   Pavel Govyadinov   "lots of stuff, t...
5
  #include <stdio.h>
7d1d153a   Pavel Govyadinov   fixed the include...
6
  #include <stim/visualization/colormap.h>
42145f38   Pavel Govyadinov   Fixed the issues ...
7
  #include <sstream>
7d1d153a   Pavel Govyadinov   fixed the include...
8
9
10
  #include <stim/math/vector.h>
  #include <stim/cuda/cudatools/devices.h>
  #include <stim/cuda/cudatools/threads.h>
556c4e15   Pavel Govyadinov   Changed the handl...
11
12
  
  ///Cost function that works with the gl-spider class to find index of the item with min-cost.
7e099e80   Pavel Govyadinov   "lots of stuff, t...
13
  typedef unsigned char uchar;
32c433c7   Pavel Govyadinov   recovered the maj...
14
  texture<uchar, cudaTextureType2D, cudaReadModeElementType> texIn;
7e099e80   Pavel Govyadinov   "lots of stuff, t...
15
  float *result;
7e099e80   Pavel Govyadinov   "lots of stuff, t...
16
  cudaArray* srcArray;
4f5b240a   Pavel Govyadinov   minor change to i...
17
  bool testing = false;
7e099e80   Pavel Govyadinov   "lots of stuff, t...
18
19
20
21
22
23
24
25
26
27
  
  inline void checkCUDAerrors(const char *msg)
  {
  	cudaError_t err = cudaGetLastError();
  	if (cudaSuccess != err){
  		fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err) );
  		exit(1);
  		}
  }
  
556c4e15   Pavel Govyadinov   Changed the handl...
28
29
30
  ///A virtual representation of a uniform template.
  ///Returns the value of the template pixel.
  ///@param x, location of a pixel.
7e099e80   Pavel Govyadinov   "lots of stuff, t...
31
32
  __device__ float Template(int x)
  {
79a9bf3f   Pavel Govyadinov   new implementatio...
33
  	if(x < 16/6 || x > 16*5/6 || (x > 16*2/6 && x < 16*4/6)){
7e099e80   Pavel Govyadinov   "lots of stuff, t...
34
35
36
37
38
39
40
  		return 1.0;
  	}else{
  		return 0.0;
  	}
  
  }
  
556c4e15   Pavel Govyadinov   Changed the handl...
41
42
43
  ///Find the difference of the given set of samples and the template
  ///using cuda acceleration.
  ///@param *result, a pointer to the memory that stores the result.
7e099e80   Pavel Govyadinov   "lots of stuff, t...
44
45
46
  __global__
  void get_diff (float *result)
  {	
79a9bf3f   Pavel Govyadinov   new implementatio...
47
48
  	//float* shared = SharedMemory();
  	__shared__ float shared[16][8];
7e099e80   Pavel Govyadinov   "lots of stuff, t...
49
50
  	int x 	= threadIdx.x + blockIdx.x * blockDim.x;
  	int y 	= threadIdx.y + blockIdx.y * blockDim.y;
79a9bf3f   Pavel Govyadinov   new implementatio...
51
52
53
54
  	int x_t = threadIdx.x;
  	int y_t = threadIdx.y;
  	//int idx = y*16+x;
  	int g_idx = blockIdx.y;
7e099e80   Pavel Govyadinov   "lots of stuff, t...
55
  
a9f956be   Pavel Govyadinov   Fixed the cost fu...
56
  	float valIn		= tex2D(texIn, x, y)/255.0;
7e099e80   Pavel Govyadinov   "lots of stuff, t...
57
  	float valTemp		= Template(x);
79a9bf3f   Pavel Govyadinov   new implementatio...
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
  	shared[x_t][y_t]	= abs(valIn-valTemp);
  
  	__syncthreads();
  
  	for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
  	{
  		__syncthreads();
  		if (x_t < step)
  		{
  			shared[x_t][y_t] += shared[x_t + step][y_t];
  		}
  	__syncthreads();
  	}
  	__syncthreads();
  
  	for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
  	{
  		__syncthreads();
  		if(y_t < step)
  		{
  			shared[x_t][y_t] += shared[x_t][y_t + step];
  		}
  	__syncthreads();
  	}
  	__syncthreads();
79a9bf3f   Pavel Govyadinov   new implementatio...
83
84
85
86
87
  	if(x_t == 0 && y_t == 0)
  		result[g_idx] = shared[0][0];
  
  
  //	//result[idx]		= abs(valIn);
7e099e80   Pavel Govyadinov   "lots of stuff, t...
88
89
90
91
  }
  
  
  
556c4e15   Pavel Govyadinov   Changed the handl...
92
93
94
95
  ///Initialization function, allocates the memory and passes the necessary
  ///handles from OpenGL and Cuda.
  ///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture
  ///@param DIM_Y, integer controlling how much memory to allocate.
5e7c7581   Pavel Govyadinov   Debugging build f...
96
  void initArray(cudaGraphicsResource_t src, int DIM_Y)
7e099e80   Pavel Govyadinov   "lots of stuff, t...
97
  {
7e099e80   Pavel Govyadinov   "lots of stuff, t...
98
99
100
101
  	HANDLE_ERROR(
  		cudaGraphicsMapResources(1, &src)	
  	);
  	HANDLE_ERROR(
c4887649   Pavel Govyadinov   fixed a significa...
102
  		cudaGraphicsSubResourceGetMappedArray(&srcArray, src, 0, 0)
7e099e80   Pavel Govyadinov   "lots of stuff, t...
103
104
105
106
  		);
  	HANDLE_ERROR(
  		cudaBindTextureToArray(texIn, srcArray)
  		);
79a9bf3f   Pavel Govyadinov   new implementatio...
107
  	cudaMalloc( (void**) &result, DIM_Y*sizeof(float));
7e099e80   Pavel Govyadinov   "lots of stuff, t...
108
          checkCUDAerrors("Memory Allocation Issue 1");	
7e099e80   Pavel Govyadinov   "lots of stuff, t...
109
110
111
112
  	//HANDLE_ERROR(
  	//	cudaBindTextureToArray(texIn, ptr, &channelDesc)
  	//	    );		
  }
556c4e15   Pavel Govyadinov   Changed the handl...
113
114
115
  ///Deinit function that frees the memery used and releases the texture resource
  ///back to OpenGL.
  ///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture
7e099e80   Pavel Govyadinov   "lots of stuff, t...
116
117
118
  void cleanUP(cudaGraphicsResource_t src)
  {
  	HANDLE_ERROR(
7e099e80   Pavel Govyadinov   "lots of stuff, t...
119
120
121
122
123
  		cudaFree(result)
  	);
  	HANDLE_ERROR(
  		cudaGraphicsUnmapResources(1,&src)
  	);
a9f956be   Pavel Govyadinov   Fixed the cost fu...
124
  	HANDLE_ERROR(
c4887649   Pavel Govyadinov   fixed a significa...
125
126
  		cudaUnbindTexture(texIn)
  	);
7e099e80   Pavel Govyadinov   "lots of stuff, t...
127
  }
c4887649   Pavel Govyadinov   fixed a significa...
128
129
130
  
  
  
556c4e15   Pavel Govyadinov   Changed the handl...
131
132
133
134
  ///External access-point to the cuda function
  ///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture
  ///@param DIM_Y, the number of samples in the template.
  ///@inter temporary paramenter that tracks the number of times cost.h was called.
7e099e80   Pavel Govyadinov   "lots of stuff, t...
135
  extern "C"
edd4ab2d   Pavel Govyadinov   Cleaned up more a...
136
  stim::vec<int> get_cost(cudaGraphicsResource_t src, int DIM_Y)
7e099e80   Pavel Govyadinov   "lots of stuff, t...
137
  {
c4887649   Pavel Govyadinov   fixed a significa...
138
139
140
141
142
143
  //	int minGridSize;
  //	int blockSize;
  
  //	cudaOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, get_diff, 0, 20*DIM_Y*10);
  //	std::cout << blockSize << std::endl;
  //	std::cout << minGridSize << std::endl;
cce7daf9   Pavel Govyadinov   added glObj.h to ...
144
145
146
147
148
149
150
151
152
153
154
  //	stringstream name;	//for debugging
  //	name << "Test.bmp";
  //	dim3 block(4,4);
  //	dim3 grid(20/4, DIM_Y*10/4);
  //	int gridSize = (DIM_Y*10*20 + 1024 - 1)/1024;
  //	dim3 grid(26, 26);
  //	dim3 grid = GenGrid1D(DIM_Y*10*20);
  //	stim::gpu2image<float>(result, name.str(), 20,DIM_Y*10,0,1);
  //	name.clear();
  //	name << "sample_" << inter << "_" << idx << ".bmp";
  //	stim::gpu2image<float>(v_dif, name.str(), 20,10,0,1);
c4887649   Pavel Govyadinov   fixed a significa...
155
  
79a9bf3f   Pavel Govyadinov   new implementatio...
156
157
158
  	//float output[DIM_Y];
  	float *output;
  	output = (float* ) malloc(DIM_Y*sizeof(float));
edd4ab2d   Pavel Govyadinov   Cleaned up more a...
159
  	stim::vec<int> ret(0, 0);
a9f956be   Pavel Govyadinov   Fixed the cost fu...
160
  	float mini = 10000000000000000.0;
79a9bf3f   Pavel Govyadinov   new implementatio...
161
162
163
164
165
166
167
168
169
170
171
172
  	int idx = 0;
  	initArray(src, DIM_Y*8);
  	dim3 numBlocks(1, DIM_Y);
  	dim3 threadsPerBlock(16, 8);
  
  
  	get_diff <<< numBlocks, threadsPerBlock >>> (result);
  	cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost);
  
  	for( int i = 0; i<DIM_Y; i++){
  //		std::cout << output[i] << std::endl;
  		if(output[i] < mini){
a9f956be   Pavel Govyadinov   Fixed the cost fu...
173
174
  			mini = output[i];
  			idx = i;
385d2447   Pavel Govyadinov   Checkpoint: Conve...
175
  		}
79a9bf3f   Pavel Govyadinov   new implementatio...
176
177
178
179
  	}
    
  //	std::cout << "hello" << std::endl;
  	//output[idx] = get_sum(result+(16*8*idx));
7e099e80   Pavel Govyadinov   "lots of stuff, t...
180
  	cleanUP(src);
edd4ab2d   Pavel Govyadinov   Cleaned up more a...
181
  	ret[0] = idx; ret[1] = (int) output[idx];
c0f3e9f6   Pavel Govyadinov   UPDATE TO CIMG: v...
182
  	std::cout << output[idx] << std::endl;
79a9bf3f   Pavel Govyadinov   new implementatio...
183
  	free(output);
edd4ab2d   Pavel Govyadinov   Cleaned up more a...
184
  	return ret;
7e099e80   Pavel Govyadinov   "lots of stuff, t...
185
  }