Blame view

stim/cuda/cost.h 3.64 KB
7e099e80   Pavel Govyadinov   "lots of stuff, t...
1
2
3
4
  #include <assert.h>
  #include <cuda.h>
  #include <cuda_runtime.h>
  #include <cublas_v2.h>
42145f38   Pavel Govyadinov   Fixed the issues ...
5
6
  //#include "cuPrintf.cu"
  //#include "cuPrintf.cuh"
7e099e80   Pavel Govyadinov   "lots of stuff, t...
7
8
  #include <stdio.h>
  #include "../visualization/colormap.h"
42145f38   Pavel Govyadinov   Fixed the issues ...
9
  #include <sstream>
7e099e80   Pavel Govyadinov   "lots of stuff, t...
10
  
a9f956be   Pavel Govyadinov   Fixed the cost fu...
11
12
  #define DIM_Y 10890
  #define DIM_X 20
7e099e80   Pavel Govyadinov   "lots of stuff, t...
13
14
15
16
  typedef unsigned char uchar;
  //surface<void, 2> texOut; ///// maybe just do a normal array instead of a surface.
  		//we may not need a surface at all.
  //texture<float, cudaTextureType2D, cudaReadModeElementType> texTemplate 
32c433c7   Pavel Govyadinov   recovered the maj...
17
  texture<uchar, cudaTextureType2D, cudaReadModeElementType> texIn;
7e099e80   Pavel Govyadinov   "lots of stuff, t...
18
19
20
  float *result;
  float* v_dif;
  cudaArray* srcArray;
4f5b240a   Pavel Govyadinov   minor change to i...
21
  bool testing = false;
7e099e80   Pavel Govyadinov   "lots of stuff, t...
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
  
  inline void checkCUDAerrors(const char *msg)
  {
  	cudaError_t err = cudaGetLastError();
  	if (cudaSuccess != err){
  		fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err) );
  		exit(1);
  		}
  }
  
  
  float get_sum(float *diff)
  {
  
  	cublasStatus_t ret;
  	cublasHandle_t handle;
  	ret = cublasCreate(&handle);
  	
a9f956be   Pavel Govyadinov   Fixed the cost fu...
40
  	ret = cublasSetVector(20*10, sizeof(*diff), diff, 1, v_dif, 1);
7e099e80   Pavel Govyadinov   "lots of stuff, t...
41
  	float out;
a9f956be   Pavel Govyadinov   Fixed the cost fu...
42
  	ret = cublasSasum(handle, 20*10, v_dif, 1, &out);
7e099e80   Pavel Govyadinov   "lots of stuff, t...
43
  	cublasDestroy(handle);
7e099e80   Pavel Govyadinov   "lots of stuff, t...
44
45
46
47
48
  	return out;
  }
  
  __device__ float Template(int x)
  {
a9f956be   Pavel Govyadinov   Fixed the cost fu...
49
  	if(x < 20/6 || x > 20*5/6 || (x > 20*2/6 && x < 20*4/6)){
7e099e80   Pavel Govyadinov   "lots of stuff, t...
50
51
52
53
54
55
56
57
58
59
60
61
62
  		return 1.0;
  	}else{
  		return 0.0;
  	}
  
  }
  
  __global__
  void get_diff (float *result)
  {	
  	//cuPrintf("Hello");
  	int x 	= threadIdx.x + blockIdx.x * blockDim.x;
  	int y 	= threadIdx.y + blockIdx.y * blockDim.y;
32c433c7   Pavel Govyadinov   recovered the maj...
63
  	int idx = y*DIM_X+x;	
7e099e80   Pavel Govyadinov   "lots of stuff, t...
64
  
a9f956be   Pavel Govyadinov   Fixed the cost fu...
65
66
  	//float valIn		= tex2D(texIn, x, y);
  	float valIn		= tex2D(texIn, x, y)/255.0;
7e099e80   Pavel Govyadinov   "lots of stuff, t...
67
  	float valTemp		= Template(x);
a9f956be   Pavel Govyadinov   Fixed the cost fu...
68
69
  	result[idx] 		= abs(valIn-valTemp);
  	//result[idx] 		= abs(valTemp);
0fdb4ed4   Pavel Govyadinov   fixed the issue w...
70
71
72
  //	#if __CUDA_ARCH__>=200
  //		printf("Value is : %f\n and the result is : %f\n", valIn, result[idx]);
  //	#endif
7e099e80   Pavel Govyadinov   "lots of stuff, t...
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
  	//cuPrintf("Value is : %f\n and the result is : %f\n", valIn, result[idx]);
  }
  
  
  
  
  void initArray(cudaGraphicsResource_t src)
  {
  	//cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar> ();
  	//cudaMallocArray(&result, &channelDesc, DIM_X, DIM_Y, 0);
  	//HANDLE_ERROR(
  	//	cudaGraphicsGLRegisterImage(&src,
  	//		fboID,
  	//		GL_TEXTURE_2D,
  	HANDLE_ERROR(
  		cudaGraphicsMapResources(1, &src)	
  	);
  	HANDLE_ERROR(
  		cudaGraphicsSubResourceGetMappedArray(&srcArray, src,0,0)
  		);
  	HANDLE_ERROR(
  		cudaBindTextureToArray(texIn, srcArray)
  		);
  	cudaMalloc( (void**) &result, DIM_X*DIM_Y*sizeof(float));
          checkCUDAerrors("Memory Allocation Issue 1");	
a9f956be   Pavel Govyadinov   Fixed the cost fu...
98
  	cudaMalloc((void **) &v_dif, 20*10*sizeof(float));
7e099e80   Pavel Govyadinov   "lots of stuff, t...
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
          checkCUDAerrors("Memory Allocation Issue 2");	
  	//HANDLE_ERROR(
  	//	cudaBindTextureToArray(texIn, ptr, &channelDesc)
  	//	    );		
  }
  
  void cleanUP(cudaGraphicsResource_t src)
  {
  	HANDLE_ERROR(
  		cudaUnbindTexture(texIn)
  	);
  	HANDLE_ERROR(
  		cudaFree(result)
  	);
  	HANDLE_ERROR(
  		cudaGraphicsUnmapResources(1,&src)
  	);
a9f956be   Pavel Govyadinov   Fixed the cost fu...
116
117
118
  	HANDLE_ERROR(
  		cudaFree(v_dif)
  	);
7e099e80   Pavel Govyadinov   "lots of stuff, t...
119
120
121
  }
  
  extern "C"
42145f38   Pavel Govyadinov   Fixed the issues ...
122
  int get_cost(cudaGraphicsResource_t src, int inter)
7e099e80   Pavel Govyadinov   "lots of stuff, t...
123
  {
a9f956be   Pavel Govyadinov   Fixed the cost fu...
124
125
126
  	float output[1089];
  	float mini = 10000000000000000.0;
  	int idx;
42145f38   Pavel Govyadinov   Fixed the issues ...
127
  	stringstream name;
7e099e80   Pavel Govyadinov   "lots of stuff, t...
128
  	initArray(src);
a9f956be   Pavel Govyadinov   Fixed the cost fu...
129
  	dim3 grid(20, 10890);
7e099e80   Pavel Govyadinov   "lots of stuff, t...
130
131
132
  	dim3 block(1, 1);
  	//texIn.normalized = 1;	
  	get_diff <<< grid, block >>> (result);
a9f956be   Pavel Govyadinov   Fixed the cost fu...
133
134
135
  	stim::gpu2image<float>(result, "test.bmp", 20,10890,0,1);
  	for (int i = 0; i < 1089; i++){
  		output[i] = get_sum(result+(20*10*i));
42145f38   Pavel Govyadinov   Fixed the issues ...
136
  		if(output[i] <= mini){
a9f956be   Pavel Govyadinov   Fixed the cost fu...
137
138
  			mini = output[i];
  			idx = i;
42145f38   Pavel Govyadinov   Fixed the issues ...
139
140
141
142
143
144
145
146
  //		if(!testing)
  		
  //		testing = true;
  		}	//float* out = (float*) malloc(sizeof(float));
  	}	
  	name << "sample_" << inter << "_" << idx << ".bmp";
  	output[idx] = get_sum(result+(20*10*idx));
  	stim::gpu2image<float>(v_dif, name.str(), 20,10,0,1);
a9f956be   Pavel Govyadinov   Fixed the cost fu...
147
148
149
150
151
  	std::cout << output[0] << std::endl;
  	std::cout << output[100] << std::endl;
  	std::cout << output[500] << std::endl;
  	std::cout << output[1000] << std::endl;
  	std::cout << idx << std::endl;
7e099e80   Pavel Govyadinov   "lots of stuff, t...
152
  	cleanUP(src);
a9f956be   Pavel Govyadinov   Fixed the cost fu...
153
  	return idx;
7e099e80   Pavel Govyadinov   "lots of stuff, t...
154
  }