cost.h
3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
//#include "cuPrintf.cu"
//#include "cuPrintf.cuh"
#include <stdio.h>
#include "../visualization/colormap.h"
#include <sstream>
#define DIM_Y 10890
#define DIM_X 20
typedef unsigned char uchar;
//surface<void, 2> texOut; ///// maybe just do a normal array instead of a surface.
//we may not need a surface at all.
//texture<float, cudaTextureType2D, cudaReadModeElementType> texTemplate
texture<uchar, cudaTextureType2D, cudaReadModeElementType> texIn;
float *result;
float* v_dif;
cudaArray* srcArray;
bool testing = false;
inline void checkCUDAerrors(const char *msg)
{
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err) );
exit(1);
}
}
float get_sum(float *diff)
{
cublasStatus_t ret;
cublasHandle_t handle;
ret = cublasCreate(&handle);
ret = cublasSetVector(20*10, sizeof(*diff), diff, 1, v_dif, 1);
float out;
ret = cublasSasum(handle, 20*10, v_dif, 1, &out);
cublasDestroy(handle);
return out;
}
__device__ float Template(int x)
{
if(x < 20/6 || x > 20*5/6 || (x > 20*2/6 && x < 20*4/6)){
return 1.0;
}else{
return 0.0;
}
}
__global__
void get_diff (float *result)
{
//cuPrintf("Hello");
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int idx = y*DIM_X+x;
//float valIn = tex2D(texIn, x, y);
float valIn = tex2D(texIn, x, y)/255.0;
float valTemp = Template(x);
result[idx] = abs(valIn-valTemp);
//result[idx] = abs(valTemp);
// #if __CUDA_ARCH__>=200
// printf("Value is : %f\n and the result is : %f\n", valIn, result[idx]);
// #endif
//cuPrintf("Value is : %f\n and the result is : %f\n", valIn, result[idx]);
}
void initArray(cudaGraphicsResource_t src)
{
//cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar> ();
//cudaMallocArray(&result, &channelDesc, DIM_X, DIM_Y, 0);
//HANDLE_ERROR(
// cudaGraphicsGLRegisterImage(&src,
// fboID,
// GL_TEXTURE_2D,
HANDLE_ERROR(
cudaGraphicsMapResources(1, &src)
);
HANDLE_ERROR(
cudaGraphicsSubResourceGetMappedArray(&srcArray, src,0,0)
);
HANDLE_ERROR(
cudaBindTextureToArray(texIn, srcArray)
);
cudaMalloc( (void**) &result, DIM_X*DIM_Y*sizeof(float));
checkCUDAerrors("Memory Allocation Issue 1");
cudaMalloc((void **) &v_dif, 20*10*sizeof(float));
checkCUDAerrors("Memory Allocation Issue 2");
//HANDLE_ERROR(
// cudaBindTextureToArray(texIn, ptr, &channelDesc)
// );
}
void cleanUP(cudaGraphicsResource_t src)
{
HANDLE_ERROR(
cudaUnbindTexture(texIn)
);
HANDLE_ERROR(
cudaFree(result)
);
HANDLE_ERROR(
cudaGraphicsUnmapResources(1,&src)
);
HANDLE_ERROR(
cudaFree(v_dif)
);
}
extern "C"
int get_cost(cudaGraphicsResource_t src, int inter)
{
float output[1089];
float mini = 10000000000000000.0;
int idx;
stringstream name;
initArray(src);
dim3 grid(20, 10890);
dim3 block(1, 1);
//texIn.normalized = 1;
get_diff <<< grid, block >>> (result);
stim::gpu2image<float>(result, "test.bmp", 20,10890,0,1);
for (int i = 0; i < 1089; i++){
output[i] = get_sum(result+(20*10*i));
if(output[i] <= mini){
mini = output[i];
idx = i;
// if(!testing)
// testing = true;
} //float* out = (float*) malloc(sizeof(float));
}
name << "sample_" << inter << "_" << idx << ".bmp";
output[idx] = get_sum(result+(20*10*idx));
stim::gpu2image<float>(v_dif, name.str(), 20,10,0,1);
std::cout << output[0] << std::endl;
std::cout << output[100] << std::endl;
std::cout << output[500] << std::endl;
std::cout << output[1000] << std::endl;
std::cout << idx << std::endl;
cleanUP(src);
return idx;
}