#include #include #include #include #include "cuPrintf.cu" #include "cuPrintf.cuh" #include #include "../visualization/colormap.h" #define DIM_Y 10890 #define DIM_X 20 typedef unsigned char uchar; //surface texOut; ///// maybe just do a normal array instead of a surface. //we may not need a surface at all. //texture texTemplate texture texIn; float *result; float* v_dif; cudaArray* srcArray; bool testing = false; inline void checkCUDAerrors(const char *msg) { cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err) ); exit(1); } } float get_sum(float *diff) { cublasStatus_t ret; cublasHandle_t handle; ret = cublasCreate(&handle); ret = cublasSetVector(20*10, sizeof(*diff), diff, 1, v_dif, 1); if(!testing){ stim::gpu2image(v_dif, "sample0.bmp", 20,10,0,1); testing = true; } //float* out = (float*) malloc(sizeof(float)); float out; ret = cublasSasum(handle, 20*10, v_dif, 1, &out); cublasDestroy(handle); return out; } __device__ float Template(int x) { if(x < 20/6 || x > 20*5/6 || (x > 20*2/6 && x < 20*4/6)){ return 1.0; }else{ return 0.0; } } __global__ void get_diff (float *result) { //cuPrintf("Hello"); int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; int idx = y*DIM_X+x; //float valIn = tex2D(texIn, x, y); float valIn = tex2D(texIn, x, y)/255.0; float valTemp = Template(x); result[idx] = abs(valIn-valTemp); //result[idx] = abs(valTemp); // #if __CUDA_ARCH__>=200 // printf("Value is : %f\n and the result is : %f\n", valIn, result[idx]); // #endif //cuPrintf("Value is : %f\n and the result is : %f\n", valIn, result[idx]); } void initArray(cudaGraphicsResource_t src) { //cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc (); //cudaMallocArray(&result, &channelDesc, DIM_X, DIM_Y, 0); //HANDLE_ERROR( // cudaGraphicsGLRegisterImage(&src, // fboID, // GL_TEXTURE_2D, HANDLE_ERROR( cudaGraphicsMapResources(1, &src) ); HANDLE_ERROR( cudaGraphicsSubResourceGetMappedArray(&srcArray, src,0,0) ); HANDLE_ERROR( cudaBindTextureToArray(texIn, srcArray) ); cudaMalloc( (void**) &result, DIM_X*DIM_Y*sizeof(float)); checkCUDAerrors("Memory Allocation Issue 1"); cudaMalloc((void **) &v_dif, 20*10*sizeof(float)); checkCUDAerrors("Memory Allocation Issue 2"); //HANDLE_ERROR( // cudaBindTextureToArray(texIn, ptr, &channelDesc) // ); } void cleanUP(cudaGraphicsResource_t src) { HANDLE_ERROR( cudaUnbindTexture(texIn) ); HANDLE_ERROR( cudaFree(result) ); HANDLE_ERROR( cudaGraphicsUnmapResources(1,&src) ); HANDLE_ERROR( cudaFree(v_dif) ); } extern "C" int get_cost(cudaGraphicsResource_t src) { float output[1089]; float mini = 10000000000000000.0; int idx; initArray(src); dim3 grid(20, 10890); dim3 block(1, 1); //texIn.normalized = 1; get_diff <<< grid, block >>> (result); stim::gpu2image(result, "test.bmp", 20,10890,0,1); for (int i = 0; i < 1089; i++){ output[i] = get_sum(result+(20*10*i)); if(output[i] < mini){ mini = output[i]; idx = i; } } std::cout << output[0] << std::endl; std::cout << output[100] << std::endl; std::cout << output[500] << std::endl; std::cout << output[1000] << std::endl; std::cout << idx << std::endl; cleanUP(src); return idx; }