#include #include #include #include #include #include "../visualization/colormap.h" #include ///Cost function that works with the gl-spider class to find index of the item with min-cost. typedef unsigned char uchar; texture texIn; float *result; float* v_dif; cudaArray* srcArray; bool testing = false; inline void checkCUDAerrors(const char *msg) { cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err) ); exit(1); } } ///Finds the sum of all the pixes in a gives template element. ///Returns the abosolute value. ///@param *diff, a pointer to the memory block that holds the pixel-differences. float get_sum(float *diff) { cublasStatus_t ret; cublasHandle_t handle; ret = cublasCreate(&handle); ret = cublasSetVector(20*10, sizeof(*diff), diff, 1, v_dif, 1); float out; ret = cublasSasum(handle, 20*10, v_dif, 1, &out); cublasDestroy(handle); return out; } ///A virtual representation of a uniform template. ///Returns the value of the template pixel. ///@param x, location of a pixel. __device__ float Template(int x) { if(x < 20/6 || x > 20*5/6 || (x > 20*2/6 && x < 20*4/6)){ return 1.0; }else{ return 0.0; } } ///Find the difference of the given set of samples and the template ///using cuda acceleration. ///@param *result, a pointer to the memory that stores the result. __global__ void get_diff (float *result) { //cuPrintf("Hello"); int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; int idx = y*20+x; float valIn = tex2D(texIn, x, y)/255.0; float valTemp = Template(x); result[idx] = abs(valIn-valTemp); //result[idx] = abs(valIn); } ///Initialization function, allocates the memory and passes the necessary ///handles from OpenGL and Cuda. ///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture ///@param DIM_Y, integer controlling how much memory to allocate. void initArray(cudaGraphicsResource_t src, int DIM_Y) { //cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc (); //cudaMallocArray(&result, &channelDesc, DIM_X, DIM_Y, 0); //HANDLE_ERROR( // cudaGraphicsGLRegisterImage(&src, // fboID, // GL_TEXTURE_2D, HANDLE_ERROR( cudaGraphicsMapResources(1, &src) ); HANDLE_ERROR( cudaGraphicsSubResourceGetMappedArray(&srcArray, src,0,0) ); HANDLE_ERROR( cudaBindTextureToArray(texIn, srcArray) ); cudaMalloc( (void**) &result, 20*DIM_Y*sizeof(float)); checkCUDAerrors("Memory Allocation Issue 1"); cudaMalloc((void **) &v_dif, 20*10*sizeof(float)); checkCUDAerrors("Memory Allocation Issue 2"); //HANDLE_ERROR( // cudaBindTextureToArray(texIn, ptr, &channelDesc) // ); } ///Deinit function that frees the memery used and releases the texture resource ///back to OpenGL. ///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture void cleanUP(cudaGraphicsResource_t src) { HANDLE_ERROR( cudaUnbindTexture(texIn) ); HANDLE_ERROR( cudaFree(result) ); HANDLE_ERROR( cudaGraphicsUnmapResources(1,&src) ); HANDLE_ERROR( cudaFree(v_dif) ); } ///External access-point to the cuda function ///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture ///@param DIM_Y, the number of samples in the template. ///@inter temporary paramenter that tracks the number of times cost.h was called. extern "C" int get_cost(cudaGraphicsResource_t src, int inter, int DIM_Y) { float output[DIM_Y]; float mini = 10000000000000000.0; int idx; stringstream name; //for debugging initArray(src, DIM_Y*10); dim3 grid(20, DIM_Y*10); dim3 block(1, 1); get_diff <<< grid, block >>> (result); name << "temp_diff_" << inter << ".bmp"; stim::gpu2image(result, name.str(), 20,DIM_Y*10,0,1); for (int i = 0; i < DIM_Y; i++){ output[i] = get_sum(result+(20*10*i)); if(output[i] <= mini){ mini = output[i]; idx = i; } } name.clear(); name << "sample_" << inter << "_" << idx << ".bmp"; output[idx] = get_sum(result+(20*10*idx)); stim::gpu2image(v_dif, name.str(), 20,10,0,1); cleanUP(src); return idx; }