#include #include #include #include #include #include #include #include #include #include ///Cost function that works with the gl-spider class to find index of the item with min-cost. typedef unsigned char uchar; texture texIn; float *result; cudaArray* srcArray; bool testing = false; inline void checkCUDAerrors(const char *msg) { cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err) ); exit(1); } } ///A virtual representation of a uniform template. ///Returns the value of the template pixel. ///@param x, location of a pixel. __device__ float Template(int x) { if(x < 16/6 || x > 16*5/6 || (x > 16*2/6 && x < 16*4/6)){ return 1.0; }else{ return 0.0; } } ///Find the difference of the given set of samples and the template ///using cuda acceleration. ///@param *result, a pointer to the memory that stores the result. __global__ void get_diff (float *result) { //float* shared = SharedMemory(); __shared__ float shared[16][8]; int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; int x_t = threadIdx.x; int y_t = threadIdx.y; //int idx = y*16+x; int g_idx = blockIdx.y; float valIn = tex2D(texIn, x, y)/255.0; float valTemp = Template(x); shared[x_t][y_t] = abs(valIn-valTemp); __syncthreads(); for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1) { __syncthreads(); if (x_t < step) { shared[x_t][y_t] += shared[x_t + step][y_t]; } __syncthreads(); } __syncthreads(); for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1) { __syncthreads(); if(y_t < step) { shared[x_t][y_t] += shared[x_t][y_t + step]; } __syncthreads(); } __syncthreads(); if(x_t == 0 && y_t == 0) result[g_idx] = shared[0][0]; // //result[idx] = abs(valIn); } ///Initialization function, allocates the memory and passes the necessary ///handles from OpenGL and Cuda. ///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture ///@param DIM_Y, integer controlling how much memory to allocate. void initArray(cudaGraphicsResource_t src, int DIM_Y) { HANDLE_ERROR( cudaGraphicsMapResources(1, &src) ); HANDLE_ERROR( cudaGraphicsSubResourceGetMappedArray(&srcArray, src, 0, 0) ); HANDLE_ERROR( cudaBindTextureToArray(texIn, srcArray) ); cudaMalloc( (void**) &result, DIM_Y*sizeof(float)); checkCUDAerrors("Memory Allocation Issue 1"); //HANDLE_ERROR( // cudaBindTextureToArray(texIn, ptr, &channelDesc) // ); } ///Deinit function that frees the memery used and releases the texture resource ///back to OpenGL. ///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture void cleanUP(cudaGraphicsResource_t src) { HANDLE_ERROR( cudaFree(result) ); HANDLE_ERROR( cudaGraphicsUnmapResources(1,&src) ); HANDLE_ERROR( cudaUnbindTexture(texIn) ); } ///External access-point to the cuda function ///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture ///@param DIM_Y, the number of samples in the template. ///@inter temporary paramenter that tracks the number of times cost.h was called. extern "C" stim::vec get_cost(cudaGraphicsResource_t src, int DIM_Y) { // int minGridSize; // int blockSize; // cudaOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, get_diff, 0, 20*DIM_Y*10); // std::cout << blockSize << std::endl; // std::cout << minGridSize << std::endl; // stringstream name; //for debugging // name << "Test.bmp"; // dim3 block(4,4); // dim3 grid(20/4, DIM_Y*10/4); // int gridSize = (DIM_Y*10*20 + 1024 - 1)/1024; // dim3 grid(26, 26); // dim3 grid = GenGrid1D(DIM_Y*10*20); // stim::gpu2image(result, name.str(), 20,DIM_Y*10,0,1); // name.clear(); // name << "sample_" << inter << "_" << idx << ".bmp"; // stim::gpu2image(v_dif, name.str(), 20,10,0,1); //float output[DIM_Y]; float *output; output = (float* ) malloc(DIM_Y*sizeof(float)); stim::vec ret(0, 0); float mini = 10000000000000000.0; int idx = 0; initArray(src, DIM_Y*8); dim3 numBlocks(1, DIM_Y); dim3 threadsPerBlock(16, 8); get_diff <<< numBlocks, threadsPerBlock >>> (result); cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost); for( int i = 0; i