cost.h 4.17 KB
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <stdio.h>
#include "../visualization/colormap.h"
#include <sstream>


///Cost function that works with the gl-spider class to find index of the item with min-cost.
typedef unsigned char uchar;
texture<uchar, cudaTextureType2D, cudaReadModeElementType> texIn;
float *result;
float* v_dif;
cudaArray* srcArray;
bool testing = false;

inline void checkCUDAerrors(const char *msg)
{
	cudaError_t err = cudaGetLastError();
	if (cudaSuccess != err){
		fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err) );
		exit(1);
		}
}

///Finds the sum of all the pixes in a gives template element.
///Returns the abosolute value.
///@param *diff, a pointer to the memory block that holds the pixel-differences.
float get_sum(float *diff)
{

	cublasStatus_t ret;
	cublasHandle_t handle;
	ret = cublasCreate(&handle);
	
	ret = cublasSetVector(20*10, sizeof(*diff), diff, 1, v_dif, 1);
	float out;
	ret = cublasSasum(handle, 20*10, v_dif, 1, &out);
	cublasDestroy(handle);
	return out;
}

///A virtual representation of a uniform template.
///Returns the value of the template pixel.
///@param x, location of a pixel.
__device__ float Template(int x)
{
	if(x < 20/6 || x > 20*5/6 || (x > 20*2/6 && x < 20*4/6)){
		return 1.0;
	}else{
		return 0.0;
	}

}

///Find the difference of the given set of samples and the template
///using cuda acceleration.
///@param *result, a pointer to the memory that stores the result.
__global__
void get_diff (float *result)
{	
	//cuPrintf("Hello");
	int x 	= threadIdx.x + blockIdx.x * blockDim.x;
	int y 	= threadIdx.y + blockIdx.y * blockDim.y;
	int idx = y*20+x;	

	float valIn		= tex2D(texIn, x, y)/255.0;
	float valTemp		= Template(x);
	//result[idx]		= abs(valIn-valTemp);
	result[idx]		= abs(valIn);
}



///Initialization function, allocates the memory and passes the necessary
///handles from OpenGL and Cuda.
///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture
///@param DIM_Y, integer controlling how much memory to allocate.
void initArray(cudaGraphicsResource_t src, int DIM_Y)
{
	//cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar> ();
	//cudaMallocArray(&result, &channelDesc, DIM_X, DIM_Y, 0);
	//HANDLE_ERROR(
	//	cudaGraphicsGLRegisterImage(&src,
	//		fboID,
	//		GL_TEXTURE_2D,
	HANDLE_ERROR(
		cudaGraphicsMapResources(1, &src)	
	);
	HANDLE_ERROR(
		cudaGraphicsSubResourceGetMappedArray(&srcArray, src,0,0)
		);
	HANDLE_ERROR(
		cudaBindTextureToArray(texIn, srcArray)
		);
	cudaMalloc( (void**) &result, 20*DIM_Y*sizeof(float));
        checkCUDAerrors("Memory Allocation Issue 1");	
	cudaMalloc((void **) &v_dif, 20*10*sizeof(float));
        checkCUDAerrors("Memory Allocation Issue 2");	
	//HANDLE_ERROR(
	//	cudaBindTextureToArray(texIn, ptr, &channelDesc)
	//	    );		
}
///Deinit function that frees the memery used and releases the texture resource
///back to OpenGL.
///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture
void cleanUP(cudaGraphicsResource_t src)
{
	HANDLE_ERROR(
		cudaUnbindTexture(texIn)
	);
	HANDLE_ERROR(
		cudaFree(result)
	);
	HANDLE_ERROR(
		cudaGraphicsUnmapResources(1,&src)
	);
	HANDLE_ERROR(
		cudaFree(v_dif)
	);
}
///External access-point to the cuda function
///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture
///@param DIM_Y, the number of samples in the template.
///@inter temporary paramenter that tracks the number of times cost.h was called.
extern "C"
int get_cost(cudaGraphicsResource_t src, int inter, int DIM_Y)
{
	float output[DIM_Y];
	float mini = 10000000000000000.0;
	int idx;
	stringstream name;	//for debugging
	initArray(src, DIM_Y*10);
	dim3 grid(20, DIM_Y*10);
	dim3 block(1, 1);
	get_diff <<< grid, block >>> (result);
	name << "temp_diff_" << inter << ".bmp";
	stim::gpu2image<float>(result, name.str(), 20,DIM_Y*10,0,1);
	for (int i = 0; i < DIM_Y; i++){
		output[i] = get_sum(result+(20*10*i));
		if(output[i] <= mini){
			mini = output[i];
			idx = i;
		}
	}	
	name.clear();
	name << "sample_" << inter << "_" << idx << ".bmp";
	output[idx] = get_sum(result+(20*10*idx));
	stim::gpu2image<float>(v_dif, name.str(), 20,10,0,1);
	cleanUP(src);
	return idx;
}