cost.h 3.49 KB
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include "cuPrintf.cu"
#include "cuPrintf.cuh"
#include <stdio.h>
#include "../visualization/colormap.h"

#define DIM_Y 10890
#define DIM_X 20
typedef unsigned char uchar;
//surface<void, 2> texOut; ///// maybe just do a normal array instead of a surface.
		//we may not need a surface at all.
//texture<float, cudaTextureType2D, cudaReadModeElementType> texTemplate 
texture<uchar, cudaTextureType2D, cudaReadModeElementType> texIn;
float *result;
float* v_dif;
cudaArray* srcArray;
bool testing = false;

inline void checkCUDAerrors(const char *msg)
{
	cudaError_t err = cudaGetLastError();
	if (cudaSuccess != err){
		fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err) );
		exit(1);
		}
}


float get_sum(float *diff)
{

	cublasStatus_t ret;
	cublasHandle_t handle;
	ret = cublasCreate(&handle);
	
	ret = cublasSetVector(20*10, sizeof(*diff), diff, 1, v_dif, 1);
	if(!testing){
		stim::gpu2image<float>(v_dif, "sample0.bmp", 20,10,0,1);
		testing = true;
	}	//float* out = (float*) malloc(sizeof(float));
	float out;
	ret = cublasSasum(handle, 20*10, v_dif, 1, &out);
	cublasDestroy(handle);
	return out;
}

__device__ float Template(int x)
{
	if(x < 20/6 || x > 20*5/6 || (x > 20*2/6 && x < 20*4/6)){
		return 1.0;
	}else{
		return 0.0;
	}

}

__global__
void get_diff (float *result)
{	
	//cuPrintf("Hello");
	int x 	= threadIdx.x + blockIdx.x * blockDim.x;
	int y 	= threadIdx.y + blockIdx.y * blockDim.y;
	int idx = y*DIM_X+x;	

	//float valIn		= tex2D(texIn, x, y);
	float valIn		= tex2D(texIn, x, y)/255.0;
	float valTemp		= Template(x);
	result[idx] 		= abs(valIn-valTemp);
	//result[idx] 		= abs(valTemp);
//	#if __CUDA_ARCH__>=200
//		printf("Value is : %f\n and the result is : %f\n", valIn, result[idx]);
//	#endif
	//cuPrintf("Value is : %f\n and the result is : %f\n", valIn, result[idx]);
}




void initArray(cudaGraphicsResource_t src)
{
	//cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar> ();
	//cudaMallocArray(&result, &channelDesc, DIM_X, DIM_Y, 0);
	//HANDLE_ERROR(
	//	cudaGraphicsGLRegisterImage(&src,
	//		fboID,
	//		GL_TEXTURE_2D,
	HANDLE_ERROR(
		cudaGraphicsMapResources(1, &src)	
	);
	HANDLE_ERROR(
		cudaGraphicsSubResourceGetMappedArray(&srcArray, src,0,0)
		);
	HANDLE_ERROR(
		cudaBindTextureToArray(texIn, srcArray)
		);
	cudaMalloc( (void**) &result, DIM_X*DIM_Y*sizeof(float));
        checkCUDAerrors("Memory Allocation Issue 1");	
	cudaMalloc((void **) &v_dif, 20*10*sizeof(float));
        checkCUDAerrors("Memory Allocation Issue 2");	
	//HANDLE_ERROR(
	//	cudaBindTextureToArray(texIn, ptr, &channelDesc)
	//	    );		
}

void cleanUP(cudaGraphicsResource_t src)
{
	HANDLE_ERROR(
		cudaUnbindTexture(texIn)
	);
	HANDLE_ERROR(
		cudaFree(result)
	);
	HANDLE_ERROR(
		cudaGraphicsUnmapResources(1,&src)
	);
	HANDLE_ERROR(
		cudaFree(v_dif)
	);
}

extern "C"
int get_cost(cudaGraphicsResource_t src)
{
	float output[1089];
	float mini = 10000000000000000.0;
	int idx;
	initArray(src);
	dim3 grid(20, 10890);
	dim3 block(1, 1);
	//texIn.normalized = 1;	
	get_diff <<< grid, block >>> (result);
	stim::gpu2image<float>(result, "test.bmp", 20,10890,0,1);
	for (int i = 0; i < 1089; i++){
		output[i] = get_sum(result+(20*10*i));
		if(output[i] < mini){
			mini = output[i];
			idx = i;
		}	
	}
	std::cout << output[0] << std::endl;
	std::cout << output[100] << std::endl;
	std::cout << output[500] << std::endl;
	std::cout << output[1000] << std::endl;
	std::cout << idx << std::endl;
	cleanUP(src);
	return idx;
}