cost.h 3.12 KB
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include "cuPrintf.cu"
#include "cuPrintf.cuh"
#include <stdio.h>
#include "../visualization/colormap.h"

#define DIM_X 400
#define DIM_Y 200
typedef unsigned char uchar;
//surface<void, 2> texOut; ///// maybe just do a normal array instead of a surface.
		//we may not need a surface at all.
//texture<float, cudaTextureType2D, cudaReadModeElementType> texTemplate 
texture<float, cudaTextureType2D, cudaReadModeElementType> texIn;
float *result;
float* v_dif;
cudaArray* srcArray;

inline void checkCUDAerrors(const char *msg)
{
	cudaError_t err = cudaGetLastError();
	if (cudaSuccess != err){
		fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err) );
		exit(1);
		}
}


float get_sum(float *diff)
{

	cublasStatus_t ret;
	cublasHandle_t handle;
	ret = cublasCreate(&handle);
	
	ret = cublasSetVector(DIM_X*DIM_Y, sizeof(*diff), diff, 1, v_dif, 1);
	//float* out = (float*) malloc(sizeof(float));
	float out;
	ret = cublasSasum(handle, DIM_X*DIM_Y, v_dif, 1, &out);
	cublasDestroy(handle);
	cudaFree(v_dif);
	return out;
}

__device__ float Template(int x)
{
	if(x < DIM_X/3 || x > DIM_X*2/3){
		return 1.0;
	}else{
		return 0.0;
	}

}

__global__
void get_diff (float *result)
{	
	//cuPrintf("Hello");
	int x 	= threadIdx.x + blockIdx.x * blockDim.x;
	int y 	= threadIdx.y + blockIdx.y * blockDim.y;
	int idx = y*DIM_Y+x;	
	//int idx = x*DIM_X+y;

 	//uchar4 color 		= tex2D(texIn, x, y);
	//float3 tempcolor	= make_float3(color.x, color.y, color.z);
	//float valIn		= tempcolor.x + tempcolor.y + tempcolor.z;
	float valIn		= x;//tex2D(texIn, x, y);
	float valTemp		= Template(x);
	result[idx] 		= valIn;
// - valTemp;
	//#if __CUDA_ARCH__>=200
	//	printf("Value is : %f\n and the result is : %f\n", valIn, result[idx]);
	//#endif
	//cuPrintf("Value is : %f\n and the result is : %f\n", valIn, result[idx]);
}




void initArray(cudaGraphicsResource_t src)
{
	//cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar> ();
	//cudaMallocArray(&result, &channelDesc, DIM_X, DIM_Y, 0);
	//HANDLE_ERROR(
	//	cudaGraphicsGLRegisterImage(&src,
	//		fboID,
	//		GL_TEXTURE_2D,
	HANDLE_ERROR(
		cudaGraphicsMapResources(1, &src)	
	);
	HANDLE_ERROR(
		cudaGraphicsSubResourceGetMappedArray(&srcArray, src,0,0)
		);
	HANDLE_ERROR(
		cudaBindTextureToArray(texIn, srcArray)
		);
	cudaMalloc( (void**) &result, DIM_X*DIM_Y*sizeof(float));
        checkCUDAerrors("Memory Allocation Issue 1");	
	cudaMalloc((void **) &v_dif, DIM_X*DIM_Y*sizeof(*result));
        checkCUDAerrors("Memory Allocation Issue 2");	
	//HANDLE_ERROR(
	//	cudaBindTextureToArray(texIn, ptr, &channelDesc)
	//	    );		
}

void cleanUP(cudaGraphicsResource_t src)
{
	HANDLE_ERROR(
		cudaUnbindTexture(texIn)
	);
	HANDLE_ERROR(
		cudaFree(result)
	);
	HANDLE_ERROR(
		cudaGraphicsUnmapResources(1,&src)
	);
}

extern "C"
float get_cost(cudaGraphicsResource_t src)
{
	initArray(src);
	dim3 grid(400, 200);
	dim3 block(1, 1);
	//texIn.normalized = 1;	
	get_diff <<< grid, block >>> (result);
	stim::gpu2image<float>(result, "test2.bmp", 400,200,0,255);
	float output = get_sum(result);
	cleanUP(src);
	return output;
}