diff --git a/cuda/cost.h b/cuda/cost.h new file mode 100644 index 0000000..faf9b98 --- /dev/null +++ b/cuda/cost.h @@ -0,0 +1,133 @@ +#include +#include +#include +#include +#include "cuPrintf.cu" +#include "cuPrintf.cuh" +#include +#include "../visualization/colormap.h" + +#define DIM_X 400 +#define DIM_Y 200 +typedef unsigned char uchar; +//surface texOut; ///// maybe just do a normal array instead of a surface. + //we may not need a surface at all. +//texture texTemplate +texture texIn; +float *result; +float* v_dif; +cudaArray* srcArray; + +inline void checkCUDAerrors(const char *msg) +{ + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err) ); + exit(1); + } +} + + +float get_sum(float *diff) +{ + + cublasStatus_t ret; + cublasHandle_t handle; + ret = cublasCreate(&handle); + + ret = cublasSetVector(DIM_X*DIM_Y, sizeof(*diff), diff, 1, v_dif, 1); + //float* out = (float*) malloc(sizeof(float)); + float out; + ret = cublasSasum(handle, DIM_X*DIM_Y, v_dif, 1, &out); + cublasDestroy(handle); + cudaFree(v_dif); + return out; +} + +__device__ float Template(int x) +{ + if(x < DIM_X/3 || x > DIM_X*2/3){ + return 1.0; + }else{ + return 0.0; + } + +} + +__global__ +void get_diff (float *result) +{ + //cuPrintf("Hello"); + int x = threadIdx.x + blockIdx.x * blockDim.x; + int y = threadIdx.y + blockIdx.y * blockDim.y; + int idx = y*DIM_Y+x; + //int idx = x*DIM_X+y; + + //uchar4 color = tex2D(texIn, x, y); + //float3 tempcolor = make_float3(color.x, color.y, color.z); + //float valIn = tempcolor.x + tempcolor.y + tempcolor.z; + float valIn = x;//tex2D(texIn, x, y); + float valTemp = Template(x); + result[idx] = valIn; +// - valTemp; + //#if __CUDA_ARCH__>=200 + // printf("Value is : %f\n and the result is : %f\n", valIn, result[idx]); + //#endif + //cuPrintf("Value is : %f\n and the result is : %f\n", valIn, result[idx]); +} + + + + +void initArray(cudaGraphicsResource_t src) +{ + //cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc (); + //cudaMallocArray(&result, &channelDesc, DIM_X, DIM_Y, 0); + //HANDLE_ERROR( + // cudaGraphicsGLRegisterImage(&src, + // fboID, + // GL_TEXTURE_2D, + HANDLE_ERROR( + cudaGraphicsMapResources(1, &src) + ); + HANDLE_ERROR( + cudaGraphicsSubResourceGetMappedArray(&srcArray, src,0,0) + ); + HANDLE_ERROR( + cudaBindTextureToArray(texIn, srcArray) + ); + cudaMalloc( (void**) &result, DIM_X*DIM_Y*sizeof(float)); + checkCUDAerrors("Memory Allocation Issue 1"); + cudaMalloc((void **) &v_dif, DIM_X*DIM_Y*sizeof(*result)); + checkCUDAerrors("Memory Allocation Issue 2"); + //HANDLE_ERROR( + // cudaBindTextureToArray(texIn, ptr, &channelDesc) + // ); +} + +void cleanUP(cudaGraphicsResource_t src) +{ + HANDLE_ERROR( + cudaUnbindTexture(texIn) + ); + HANDLE_ERROR( + cudaFree(result) + ); + HANDLE_ERROR( + cudaGraphicsUnmapResources(1,&src) + ); +} + +extern "C" +float get_cost(cudaGraphicsResource_t src) +{ + initArray(src); + dim3 grid(400, 200); + dim3 block(1, 1); + //texIn.normalized = 1; + get_diff <<< grid, block >>> (result); + stim::gpu2image(result, "test2.bmp", 400,200,0,255); + float output = get_sum(result); + cleanUP(src); + return output; +} -- libgit2 0.21.4