Commit 3363c667fb4c4b2a82004fb38b9a12ba3439350c

Authored by Pavel Govyadinov
2 parents 33d7d3cf 7e099e80

Merge branch 'Dev'

Showing 1 changed file with 133 additions and 0 deletions   Show diff stats
cuda/cost.h 0 → 100644
  1 +#include <assert.h>
  2 +#include <cuda.h>
  3 +#include <cuda_runtime.h>
  4 +#include <cublas_v2.h>
  5 +#include "cuPrintf.cu"
  6 +#include "cuPrintf.cuh"
  7 +#include <stdio.h>
  8 +#include "../visualization/colormap.h"
  9 +
  10 +#define DIM_X 400
  11 +#define DIM_Y 200
  12 +typedef unsigned char uchar;
  13 +//surface<void, 2> texOut; ///// maybe just do a normal array instead of a surface.
  14 + //we may not need a surface at all.
  15 +//texture<float, cudaTextureType2D, cudaReadModeElementType> texTemplate
  16 +texture<float, cudaTextureType2D, cudaReadModeElementType> texIn;
  17 +float *result;
  18 +float* v_dif;
  19 +cudaArray* srcArray;
  20 +
  21 +inline void checkCUDAerrors(const char *msg)
  22 +{
  23 + cudaError_t err = cudaGetLastError();
  24 + if (cudaSuccess != err){
  25 + fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err) );
  26 + exit(1);
  27 + }
  28 +}
  29 +
  30 +
  31 +float get_sum(float *diff)
  32 +{
  33 +
  34 + cublasStatus_t ret;
  35 + cublasHandle_t handle;
  36 + ret = cublasCreate(&handle);
  37 +
  38 + ret = cublasSetVector(DIM_X*DIM_Y, sizeof(*diff), diff, 1, v_dif, 1);
  39 + //float* out = (float*) malloc(sizeof(float));
  40 + float out;
  41 + ret = cublasSasum(handle, DIM_X*DIM_Y, v_dif, 1, &out);
  42 + cublasDestroy(handle);
  43 + cudaFree(v_dif);
  44 + return out;
  45 +}
  46 +
  47 +__device__ float Template(int x)
  48 +{
  49 + if(x < DIM_X/3 || x > DIM_X*2/3){
  50 + return 1.0;
  51 + }else{
  52 + return 0.0;
  53 + }
  54 +
  55 +}
  56 +
  57 +__global__
  58 +void get_diff (float *result)
  59 +{
  60 + //cuPrintf("Hello");
  61 + int x = threadIdx.x + blockIdx.x * blockDim.x;
  62 + int y = threadIdx.y + blockIdx.y * blockDim.y;
  63 + int idx = y*DIM_Y+x;
  64 + //int idx = x*DIM_X+y;
  65 +
  66 + //uchar4 color = tex2D(texIn, x, y);
  67 + //float3 tempcolor = make_float3(color.x, color.y, color.z);
  68 + //float valIn = tempcolor.x + tempcolor.y + tempcolor.z;
  69 + float valIn = x;//tex2D(texIn, x, y);
  70 + float valTemp = Template(x);
  71 + result[idx] = valIn;
  72 +// - valTemp;
  73 + //#if __CUDA_ARCH__>=200
  74 + // printf("Value is : %f\n and the result is : %f\n", valIn, result[idx]);
  75 + //#endif
  76 + //cuPrintf("Value is : %f\n and the result is : %f\n", valIn, result[idx]);
  77 +}
  78 +
  79 +
  80 +
  81 +
  82 +void initArray(cudaGraphicsResource_t src)
  83 +{
  84 + //cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar> ();
  85 + //cudaMallocArray(&result, &channelDesc, DIM_X, DIM_Y, 0);
  86 + //HANDLE_ERROR(
  87 + // cudaGraphicsGLRegisterImage(&src,
  88 + // fboID,
  89 + // GL_TEXTURE_2D,
  90 + HANDLE_ERROR(
  91 + cudaGraphicsMapResources(1, &src)
  92 + );
  93 + HANDLE_ERROR(
  94 + cudaGraphicsSubResourceGetMappedArray(&srcArray, src,0,0)
  95 + );
  96 + HANDLE_ERROR(
  97 + cudaBindTextureToArray(texIn, srcArray)
  98 + );
  99 + cudaMalloc( (void**) &result, DIM_X*DIM_Y*sizeof(float));
  100 + checkCUDAerrors("Memory Allocation Issue 1");
  101 + cudaMalloc((void **) &v_dif, DIM_X*DIM_Y*sizeof(*result));
  102 + checkCUDAerrors("Memory Allocation Issue 2");
  103 + //HANDLE_ERROR(
  104 + // cudaBindTextureToArray(texIn, ptr, &channelDesc)
  105 + // );
  106 +}
  107 +
  108 +void cleanUP(cudaGraphicsResource_t src)
  109 +{
  110 + HANDLE_ERROR(
  111 + cudaUnbindTexture(texIn)
  112 + );
  113 + HANDLE_ERROR(
  114 + cudaFree(result)
  115 + );
  116 + HANDLE_ERROR(
  117 + cudaGraphicsUnmapResources(1,&src)
  118 + );
  119 +}
  120 +
  121 +extern "C"
  122 +float get_cost(cudaGraphicsResource_t src)
  123 +{
  124 + initArray(src);
  125 + dim3 grid(400, 200);
  126 + dim3 block(1, 1);
  127 + //texIn.normalized = 1;
  128 + get_diff <<< grid, block >>> (result);
  129 + stim::gpu2image<float>(result, "test2.bmp", 400,200,0,255);
  130 + float output = get_sum(result);
  131 + cleanUP(src);
  132 + return output;
  133 +}
... ...