From 654c8934c91160c2a206f6950546ab93528b0c58 Mon Sep 17 00:00:00 2001 From: David Mayerich Date: Wed, 29 Mar 2017 17:56:38 -0500 Subject: [PATCH] CUDA_CALLABLE fix and CUDA_UNCALLABLE implementation --- stim/cuda/cudatools/callable.h | 6 ++++++ stim/math/vec3.h | 6 +++--- stim/visualization/colormap.h | 8 +------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/stim/cuda/cudatools/callable.h b/stim/cuda/cudatools/callable.h index 52e1c8a..601d852 100644 --- a/stim/cuda/cudatools/callable.h +++ b/stim/cuda/cudatools/callable.h @@ -7,4 +7,10 @@ #define CUDA_CALLABLE #endif +#ifdef __CUDACC__ +#define CUDA_UNCALLABLE __host__ inline +#else +#define CUDA_UNCALLABLE +#endif + #endif diff --git a/stim/math/vec3.h b/stim/math/vec3.h index 457092f..41c2f07 100644 --- a/stim/math/vec3.h +++ b/stim/math/vec3.h @@ -243,9 +243,9 @@ public: return false; } -#ifndef __NVCC__ +//#ifndef __CUDA_ARCH__ /// Outputs the vector as a string - std::string str() const{ +CUDA_UNCALLABLE std::string str() const{ std::stringstream ss; const size_t N = 3; @@ -261,7 +261,7 @@ public: return ss.str(); } -#endif +//#endif size_t size(){ return 3; } diff --git a/stim/visualization/colormap.h b/stim/visualization/colormap.h index b0807f3..6b3b4b4 100644 --- a/stim/visualization/colormap.h +++ b/stim/visualization/colormap.h @@ -166,14 +166,12 @@ static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T gridX = 65535; } dim3 dimGrid(gridX, gridY); - //int gridDim = (nVals + blockDim - 1)/blockDim; if(cm == cmGrayscale) applyGrayscale<<>>(gpuSource, gpuDest, nVals, minVal, maxVal); else if(cm == cmBrewer) { initBrewer(); applyBrewer<<>>(gpuSource, gpuDest, nVals, minVal, maxVal); - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3)); destroyBrewer(); } @@ -190,13 +188,9 @@ static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T unsigned char* gpuDest; HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 )); - //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals)); - //create the image on the gpu gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm); - - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3)); - + //copy the image from the GPU to the CPU HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost)); -- libgit2 0.21.4