From 6ef1dab9e7cfa8e30d297ba698ffab696b112dc7 Mon Sep 17 00:00:00 2001 From: Laila Saadatifard Date: Tue, 8 Dec 2015 12:53:12 -0600 Subject: [PATCH] fix one bug in the gaussian_blur3 code, this ivote3 project works well for the data image size less than 512 by 512 by 256 --- cpp/cudafunc.cu | 8 +++++--- cpp/float_to_half.cuh | 7 ++++--- cpp/gaussian_blur3.cuh | 6 +++--- cpp/main.cpp | 4 ++-- cpp/update_dir3.cuh | 8 ++++---- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/cpp/cudafunc.cu b/cpp/cudafunc.cu index 83fc0d5..c34ade6 100644 --- a/cpp/cudafunc.cu +++ b/cpp/cudafunc.cu @@ -1,3 +1,6 @@ +#include "cuda_fp16.h" +#include "float_to_half.cuh" +#include "half_to_float.cuh" #include "gaussian_blur3.cuh" #include "gradient3.cuh" #include "mag3.cuh" @@ -9,7 +12,6 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, unsigned int r[], int iter, float t, unsigned int conn[], unsigned int x, unsigned int y, unsigned int z){ - // compute the number of bytes in the input data unsigned int bytes = x * y * z * sizeof(float); @@ -68,7 +70,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un cudaFree(gpu_grad); cudaFree(gpu_mag); - //cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost); + cudaMemcpy(center, gpu_vote, bytes, cudaMemcpyDeviceToHost); //allocate space on the gpu for the final detected cells. float* gpu_output; @@ -78,7 +80,7 @@ void ivote3(float* center, float* img, float sigma[], float phi, float d_phi, un gpu_local_max3(gpu_output, gpu_vote, t, conn, x, y, z); //copy the final result to the cpu. - cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost); + //cudaMemcpy(center, gpu_output, bytes, cudaMemcpyDeviceToHost); cudaFree(gpu_vote); diff --git a/cpp/float_to_half.cuh b/cpp/float_to_half.cuh index ed85ef0..ed00b23 100644 --- a/cpp/float_to_half.cuh +++ b/cpp/float_to_half.cuh @@ -6,8 +6,8 @@ #include #include #include -#include "cuda_fp16.h" - +#include +#include __global__ void cuda_f2h(half* gpu_half, float* gpu_float, int x, int y, int z){ @@ -24,7 +24,8 @@ gpu_half[i] = __float2half(gpu_float[i]); - + + } diff --git a/cpp/gaussian_blur3.cuh b/cpp/gaussian_blur3.cuh index f42e8d4..142ac6f 100644 --- a/cpp/gaussian_blur3.cuh +++ b/cpp/gaussian_blur3.cuh @@ -12,7 +12,7 @@ template - __global__ void blur_x(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){ + __global__ void blur_x(T* out, T* in, T sigma, int x, int y, int z){ //calculate x,y,z coordinates for this thread int xi = blockIdx.x * blockDim.x + threadIdx.x; @@ -55,7 +55,7 @@ template - __global__ void blur_y(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){ + __global__ void blur_y(T* out, T* in, T sigma, int x, int y, int z){ //calculate x,y,z coordinates for this thread int xi = blockIdx.x * blockDim.x + threadIdx.x; @@ -98,7 +98,7 @@ } template - __global__ void blur_z(T* out, T* in, T sigma, unsigned int x, unsigned int y, unsigned int z){ + __global__ void blur_z(T* out, T* in, T sigma, int x, int y, int z){ //calculate x,y,z coordinates for this thread int xi = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/cpp/main.cpp b/cpp/main.cpp index 65c74e1..cb012cc 100644 --- a/cpp/main.cpp +++ b/cpp/main.cpp @@ -104,7 +104,7 @@ int main(int argc, char** argv){ invert_data(cpuI, x, y, z); //write a new file from the cpuI. - std::ofstream original("output/0-original_invert.vol", std::ofstream::out | std::ofstream::binary); + std::ofstream original("output/original_invert--512.vol", std::ofstream::out | std::ofstream::binary); original.write((char*)cpuI, bytes); original.close(); @@ -115,7 +115,7 @@ int main(int argc, char** argv){ ivote3(cpu_out, cpuI, sigma, phi, d_phi, r, iter, t, conn, x, y, z); //write the blurred file from the cpuI. - std::ofstream fblur("output/test0.vol", std::ofstream::out | std::ofstream::binary); + std::ofstream fblur("output/v1--512.vol", std::ofstream::out | std::ofstream::binary); fblur.write((char*)cpuI, bytes); fblur.close(); diff --git a/cpp/update_dir3.cuh b/cpp/update_dir3.cuh index 47f1cc0..fc9213d 100644 --- a/cpp/update_dir3.cuh +++ b/cpp/update_dir3.cuh @@ -5,7 +5,7 @@ # include #include #include - +#include // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area. template @@ -33,9 +33,9 @@ float max = 0; float l_vote = 0; // define local variables for the x, y, and z coordinations where the maximum happened - int id_x = g_v_x; - int id_y = g_v_y; - int id_z = g_v_z; + float id_x = g_v_x; + float id_y = g_v_y; + float id_z = g_v_z; int rx_sq = rx * rx; int ry_sq = ry * ry; -- libgit2 0.21.4