diff --git a/stim/iVote/ivote2.cuh b/stim/iVote/ivote2.cuh index e88dab8..0c97ccb 100644 --- a/stim/iVote/ivote2.cuh +++ b/stim/iVote/ivote2.cuh @@ -6,13 +6,14 @@ #include #include #include -#include +#include +#include #include #include #include -namespace stim { +namespace stim { // this function precomputes the atan2 values template void atan_2(T* cpuTable, unsigned int rmax) { @@ -93,8 +94,8 @@ namespace stim { //this function performs the 2D iterative voting algorithm on the image stored in the gpu template - void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", - int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) { + void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) { size_t pixels = x * y; //compute the size of input image // @@ -118,14 +119,12 @@ namespace stim { float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes)); //allocate space to store the vote image stim::cuda::gpu_gradient_2d(gpuGrad, gpuI, x, y); //calculate the 2D gradient - //if (invert) stim::cuda::gpu_cart2polar(gpuGrad, x, y, stim::PI); - //else stim::cuda::gpu_cart2polar(gpuGrad, x, y); - stim::cuda::gpu_cart2polar(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar + stim::cuda::gpu_cart2polar(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar for (int i = 0; i < iter; i++) { //for each iteration cudaMemset(gpuVote, 0, bytes); //reset the vote image to 0 - stim::cuda::gpu_vote(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //perform voting - stim::cuda::gpu_update_dir(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //update the voter directions + stim::cuda::gpu_vote(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug); //perform voting + stim::cuda::gpu_update_dir(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug); //update the voter directions phi = phi - dphi; //decrement phi } stim::cuda::gpu_local_max(gpuI, gpuVote, conn, x, y); //calculate the local maxima @@ -160,13 +159,13 @@ namespace stim { template - void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", - int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) { + void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) { size_t bytes = x*y * sizeof(T); T* gpuI; //allocate space on the gpu to save the input image HANDLE_ERROR(cudaMalloc(&gpuI, bytes)); HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice)); //copy the image to the gpu - stim::gpu_ivote2(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn); //call the gpu version of the ivote + stim::gpu_ivote2(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn, debug); //call the gpu version of the ivote HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output to the cpu } } diff --git a/stim/iVote/ivote2/iter_vote2.cuh b/stim/iVote/ivote2/iter_vote2.cuh index 423d916..55f3445 100644 --- a/stim/iVote/ivote2/iter_vote2.cuh +++ b/stim/iVote/ivote2/iter_vote2.cuh @@ -1,9 +1,8 @@ #ifndef STIM_CUDA_ITER_VOTE2_H #define STIM_CUDA_ITER_VOTE2_H -extern bool DEBUG; +//extern bool DEBUG; -#include "local_max.cuh" #include "update_dir_bb.cuh" #include "vote_atomic_bb.cuh" diff --git a/stim/iVote/ivote2/update_dir_bb.cuh b/stim/iVote/ivote2/update_dir_bb.cuh index 43869ec..b9da2be 100644 --- a/stim/iVote/ivote2/update_dir_bb.cuh +++ b/stim/iVote/ivote2/update_dir_bb.cuh @@ -97,7 +97,7 @@ namespace stim{ } template - void gpu_update_dir(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y){ + void gpu_update_dir(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y, bool DEBUG = false){ //calculate the number of bytes in the array size_t bytes = x * y * sizeof(T); diff --git a/stim/iVote/ivote2/vote_atomic_bb.cuh b/stim/iVote/ivote2/vote_atomic_bb.cuh index 5a05001..dd033a6 100644 --- a/stim/iVote/ivote2/vote_atomic_bb.cuh +++ b/stim/iVote/ivote2/vote_atomic_bb.cuh @@ -87,7 +87,7 @@ namespace stim{ /// @param x and y are the spatial dimensions of the gradient image /// @param gradmag defines whether or not the gradient magnitude is taken into account during the vote template - void gpu_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y, bool gradmag = true){ + void gpu_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y, bool DEBUG = false, bool gradmag = true){ unsigned int max_threads = stim::maxThreadsPerBlock(); dim3 threads( (unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads) ); dim3 blocks((unsigned int)x/threads.x + 1, (unsigned int)y/threads.y + 1); @@ -96,7 +96,7 @@ namespace stim{ if (DEBUG) std::cout<<"Shared Memory required: "< shared_mem){ - std::cout<<"Error: insufficient shared memory for this implementation of cuda_update_dir()."<