Commit 6f710020edb2fa1144f7676571f6e721b63d4e9b
Merge branch 'ivote2' into 'master'
fix the 'unresolved external sumbol (bool DEBUG)' error in ivote2.cuh file See merge request !34
Showing
4 changed files
with
15 additions
and
17 deletions
Show diff stats
stim/iVote/ivote2.cuh
@@ -6,13 +6,14 @@ | @@ -6,13 +6,14 @@ | ||
6 | #include <stim/cuda/cudatools/error.h> | 6 | #include <stim/cuda/cudatools/error.h> |
7 | #include <stim/cuda/templates/gradient.cuh> | 7 | #include <stim/cuda/templates/gradient.cuh> |
8 | #include <stim/cuda/arraymath.cuh> | 8 | #include <stim/cuda/arraymath.cuh> |
9 | -#include <stim/iVote/ivote2/ivote2.cuh> | 9 | +#include <stim/iVote/ivote2/iter_vote2.cuh> |
10 | +#include <stim/iVote/ivote2/local_max.cuh> | ||
10 | #include <stim/math/constants.h> | 11 | #include <stim/math/constants.h> |
11 | #include <stim/math/vector.h> | 12 | #include <stim/math/vector.h> |
12 | #include <stim/visualization/colormap.h> | 13 | #include <stim/visualization/colormap.h> |
13 | 14 | ||
14 | -namespace stim { | ||
15 | 15 | ||
16 | +namespace stim { | ||
16 | // this function precomputes the atan2 values | 17 | // this function precomputes the atan2 values |
17 | template<typename T> | 18 | template<typename T> |
18 | void atan_2(T* cpuTable, unsigned int rmax) { | 19 | void atan_2(T* cpuTable, unsigned int rmax) { |
@@ -93,8 +94,8 @@ namespace stim { | @@ -93,8 +94,8 @@ namespace stim { | ||
93 | 94 | ||
94 | //this function performs the 2D iterative voting algorithm on the image stored in the gpu | 95 | //this function performs the 2D iterative voting algorithm on the image stored in the gpu |
95 | template<typename T> | 96 | template<typename T> |
96 | - void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", | ||
97 | - int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) { | 97 | + void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", |
98 | + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) { | ||
98 | 99 | ||
99 | size_t pixels = x * y; //compute the size of input image | 100 | size_t pixels = x * y; //compute the size of input image |
100 | // | 101 | // |
@@ -118,14 +119,12 @@ namespace stim { | @@ -118,14 +119,12 @@ namespace stim { | ||
118 | float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes)); //allocate space to store the vote image | 119 | float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes)); //allocate space to store the vote image |
119 | 120 | ||
120 | stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y); //calculate the 2D gradient | 121 | stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y); //calculate the 2D gradient |
121 | - //if (invert) stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, stim::PI); | ||
122 | - //else stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); | ||
123 | - stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar | 122 | + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar |
124 | 123 | ||
125 | for (int i = 0; i < iter; i++) { //for each iteration | 124 | for (int i = 0; i < iter; i++) { //for each iteration |
126 | cudaMemset(gpuVote, 0, bytes); //reset the vote image to 0 | 125 | cudaMemset(gpuVote, 0, bytes); //reset the vote image to 0 |
127 | - stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //perform voting | ||
128 | - stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //update the voter directions | 126 | + stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug); //perform voting |
127 | + stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug); //update the voter directions | ||
129 | phi = phi - dphi; //decrement phi | 128 | phi = phi - dphi; //decrement phi |
130 | } | 129 | } |
131 | stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y); //calculate the local maxima | 130 | stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y); //calculate the local maxima |
@@ -160,13 +159,13 @@ namespace stim { | @@ -160,13 +159,13 @@ namespace stim { | ||
160 | 159 | ||
161 | 160 | ||
162 | template<typename T> | 161 | template<typename T> |
163 | - void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", | ||
164 | - int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) { | 162 | + void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", |
163 | + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) { | ||
165 | size_t bytes = x*y * sizeof(T); | 164 | size_t bytes = x*y * sizeof(T); |
166 | T* gpuI; //allocate space on the gpu to save the input image | 165 | T* gpuI; //allocate space on the gpu to save the input image |
167 | HANDLE_ERROR(cudaMalloc(&gpuI, bytes)); | 166 | HANDLE_ERROR(cudaMalloc(&gpuI, bytes)); |
168 | HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice)); //copy the image to the gpu | 167 | HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice)); //copy the image to the gpu |
169 | - stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn); //call the gpu version of the ivote | 168 | + stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn, debug); //call the gpu version of the ivote |
170 | HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output to the cpu | 169 | HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output to the cpu |
171 | } | 170 | } |
172 | } | 171 | } |
stim/iVote/ivote2/iter_vote2.cuh
1 | #ifndef STIM_CUDA_ITER_VOTE2_H | 1 | #ifndef STIM_CUDA_ITER_VOTE2_H |
2 | #define STIM_CUDA_ITER_VOTE2_H | 2 | #define STIM_CUDA_ITER_VOTE2_H |
3 | 3 | ||
4 | -extern bool DEBUG; | 4 | +//extern bool DEBUG; |
5 | 5 | ||
6 | -#include "local_max.cuh" | ||
7 | #include "update_dir_bb.cuh" | 6 | #include "update_dir_bb.cuh" |
8 | #include "vote_atomic_bb.cuh" | 7 | #include "vote_atomic_bb.cuh" |
9 | 8 |
stim/iVote/ivote2/update_dir_bb.cuh
@@ -97,7 +97,7 @@ namespace stim{ | @@ -97,7 +97,7 @@ namespace stim{ | ||
97 | } | 97 | } |
98 | 98 | ||
99 | template<typename T> | 99 | template<typename T> |
100 | - void gpu_update_dir(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y){ | 100 | + void gpu_update_dir(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y, bool DEBUG = false){ |
101 | 101 | ||
102 | //calculate the number of bytes in the array | 102 | //calculate the number of bytes in the array |
103 | size_t bytes = x * y * sizeof(T); | 103 | size_t bytes = x * y * sizeof(T); |
stim/iVote/ivote2/vote_atomic_bb.cuh
@@ -87,7 +87,7 @@ namespace stim{ | @@ -87,7 +87,7 @@ namespace stim{ | ||
87 | /// @param x and y are the spatial dimensions of the gradient image | 87 | /// @param x and y are the spatial dimensions of the gradient image |
88 | /// @param gradmag defines whether or not the gradient magnitude is taken into account during the vote | 88 | /// @param gradmag defines whether or not the gradient magnitude is taken into account during the vote |
89 | template<typename T> | 89 | template<typename T> |
90 | - void gpu_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y, bool gradmag = true){ | 90 | + void gpu_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y, bool DEBUG = false, bool gradmag = true){ |
91 | unsigned int max_threads = stim::maxThreadsPerBlock(); | 91 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
92 | dim3 threads( (unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads) ); | 92 | dim3 threads( (unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads) ); |
93 | dim3 blocks((unsigned int)x/threads.x + 1, (unsigned int)y/threads.y + 1); | 93 | dim3 blocks((unsigned int)x/threads.x + 1, (unsigned int)y/threads.y + 1); |
@@ -96,7 +96,7 @@ namespace stim{ | @@ -96,7 +96,7 @@ namespace stim{ | ||
96 | if (DEBUG) std::cout<<"Shared Memory required: "<<shared_mem_req<<std::endl; | 96 | if (DEBUG) std::cout<<"Shared Memory required: "<<shared_mem_req<<std::endl; |
97 | size_t shared_mem = stim::sharedMemPerBlock(); | 97 | size_t shared_mem = stim::sharedMemPerBlock(); |
98 | if(shared_mem_req > shared_mem){ | 98 | if(shared_mem_req > shared_mem){ |
99 | - std::cout<<"Error: insufficient shared memory for this implementation of cuda_update_dir()."<<std::endl; | 99 | + std::cout<<"Error: insufficient shared memory for this implementation of cuda_vote()."<<std::endl; |
100 | exit(1); | 100 | exit(1); |
101 | } | 101 | } |
102 | 102 |