Commit 567379052590a9a52c9f3f31a3634a3a777c2adc
Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib
Showing
4 changed files
with
15 additions
and
17 deletions
Show diff stats
stim/iVote/ivote2.cuh
... | ... | @@ -6,13 +6,14 @@ |
6 | 6 | #include <stim/cuda/cudatools/error.h> |
7 | 7 | #include <stim/cuda/templates/gradient.cuh> |
8 | 8 | #include <stim/cuda/arraymath.cuh> |
9 | -#include <stim/iVote/ivote2/ivote2.cuh> | |
9 | +#include <stim/iVote/ivote2/iter_vote2.cuh> | |
10 | +#include <stim/iVote/ivote2/local_max.cuh> | |
10 | 11 | #include <stim/math/constants.h> |
11 | 12 | #include <stim/math/vector.h> |
12 | 13 | #include <stim/visualization/colormap.h> |
13 | 14 | |
14 | -namespace stim { | |
15 | 15 | |
16 | +namespace stim { | |
16 | 17 | // this function precomputes the atan2 values |
17 | 18 | template<typename T> |
18 | 19 | void atan_2(T* cpuTable, unsigned int rmax) { |
... | ... | @@ -93,8 +94,8 @@ namespace stim { |
93 | 94 | |
94 | 95 | //this function performs the 2D iterative voting algorithm on the image stored in the gpu |
95 | 96 | template<typename T> |
96 | - void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", | |
97 | - int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) { | |
97 | + void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", | |
98 | + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) { | |
98 | 99 | |
99 | 100 | size_t pixels = x * y; //compute the size of input image |
100 | 101 | // |
... | ... | @@ -118,14 +119,12 @@ namespace stim { |
118 | 119 | float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes)); //allocate space to store the vote image |
119 | 120 | |
120 | 121 | stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y); //calculate the 2D gradient |
121 | - //if (invert) stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, stim::PI); | |
122 | - //else stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); | |
123 | - stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar | |
122 | + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y); //convert cartesian coordinate of gradient to the polar | |
124 | 123 | |
125 | 124 | for (int i = 0; i < iter; i++) { //for each iteration |
126 | 125 | cudaMemset(gpuVote, 0, bytes); //reset the vote image to 0 |
127 | - stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //perform voting | |
128 | - stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y); //update the voter directions | |
126 | + stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug); //perform voting | |
127 | + stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug); //update the voter directions | |
129 | 128 | phi = phi - dphi; //decrement phi |
130 | 129 | } |
131 | 130 | stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y); //calculate the local maxima |
... | ... | @@ -160,13 +159,13 @@ namespace stim { |
160 | 159 | |
161 | 160 | |
162 | 161 | template<typename T> |
163 | - void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", | |
164 | - int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8) { | |
162 | + void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt", | |
163 | + int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) { | |
165 | 164 | size_t bytes = x*y * sizeof(T); |
166 | 165 | T* gpuI; //allocate space on the gpu to save the input image |
167 | 166 | HANDLE_ERROR(cudaMalloc(&gpuI, bytes)); |
168 | 167 | HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice)); //copy the image to the gpu |
169 | - stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn); //call the gpu version of the ivote | |
168 | + stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn, debug); //call the gpu version of the ivote | |
170 | 169 | HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output to the cpu |
171 | 170 | } |
172 | 171 | } | ... | ... |
stim/iVote/ivote2/iter_vote2.cuh
stim/iVote/ivote2/update_dir_bb.cuh
... | ... | @@ -97,7 +97,7 @@ namespace stim{ |
97 | 97 | } |
98 | 98 | |
99 | 99 | template<typename T> |
100 | - void gpu_update_dir(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y){ | |
100 | + void gpu_update_dir(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y, bool DEBUG = false){ | |
101 | 101 | |
102 | 102 | //calculate the number of bytes in the array |
103 | 103 | size_t bytes = x * y * sizeof(T); | ... | ... |
stim/iVote/ivote2/vote_atomic_bb.cuh
... | ... | @@ -87,7 +87,7 @@ namespace stim{ |
87 | 87 | /// @param x and y are the spatial dimensions of the gradient image |
88 | 88 | /// @param gradmag defines whether or not the gradient magnitude is taken into account during the vote |
89 | 89 | template<typename T> |
90 | - void gpu_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y, bool gradmag = true){ | |
90 | + void gpu_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y, bool DEBUG = false, bool gradmag = true){ | |
91 | 91 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
92 | 92 | dim3 threads( (unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads) ); |
93 | 93 | dim3 blocks((unsigned int)x/threads.x + 1, (unsigned int)y/threads.y + 1); |
... | ... | @@ -96,7 +96,7 @@ namespace stim{ |
96 | 96 | if (DEBUG) std::cout<<"Shared Memory required: "<<shared_mem_req<<std::endl; |
97 | 97 | size_t shared_mem = stim::sharedMemPerBlock(); |
98 | 98 | if(shared_mem_req > shared_mem){ |
99 | - std::cout<<"Error: insufficient shared memory for this implementation of cuda_update_dir()."<<std::endl; | |
99 | + std::cout<<"Error: insufficient shared memory for this implementation of cuda_vote()."<<std::endl; | |
100 | 100 | exit(1); |
101 | 101 | } |
102 | 102 | ... | ... |