Commit c282a8017431a5838f19de6a91e21aa21d4d4856
Merge branch 'master' of https://git.stim.ee.uh.edu/codebase/stimlib
Showing
2 changed files
with
13 additions
and
10 deletions
Show diff stats
stim/cuda/ivote/local_max.cuh
... | ... | @@ -10,7 +10,7 @@ namespace stim{ |
10 | 10 | |
11 | 11 | // this kernel calculates the local maximum for finding the cell centers |
12 | 12 | template<typename T> |
13 | - __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, T final_t, int conn, int x, int y){ | |
13 | + __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, int x, int y){ | |
14 | 14 | |
15 | 15 | // calculate the 2D coordinates for this current thread. |
16 | 16 | int xi = blockIdx.x * blockDim.x + threadIdx.x; |
... | ... | @@ -27,7 +27,7 @@ namespace stim{ |
27 | 27 | T val = gpuVote[i]; |
28 | 28 | |
29 | 29 | //compare to the threshold |
30 | - if(val < final_t) return; | |
30 | + //if(val < final_t) return; | |
31 | 31 | |
32 | 32 | //define an array to store indices with same vote value |
33 | 33 | /*int * IdxEq; |
... | ... | @@ -56,11 +56,12 @@ namespace stim{ |
56 | 56 | return; |
57 | 57 | } |
58 | 58 | } */ |
59 | - gpuCenters[i] = 1; | |
59 | + //gpuCenters[i] = 1; | |
60 | + gpuCenters[i] = gpuVote[i]; | |
60 | 61 | } |
61 | 62 | |
62 | 63 | template<typename T> |
63 | - void gpu_local_max(T* gpuCenters, T* gpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){ | |
64 | + void gpu_local_max(T* gpuCenters, T* gpuVote, unsigned int conn, unsigned int x, unsigned int y){ | |
64 | 65 | |
65 | 66 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
66 | 67 | /*dim3 threads(max_threads, 1); |
... | ... | @@ -69,11 +70,11 @@ namespace stim{ |
69 | 70 | dim3 blocks(x/threads.x + 1, y/threads.y + 1); |
70 | 71 | |
71 | 72 | //call the kernel to find the local maximum. |
72 | - cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, final_t, conn, x, y); | |
73 | + cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, conn, x, y); | |
73 | 74 | } |
74 | 75 | |
75 | 76 | template<typename T> |
76 | - void cpu_local_max(T* cpuCenters, T* cpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){ | |
77 | + void cpu_local_max(T* cpuCenters, T* cpuVote, unsigned int conn, unsigned int x, unsigned int y){ | |
77 | 78 | |
78 | 79 | //calculate the number of bytes in the array |
79 | 80 | unsigned int bytes = x * y * sizeof(T); |
... | ... | @@ -90,7 +91,7 @@ namespace stim{ |
90 | 91 | HANDLE_ERROR(cudaMemcpy(gpuVote, cpuVote, bytes, cudaMemcpyHostToDevice)); |
91 | 92 | |
92 | 93 | //call the GPU version of the local max function |
93 | - gpu_local_max<T>(gpuCenters, gpuVote, final_t, conn, x, y); | |
94 | + gpu_local_max<T>(gpuCenters, gpuVote, conn, x, y); | |
94 | 95 | |
95 | 96 | //copy the cell centers data to the CPU |
96 | 97 | cudaMemcpy(cpuCenters, gpuCenters, bytes, cudaMemcpyDeviceToHost) ; | ... | ... |
stim/cuda/ivote/update_dir_bb.cuh
... | ... | @@ -40,8 +40,9 @@ namespace stim{ |
40 | 40 | bb.insert(xi + ceil(rmax * cos(theta + phi)), yi + ceil(rmax * sin(theta + phi))); //insert the final corner into the bounding box |
41 | 41 | |
42 | 42 | int x_table = 2*rmax +1; |
43 | - int lut_i; | |
44 | 43 | T rmax_sq = rmax * rmax; |
44 | + | |
45 | + int lut_i; | |
45 | 46 | T dx_sq, dy_sq; |
46 | 47 | |
47 | 48 | bb.trim_low(0, 0); //make sure the bounding box doesn't go outside the image |
... | ... | @@ -49,11 +50,12 @@ namespace stim{ |
49 | 50 | |
50 | 51 | int by, bx; |
51 | 52 | int dx, dy; //coordinate relative to (xi, yi) |
53 | + | |
52 | 54 | T v; |
53 | 55 | T max_v = 0; //initialize the maximum vote value to zero |
54 | 56 | T alpha; |
55 | - int max_dx = bb.low[0]; | |
56 | - int max_dy = bb.low[1]; | |
57 | + int max_dx = bb.low[0] - xi; | |
58 | + int max_dy = bb.low[1] - yi; | |
57 | 59 | for(by = bb.low[1]; by <= bb.high[1]; by++){ //for each element in the bounding box |
58 | 60 | dy = by - yi; //calculate the y coordinate of the current point relative to yi |
59 | 61 | dy_sq = dy * dy; | ... | ... |