Commit daacc99c097346e9dc02e5be9a1b5a5945e177a1
1 parent
800ff264
change the local max kernel to not threshold the output
Showing
2 changed files
with
13 additions
and
10 deletions
Show diff stats
stim/cuda/ivote/local_max.cuh
@@ -10,7 +10,7 @@ namespace stim{ | @@ -10,7 +10,7 @@ namespace stim{ | ||
10 | 10 | ||
11 | // this kernel calculates the local maximum for finding the cell centers | 11 | // this kernel calculates the local maximum for finding the cell centers |
12 | template<typename T> | 12 | template<typename T> |
13 | - __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, T final_t, int conn, int x, int y){ | 13 | + __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, int x, int y){ |
14 | 14 | ||
15 | // calculate the 2D coordinates for this current thread. | 15 | // calculate the 2D coordinates for this current thread. |
16 | int xi = blockIdx.x * blockDim.x + threadIdx.x; | 16 | int xi = blockIdx.x * blockDim.x + threadIdx.x; |
@@ -27,7 +27,7 @@ namespace stim{ | @@ -27,7 +27,7 @@ namespace stim{ | ||
27 | T val = gpuVote[i]; | 27 | T val = gpuVote[i]; |
28 | 28 | ||
29 | //compare to the threshold | 29 | //compare to the threshold |
30 | - if(val < final_t) return; | 30 | + //if(val < final_t) return; |
31 | 31 | ||
32 | //define an array to store indices with same vote value | 32 | //define an array to store indices with same vote value |
33 | /*int * IdxEq; | 33 | /*int * IdxEq; |
@@ -56,11 +56,12 @@ namespace stim{ | @@ -56,11 +56,12 @@ namespace stim{ | ||
56 | return; | 56 | return; |
57 | } | 57 | } |
58 | } */ | 58 | } */ |
59 | - gpuCenters[i] = 1; | 59 | + //gpuCenters[i] = 1; |
60 | + gpuCenters[i] = gpuVote[i]; | ||
60 | } | 61 | } |
61 | 62 | ||
62 | template<typename T> | 63 | template<typename T> |
63 | - void gpu_local_max(T* gpuCenters, T* gpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){ | 64 | + void gpu_local_max(T* gpuCenters, T* gpuVote, unsigned int conn, unsigned int x, unsigned int y){ |
64 | 65 | ||
65 | unsigned int max_threads = stim::maxThreadsPerBlock(); | 66 | unsigned int max_threads = stim::maxThreadsPerBlock(); |
66 | /*dim3 threads(max_threads, 1); | 67 | /*dim3 threads(max_threads, 1); |
@@ -69,11 +70,11 @@ namespace stim{ | @@ -69,11 +70,11 @@ namespace stim{ | ||
69 | dim3 blocks(x/threads.x + 1, y/threads.y + 1); | 70 | dim3 blocks(x/threads.x + 1, y/threads.y + 1); |
70 | 71 | ||
71 | //call the kernel to find the local maximum. | 72 | //call the kernel to find the local maximum. |
72 | - cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, final_t, conn, x, y); | 73 | + cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, conn, x, y); |
73 | } | 74 | } |
74 | 75 | ||
75 | template<typename T> | 76 | template<typename T> |
76 | - void cpu_local_max(T* cpuCenters, T* cpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){ | 77 | + void cpu_local_max(T* cpuCenters, T* cpuVote, unsigned int conn, unsigned int x, unsigned int y){ |
77 | 78 | ||
78 | //calculate the number of bytes in the array | 79 | //calculate the number of bytes in the array |
79 | unsigned int bytes = x * y * sizeof(T); | 80 | unsigned int bytes = x * y * sizeof(T); |
@@ -90,7 +91,7 @@ namespace stim{ | @@ -90,7 +91,7 @@ namespace stim{ | ||
90 | HANDLE_ERROR(cudaMemcpy(gpuVote, cpuVote, bytes, cudaMemcpyHostToDevice)); | 91 | HANDLE_ERROR(cudaMemcpy(gpuVote, cpuVote, bytes, cudaMemcpyHostToDevice)); |
91 | 92 | ||
92 | //call the GPU version of the local max function | 93 | //call the GPU version of the local max function |
93 | - gpu_local_max<T>(gpuCenters, gpuVote, final_t, conn, x, y); | 94 | + gpu_local_max<T>(gpuCenters, gpuVote, conn, x, y); |
94 | 95 | ||
95 | //copy the cell centers data to the CPU | 96 | //copy the cell centers data to the CPU |
96 | cudaMemcpy(cpuCenters, gpuCenters, bytes, cudaMemcpyDeviceToHost) ; | 97 | cudaMemcpy(cpuCenters, gpuCenters, bytes, cudaMemcpyDeviceToHost) ; |
stim/cuda/ivote/update_dir_bb.cuh
@@ -40,8 +40,9 @@ namespace stim{ | @@ -40,8 +40,9 @@ namespace stim{ | ||
40 | bb.insert(xi + ceil(rmax * cos(theta + phi)), yi + ceil(rmax * sin(theta + phi))); //insert the final corner into the bounding box | 40 | bb.insert(xi + ceil(rmax * cos(theta + phi)), yi + ceil(rmax * sin(theta + phi))); //insert the final corner into the bounding box |
41 | 41 | ||
42 | int x_table = 2*rmax +1; | 42 | int x_table = 2*rmax +1; |
43 | - int lut_i; | ||
44 | T rmax_sq = rmax * rmax; | 43 | T rmax_sq = rmax * rmax; |
44 | + | ||
45 | + int lut_i; | ||
45 | T dx_sq, dy_sq; | 46 | T dx_sq, dy_sq; |
46 | 47 | ||
47 | bb.trim_low(0, 0); //make sure the bounding box doesn't go outside the image | 48 | bb.trim_low(0, 0); //make sure the bounding box doesn't go outside the image |
@@ -49,11 +50,12 @@ namespace stim{ | @@ -49,11 +50,12 @@ namespace stim{ | ||
49 | 50 | ||
50 | int by, bx; | 51 | int by, bx; |
51 | int dx, dy; //coordinate relative to (xi, yi) | 52 | int dx, dy; //coordinate relative to (xi, yi) |
53 | + | ||
52 | T v; | 54 | T v; |
53 | T max_v = 0; //initialize the maximum vote value to zero | 55 | T max_v = 0; //initialize the maximum vote value to zero |
54 | T alpha; | 56 | T alpha; |
55 | - int max_dx = bb.low[0]; | ||
56 | - int max_dy = bb.low[1]; | 57 | + int max_dx = bb.low[0] - xi; |
58 | + int max_dy = bb.low[1] - yi; | ||
57 | for(by = bb.low[1]; by <= bb.high[1]; by++){ //for each element in the bounding box | 59 | for(by = bb.low[1]; by <= bb.high[1]; by++){ //for each element in the bounding box |
58 | dy = by - yi; //calculate the y coordinate of the current point relative to yi | 60 | dy = by - yi; //calculate the y coordinate of the current point relative to yi |
59 | dy_sq = dy * dy; | 61 | dy_sq = dy * dy; |