Commit d6245fabb836666628dc3fca60aa270462310ab2
1 parent
7bca968e
fix the bug in local_max about casting size_t to int
Showing
1 changed file
with
6 additions
and
20 deletions
Show diff stats
stim/iVote/ivote2/local_max.cuh
@@ -10,30 +10,22 @@ namespace stim{ | @@ -10,30 +10,22 @@ namespace stim{ | ||
10 | 10 | ||
11 | // this kernel calculates the local maximum for finding the cell centers | 11 | // this kernel calculates the local maximum for finding the cell centers |
12 | template<typename T> | 12 | template<typename T> |
13 | - __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, size_t x, size_t y){ | 13 | + __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, int x, int y){ |
14 | 14 | ||
15 | // calculate the 2D coordinates for this current thread. | 15 | // calculate the 2D coordinates for this current thread. |
16 | - size_t xi = blockIdx.x * blockDim.x + threadIdx.x; | ||
17 | - size_t yi = blockIdx.y * blockDim.y + threadIdx.y; | 16 | + int xi = blockIdx.x * blockDim.x + threadIdx.x; |
17 | + int yi = blockIdx.y * blockDim.y + threadIdx.y; | ||
18 | 18 | ||
19 | if(xi >= x || yi >= y) | 19 | if(xi >= x || yi >= y) |
20 | return; | 20 | return; |
21 | 21 | ||
22 | // convert 2D coordinates to 1D | 22 | // convert 2D coordinates to 1D |
23 | - size_t i = yi * x + xi; | 23 | + int i = yi * x + xi; |
24 | 24 | ||
25 | gpuCenters[i] = 0; //initialize the value at this location to zero | 25 | gpuCenters[i] = 0; //initialize the value at this location to zero |
26 | 26 | ||
27 | T val = gpuVote[i]; | 27 | T val = gpuVote[i]; |
28 | 28 | ||
29 | - //compare to the threshold | ||
30 | - //if(val < final_t) return; | ||
31 | - | ||
32 | - //define an array to store indices with same vote value | ||
33 | - /*int * IdxEq; | ||
34 | - IdxEq = new int [2*conn]; | ||
35 | - int n = 0;*/ | ||
36 | - | ||
37 | for(int xl = xi - conn; xl < xi + conn; xl++){ | 29 | for(int xl = xi - conn; xl < xi + conn; xl++){ |
38 | for(int yl = yi - conn; yl < yi + conn; yl++){ | 30 | for(int yl = yi - conn; yl < yi + conn; yl++){ |
39 | if(xl >= 0 && xl < x && yl >= 0 && yl < y){ | 31 | if(xl >= 0 && xl < x && yl >= 0 && yl < y){ |
@@ -42,8 +34,7 @@ namespace stim{ | @@ -42,8 +34,7 @@ namespace stim{ | ||
42 | return; | 34 | return; |
43 | } | 35 | } |
44 | if (gpuVote[il] == val){ | 36 | if (gpuVote[il] == val){ |
45 | - /*IdxEq[n] = il; | ||
46 | - n = n+1;*/ | 37 | + |
47 | if( il > i){ | 38 | if( il > i){ |
48 | return; | 39 | return; |
49 | } | 40 | } |
@@ -51,12 +42,7 @@ namespace stim{ | @@ -51,12 +42,7 @@ namespace stim{ | ||
51 | } | 42 | } |
52 | } | 43 | } |
53 | } | 44 | } |
54 | - /*if (n!=0){ | ||
55 | - if(IdxEq[n/2] !=i){ | ||
56 | - return; | ||
57 | - } | ||
58 | - } */ | ||
59 | - //gpuCenters[i] = 1; | 45 | + |
60 | gpuCenters[i] = gpuVote[i]; | 46 | gpuCenters[i] = gpuVote[i]; |
61 | } | 47 | } |
62 | 48 |