Commit daacc99c097346e9dc02e5be9a1b5a5945e177a1

Authored by Laila Saadatifard
1 parent 800ff264

change the local max kernel to not threshold the output

stim/cuda/ivote/local_max.cuh
@@ -10,7 +10,7 @@ namespace stim{ @@ -10,7 +10,7 @@ namespace stim{
10 10
11 // this kernel calculates the local maximum for finding the cell centers 11 // this kernel calculates the local maximum for finding the cell centers
12 template<typename T> 12 template<typename T>
13 - __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, T final_t, int conn, int x, int y){ 13 + __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, int x, int y){
14 14
15 // calculate the 2D coordinates for this current thread. 15 // calculate the 2D coordinates for this current thread.
16 int xi = blockIdx.x * blockDim.x + threadIdx.x; 16 int xi = blockIdx.x * blockDim.x + threadIdx.x;
@@ -27,7 +27,7 @@ namespace stim{ @@ -27,7 +27,7 @@ namespace stim{
27 T val = gpuVote[i]; 27 T val = gpuVote[i];
28 28
29 //compare to the threshold 29 //compare to the threshold
30 - if(val < final_t) return; 30 + //if(val < final_t) return;
31 31
32 //define an array to store indices with same vote value 32 //define an array to store indices with same vote value
33 /*int * IdxEq; 33 /*int * IdxEq;
@@ -56,11 +56,12 @@ namespace stim{ @@ -56,11 +56,12 @@ namespace stim{
56 return; 56 return;
57 } 57 }
58 } */ 58 } */
59 - gpuCenters[i] = 1; 59 + //gpuCenters[i] = 1;
  60 + gpuCenters[i] = gpuVote[i];
60 } 61 }
61 62
62 template<typename T> 63 template<typename T>
63 - void gpu_local_max(T* gpuCenters, T* gpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){ 64 + void gpu_local_max(T* gpuCenters, T* gpuVote, unsigned int conn, unsigned int x, unsigned int y){
64 65
65 unsigned int max_threads = stim::maxThreadsPerBlock(); 66 unsigned int max_threads = stim::maxThreadsPerBlock();
66 /*dim3 threads(max_threads, 1); 67 /*dim3 threads(max_threads, 1);
@@ -69,11 +70,11 @@ namespace stim{ @@ -69,11 +70,11 @@ namespace stim{
69 dim3 blocks(x/threads.x + 1, y/threads.y + 1); 70 dim3 blocks(x/threads.x + 1, y/threads.y + 1);
70 71
71 //call the kernel to find the local maximum. 72 //call the kernel to find the local maximum.
72 - cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, final_t, conn, x, y); 73 + cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, conn, x, y);
73 } 74 }
74 75
75 template<typename T> 76 template<typename T>
76 - void cpu_local_max(T* cpuCenters, T* cpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){ 77 + void cpu_local_max(T* cpuCenters, T* cpuVote, unsigned int conn, unsigned int x, unsigned int y){
77 78
78 //calculate the number of bytes in the array 79 //calculate the number of bytes in the array
79 unsigned int bytes = x * y * sizeof(T); 80 unsigned int bytes = x * y * sizeof(T);
@@ -90,7 +91,7 @@ namespace stim{ @@ -90,7 +91,7 @@ namespace stim{
90 HANDLE_ERROR(cudaMemcpy(gpuVote, cpuVote, bytes, cudaMemcpyHostToDevice)); 91 HANDLE_ERROR(cudaMemcpy(gpuVote, cpuVote, bytes, cudaMemcpyHostToDevice));
91 92
92 //call the GPU version of the local max function 93 //call the GPU version of the local max function
93 - gpu_local_max<T>(gpuCenters, gpuVote, final_t, conn, x, y); 94 + gpu_local_max<T>(gpuCenters, gpuVote, conn, x, y);
94 95
95 //copy the cell centers data to the CPU 96 //copy the cell centers data to the CPU
96 cudaMemcpy(cpuCenters, gpuCenters, bytes, cudaMemcpyDeviceToHost) ; 97 cudaMemcpy(cpuCenters, gpuCenters, bytes, cudaMemcpyDeviceToHost) ;
stim/cuda/ivote/update_dir_bb.cuh
@@ -40,8 +40,9 @@ namespace stim{ @@ -40,8 +40,9 @@ namespace stim{
40 bb.insert(xi + ceil(rmax * cos(theta + phi)), yi + ceil(rmax * sin(theta + phi))); //insert the final corner into the bounding box 40 bb.insert(xi + ceil(rmax * cos(theta + phi)), yi + ceil(rmax * sin(theta + phi))); //insert the final corner into the bounding box
41 41
42 int x_table = 2*rmax +1; 42 int x_table = 2*rmax +1;
43 - int lut_i;  
44 T rmax_sq = rmax * rmax; 43 T rmax_sq = rmax * rmax;
  44 +
  45 + int lut_i;
45 T dx_sq, dy_sq; 46 T dx_sq, dy_sq;
46 47
47 bb.trim_low(0, 0); //make sure the bounding box doesn't go outside the image 48 bb.trim_low(0, 0); //make sure the bounding box doesn't go outside the image
@@ -49,11 +50,12 @@ namespace stim{ @@ -49,11 +50,12 @@ namespace stim{
49 50
50 int by, bx; 51 int by, bx;
51 int dx, dy; //coordinate relative to (xi, yi) 52 int dx, dy; //coordinate relative to (xi, yi)
  53 +
52 T v; 54 T v;
53 T max_v = 0; //initialize the maximum vote value to zero 55 T max_v = 0; //initialize the maximum vote value to zero
54 T alpha; 56 T alpha;
55 - int max_dx = bb.low[0];  
56 - int max_dy = bb.low[1]; 57 + int max_dx = bb.low[0] - xi;
  58 + int max_dy = bb.low[1] - yi;
57 for(by = bb.low[1]; by <= bb.high[1]; by++){ //for each element in the bounding box 59 for(by = bb.low[1]; by <= bb.high[1]; by++){ //for each element in the bounding box
58 dy = by - yi; //calculate the y coordinate of the current point relative to yi 60 dy = by - yi; //calculate the y coordinate of the current point relative to yi
59 dy_sq = dy * dy; 61 dy_sq = dy * dy;