diff --git a/stim/cuda/ivote/local_max.cuh b/stim/cuda/ivote/local_max.cuh index b342a6f..b65f8a0 100644 --- a/stim/cuda/ivote/local_max.cuh +++ b/stim/cuda/ivote/local_max.cuh @@ -10,7 +10,7 @@ namespace stim{ // this kernel calculates the local maximum for finding the cell centers template - __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, T final_t, int conn, int x, int y){ + __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, int x, int y){ // calculate the 2D coordinates for this current thread. int xi = blockIdx.x * blockDim.x + threadIdx.x; @@ -27,7 +27,7 @@ namespace stim{ T val = gpuVote[i]; //compare to the threshold - if(val < final_t) return; + //if(val < final_t) return; //define an array to store indices with same vote value /*int * IdxEq; @@ -56,11 +56,12 @@ namespace stim{ return; } } */ - gpuCenters[i] = 1; + //gpuCenters[i] = 1; + gpuCenters[i] = gpuVote[i]; } template - void gpu_local_max(T* gpuCenters, T* gpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){ + void gpu_local_max(T* gpuCenters, T* gpuVote, unsigned int conn, unsigned int x, unsigned int y){ unsigned int max_threads = stim::maxThreadsPerBlock(); /*dim3 threads(max_threads, 1); @@ -69,11 +70,11 @@ namespace stim{ dim3 blocks(x/threads.x + 1, y/threads.y + 1); //call the kernel to find the local maximum. - cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, final_t, conn, x, y); + cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, conn, x, y); } template - void cpu_local_max(T* cpuCenters, T* cpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){ + void cpu_local_max(T* cpuCenters, T* cpuVote, unsigned int conn, unsigned int x, unsigned int y){ //calculate the number of bytes in the array unsigned int bytes = x * y * sizeof(T); @@ -90,7 +91,7 @@ namespace stim{ HANDLE_ERROR(cudaMemcpy(gpuVote, cpuVote, bytes, cudaMemcpyHostToDevice)); //call the GPU version of the local max function - gpu_local_max(gpuCenters, gpuVote, final_t, conn, x, y); + gpu_local_max(gpuCenters, gpuVote, conn, x, y); //copy the cell centers data to the CPU cudaMemcpy(cpuCenters, gpuCenters, bytes, cudaMemcpyDeviceToHost) ; diff --git a/stim/cuda/ivote/update_dir_bb.cuh b/stim/cuda/ivote/update_dir_bb.cuh index bb04f23..f3c324d 100644 --- a/stim/cuda/ivote/update_dir_bb.cuh +++ b/stim/cuda/ivote/update_dir_bb.cuh @@ -40,8 +40,9 @@ namespace stim{ bb.insert(xi + ceil(rmax * cos(theta + phi)), yi + ceil(rmax * sin(theta + phi))); //insert the final corner into the bounding box int x_table = 2*rmax +1; - int lut_i; T rmax_sq = rmax * rmax; + + int lut_i; T dx_sq, dy_sq; bb.trim_low(0, 0); //make sure the bounding box doesn't go outside the image @@ -49,11 +50,12 @@ namespace stim{ int by, bx; int dx, dy; //coordinate relative to (xi, yi) + T v; T max_v = 0; //initialize the maximum vote value to zero T alpha; - int max_dx = bb.low[0]; - int max_dy = bb.low[1]; + int max_dx = bb.low[0] - xi; + int max_dy = bb.low[1] - yi; for(by = bb.low[1]; by <= bb.high[1]; by++){ //for each element in the bounding box dy = by - yi; //calculate the y coordinate of the current point relative to yi dy_sq = dy * dy; -- libgit2 0.21.4