diff --git a/stim/cuda/ivote/local_max.cuh b/stim/cuda/ivote/local_max.cuh
index b342a6f..b65f8a0 100644
--- a/stim/cuda/ivote/local_max.cuh
+++ b/stim/cuda/ivote/local_max.cuh
@@ -10,7 +10,7 @@ namespace stim{
 
 		// this kernel calculates the local maximum for finding the cell centers
 		template<typename T>
-		__global__ void cuda_local_max(T* gpuCenters, T* gpuVote, T final_t, int conn, int x, int y){
+		__global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, int x, int y){
 
 			// calculate the 2D coordinates for this current thread.
 			int xi = blockIdx.x * blockDim.x + threadIdx.x;
@@ -27,7 +27,7 @@ namespace stim{
 			T val = gpuVote[i];
 			
 			//compare to the threshold
-			if(val < final_t) return;
+			//if(val < final_t) return;
 			
 			//define an array to store indices with same vote value
 			/*int * IdxEq;
@@ -56,11 +56,12 @@ namespace stim{
 					return;
 				}
 			}	*/	
-			gpuCenters[i] = 1;
+			//gpuCenters[i] = 1;
+			gpuCenters[i] = gpuVote[i];
 		}
 		
 		template<typename T>
-		void gpu_local_max(T* gpuCenters, T* gpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){
+		void gpu_local_max(T* gpuCenters, T* gpuVote,  unsigned int conn, unsigned int x, unsigned int y){
 
 			unsigned int max_threads = stim::maxThreadsPerBlock();
 			/*dim3 threads(max_threads, 1);
@@ -69,11 +70,11 @@ namespace stim{
 			dim3 blocks(x/threads.x + 1, y/threads.y + 1);
 			
 			//call the kernel to find the local maximum.
-			cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, final_t, conn, x, y);
+			cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, conn, x, y);
 		}
 
 		template<typename T>
-		void cpu_local_max(T* cpuCenters, T* cpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){
+		void cpu_local_max(T* cpuCenters, T* cpuVote, unsigned int conn, unsigned int x, unsigned int y){
 		
 			//calculate the number of bytes in the array
 			unsigned int bytes = x * y * sizeof(T);
@@ -90,7 +91,7 @@ namespace stim{
 			HANDLE_ERROR(cudaMemcpy(gpuVote, cpuVote, bytes, cudaMemcpyHostToDevice));
 						
 			//call the GPU version of the local max function
-			gpu_local_max<T>(gpuCenters, gpuVote, final_t, conn, x, y);
+			gpu_local_max<T>(gpuCenters, gpuVote, conn, x, y);
 							
 			//copy the cell centers data to the CPU
 			cudaMemcpy(cpuCenters, gpuCenters, bytes, cudaMemcpyDeviceToHost) ;
diff --git a/stim/cuda/ivote/update_dir_bb.cuh b/stim/cuda/ivote/update_dir_bb.cuh
index bb04f23..f3c324d 100644
--- a/stim/cuda/ivote/update_dir_bb.cuh
+++ b/stim/cuda/ivote/update_dir_bb.cuh
@@ -40,8 +40,9 @@ namespace stim{
 			bb.insert(xi + ceil(rmax * cos(theta + phi)), yi + ceil(rmax * sin(theta + phi)));		//insert the final corner into the bounding box
 
 			int x_table = 2*rmax +1;
-			int lut_i;
 			T rmax_sq = rmax * rmax;
+
+			int lut_i;
 			T dx_sq, dy_sq;
 
 			bb.trim_low(0, 0);															//make sure the bounding box doesn't go outside the image
@@ -49,11 +50,12 @@ namespace stim{
 
 			int by, bx;
 			int dx, dy;													//coordinate relative to (xi, yi)
+			
 			T v;
 			T max_v = 0;												//initialize the maximum vote value to zero
 			T alpha;
-			int max_dx = bb.low[0];
-			int max_dy = bb.low[1];
+			int max_dx = bb.low[0] - xi;
+			int max_dy = bb.low[1] - yi;
 			for(by = bb.low[1]; by <= bb.high[1]; by++){					//for each element in the bounding box
 				dy = by - yi;											//calculate the y coordinate of the current point relative to yi
 				dy_sq = dy * dy;
--
libgit2 0.21.4