Merge branch 'master' of https://git.stim.ee.uh.edu/codebase/stimlib

David Mayerich
2 parents a19e3c80 d8c97a2b
Showing 2 changed files with 13 additions and 10 deletions Show diff stats
stim/cuda/ivote/local_max.cuh
stim/cuda/ivote/update_dir_bb.cuh
@@ -10,7 +10,7 @@ namespace stim{
  
 		// this kernel calculates the local maximum for finding the cell centers
 		template<typename T>
-		__global__ void cuda_local_max(T* gpuCenters, T* gpuVote, T final_t, int conn, int x, int y){
+		__global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, int x, int y){
  
 			// calculate the 2D coordinates for this current thread.
 			int xi = blockIdx.x * blockDim.x + threadIdx.x;
@@ -27,7 +27,7 @@ namespace stim{
 			T val = gpuVote[i];
  
 			//compare to the threshold
-			if(val < final_t) return;
+			//if(val < final_t) return;
  
 			//define an array to store indices with same vote value
 			/*int * IdxEq;
@@ -56,11 +56,12 @@ namespace stim{
 					return;
 				}
 			}	*/	
-			gpuCenters[i] = 1;
+			//gpuCenters[i] = 1;
+			gpuCenters[i] = gpuVote[i];
 		}
  
 		template<typename T>
-		void gpu_local_max(T* gpuCenters, T* gpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){
+		void gpu_local_max(T* gpuCenters, T* gpuVote,  unsigned int conn, unsigned int x, unsigned int y){
  
 			unsigned int max_threads = stim::maxThreadsPerBlock();
 			/*dim3 threads(max_threads, 1);
@@ -69,11 +70,11 @@ namespace stim{
 			dim3 blocks(x/threads.x + 1, y/threads.y + 1);
  
 			//call the kernel to find the local maximum.
-			cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, final_t, conn, x, y);
+			cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, conn, x, y);
 		}
  
 		template<typename T>
-		void cpu_local_max(T* cpuCenters, T* cpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){
+		void cpu_local_max(T* cpuCenters, T* cpuVote, unsigned int conn, unsigned int x, unsigned int y){
  
 			//calculate the number of bytes in the array
 			unsigned int bytes = x * y * sizeof(T);
@@ -90,7 +91,7 @@ namespace stim{
 			HANDLE_ERROR(cudaMemcpy(gpuVote, cpuVote, bytes, cudaMemcpyHostToDevice));
  
 			//call the GPU version of the local max function
-			gpu_local_max<T>(gpuCenters, gpuVote, final_t, conn, x, y);
+			gpu_local_max<T>(gpuCenters, gpuVote, conn, x, y);
  
 			//copy the cell centers data to the CPU
 			cudaMemcpy(cpuCenters, gpuCenters, bytes, cudaMemcpyDeviceToHost) ;
@@ -40,8 +40,9 @@ namespace stim{
 			bb.insert(xi + ceil(rmax * cos(theta + phi)), yi + ceil(rmax * sin(theta + phi)));		//insert the final corner into the bounding box
  
 			int x_table = 2*rmax +1;
-			int lut_i;
 			T rmax_sq = rmax * rmax;
+
+			int lut_i;
 			T dx_sq, dy_sq;
  
 			bb.trim_low(0, 0);															//make sure the bounding box doesn't go outside the image
@@ -49,11 +50,12 @@ namespace stim{
  
 			int by, bx;
 			int dx, dy;													//coordinate relative to (xi, yi)
+			
 			T v;
 			T max_v = 0;												//initialize the maximum vote value to zero
 			T alpha;
-			int max_dx = bb.low[0];
-			int max_dy = bb.low[1];
+			int max_dx = bb.low[0] - xi;
+			int max_dy = bb.low[1] - yi;
 			for(by = bb.low[1]; by <= bb.high[1]; by++){					//for each element in the bounding box
 				dy = by - yi;											//calculate the y coordinate of the current point relative to yi
 				dy_sq = dy * dy;