add #define stack_size in kd-tree

Jiaming Guo
1 parent fcd2eb7c
Showing 1 changed file with 11 additions and 10 deletions Show diff stats
stim/structures/kdtree.cuh
@@ -7,6 +7,7 @@
  
 #ifndef KDTREE_H
 #define KDTREE_H
+#define stack_size 50
  
 #include "device_launch_parameters.h"
 #include <cuda.h>
@@ -337,13 +338,13 @@ namespace stim {
 		size_t best_index = 0;
  
 		int next_nodes_pos = 0;																				// initialize pop out order index
-		next_nodes[id * 50 + next_nodes_pos] = cur;															// find data that belongs to the very specific thread
+		next_nodes[id * stack_size + next_nodes_pos] = cur;															// find data that belongs to the very specific thread
 		next_nodes_pos++;
  
 		while (next_nodes_pos) {
 			int next_search_nodes_pos = 0;																	// record push back order index
 			while (next_nodes_pos) {
-				cur = next_nodes[id * 50 + next_nodes_pos - 1];												// pop out the last push in one and keep poping out
+				cur = next_nodes[id * stack_size + next_nodes_pos - 1];												// pop out the last push in one and keep poping out
 				next_nodes_pos--;
 				int split_axis = nodes[cur].level % D;
  
@@ -362,20 +363,20 @@ namespace stim {
  
 					if (fabs(d) > range) {
 						if (d < 0) {
-							next_search_nodes[id * 50 + next_search_nodes_pos] = nodes[cur].left;
+							next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].left;
 							next_search_nodes_pos++;
 						}
 						else {
-							next_search_nodes[id * 50 + next_search_nodes_pos] = nodes[cur].right;
+							next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].right;
 							next_search_nodes_pos++;
 						}
 					}
 					else {
-						next_search_nodes[id * 50 + next_search_nodes_pos] = nodes[cur].right;
+						next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].right;
 						next_search_nodes_pos++;
-						next_search_nodes[id * 50 + next_search_nodes_pos] = nodes[cur].left;
+						next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].left;
 						next_search_nodes_pos++;
-						if (next_search_nodes_pos > 50) {
+						if (next_search_nodes_pos > stack_size) {
 							printf("Thread conflict might be caused by thread %d, so please try smaller input max_tree_levels\n", id);
 							(*Judge)++;
 						}
@@ -383,7 +384,7 @@ namespace stim {
 				}
 			}
 			for (int i = 0; i < next_search_nodes_pos; i++)
-				next_nodes[id * 50 + i] = next_search_nodes[id * 50 + i];
+				next_nodes[id * stack_size + i] = next_search_nodes[id * stack_size + i];
 			next_nodes_pos = next_search_nodes_pos;										
 		}
 		*d_distance = best_distance;
@@ -537,8 +538,8 @@ namespace stim {
 			HANDLE_ERROR(cudaMalloc((void**)&d_query_points, sizeof(T) * query_points.size() * D));
 			HANDLE_ERROR(cudaMalloc((void**)&d_indices, sizeof(size_t) * query_points.size()));
 			HANDLE_ERROR(cudaMalloc((void**)&d_distances, sizeof(T) * query_points.size()));
-			HANDLE_ERROR(cudaMalloc((void**)&next_nodes, threads * blocks * 50 * sizeof(int)));																	// STACK size right now is 50, you can change it if you mean to
-			HANDLE_ERROR(cudaMalloc((void**)&next_search_nodes, threads * blocks * 50 * sizeof(int)));	
+			HANDLE_ERROR(cudaMalloc((void**)&next_nodes, threads * blocks * stack_size * sizeof(int)));																	// STACK size right now is 50, you can change it if you mean to
+			HANDLE_ERROR(cudaMalloc((void**)&next_search_nodes, threads * blocks * stack_size * sizeof(int)));	
 			HANDLE_ERROR(cudaMemcpy(d_query_points, &query_points[0], sizeof(T) * query_points.size() * D, cudaMemcpyHostToDevice));
  
 			search_batch<<<blocks, threads>>> (d_nodes, d_index, d_reference_points, d_query_points, query_points.size(), d_indices, d_distances, next_nodes, next_search_nodes, Judge);