From a0f09cd529ce6afba149fd491f781ad8dc540e83 Mon Sep 17 00:00:00 2001 From: Jiaming Guo Date: Wed, 7 Dec 2016 15:23:30 -0600 Subject: [PATCH] add #define stack_size in kd-tree --- stim/structures/kdtree.cuh | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/stim/structures/kdtree.cuh b/stim/structures/kdtree.cuh index 2b405e7..b1481ed 100644 --- a/stim/structures/kdtree.cuh +++ b/stim/structures/kdtree.cuh @@ -7,6 +7,7 @@ #ifndef KDTREE_H #define KDTREE_H +#define stack_size 50 #include "device_launch_parameters.h" #include @@ -337,13 +338,13 @@ namespace stim { size_t best_index = 0; int next_nodes_pos = 0; // initialize pop out order index - next_nodes[id * 50 + next_nodes_pos] = cur; // find data that belongs to the very specific thread + next_nodes[id * stack_size + next_nodes_pos] = cur; // find data that belongs to the very specific thread next_nodes_pos++; while (next_nodes_pos) { int next_search_nodes_pos = 0; // record push back order index while (next_nodes_pos) { - cur = next_nodes[id * 50 + next_nodes_pos - 1]; // pop out the last push in one and keep poping out + cur = next_nodes[id * stack_size + next_nodes_pos - 1]; // pop out the last push in one and keep poping out next_nodes_pos--; int split_axis = nodes[cur].level % D; @@ -362,20 +363,20 @@ namespace stim { if (fabs(d) > range) { if (d < 0) { - next_search_nodes[id * 50 + next_search_nodes_pos] = nodes[cur].left; + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].left; next_search_nodes_pos++; } else { - next_search_nodes[id * 50 + next_search_nodes_pos] = nodes[cur].right; + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].right; next_search_nodes_pos++; } } else { - next_search_nodes[id * 50 + next_search_nodes_pos] = nodes[cur].right; + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].right; next_search_nodes_pos++; - next_search_nodes[id * 50 + next_search_nodes_pos] = nodes[cur].left; + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].left; next_search_nodes_pos++; - if (next_search_nodes_pos > 50) { + if (next_search_nodes_pos > stack_size) { printf("Thread conflict might be caused by thread %d, so please try smaller input max_tree_levels\n", id); (*Judge)++; } @@ -383,7 +384,7 @@ namespace stim { } } for (int i = 0; i < next_search_nodes_pos; i++) - next_nodes[id * 50 + i] = next_search_nodes[id * 50 + i]; + next_nodes[id * stack_size + i] = next_search_nodes[id * stack_size + i]; next_nodes_pos = next_search_nodes_pos; } *d_distance = best_distance; @@ -537,8 +538,8 @@ namespace stim { HANDLE_ERROR(cudaMalloc((void**)&d_query_points, sizeof(T) * query_points.size() * D)); HANDLE_ERROR(cudaMalloc((void**)&d_indices, sizeof(size_t) * query_points.size())); HANDLE_ERROR(cudaMalloc((void**)&d_distances, sizeof(T) * query_points.size())); - HANDLE_ERROR(cudaMalloc((void**)&next_nodes, threads * blocks * 50 * sizeof(int))); // STACK size right now is 50, you can change it if you mean to - HANDLE_ERROR(cudaMalloc((void**)&next_search_nodes, threads * blocks * 50 * sizeof(int))); + HANDLE_ERROR(cudaMalloc((void**)&next_nodes, threads * blocks * stack_size * sizeof(int))); // STACK size right now is 50, you can change it if you mean to + HANDLE_ERROR(cudaMalloc((void**)&next_search_nodes, threads * blocks * stack_size * sizeof(int))); HANDLE_ERROR(cudaMemcpy(d_query_points, &query_points[0], sizeof(T) * query_points.size() * D, cudaMemcpyHostToDevice)); search_batch<<>> (d_nodes, d_index, d_reference_points, d_query_points, query_points.size(), d_indices, d_distances, next_nodes, next_search_nodes, Judge); -- libgit2 0.21.4