Commit fdfdeda06e048f9dfe43a60ea8a91e5b8da17ac2

Authored by Jiaming Guo
1 parent d31531d8

fix minor mistakes

Showing 2 changed files with 300 additions and 343 deletions   Show diff stats
stim/structures/kdtree.cuh
1   -// right now the size of CUDA STACK is set to 1000, increase it if you mean to make deeper tree
  1 +// right now the size of CUDA STACK is set to 50, increase it if you mean to make deeper tree
2 2 // data should be stored in row-major
3 3 // x1,x2,x3,x4,x5......
4 4 // y1,y2,y3,y4,y5......
... ... @@ -22,16 +22,16 @@
22 22 #include <stim/visualization/aabbn.h>
23 23  
24 24 namespace stim {
25   - namespace kdtree {
  25 + namespace cpu_kdtree {
26 26 template<typename T, int D> // typename refers to float or double while D refers to dimension of points
27 27 struct point {
28 28 T dim[D]; // create a structure to store every one input point
29 29 };
30 30  
31 31 template<typename T>
32   - class kdnode {
  32 + class cpu_kdnode {
33 33 public:
34   - kdnode() { // constructor for initializing a kdnode
  34 + cpu_kdnode() { // constructor for initializing a kdnode
35 35 parent = NULL; // set every node's parent, left and right kdnode pointers to NULL
36 36 left = NULL;
37 37 right = NULL;
... ... @@ -42,258 +42,12 @@ namespace stim {
42 42 }
43 43 int idx; // index of current node
44 44 int parent_idx, left_idx, right_idx; // index of parent, left and right nodes
45   - kdnode *parent, *left, *right; // parent, left and right kdnodes
  45 + cpu_kdnode *parent, *left, *right; // parent, left and right kdnodes
46 46 T split_value; // splitting value of current node
47 47 std::vector <size_t> indices; // it indicates the points' indices that current node has
48 48 size_t level; // tree level of current node
49 49 };
50   - } // end of namespace kdtree
51   -
52   - template <typename T, int D = 3> // set dimension of data to default 3
53   - class cpu_kdtree {
54   - protected:
55   - int current_axis; // current judging axis
56   - int n_id; // store the total number of nodes
57   - std::vector < typename kdtree::point<T, D> > *tmp_points; // transfer or temperary points
58   - std::vector < typename kdtree::point<T, D> > cpu_tmp_points; // for cpu searching
59   - kdtree::kdnode<T> *root; // root node
60   - static cpu_kdtree<T, D> *cur_tree_ptr;
61   - public:
62   - cpu_kdtree() { // constructor for creating a cpu_kdtree
63   - cur_tree_ptr = this; // create a class pointer points to the current class value
64   - n_id = 0; // set total number of points to default 0
65   - }
66   -
67   - ~cpu_kdtree() { // destructor of cpu_kdtree
68   - std::vector <kdtree::kdnode<T>*> next_nodes;
69   - next_nodes.push_back(root);
70   - while (next_nodes.size()) {
71   - std::vector <kdtree::kdnode<T>*> next_search_nodes;
72   - while (next_nodes.size()) {
73   - kdtree::kdnode<T> *cur = next_nodes.back();
74   - next_nodes.pop_back();
75   - if (cur->left)
76   - next_search_nodes.push_back(cur->left);
77   - if (cur->right)
78   - next_search_nodes.push_back(cur->right);
79   - delete cur;
80   - }
81   - next_nodes = next_search_nodes;
82   - }
83   - root = NULL;
84   - }
85   -
86   - void cpu_create(std::vector < typename kdtree::point<T, D> > &reference_points, size_t max_levels) {
87   - tmp_points = &reference_points;
88   - root = new kdtree::kdnode<T>(); // initializing the root node
89   - root->idx = n_id++; // the index of root is 0
90   - root->level = 0; // tree level begins at 0
91   - root->indices.resize(reference_points.size()); // get the number of points
92   - for (size_t i = 0; i < reference_points.size(); i++) {
93   - root->indices[i] = i; // set indices of input points
94   - }
95   - std::vector <kdtree::kdnode<T>*> next_nodes; // next nodes
96   - next_nodes.push_back(root); // push back the root node
97   - while (next_nodes.size()) {
98   - std::vector <kdtree::kdnode<T>*> next_search_nodes; // next search nodes
99   - while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search
100   - kdtree::kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first)
101   - next_nodes.pop_back(); // pop out current node in order to store next round of nodes
102   - if (current_node->level < max_levels) {
103   - if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point
104   - kdtree::kdnode<T> *left = new kdtree::kdnode<T>();
105   - kdtree::kdnode<T> *right = new kdtree::kdnode<T>();
106   - left->idx = n_id++; // set the index of current node's left node
107   - right->idx = n_id++;
108   - split(current_node, left, right); // split left and right and determine a node
109   - std::vector <size_t> temp; // empty vecters of int
110   - //temp.resize(current_node->indices.size());
111   - current_node->indices.swap(temp); // clean up current node's indices
112   - current_node->left = left;
113   - current_node->right = right;
114   - current_node->left_idx = left->idx;
115   - current_node->right_idx = right->idx;
116   - if (right->indices.size())
117   - next_search_nodes.push_back(right); // left pop out first
118   - if (left->indices.size())
119   - next_search_nodes.push_back(left);
120   - }
121   - }
122   - }
123   - next_nodes = next_search_nodes; // go deeper within the tree
124   - }
125   - }
126   -
127   - static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort
128   - std::vector < typename kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer
129   - return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis];
130   - }
131   -
132   - void split(kdtree::kdnode<T> *cur, kdtree::kdnode<T> *left, kdtree::kdnode<T> *right) {
133   - std::vector < typename kdtree::point<T, D> > &pts = *tmp_points;
134   - current_axis = cur->level % D; // indicate the judicative dimension or axis
135   - std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data
136   - size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor
137   - cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node
138   - left->parent = cur; // set the parent of the next search nodes to current node
139   - right->parent = cur;
140   - left->level = cur->level + 1; // level + 1
141   - right->level = cur->level + 1;
142   - left->parent_idx = cur->idx; // set its parent node's index
143   - right->parent_idx = cur->idx;
144   - for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one
145   - size_t idx = cur->indices[i];
146   - if (pts[idx].dim[current_axis] < cur->split_value)
147   - left->indices.push_back(idx);
148   - else
149   - right->indices.push_back(idx);
150   - }
151   - }
152   -
153   - void create(T *h_reference_points, size_t reference_count, size_t max_levels) {
154   - std::vector < typename kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way
155   - for (size_t j = 0; j < reference_count; j++)
156   - for (size_t i = 0; i < D; i++)
157   - reference_points[j].dim[i] = h_reference_points[j * D + i];
158   - cpu_create(reference_points, max_levels);
159   - cpu_tmp_points = *tmp_points;
160   - }
161   -
162   - int get_num_nodes() const { // get the total number of nodes
163   - return n_id;
164   - }
165   -
166   - kdtree::kdnode<T>* get_root() const { // get the root node of tree
167   - return root;
168   - }
169   -
170   - T cpu_distance(const kdtree::point<T, D> &a, const kdtree::point<T, D> &b) {
171   - T distance = 0;
172   -
173   - for (size_t i = 0; i < D; i++) {
174   - T d = a.dim[i] - b.dim[i];
175   - distance += d*d;
176   - }
177   - return distance;
178   - }
179   -
180   - void cpu_search_at_node(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, size_t *index, T *distance, kdtree::kdnode<T> **node) {
181   - T best_distance = FLT_MAX; // initialize the best distance to max of floating point
182   - size_t best_index = 0;
183   - std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points;
184   - while (true) {
185   - size_t split_axis = cur->level % D;
186   - if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace
187   - *node = cur; // pointer points to a pointer
188   - for (size_t i = 0; i < cur->indices.size(); i++) {
189   - size_t idx = cur->indices[i];
190   - T d = cpu_distance(query, pts[idx]); // compute distances
191   - /// if we want to compute k nearest neighbor, we can input the last resul
192   - /// (last_best_dist < dist < best_dist) to select the next point until reaching to k
193   - if (d < best_distance) {
194   - best_distance = d;
195   - best_index = idx; // record the nearest neighbor index
196   - }
197   - }
198   - break; // find the target point then break the loop
199   - }
200   - else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side
201   - cur = cur->left;
202   - }
203   - else {
204   - cur = cur->right;
205   - }
206   - }
207   - *index = best_index;
208   - *distance = best_distance;
209   - }
210   -
211   - void cpu_search_at_node_range(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, T range, size_t *index, T *distance) {
212   - T best_distance = FLT_MAX; // initialize the best distance to max of floating point
213   - size_t best_index = 0;
214   - std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points;
215   - std::vector < typename kdtree::kdnode<T>*> next_node;
216   - next_node.push_back(cur);
217   - while (next_node.size()) {
218   - std::vector<typename kdtree::kdnode<T>*> next_search;
219   - while (next_node.size()) {
220   - cur = next_node.back();
221   - next_node.pop_back();
222   - size_t split_axis = cur->level % D;
223   - if (cur->left == NULL) {
224   - for (size_t i = 0; i < cur->indices.size(); i++) {
225   - size_t idx = cur->indices[i];
226   - T d = cpu_distance(query, pts[idx]);
227   - if (d < best_distance) {
228   - best_distance = d;
229   - best_index = idx;
230   - }
231   - }
232   - }
233   - else {
234   - T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension
235   - /// there are three possibilities: on either left or right, and on both left and right
236   - if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist
237   - if (d < 0)
238   - next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region
239   - else
240   - next_search.push_back(cur->right);
241   - }
242   - else { // it is possible that nereast neighbor will appear on both left and right
243   - next_search.push_back(cur->left);
244   - next_search.push_back(cur->right);
245   - }
246   - }
247   - }
248   - next_node = next_search; // pop out at least one time
249   - }
250   - *index = best_index;
251   - *distance = best_distance;
252   - }
253   -
254   - void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) {
255   - /// first convert the input query point into specific type
256   - kdtree::point<T, D> query;
257   - for (size_t j = 0; j < query_count; j++) {
258   - for (size_t i = 0; i < D; i++)
259   - query.dim[i] = h_query_points[j * D + i];
260   - /// find the nearest node, this will be the upper bound for the next time searching
261   - kdtree::kdnode<T> *best_node = NULL;
262   - T best_distance = FLT_MAX;
263   - size_t best_index = 0;
264   - T radius = 0; // radius for range
265   - cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step
266   - radius = sqrt(best_distance); // It is possible that nearest will appear in another region
267   - /// find other possibilities
268   - kdtree::kdnode<T> *cur = best_node;
269   - while (cur->parent != NULL) { // every node that you pass will be possible to be the best node
270   - /// go up
271   - kdtree::kdnode<T> *parent = cur->parent; // travel back to every node that we pass through
272   - size_t split_axis = (parent->level) % D;
273   - /// search other nodes
274   - size_t tmp_index;
275   - T tmp_distance = FLT_MAX;
276   - if (fabs(parent->split_value - query.dim[split_axis]) <= radius) {
277   - /// search opposite node
278   - if (parent->left != cur)
279   - cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node
280   - else
281   - cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance);
282   - }
283   - if (tmp_distance < best_distance) {
284   - best_distance = tmp_distance;
285   - best_index = tmp_index;
286   - }
287   - cur = parent;
288   - }
289   - h_indices[j] = best_index;
290   - h_distances[j] = best_distance;
291   - }
292   - }
293   - }; //end class kdtree
294   -
295   - template <typename T, int D>
296   - cpu_kdtree<T, D>* cpu_kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class
  50 + } // end of namespace cpu_kdtree
297 51  
298 52 template <typename T>
299 53 struct cuda_kdnode {
... ... @@ -305,7 +59,7 @@ namespace stim {
305 59 };
306 60  
307 61 template <typename T, int D>
308   - __device__ T gpu_distance(kdtree::point<T, D> &a, kdtree::point<T, D> &b) {
  62 + __device__ T gpu_distance(cpu_kdtree::point<T, D> &a, cpu_kdtree::point<T, D> &b) {
309 63 T distance = 0;
310 64  
311 65 for (size_t i = 0; i < D; i++) {
... ... @@ -316,7 +70,7 @@ namespace stim {
316 70 }
317 71  
318 72 template <typename T, int D>
319   - __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, int cur, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) {
  73 + __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, int cur, cpu_kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) {
320 74 T best_distance = FLT_MAX;
321 75 size_t best_index = 0;
322 76  
... ... @@ -346,7 +100,7 @@ namespace stim {
346 100 }
347 101  
348 102 template <typename T, int D>
349   - __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
  103 + __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
350 104 T best_distance = FLT_MAX;
351 105 size_t best_index = 0;
352 106  
... ... @@ -405,7 +159,7 @@ namespace stim {
405 159 }
406 160  
407 161 template <typename T, int D>
408   - __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
  162 + __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
409 163 int best_node = 0;
410 164 T best_distance = FLT_MAX;
411 165 size_t best_index = 0;
... ... @@ -438,7 +192,7 @@ namespace stim {
438 192 }
439 193  
440 194 template <typename T, int D>
441   - __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) {
  195 + __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) {
442 196 size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
443 197 if (idx >= d_query_count) return; // avoid segfault
444 198  
... ... @@ -446,11 +200,11 @@ namespace stim {
446 200 }
447 201  
448 202 template <typename T, int D>
449   - void search_stream(cuda_kdnode<T> *d_nodes, size_t *d_index, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *query_stream_points, size_t stream_count, size_t *indices, T *distances) {
  203 + void search_stream(cuda_kdnode<T> *d_nodes, size_t *d_index, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> *query_stream_points, size_t stream_count, size_t *indices, T *distances) {
450 204 unsigned int threads = (unsigned int)(stream_count > 1024 ? 1024 : stream_count);
451 205 unsigned int blocks = (unsigned int)(stream_count / threads + (stream_count % threads ? 1 : 0));
452 206  
453   - kdtree::point<T, D> *d_query_points;
  207 + cpu_kdtree::point<T, D> *d_query_points;
454 208 size_t *d_indices;
455 209 T *d_distances;
456 210  
... ... @@ -480,26 +234,121 @@ namespace stim {
480 234 HANDLE_ERROR(cudaFree(d_distances));
481 235 }
482 236  
483   - template <typename T, int D = 3>
484   - class cuda_kdtree {
  237 + template <typename T, int D = 3> // set dimension of data to default 3
  238 + class kdtree {
485 239 protected:
486   - cuda_kdnode<T> *d_nodes;
487   - size_t *d_index;
488   - kdtree::point<T, D>* d_reference_points;
489   - size_t npts;
490   - int num_nodes;
  240 + int current_axis; // current judging axis
  241 + int n_id; // store the total number of nodes
  242 + std::vector < typename cpu_kdtree::point<T, D> > *tmp_points; // transfer or temperary points
  243 + std::vector < typename cpu_kdtree::point<T, D> > cpu_tmp_points; // for cpu searching
  244 + cpu_kdtree::cpu_kdnode<T> *root; // root node
  245 + static kdtree<T, D> *cur_tree_ptr;
  246 + #ifdef __CUDACC__
  247 + cuda_kdnode<T> *d_nodes;
  248 + size_t *d_index;
  249 + cpu_kdtree::point<T, D>* d_reference_points;
  250 + size_t npts;
  251 + int num_nodes;
  252 + #endif
491 253 public:
492   - ~cuda_kdtree() {
  254 + kdtree() { // constructor for creating a cpu_kdtree
  255 + cur_tree_ptr = this; // create a class pointer points to the current class value
  256 + n_id = 0; // set total number of points to default 0
  257 + }
  258 +
  259 + ~kdtree() { // destructor of cpu_kdtree
  260 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes;
  261 + next_nodes.push_back(root);
  262 + while (next_nodes.size()) {
  263 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_search_nodes;
  264 + while (next_nodes.size()) {
  265 + cpu_kdtree::cpu_kdnode<T> *cur = next_nodes.back();
  266 + next_nodes.pop_back();
  267 + if (cur->left)
  268 + next_search_nodes.push_back(cur->left);
  269 + if (cur->right)
  270 + next_search_nodes.push_back(cur->right);
  271 + delete cur;
  272 + }
  273 + next_nodes = next_search_nodes;
  274 + }
  275 + root = NULL;
  276 + #ifdef __CUDACC__
493 277 HANDLE_ERROR(cudaFree(d_nodes));
494 278 HANDLE_ERROR(cudaFree(d_index));
495 279 HANDLE_ERROR(cudaFree(d_reference_points));
  280 + #endif
496 281 }
497   -
498   - /// Create a KD-tree given a pointer to an array of reference points and the number of reference points
499   - /// @param h_reference_points is a host array containing the reference points in (x0, y0, z0, ...., ) order
500   - /// @param reference_count is the number of reference point in the array
501   - /// @param max_levels is the deepest number of tree levels allowed
502   - void create(T *h_reference_points, size_t reference_count, size_t max_levels = 3) {
  282 +
  283 + void cpu_create(std::vector < typename cpu_kdtree::point<T, D> > &reference_points, size_t max_levels) {
  284 + tmp_points = &reference_points;
  285 + root = new cpu_kdtree::cpu_kdnode<T>(); // initializing the root node
  286 + root->idx = n_id++; // the index of root is 0
  287 + root->level = 0; // tree level begins at 0
  288 + root->indices.resize(reference_points.size()); // get the number of points
  289 + for (size_t i = 0; i < reference_points.size(); i++) {
  290 + root->indices[i] = i; // set indices of input points
  291 + }
  292 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes; // next nodes
  293 + next_nodes.push_back(root); // push back the root node
  294 + while (next_nodes.size()) {
  295 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_search_nodes; // next search nodes
  296 + while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search
  297 + cpu_kdtree::cpu_kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first)
  298 + next_nodes.pop_back(); // pop out current node in order to store next round of nodes
  299 + if (current_node->level < max_levels) {
  300 + if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point
  301 + cpu_kdtree::cpu_kdnode<T> *left = new cpu_kdtree::cpu_kdnode<T>();
  302 + cpu_kdtree::cpu_kdnode<T> *right = new cpu_kdtree::cpu_kdnode<T>();
  303 + left->idx = n_id++; // set the index of current node's left node
  304 + right->idx = n_id++;
  305 + split(current_node, left, right); // split left and right and determine a node
  306 + std::vector <size_t> temp; // empty vecters of int
  307 + //temp.resize(current_node->indices.size());
  308 + current_node->indices.swap(temp); // clean up current node's indices
  309 + current_node->left = left;
  310 + current_node->right = right;
  311 + current_node->left_idx = left->idx;
  312 + current_node->right_idx = right->idx;
  313 + if (right->indices.size())
  314 + next_search_nodes.push_back(right); // left pop out first
  315 + if (left->indices.size())
  316 + next_search_nodes.push_back(left);
  317 + }
  318 + }
  319 + }
  320 + next_nodes = next_search_nodes; // go deeper within the tree
  321 + }
  322 + }
  323 +
  324 + static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort
  325 + std::vector < typename cpu_kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer
  326 + return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis];
  327 + }
  328 +
  329 + void split(cpu_kdtree::cpu_kdnode<T> *cur, cpu_kdtree::cpu_kdnode<T> *left, cpu_kdtree::cpu_kdnode<T> *right) {
  330 + std::vector < typename cpu_kdtree::point<T, D> > &pts = *tmp_points;
  331 + current_axis = cur->level % D; // indicate the judicative dimension or axis
  332 + std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data
  333 + size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor
  334 + cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node
  335 + left->parent = cur; // set the parent of the next search nodes to current node
  336 + right->parent = cur;
  337 + left->level = cur->level + 1; // level + 1
  338 + right->level = cur->level + 1;
  339 + left->parent_idx = cur->idx; // set its parent node's index
  340 + right->parent_idx = cur->idx;
  341 + for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one
  342 + size_t idx = cur->indices[i];
  343 + if (pts[idx].dim[current_axis] < cur->split_value)
  344 + left->indices.push_back(idx);
  345 + else
  346 + right->indices.push_back(idx);
  347 + }
  348 + }
  349 +
  350 + void create(T *h_reference_points, size_t reference_count, size_t max_levels) {
  351 + #ifdef __CUDACC__
503 352 if (max_levels > 10) {
504 353 std::cout<<"The max_tree_levels should be smaller!"<<std::endl;
505 354 exit(1);
... ... @@ -507,29 +356,28 @@ namespace stim {
507 356 //bb.init(&h_reference_points[0]);
508 357 //aaboundingboxing<T, D>(bb, h_reference_points, reference_count);
509 358  
510   - std::vector < typename kdtree::point<T, D>> reference_points(reference_count); // restore the reference points in particular way
  359 + std::vector < typename cpu_kdtree::point<T, D>> reference_points(reference_count); // restore the reference points in particular way
511 360 for (size_t j = 0; j < reference_count; j++)
512 361 for (size_t i = 0; i < D; i++)
513   - reference_points[j].dim[i] = h_reference_points[j * D + i];
514   - cpu_kdtree<T, D> tree; // creating a tree on cpu
515   - tree.cpu_create(reference_points, max_levels); // building a tree on cpu
516   - kdtree::kdnode<T> *d_root = tree.get_root();
517   - num_nodes = tree.get_num_nodes();
  362 + reference_points[j].dim[i] = h_reference_points[j * D + i]; // creating a tree on cpu
  363 + (*this).cpu_create(reference_points, max_levels); // building a tree on cpu
  364 + cpu_kdtree::cpu_kdnode<T> *d_root = (*this).get_root();
  365 + num_nodes = (*this).get_num_nodes();
518 366 npts = reference_count; // also equals to reference_count
519 367  
520 368 HANDLE_ERROR(cudaMalloc((void**)&d_nodes, sizeof(cuda_kdnode<T>) * num_nodes)); // copy data from host to device
521 369 HANDLE_ERROR(cudaMalloc((void**)&d_index, sizeof(size_t) * npts));
522   - HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(kdtree::point<T, D>) * npts));
  370 + HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(cpu_kdtree::point<T, D>) * npts));
523 371  
524 372 std::vector < cuda_kdnode<T> > tmp_nodes(num_nodes);
525 373 std::vector <size_t> indices(npts);
526   - std::vector <kdtree::kdnode<T>*> next_nodes;
  374 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes;
527 375 size_t cur_pos = 0;
528 376 next_nodes.push_back(d_root);
529 377 while (next_nodes.size()) {
530   - std::vector <typename kdtree::kdnode<T>*> next_search_nodes;
  378 + std::vector <typename cpu_kdtree::cpu_kdnode<T>*> next_search_nodes;
531 379 while (next_nodes.size()) {
532   - kdtree::kdnode<T> *cur = next_nodes.back();
  380 + cpu_kdtree::cpu_kdnode<T> *cur = next_nodes.back();
533 381 next_nodes.pop_back();
534 382 int id = cur->idx; // the nodes at same level are independent
535 383 tmp_nodes[id].level = cur->level;
... ... @@ -559,16 +407,154 @@ namespace stim {
559 407 }
560 408 HANDLE_ERROR(cudaMemcpy(d_nodes, &tmp_nodes[0], sizeof(cuda_kdnode<T>) * tmp_nodes.size(), cudaMemcpyHostToDevice));
561 409 HANDLE_ERROR(cudaMemcpy(d_index, &indices[0], sizeof(size_t) * indices.size(), cudaMemcpyHostToDevice));
562   - HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(kdtree::point<T, D>) * reference_count, cudaMemcpyHostToDevice));
  410 + HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(cpu_kdtree::point<T, D>) * reference_count, cudaMemcpyHostToDevice));
  411 +
  412 + #else
  413 + std::vector < typename cpu_kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way
  414 + for (size_t j = 0; j < reference_count; j++)
  415 + for (size_t i = 0; i < D; i++)
  416 + reference_points[j].dim[i] = h_reference_points[j * D + i];
  417 + cpu_create(reference_points, max_levels);
  418 + cpu_tmp_points = *tmp_points;
  419 +
  420 + #endif
  421 + }
  422 +
  423 + int get_num_nodes() const { // get the total number of nodes
  424 + return n_id;
  425 + }
  426 +
  427 + cpu_kdtree::cpu_kdnode<T>* get_root() const { // get the root node of tree
  428 + return root;
  429 + }
  430 +
  431 + T cpu_distance(const cpu_kdtree::point<T, D> &a, const cpu_kdtree::point<T, D> &b) {
  432 + T distance = 0;
  433 +
  434 + for (size_t i = 0; i < D; i++) {
  435 + T d = a.dim[i] - b.dim[i];
  436 + distance += d*d;
  437 + }
  438 + return distance;
  439 + }
  440 +
  441 + void cpu_search_at_node(cpu_kdtree::cpu_kdnode<T> *cur, const cpu_kdtree::point<T, D> &query, size_t *index, T *distance, cpu_kdtree::cpu_kdnode<T> **node) {
  442 + T best_distance = FLT_MAX; // initialize the best distance to max of floating point
  443 + size_t best_index = 0;
  444 + std::vector < typename cpu_kdtree::point<T, D> > pts = cpu_tmp_points;
  445 + while (true) {
  446 + size_t split_axis = cur->level % D;
  447 + if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace
  448 + *node = cur; // pointer points to a pointer
  449 + for (size_t i = 0; i < cur->indices.size(); i++) {
  450 + size_t idx = cur->indices[i];
  451 + T d = cpu_distance(query, pts[idx]); // compute distances
  452 + /// if we want to compute k nearest neighbor, we can input the last resul
  453 + /// (last_best_dist < dist < best_dist) to select the next point until reaching to k
  454 + if (d < best_distance) {
  455 + best_distance = d;
  456 + best_index = idx; // record the nearest neighbor index
  457 + }
  458 + }
  459 + break; // find the target point then break the loop
  460 + }
  461 + else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side
  462 + cur = cur->left;
  463 + }
  464 + else {
  465 + cur = cur->right;
  466 + }
  467 + }
  468 + *index = best_index;
  469 + *distance = best_distance;
  470 + }
  471 +
  472 + void cpu_search_at_node_range(cpu_kdtree::cpu_kdnode<T> *cur, const cpu_kdtree::point<T, D> &query, T range, size_t *index, T *distance) {
  473 + T best_distance = FLT_MAX; // initialize the best distance to max of floating point
  474 + size_t best_index = 0;
  475 + std::vector < typename cpu_kdtree::point<T, D> > pts = cpu_tmp_points;
  476 + std::vector < typename cpu_kdtree::cpu_kdnode<T>*> next_node;
  477 + next_node.push_back(cur);
  478 + while (next_node.size()) {
  479 + std::vector<typename cpu_kdtree::cpu_kdnode<T>*> next_search;
  480 + while (next_node.size()) {
  481 + cur = next_node.back();
  482 + next_node.pop_back();
  483 + size_t split_axis = cur->level % D;
  484 + if (cur->left == NULL) {
  485 + for (size_t i = 0; i < cur->indices.size(); i++) {
  486 + size_t idx = cur->indices[i];
  487 + T d = cpu_distance(query, pts[idx]);
  488 + if (d < best_distance) {
  489 + best_distance = d;
  490 + best_index = idx;
  491 + }
  492 + }
  493 + }
  494 + else {
  495 + T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension
  496 + /// there are three possibilities: on either left or right, and on both left and right
  497 + if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist
  498 + if (d < 0)
  499 + next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region
  500 + else
  501 + next_search.push_back(cur->right);
  502 + }
  503 + else { // it is possible that nereast neighbor will appear on both left and right
  504 + next_search.push_back(cur->left);
  505 + next_search.push_back(cur->right);
  506 + }
  507 + }
  508 + }
  509 + next_node = next_search; // pop out at least one time
  510 + }
  511 + *index = best_index;
  512 + *distance = best_distance;
  513 + }
  514 +
  515 + void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) {
  516 + /// first convert the input query point into specific type
  517 + cpu_kdtree::point<T, D> query;
  518 + for (size_t j = 0; j < query_count; j++) {
  519 + for (size_t i = 0; i < D; i++)
  520 + query.dim[i] = h_query_points[j * D + i];
  521 + /// find the nearest node, this will be the upper bound for the next time searching
  522 + cpu_kdtree::cpu_kdnode<T> *best_node = NULL;
  523 + T best_distance = FLT_MAX;
  524 + size_t best_index = 0;
  525 + T radius = 0; // radius for range
  526 + cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step
  527 + radius = sqrt(best_distance); // It is possible that nearest will appear in another region
  528 + /// find other possibilities
  529 + cpu_kdtree::cpu_kdnode<T> *cur = best_node;
  530 + while (cur->parent != NULL) { // every node that you pass will be possible to be the best node
  531 + /// go up
  532 + cpu_kdtree::cpu_kdnode<T> *parent = cur->parent; // travel back to every node that we pass through
  533 + size_t split_axis = (parent->level) % D;
  534 + /// search other nodes
  535 + size_t tmp_index;
  536 + T tmp_distance = FLT_MAX;
  537 + if (fabs(parent->split_value - query.dim[split_axis]) <= radius) {
  538 + /// search opposite node
  539 + if (parent->left != cur)
  540 + cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node
  541 + else
  542 + cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance);
  543 + }
  544 + if (tmp_distance < best_distance) {
  545 + best_distance = tmp_distance;
  546 + best_index = tmp_index;
  547 + }
  548 + cur = parent;
  549 + }
  550 + h_indices[j] = best_index;
  551 + h_distances[j] = best_distance;
  552 + }
563 553 }
564 554  
565   - /// Search the KD tree for nearest neighbors to a set of specified query points
566   - /// @param h_query_points an array of query points in (x0, y0, z0, ...) order
567   - /// @param query_count is the number of query points
568   - /// @param indices are the indices to the nearest reference point for each query points
569   - /// @param distances is an array containing the distance between each query point and the nearest reference point
570 555 void search(T *h_query_points, size_t query_count, size_t *indices, T *distances) {
571   - std::vector < typename kdtree::point<T, D> > query_points(query_count);
  556 + #ifdef __CUDACC__
  557 + std::vector < typename cpu_kdtree::point<T, D> > query_points(query_count);
572 558 for (size_t j = 0; j < query_count; j++)
573 559 for (size_t i = 0; i < D; i++)
574 560 query_points[j].dim[i] = h_query_points[j * D + i];
... ... @@ -595,7 +581,7 @@ namespace stim {
595 581 unsigned int threads = (unsigned int)(query_count > 1024 ? 1024 : query_count);
596 582 unsigned int blocks = (unsigned int)(query_count / threads + (query_count % threads ? 1 : 0));
597 583  
598   - kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu
  584 + cpu_kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu
599 585 size_t *d_indices;
600 586 T *d_distances;
601 587  
... ... @@ -624,64 +610,18 @@ namespace stim {
624 610 HANDLE_ERROR(cudaFree(d_indices));
625 611 HANDLE_ERROR(cudaFree(d_distances));
626 612 }
627   - }
628   -
629   - /// Return the number of points in the KD tree
630   - size_t num_points() {
631   - return npts;
632   - }
633 613  
634   - stim::aabbn<T, D> getbox() {
635   - size_t N = npts;
636   - //std::vector < typename kdtree::point<T, D> > cpu_ref(npts); //allocate space on the CPU for the reference points
637   - T* cpu_ref = (T*)malloc(N * D * sizeof(T)); //allocate space on the CPU for the reference points
638   - HANDLE_ERROR(cudaMemcpy(cpu_ref, d_reference_points, N * D * sizeof(T), cudaMemcpyDeviceToHost)); //copy from GPU to CPU
  614 + #else
  615 + cpu_search(h_query_points, query_count, indices, distances);
639 616  
640   - stim::aabbn<T, D> bb(cpu_ref);
  617 + #endif
641 618  
642   - for (size_t i = 1; i < N; i++) { //for each reference point
643   - //std::cout << "( " << cpu_ref[i * D + 0] << ", " << cpu_ref[i * D + 1] << ", " << cpu_ref[i * D + 2] << ")" << std::endl;
644   - bb.insert(&cpu_ref[i * D]);
645   - }
646   - return bb;
647 619 }
648 620  
649   - //generate an implicit distance field for the KD-tree
650   - void dist_field3(T* dist, size_t* dims, stim::aabbn<T, 3> bb) {
651   - size_t N = 1; //number of query points that make up the distance field
652   - for (size_t d = 0; d < 3; d++) N *= dims[d]; //calculate the total number of query points
653   -
654   - //calculate the grid spatial parameters
655   - T dx = 0;
656   - if (dims[0] > 1) dx = bb.length(0) / dims[0];
657   - T dy = 0;
658   - if (dims[1] > 1) dy = bb.length(1) / dims[1];
659   - T dz = 0;
660   - if (dims[2] > 1) dz = bb.length(2) / dims[2];
661   -
662   - T* Q = (T*)malloc(N * 3 * sizeof(T)); //allocate space for the query points
663   - size_t i;
664   - for (size_t z = 0; z < dims[2]; z++) { //for each query point (which is a point in the grid)
665   - for (size_t y = 0; y < dims[1]; y++) {
666   - for (size_t x = 0; x < dims[0]; x++) {
667   - i = z * dims[1] * dims[0] + y * dims[0] + x;
668   - Q[i * 3 + 0] = bb.low[0] + x * dx + dx / 2;
669   - Q[i * 3 + 1] = bb.low[1] + y * dy + dy / 2;
670   - Q[i * 3 + 2] = bb.low[2] + z * dz + dz / 2;
671   - //std::cout << i<<" "<<Q[i * 3 + 0] << " " << Q[i * 3 + 1] << " " << Q[i * 3 + 2] << std::endl;
672   - }
673   - }
674   - }
675   - size_t* temp = (size_t*)malloc(N * sizeof(size_t)); //allocate space to store the indices (unused)
676   - search(Q, N, temp, dist);
677   - }
  621 + }; //end class kdtree
678 622  
679   - //generate an implicit distance field for the KD-tree
680   - void dist_field3(T* dist, size_t* dims) {
681   - stim::aabbn<T, D> bb = getbox(); //get a bounding box around the tree
682   - dist_field3(dist, dims, bb);
683   - }
  623 + template <typename T, int D>
  624 + kdtree<T, D>* kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class
684 625  
685   - };
686 626 } //end namespace stim
687 627 #endif
688 628 \ No newline at end of file
... ...
stim/visualization/gl_network.h
... ... @@ -44,6 +44,23 @@ public:
44 44 }
45 45  
46 46 /// Render the network centerline as a series of line strips.
  47 + /// glCenterline0 is for only one input
  48 + void glCenterline0(){
  49 + if (!glIsList(dlist)) { //if dlist isn't a display list, create it
  50 + dlist = glGenLists(1); //generate a display list
  51 + glNewList(dlist, GL_COMPILE); //start a new display list
  52 + for (unsigned e = 0; e < E.size(); e++) { //for each edge in the network
  53 + glBegin(GL_LINE_STRIP);
  54 + for (unsigned p = 0; p < E[e].size(); p++) { //for each point on that edge
  55 + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]); //set the vertex position based on the current point
  56 + glTexCoord1f(0); //set white color
  57 + }
  58 + glEnd();
  59 + }
  60 + glEndList(); //end the display list
  61 + }
  62 + glCallList(dlist); // render the display list
  63 + }
47 64  
48 65 /// @param m specifies the magnitude value used as the vertex weight (radius, error, etc.)
49 66 void glCenterline(unsigned m = 0){
... ...