Commit fdfdeda06e048f9dfe43a60ea8a91e5b8da17ac2

Authored by Jiaming Guo
1 parent d31531d8

fix minor mistakes

Showing 2 changed files with 300 additions and 343 deletions   Show diff stats
stim/structures/kdtree.cuh
1 -// right now the size of CUDA STACK is set to 1000, increase it if you mean to make deeper tree 1 +// right now the size of CUDA STACK is set to 50, increase it if you mean to make deeper tree
2 // data should be stored in row-major 2 // data should be stored in row-major
3 // x1,x2,x3,x4,x5...... 3 // x1,x2,x3,x4,x5......
4 // y1,y2,y3,y4,y5...... 4 // y1,y2,y3,y4,y5......
@@ -22,16 +22,16 @@ @@ -22,16 +22,16 @@
22 #include <stim/visualization/aabbn.h> 22 #include <stim/visualization/aabbn.h>
23 23
24 namespace stim { 24 namespace stim {
25 - namespace kdtree { 25 + namespace cpu_kdtree {
26 template<typename T, int D> // typename refers to float or double while D refers to dimension of points 26 template<typename T, int D> // typename refers to float or double while D refers to dimension of points
27 struct point { 27 struct point {
28 T dim[D]; // create a structure to store every one input point 28 T dim[D]; // create a structure to store every one input point
29 }; 29 };
30 30
31 template<typename T> 31 template<typename T>
32 - class kdnode { 32 + class cpu_kdnode {
33 public: 33 public:
34 - kdnode() { // constructor for initializing a kdnode 34 + cpu_kdnode() { // constructor for initializing a kdnode
35 parent = NULL; // set every node's parent, left and right kdnode pointers to NULL 35 parent = NULL; // set every node's parent, left and right kdnode pointers to NULL
36 left = NULL; 36 left = NULL;
37 right = NULL; 37 right = NULL;
@@ -42,258 +42,12 @@ namespace stim { @@ -42,258 +42,12 @@ namespace stim {
42 } 42 }
43 int idx; // index of current node 43 int idx; // index of current node
44 int parent_idx, left_idx, right_idx; // index of parent, left and right nodes 44 int parent_idx, left_idx, right_idx; // index of parent, left and right nodes
45 - kdnode *parent, *left, *right; // parent, left and right kdnodes 45 + cpu_kdnode *parent, *left, *right; // parent, left and right kdnodes
46 T split_value; // splitting value of current node 46 T split_value; // splitting value of current node
47 std::vector <size_t> indices; // it indicates the points' indices that current node has 47 std::vector <size_t> indices; // it indicates the points' indices that current node has
48 size_t level; // tree level of current node 48 size_t level; // tree level of current node
49 }; 49 };
50 - } // end of namespace kdtree  
51 -  
52 - template <typename T, int D = 3> // set dimension of data to default 3  
53 - class cpu_kdtree {  
54 - protected:  
55 - int current_axis; // current judging axis  
56 - int n_id; // store the total number of nodes  
57 - std::vector < typename kdtree::point<T, D> > *tmp_points; // transfer or temperary points  
58 - std::vector < typename kdtree::point<T, D> > cpu_tmp_points; // for cpu searching  
59 - kdtree::kdnode<T> *root; // root node  
60 - static cpu_kdtree<T, D> *cur_tree_ptr;  
61 - public:  
62 - cpu_kdtree() { // constructor for creating a cpu_kdtree  
63 - cur_tree_ptr = this; // create a class pointer points to the current class value  
64 - n_id = 0; // set total number of points to default 0  
65 - }  
66 -  
67 - ~cpu_kdtree() { // destructor of cpu_kdtree  
68 - std::vector <kdtree::kdnode<T>*> next_nodes;  
69 - next_nodes.push_back(root);  
70 - while (next_nodes.size()) {  
71 - std::vector <kdtree::kdnode<T>*> next_search_nodes;  
72 - while (next_nodes.size()) {  
73 - kdtree::kdnode<T> *cur = next_nodes.back();  
74 - next_nodes.pop_back();  
75 - if (cur->left)  
76 - next_search_nodes.push_back(cur->left);  
77 - if (cur->right)  
78 - next_search_nodes.push_back(cur->right);  
79 - delete cur;  
80 - }  
81 - next_nodes = next_search_nodes;  
82 - }  
83 - root = NULL;  
84 - }  
85 -  
86 - void cpu_create(std::vector < typename kdtree::point<T, D> > &reference_points, size_t max_levels) {  
87 - tmp_points = &reference_points;  
88 - root = new kdtree::kdnode<T>(); // initializing the root node  
89 - root->idx = n_id++; // the index of root is 0  
90 - root->level = 0; // tree level begins at 0  
91 - root->indices.resize(reference_points.size()); // get the number of points  
92 - for (size_t i = 0; i < reference_points.size(); i++) {  
93 - root->indices[i] = i; // set indices of input points  
94 - }  
95 - std::vector <kdtree::kdnode<T>*> next_nodes; // next nodes  
96 - next_nodes.push_back(root); // push back the root node  
97 - while (next_nodes.size()) {  
98 - std::vector <kdtree::kdnode<T>*> next_search_nodes; // next search nodes  
99 - while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search  
100 - kdtree::kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first)  
101 - next_nodes.pop_back(); // pop out current node in order to store next round of nodes  
102 - if (current_node->level < max_levels) {  
103 - if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point  
104 - kdtree::kdnode<T> *left = new kdtree::kdnode<T>();  
105 - kdtree::kdnode<T> *right = new kdtree::kdnode<T>();  
106 - left->idx = n_id++; // set the index of current node's left node  
107 - right->idx = n_id++;  
108 - split(current_node, left, right); // split left and right and determine a node  
109 - std::vector <size_t> temp; // empty vecters of int  
110 - //temp.resize(current_node->indices.size());  
111 - current_node->indices.swap(temp); // clean up current node's indices  
112 - current_node->left = left;  
113 - current_node->right = right;  
114 - current_node->left_idx = left->idx;  
115 - current_node->right_idx = right->idx;  
116 - if (right->indices.size())  
117 - next_search_nodes.push_back(right); // left pop out first  
118 - if (left->indices.size())  
119 - next_search_nodes.push_back(left);  
120 - }  
121 - }  
122 - }  
123 - next_nodes = next_search_nodes; // go deeper within the tree  
124 - }  
125 - }  
126 -  
127 - static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort  
128 - std::vector < typename kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer  
129 - return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis];  
130 - }  
131 -  
132 - void split(kdtree::kdnode<T> *cur, kdtree::kdnode<T> *left, kdtree::kdnode<T> *right) {  
133 - std::vector < typename kdtree::point<T, D> > &pts = *tmp_points;  
134 - current_axis = cur->level % D; // indicate the judicative dimension or axis  
135 - std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data  
136 - size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor  
137 - cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node  
138 - left->parent = cur; // set the parent of the next search nodes to current node  
139 - right->parent = cur;  
140 - left->level = cur->level + 1; // level + 1  
141 - right->level = cur->level + 1;  
142 - left->parent_idx = cur->idx; // set its parent node's index  
143 - right->parent_idx = cur->idx;  
144 - for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one  
145 - size_t idx = cur->indices[i];  
146 - if (pts[idx].dim[current_axis] < cur->split_value)  
147 - left->indices.push_back(idx);  
148 - else  
149 - right->indices.push_back(idx);  
150 - }  
151 - }  
152 -  
153 - void create(T *h_reference_points, size_t reference_count, size_t max_levels) {  
154 - std::vector < typename kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way  
155 - for (size_t j = 0; j < reference_count; j++)  
156 - for (size_t i = 0; i < D; i++)  
157 - reference_points[j].dim[i] = h_reference_points[j * D + i];  
158 - cpu_create(reference_points, max_levels);  
159 - cpu_tmp_points = *tmp_points;  
160 - }  
161 -  
162 - int get_num_nodes() const { // get the total number of nodes  
163 - return n_id;  
164 - }  
165 -  
166 - kdtree::kdnode<T>* get_root() const { // get the root node of tree  
167 - return root;  
168 - }  
169 -  
170 - T cpu_distance(const kdtree::point<T, D> &a, const kdtree::point<T, D> &b) {  
171 - T distance = 0;  
172 -  
173 - for (size_t i = 0; i < D; i++) {  
174 - T d = a.dim[i] - b.dim[i];  
175 - distance += d*d;  
176 - }  
177 - return distance;  
178 - }  
179 -  
180 - void cpu_search_at_node(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, size_t *index, T *distance, kdtree::kdnode<T> **node) {  
181 - T best_distance = FLT_MAX; // initialize the best distance to max of floating point  
182 - size_t best_index = 0;  
183 - std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points;  
184 - while (true) {  
185 - size_t split_axis = cur->level % D;  
186 - if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace  
187 - *node = cur; // pointer points to a pointer  
188 - for (size_t i = 0; i < cur->indices.size(); i++) {  
189 - size_t idx = cur->indices[i];  
190 - T d = cpu_distance(query, pts[idx]); // compute distances  
191 - /// if we want to compute k nearest neighbor, we can input the last resul  
192 - /// (last_best_dist < dist < best_dist) to select the next point until reaching to k  
193 - if (d < best_distance) {  
194 - best_distance = d;  
195 - best_index = idx; // record the nearest neighbor index  
196 - }  
197 - }  
198 - break; // find the target point then break the loop  
199 - }  
200 - else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side  
201 - cur = cur->left;  
202 - }  
203 - else {  
204 - cur = cur->right;  
205 - }  
206 - }  
207 - *index = best_index;  
208 - *distance = best_distance;  
209 - }  
210 -  
211 - void cpu_search_at_node_range(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, T range, size_t *index, T *distance) {  
212 - T best_distance = FLT_MAX; // initialize the best distance to max of floating point  
213 - size_t best_index = 0;  
214 - std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points;  
215 - std::vector < typename kdtree::kdnode<T>*> next_node;  
216 - next_node.push_back(cur);  
217 - while (next_node.size()) {  
218 - std::vector<typename kdtree::kdnode<T>*> next_search;  
219 - while (next_node.size()) {  
220 - cur = next_node.back();  
221 - next_node.pop_back();  
222 - size_t split_axis = cur->level % D;  
223 - if (cur->left == NULL) {  
224 - for (size_t i = 0; i < cur->indices.size(); i++) {  
225 - size_t idx = cur->indices[i];  
226 - T d = cpu_distance(query, pts[idx]);  
227 - if (d < best_distance) {  
228 - best_distance = d;  
229 - best_index = idx;  
230 - }  
231 - }  
232 - }  
233 - else {  
234 - T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension  
235 - /// there are three possibilities: on either left or right, and on both left and right  
236 - if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist  
237 - if (d < 0)  
238 - next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region  
239 - else  
240 - next_search.push_back(cur->right);  
241 - }  
242 - else { // it is possible that nereast neighbor will appear on both left and right  
243 - next_search.push_back(cur->left);  
244 - next_search.push_back(cur->right);  
245 - }  
246 - }  
247 - }  
248 - next_node = next_search; // pop out at least one time  
249 - }  
250 - *index = best_index;  
251 - *distance = best_distance;  
252 - }  
253 -  
254 - void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) {  
255 - /// first convert the input query point into specific type  
256 - kdtree::point<T, D> query;  
257 - for (size_t j = 0; j < query_count; j++) {  
258 - for (size_t i = 0; i < D; i++)  
259 - query.dim[i] = h_query_points[j * D + i];  
260 - /// find the nearest node, this will be the upper bound for the next time searching  
261 - kdtree::kdnode<T> *best_node = NULL;  
262 - T best_distance = FLT_MAX;  
263 - size_t best_index = 0;  
264 - T radius = 0; // radius for range  
265 - cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step  
266 - radius = sqrt(best_distance); // It is possible that nearest will appear in another region  
267 - /// find other possibilities  
268 - kdtree::kdnode<T> *cur = best_node;  
269 - while (cur->parent != NULL) { // every node that you pass will be possible to be the best node  
270 - /// go up  
271 - kdtree::kdnode<T> *parent = cur->parent; // travel back to every node that we pass through  
272 - size_t split_axis = (parent->level) % D;  
273 - /// search other nodes  
274 - size_t tmp_index;  
275 - T tmp_distance = FLT_MAX;  
276 - if (fabs(parent->split_value - query.dim[split_axis]) <= radius) {  
277 - /// search opposite node  
278 - if (parent->left != cur)  
279 - cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node  
280 - else  
281 - cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance);  
282 - }  
283 - if (tmp_distance < best_distance) {  
284 - best_distance = tmp_distance;  
285 - best_index = tmp_index;  
286 - }  
287 - cur = parent;  
288 - }  
289 - h_indices[j] = best_index;  
290 - h_distances[j] = best_distance;  
291 - }  
292 - }  
293 - }; //end class kdtree  
294 -  
295 - template <typename T, int D>  
296 - cpu_kdtree<T, D>* cpu_kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class 50 + } // end of namespace cpu_kdtree
297 51
298 template <typename T> 52 template <typename T>
299 struct cuda_kdnode { 53 struct cuda_kdnode {
@@ -305,7 +59,7 @@ namespace stim { @@ -305,7 +59,7 @@ namespace stim {
305 }; 59 };
306 60
307 template <typename T, int D> 61 template <typename T, int D>
308 - __device__ T gpu_distance(kdtree::point<T, D> &a, kdtree::point<T, D> &b) { 62 + __device__ T gpu_distance(cpu_kdtree::point<T, D> &a, cpu_kdtree::point<T, D> &b) {
309 T distance = 0; 63 T distance = 0;
310 64
311 for (size_t i = 0; i < D; i++) { 65 for (size_t i = 0; i < D; i++) {
@@ -316,7 +70,7 @@ namespace stim { @@ -316,7 +70,7 @@ namespace stim {
316 } 70 }
317 71
318 template <typename T, int D> 72 template <typename T, int D>
319 - __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, int cur, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) { 73 + __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, int cur, cpu_kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) {
320 T best_distance = FLT_MAX; 74 T best_distance = FLT_MAX;
321 size_t best_index = 0; 75 size_t best_index = 0;
322 76
@@ -346,7 +100,7 @@ namespace stim { @@ -346,7 +100,7 @@ namespace stim {
346 } 100 }
347 101
348 template <typename T, int D> 102 template <typename T, int D>
349 - __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { 103 + __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
350 T best_distance = FLT_MAX; 104 T best_distance = FLT_MAX;
351 size_t best_index = 0; 105 size_t best_index = 0;
352 106
@@ -405,7 +159,7 @@ namespace stim { @@ -405,7 +159,7 @@ namespace stim {
405 } 159 }
406 160
407 template <typename T, int D> 161 template <typename T, int D>
408 - __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { 162 + __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
409 int best_node = 0; 163 int best_node = 0;
410 T best_distance = FLT_MAX; 164 T best_distance = FLT_MAX;
411 size_t best_index = 0; 165 size_t best_index = 0;
@@ -438,7 +192,7 @@ namespace stim { @@ -438,7 +192,7 @@ namespace stim {
438 } 192 }
439 193
440 template <typename T, int D> 194 template <typename T, int D>
441 - __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) { 195 + __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) {
442 size_t idx = blockIdx.x * blockDim.x + threadIdx.x; 196 size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
443 if (idx >= d_query_count) return; // avoid segfault 197 if (idx >= d_query_count) return; // avoid segfault
444 198
@@ -446,11 +200,11 @@ namespace stim { @@ -446,11 +200,11 @@ namespace stim {
446 } 200 }
447 201
448 template <typename T, int D> 202 template <typename T, int D>
449 - void search_stream(cuda_kdnode<T> *d_nodes, size_t *d_index, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *query_stream_points, size_t stream_count, size_t *indices, T *distances) { 203 + void search_stream(cuda_kdnode<T> *d_nodes, size_t *d_index, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> *query_stream_points, size_t stream_count, size_t *indices, T *distances) {
450 unsigned int threads = (unsigned int)(stream_count > 1024 ? 1024 : stream_count); 204 unsigned int threads = (unsigned int)(stream_count > 1024 ? 1024 : stream_count);
451 unsigned int blocks = (unsigned int)(stream_count / threads + (stream_count % threads ? 1 : 0)); 205 unsigned int blocks = (unsigned int)(stream_count / threads + (stream_count % threads ? 1 : 0));
452 206
453 - kdtree::point<T, D> *d_query_points; 207 + cpu_kdtree::point<T, D> *d_query_points;
454 size_t *d_indices; 208 size_t *d_indices;
455 T *d_distances; 209 T *d_distances;
456 210
@@ -480,26 +234,121 @@ namespace stim { @@ -480,26 +234,121 @@ namespace stim {
480 HANDLE_ERROR(cudaFree(d_distances)); 234 HANDLE_ERROR(cudaFree(d_distances));
481 } 235 }
482 236
483 - template <typename T, int D = 3>  
484 - class cuda_kdtree { 237 + template <typename T, int D = 3> // set dimension of data to default 3
  238 + class kdtree {
485 protected: 239 protected:
486 - cuda_kdnode<T> *d_nodes;  
487 - size_t *d_index;  
488 - kdtree::point<T, D>* d_reference_points;  
489 - size_t npts;  
490 - int num_nodes; 240 + int current_axis; // current judging axis
  241 + int n_id; // store the total number of nodes
  242 + std::vector < typename cpu_kdtree::point<T, D> > *tmp_points; // transfer or temperary points
  243 + std::vector < typename cpu_kdtree::point<T, D> > cpu_tmp_points; // for cpu searching
  244 + cpu_kdtree::cpu_kdnode<T> *root; // root node
  245 + static kdtree<T, D> *cur_tree_ptr;
  246 + #ifdef __CUDACC__
  247 + cuda_kdnode<T> *d_nodes;
  248 + size_t *d_index;
  249 + cpu_kdtree::point<T, D>* d_reference_points;
  250 + size_t npts;
  251 + int num_nodes;
  252 + #endif
491 public: 253 public:
492 - ~cuda_kdtree() { 254 + kdtree() { // constructor for creating a cpu_kdtree
  255 + cur_tree_ptr = this; // create a class pointer points to the current class value
  256 + n_id = 0; // set total number of points to default 0
  257 + }
  258 +
  259 + ~kdtree() { // destructor of cpu_kdtree
  260 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes;
  261 + next_nodes.push_back(root);
  262 + while (next_nodes.size()) {
  263 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_search_nodes;
  264 + while (next_nodes.size()) {
  265 + cpu_kdtree::cpu_kdnode<T> *cur = next_nodes.back();
  266 + next_nodes.pop_back();
  267 + if (cur->left)
  268 + next_search_nodes.push_back(cur->left);
  269 + if (cur->right)
  270 + next_search_nodes.push_back(cur->right);
  271 + delete cur;
  272 + }
  273 + next_nodes = next_search_nodes;
  274 + }
  275 + root = NULL;
  276 + #ifdef __CUDACC__
493 HANDLE_ERROR(cudaFree(d_nodes)); 277 HANDLE_ERROR(cudaFree(d_nodes));
494 HANDLE_ERROR(cudaFree(d_index)); 278 HANDLE_ERROR(cudaFree(d_index));
495 HANDLE_ERROR(cudaFree(d_reference_points)); 279 HANDLE_ERROR(cudaFree(d_reference_points));
  280 + #endif
496 } 281 }
497 -  
498 - /// Create a KD-tree given a pointer to an array of reference points and the number of reference points  
499 - /// @param h_reference_points is a host array containing the reference points in (x0, y0, z0, ...., ) order  
500 - /// @param reference_count is the number of reference point in the array  
501 - /// @param max_levels is the deepest number of tree levels allowed  
502 - void create(T *h_reference_points, size_t reference_count, size_t max_levels = 3) { 282 +
  283 + void cpu_create(std::vector < typename cpu_kdtree::point<T, D> > &reference_points, size_t max_levels) {
  284 + tmp_points = &reference_points;
  285 + root = new cpu_kdtree::cpu_kdnode<T>(); // initializing the root node
  286 + root->idx = n_id++; // the index of root is 0
  287 + root->level = 0; // tree level begins at 0
  288 + root->indices.resize(reference_points.size()); // get the number of points
  289 + for (size_t i = 0; i < reference_points.size(); i++) {
  290 + root->indices[i] = i; // set indices of input points
  291 + }
  292 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes; // next nodes
  293 + next_nodes.push_back(root); // push back the root node
  294 + while (next_nodes.size()) {
  295 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_search_nodes; // next search nodes
  296 + while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search
  297 + cpu_kdtree::cpu_kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first)
  298 + next_nodes.pop_back(); // pop out current node in order to store next round of nodes
  299 + if (current_node->level < max_levels) {
  300 + if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point
  301 + cpu_kdtree::cpu_kdnode<T> *left = new cpu_kdtree::cpu_kdnode<T>();
  302 + cpu_kdtree::cpu_kdnode<T> *right = new cpu_kdtree::cpu_kdnode<T>();
  303 + left->idx = n_id++; // set the index of current node's left node
  304 + right->idx = n_id++;
  305 + split(current_node, left, right); // split left and right and determine a node
  306 + std::vector <size_t> temp; // empty vecters of int
  307 + //temp.resize(current_node->indices.size());
  308 + current_node->indices.swap(temp); // clean up current node's indices
  309 + current_node->left = left;
  310 + current_node->right = right;
  311 + current_node->left_idx = left->idx;
  312 + current_node->right_idx = right->idx;
  313 + if (right->indices.size())
  314 + next_search_nodes.push_back(right); // left pop out first
  315 + if (left->indices.size())
  316 + next_search_nodes.push_back(left);
  317 + }
  318 + }
  319 + }
  320 + next_nodes = next_search_nodes; // go deeper within the tree
  321 + }
  322 + }
  323 +
  324 + static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort
  325 + std::vector < typename cpu_kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer
  326 + return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis];
  327 + }
  328 +
  329 + void split(cpu_kdtree::cpu_kdnode<T> *cur, cpu_kdtree::cpu_kdnode<T> *left, cpu_kdtree::cpu_kdnode<T> *right) {
  330 + std::vector < typename cpu_kdtree::point<T, D> > &pts = *tmp_points;
  331 + current_axis = cur->level % D; // indicate the judicative dimension or axis
  332 + std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data
  333 + size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor
  334 + cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node
  335 + left->parent = cur; // set the parent of the next search nodes to current node
  336 + right->parent = cur;
  337 + left->level = cur->level + 1; // level + 1
  338 + right->level = cur->level + 1;
  339 + left->parent_idx = cur->idx; // set its parent node's index
  340 + right->parent_idx = cur->idx;
  341 + for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one
  342 + size_t idx = cur->indices[i];
  343 + if (pts[idx].dim[current_axis] < cur->split_value)
  344 + left->indices.push_back(idx);
  345 + else
  346 + right->indices.push_back(idx);
  347 + }
  348 + }
  349 +
  350 + void create(T *h_reference_points, size_t reference_count, size_t max_levels) {
  351 + #ifdef __CUDACC__
503 if (max_levels > 10) { 352 if (max_levels > 10) {
504 std::cout<<"The max_tree_levels should be smaller!"<<std::endl; 353 std::cout<<"The max_tree_levels should be smaller!"<<std::endl;
505 exit(1); 354 exit(1);
@@ -507,29 +356,28 @@ namespace stim { @@ -507,29 +356,28 @@ namespace stim {
507 //bb.init(&h_reference_points[0]); 356 //bb.init(&h_reference_points[0]);
508 //aaboundingboxing<T, D>(bb, h_reference_points, reference_count); 357 //aaboundingboxing<T, D>(bb, h_reference_points, reference_count);
509 358
510 - std::vector < typename kdtree::point<T, D>> reference_points(reference_count); // restore the reference points in particular way 359 + std::vector < typename cpu_kdtree::point<T, D>> reference_points(reference_count); // restore the reference points in particular way
511 for (size_t j = 0; j < reference_count; j++) 360 for (size_t j = 0; j < reference_count; j++)
512 for (size_t i = 0; i < D; i++) 361 for (size_t i = 0; i < D; i++)
513 - reference_points[j].dim[i] = h_reference_points[j * D + i];  
514 - cpu_kdtree<T, D> tree; // creating a tree on cpu  
515 - tree.cpu_create(reference_points, max_levels); // building a tree on cpu  
516 - kdtree::kdnode<T> *d_root = tree.get_root();  
517 - num_nodes = tree.get_num_nodes(); 362 + reference_points[j].dim[i] = h_reference_points[j * D + i]; // creating a tree on cpu
  363 + (*this).cpu_create(reference_points, max_levels); // building a tree on cpu
  364 + cpu_kdtree::cpu_kdnode<T> *d_root = (*this).get_root();
  365 + num_nodes = (*this).get_num_nodes();
518 npts = reference_count; // also equals to reference_count 366 npts = reference_count; // also equals to reference_count
519 367
520 HANDLE_ERROR(cudaMalloc((void**)&d_nodes, sizeof(cuda_kdnode<T>) * num_nodes)); // copy data from host to device 368 HANDLE_ERROR(cudaMalloc((void**)&d_nodes, sizeof(cuda_kdnode<T>) * num_nodes)); // copy data from host to device
521 HANDLE_ERROR(cudaMalloc((void**)&d_index, sizeof(size_t) * npts)); 369 HANDLE_ERROR(cudaMalloc((void**)&d_index, sizeof(size_t) * npts));
522 - HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(kdtree::point<T, D>) * npts)); 370 + HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(cpu_kdtree::point<T, D>) * npts));
523 371
524 std::vector < cuda_kdnode<T> > tmp_nodes(num_nodes); 372 std::vector < cuda_kdnode<T> > tmp_nodes(num_nodes);
525 std::vector <size_t> indices(npts); 373 std::vector <size_t> indices(npts);
526 - std::vector <kdtree::kdnode<T>*> next_nodes; 374 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes;
527 size_t cur_pos = 0; 375 size_t cur_pos = 0;
528 next_nodes.push_back(d_root); 376 next_nodes.push_back(d_root);
529 while (next_nodes.size()) { 377 while (next_nodes.size()) {
530 - std::vector <typename kdtree::kdnode<T>*> next_search_nodes; 378 + std::vector <typename cpu_kdtree::cpu_kdnode<T>*> next_search_nodes;
531 while (next_nodes.size()) { 379 while (next_nodes.size()) {
532 - kdtree::kdnode<T> *cur = next_nodes.back(); 380 + cpu_kdtree::cpu_kdnode<T> *cur = next_nodes.back();
533 next_nodes.pop_back(); 381 next_nodes.pop_back();
534 int id = cur->idx; // the nodes at same level are independent 382 int id = cur->idx; // the nodes at same level are independent
535 tmp_nodes[id].level = cur->level; 383 tmp_nodes[id].level = cur->level;
@@ -559,16 +407,154 @@ namespace stim { @@ -559,16 +407,154 @@ namespace stim {
559 } 407 }
560 HANDLE_ERROR(cudaMemcpy(d_nodes, &tmp_nodes[0], sizeof(cuda_kdnode<T>) * tmp_nodes.size(), cudaMemcpyHostToDevice)); 408 HANDLE_ERROR(cudaMemcpy(d_nodes, &tmp_nodes[0], sizeof(cuda_kdnode<T>) * tmp_nodes.size(), cudaMemcpyHostToDevice));
561 HANDLE_ERROR(cudaMemcpy(d_index, &indices[0], sizeof(size_t) * indices.size(), cudaMemcpyHostToDevice)); 409 HANDLE_ERROR(cudaMemcpy(d_index, &indices[0], sizeof(size_t) * indices.size(), cudaMemcpyHostToDevice));
562 - HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(kdtree::point<T, D>) * reference_count, cudaMemcpyHostToDevice)); 410 + HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(cpu_kdtree::point<T, D>) * reference_count, cudaMemcpyHostToDevice));
  411 +
  412 + #else
  413 + std::vector < typename cpu_kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way
  414 + for (size_t j = 0; j < reference_count; j++)
  415 + for (size_t i = 0; i < D; i++)
  416 + reference_points[j].dim[i] = h_reference_points[j * D + i];
  417 + cpu_create(reference_points, max_levels);
  418 + cpu_tmp_points = *tmp_points;
  419 +
  420 + #endif
  421 + }
  422 +
  423 + int get_num_nodes() const { // get the total number of nodes
  424 + return n_id;
  425 + }
  426 +
  427 + cpu_kdtree::cpu_kdnode<T>* get_root() const { // get the root node of tree
  428 + return root;
  429 + }
  430 +
  431 + T cpu_distance(const cpu_kdtree::point<T, D> &a, const cpu_kdtree::point<T, D> &b) {
  432 + T distance = 0;
  433 +
  434 + for (size_t i = 0; i < D; i++) {
  435 + T d = a.dim[i] - b.dim[i];
  436 + distance += d*d;
  437 + }
  438 + return distance;
  439 + }
  440 +
  441 + void cpu_search_at_node(cpu_kdtree::cpu_kdnode<T> *cur, const cpu_kdtree::point<T, D> &query, size_t *index, T *distance, cpu_kdtree::cpu_kdnode<T> **node) {
  442 + T best_distance = FLT_MAX; // initialize the best distance to max of floating point
  443 + size_t best_index = 0;
  444 + std::vector < typename cpu_kdtree::point<T, D> > pts = cpu_tmp_points;
  445 + while (true) {
  446 + size_t split_axis = cur->level % D;
  447 + if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace
  448 + *node = cur; // pointer points to a pointer
  449 + for (size_t i = 0; i < cur->indices.size(); i++) {
  450 + size_t idx = cur->indices[i];
  451 + T d = cpu_distance(query, pts[idx]); // compute distances
  452 + /// if we want to compute k nearest neighbor, we can input the last resul
  453 + /// (last_best_dist < dist < best_dist) to select the next point until reaching to k
  454 + if (d < best_distance) {
  455 + best_distance = d;
  456 + best_index = idx; // record the nearest neighbor index
  457 + }
  458 + }
  459 + break; // find the target point then break the loop
  460 + }
  461 + else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side
  462 + cur = cur->left;
  463 + }
  464 + else {
  465 + cur = cur->right;
  466 + }
  467 + }
  468 + *index = best_index;
  469 + *distance = best_distance;
  470 + }
  471 +
  472 + void cpu_search_at_node_range(cpu_kdtree::cpu_kdnode<T> *cur, const cpu_kdtree::point<T, D> &query, T range, size_t *index, T *distance) {
  473 + T best_distance = FLT_MAX; // initialize the best distance to max of floating point
  474 + size_t best_index = 0;
  475 + std::vector < typename cpu_kdtree::point<T, D> > pts = cpu_tmp_points;
  476 + std::vector < typename cpu_kdtree::cpu_kdnode<T>*> next_node;
  477 + next_node.push_back(cur);
  478 + while (next_node.size()) {
  479 + std::vector<typename cpu_kdtree::cpu_kdnode<T>*> next_search;
  480 + while (next_node.size()) {
  481 + cur = next_node.back();
  482 + next_node.pop_back();
  483 + size_t split_axis = cur->level % D;
  484 + if (cur->left == NULL) {
  485 + for (size_t i = 0; i < cur->indices.size(); i++) {
  486 + size_t idx = cur->indices[i];
  487 + T d = cpu_distance(query, pts[idx]);
  488 + if (d < best_distance) {
  489 + best_distance = d;
  490 + best_index = idx;
  491 + }
  492 + }
  493 + }
  494 + else {
  495 + T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension
  496 + /// there are three possibilities: on either left or right, and on both left and right
  497 + if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist
  498 + if (d < 0)
  499 + next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region
  500 + else
  501 + next_search.push_back(cur->right);
  502 + }
  503 + else { // it is possible that nereast neighbor will appear on both left and right
  504 + next_search.push_back(cur->left);
  505 + next_search.push_back(cur->right);
  506 + }
  507 + }
  508 + }
  509 + next_node = next_search; // pop out at least one time
  510 + }
  511 + *index = best_index;
  512 + *distance = best_distance;
  513 + }
  514 +
  515 + void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) {
  516 + /// first convert the input query point into specific type
  517 + cpu_kdtree::point<T, D> query;
  518 + for (size_t j = 0; j < query_count; j++) {
  519 + for (size_t i = 0; i < D; i++)
  520 + query.dim[i] = h_query_points[j * D + i];
  521 + /// find the nearest node, this will be the upper bound for the next time searching
  522 + cpu_kdtree::cpu_kdnode<T> *best_node = NULL;
  523 + T best_distance = FLT_MAX;
  524 + size_t best_index = 0;
  525 + T radius = 0; // radius for range
  526 + cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step
  527 + radius = sqrt(best_distance); // It is possible that nearest will appear in another region
  528 + /// find other possibilities
  529 + cpu_kdtree::cpu_kdnode<T> *cur = best_node;
  530 + while (cur->parent != NULL) { // every node that you pass will be possible to be the best node
  531 + /// go up
  532 + cpu_kdtree::cpu_kdnode<T> *parent = cur->parent; // travel back to every node that we pass through
  533 + size_t split_axis = (parent->level) % D;
  534 + /// search other nodes
  535 + size_t tmp_index;
  536 + T tmp_distance = FLT_MAX;
  537 + if (fabs(parent->split_value - query.dim[split_axis]) <= radius) {
  538 + /// search opposite node
  539 + if (parent->left != cur)
  540 + cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node
  541 + else
  542 + cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance);
  543 + }
  544 + if (tmp_distance < best_distance) {
  545 + best_distance = tmp_distance;
  546 + best_index = tmp_index;
  547 + }
  548 + cur = parent;
  549 + }
  550 + h_indices[j] = best_index;
  551 + h_distances[j] = best_distance;
  552 + }
563 } 553 }
564 554
565 - /// Search the KD tree for nearest neighbors to a set of specified query points  
566 - /// @param h_query_points an array of query points in (x0, y0, z0, ...) order  
567 - /// @param query_count is the number of query points  
568 - /// @param indices are the indices to the nearest reference point for each query points  
569 - /// @param distances is an array containing the distance between each query point and the nearest reference point  
570 void search(T *h_query_points, size_t query_count, size_t *indices, T *distances) { 555 void search(T *h_query_points, size_t query_count, size_t *indices, T *distances) {
571 - std::vector < typename kdtree::point<T, D> > query_points(query_count); 556 + #ifdef __CUDACC__
  557 + std::vector < typename cpu_kdtree::point<T, D> > query_points(query_count);
572 for (size_t j = 0; j < query_count; j++) 558 for (size_t j = 0; j < query_count; j++)
573 for (size_t i = 0; i < D; i++) 559 for (size_t i = 0; i < D; i++)
574 query_points[j].dim[i] = h_query_points[j * D + i]; 560 query_points[j].dim[i] = h_query_points[j * D + i];
@@ -595,7 +581,7 @@ namespace stim { @@ -595,7 +581,7 @@ namespace stim {
595 unsigned int threads = (unsigned int)(query_count > 1024 ? 1024 : query_count); 581 unsigned int threads = (unsigned int)(query_count > 1024 ? 1024 : query_count);
596 unsigned int blocks = (unsigned int)(query_count / threads + (query_count % threads ? 1 : 0)); 582 unsigned int blocks = (unsigned int)(query_count / threads + (query_count % threads ? 1 : 0));
597 583
598 - kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu 584 + cpu_kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu
599 size_t *d_indices; 585 size_t *d_indices;
600 T *d_distances; 586 T *d_distances;
601 587
@@ -624,64 +610,18 @@ namespace stim { @@ -624,64 +610,18 @@ namespace stim {
624 HANDLE_ERROR(cudaFree(d_indices)); 610 HANDLE_ERROR(cudaFree(d_indices));
625 HANDLE_ERROR(cudaFree(d_distances)); 611 HANDLE_ERROR(cudaFree(d_distances));
626 } 612 }
627 - }  
628 -  
629 - /// Return the number of points in the KD tree  
630 - size_t num_points() {  
631 - return npts;  
632 - }  
633 613
634 - stim::aabbn<T, D> getbox() {  
635 - size_t N = npts;  
636 - //std::vector < typename kdtree::point<T, D> > cpu_ref(npts); //allocate space on the CPU for the reference points  
637 - T* cpu_ref = (T*)malloc(N * D * sizeof(T)); //allocate space on the CPU for the reference points  
638 - HANDLE_ERROR(cudaMemcpy(cpu_ref, d_reference_points, N * D * sizeof(T), cudaMemcpyDeviceToHost)); //copy from GPU to CPU 614 + #else
  615 + cpu_search(h_query_points, query_count, indices, distances);
639 616
640 - stim::aabbn<T, D> bb(cpu_ref); 617 + #endif
641 618
642 - for (size_t i = 1; i < N; i++) { //for each reference point  
643 - //std::cout << "( " << cpu_ref[i * D + 0] << ", " << cpu_ref[i * D + 1] << ", " << cpu_ref[i * D + 2] << ")" << std::endl;  
644 - bb.insert(&cpu_ref[i * D]);  
645 - }  
646 - return bb;  
647 } 619 }
648 620
649 - //generate an implicit distance field for the KD-tree  
650 - void dist_field3(T* dist, size_t* dims, stim::aabbn<T, 3> bb) {  
651 - size_t N = 1; //number of query points that make up the distance field  
652 - for (size_t d = 0; d < 3; d++) N *= dims[d]; //calculate the total number of query points  
653 -  
654 - //calculate the grid spatial parameters  
655 - T dx = 0;  
656 - if (dims[0] > 1) dx = bb.length(0) / dims[0];  
657 - T dy = 0;  
658 - if (dims[1] > 1) dy = bb.length(1) / dims[1];  
659 - T dz = 0;  
660 - if (dims[2] > 1) dz = bb.length(2) / dims[2];  
661 -  
662 - T* Q = (T*)malloc(N * 3 * sizeof(T)); //allocate space for the query points  
663 - size_t i;  
664 - for (size_t z = 0; z < dims[2]; z++) { //for each query point (which is a point in the grid)  
665 - for (size_t y = 0; y < dims[1]; y++) {  
666 - for (size_t x = 0; x < dims[0]; x++) {  
667 - i = z * dims[1] * dims[0] + y * dims[0] + x;  
668 - Q[i * 3 + 0] = bb.low[0] + x * dx + dx / 2;  
669 - Q[i * 3 + 1] = bb.low[1] + y * dy + dy / 2;  
670 - Q[i * 3 + 2] = bb.low[2] + z * dz + dz / 2;  
671 - //std::cout << i<<" "<<Q[i * 3 + 0] << " " << Q[i * 3 + 1] << " " << Q[i * 3 + 2] << std::endl;  
672 - }  
673 - }  
674 - }  
675 - size_t* temp = (size_t*)malloc(N * sizeof(size_t)); //allocate space to store the indices (unused)  
676 - search(Q, N, temp, dist);  
677 - } 621 + }; //end class kdtree
678 622
679 - //generate an implicit distance field for the KD-tree  
680 - void dist_field3(T* dist, size_t* dims) {  
681 - stim::aabbn<T, D> bb = getbox(); //get a bounding box around the tree  
682 - dist_field3(dist, dims, bb);  
683 - } 623 + template <typename T, int D>
  624 + kdtree<T, D>* kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class
684 625
685 - };  
686 } //end namespace stim 626 } //end namespace stim
687 #endif 627 #endif
688 \ No newline at end of file 628 \ No newline at end of file
stim/visualization/gl_network.h
@@ -44,6 +44,23 @@ public: @@ -44,6 +44,23 @@ public:
44 } 44 }
45 45
46 /// Render the network centerline as a series of line strips. 46 /// Render the network centerline as a series of line strips.
  47 + /// glCenterline0 is for only one input
  48 + void glCenterline0(){
  49 + if (!glIsList(dlist)) { //if dlist isn't a display list, create it
  50 + dlist = glGenLists(1); //generate a display list
  51 + glNewList(dlist, GL_COMPILE); //start a new display list
  52 + for (unsigned e = 0; e < E.size(); e++) { //for each edge in the network
  53 + glBegin(GL_LINE_STRIP);
  54 + for (unsigned p = 0; p < E[e].size(); p++) { //for each point on that edge
  55 + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]); //set the vertex position based on the current point
  56 + glTexCoord1f(0); //set white color
  57 + }
  58 + glEnd();
  59 + }
  60 + glEndList(); //end the display list
  61 + }
  62 + glCallList(dlist); // render the display list
  63 + }
47 64
48 /// @param m specifies the magnitude value used as the vertex weight (radius, error, etc.) 65 /// @param m specifies the magnitude value used as the vertex weight (radius, error, etc.)
49 void glCenterline(unsigned m = 0){ 66 void glCenterline(unsigned m = 0){