Commit fdfdeda06e048f9dfe43a60ea8a91e5b8da17ac2
1 parent
d31531d8
fix minor mistakes
Showing
2 changed files
with
300 additions
and
343 deletions
Show diff stats
stim/structures/kdtree.cuh
1 | -// right now the size of CUDA STACK is set to 1000, increase it if you mean to make deeper tree | |
1 | +// right now the size of CUDA STACK is set to 50, increase it if you mean to make deeper tree | |
2 | 2 | // data should be stored in row-major |
3 | 3 | // x1,x2,x3,x4,x5...... |
4 | 4 | // y1,y2,y3,y4,y5...... |
... | ... | @@ -22,16 +22,16 @@ |
22 | 22 | #include <stim/visualization/aabbn.h> |
23 | 23 | |
24 | 24 | namespace stim { |
25 | - namespace kdtree { | |
25 | + namespace cpu_kdtree { | |
26 | 26 | template<typename T, int D> // typename refers to float or double while D refers to dimension of points |
27 | 27 | struct point { |
28 | 28 | T dim[D]; // create a structure to store every one input point |
29 | 29 | }; |
30 | 30 | |
31 | 31 | template<typename T> |
32 | - class kdnode { | |
32 | + class cpu_kdnode { | |
33 | 33 | public: |
34 | - kdnode() { // constructor for initializing a kdnode | |
34 | + cpu_kdnode() { // constructor for initializing a kdnode | |
35 | 35 | parent = NULL; // set every node's parent, left and right kdnode pointers to NULL |
36 | 36 | left = NULL; |
37 | 37 | right = NULL; |
... | ... | @@ -42,258 +42,12 @@ namespace stim { |
42 | 42 | } |
43 | 43 | int idx; // index of current node |
44 | 44 | int parent_idx, left_idx, right_idx; // index of parent, left and right nodes |
45 | - kdnode *parent, *left, *right; // parent, left and right kdnodes | |
45 | + cpu_kdnode *parent, *left, *right; // parent, left and right kdnodes | |
46 | 46 | T split_value; // splitting value of current node |
47 | 47 | std::vector <size_t> indices; // it indicates the points' indices that current node has |
48 | 48 | size_t level; // tree level of current node |
49 | 49 | }; |
50 | - } // end of namespace kdtree | |
51 | - | |
52 | - template <typename T, int D = 3> // set dimension of data to default 3 | |
53 | - class cpu_kdtree { | |
54 | - protected: | |
55 | - int current_axis; // current judging axis | |
56 | - int n_id; // store the total number of nodes | |
57 | - std::vector < typename kdtree::point<T, D> > *tmp_points; // transfer or temperary points | |
58 | - std::vector < typename kdtree::point<T, D> > cpu_tmp_points; // for cpu searching | |
59 | - kdtree::kdnode<T> *root; // root node | |
60 | - static cpu_kdtree<T, D> *cur_tree_ptr; | |
61 | - public: | |
62 | - cpu_kdtree() { // constructor for creating a cpu_kdtree | |
63 | - cur_tree_ptr = this; // create a class pointer points to the current class value | |
64 | - n_id = 0; // set total number of points to default 0 | |
65 | - } | |
66 | - | |
67 | - ~cpu_kdtree() { // destructor of cpu_kdtree | |
68 | - std::vector <kdtree::kdnode<T>*> next_nodes; | |
69 | - next_nodes.push_back(root); | |
70 | - while (next_nodes.size()) { | |
71 | - std::vector <kdtree::kdnode<T>*> next_search_nodes; | |
72 | - while (next_nodes.size()) { | |
73 | - kdtree::kdnode<T> *cur = next_nodes.back(); | |
74 | - next_nodes.pop_back(); | |
75 | - if (cur->left) | |
76 | - next_search_nodes.push_back(cur->left); | |
77 | - if (cur->right) | |
78 | - next_search_nodes.push_back(cur->right); | |
79 | - delete cur; | |
80 | - } | |
81 | - next_nodes = next_search_nodes; | |
82 | - } | |
83 | - root = NULL; | |
84 | - } | |
85 | - | |
86 | - void cpu_create(std::vector < typename kdtree::point<T, D> > &reference_points, size_t max_levels) { | |
87 | - tmp_points = &reference_points; | |
88 | - root = new kdtree::kdnode<T>(); // initializing the root node | |
89 | - root->idx = n_id++; // the index of root is 0 | |
90 | - root->level = 0; // tree level begins at 0 | |
91 | - root->indices.resize(reference_points.size()); // get the number of points | |
92 | - for (size_t i = 0; i < reference_points.size(); i++) { | |
93 | - root->indices[i] = i; // set indices of input points | |
94 | - } | |
95 | - std::vector <kdtree::kdnode<T>*> next_nodes; // next nodes | |
96 | - next_nodes.push_back(root); // push back the root node | |
97 | - while (next_nodes.size()) { | |
98 | - std::vector <kdtree::kdnode<T>*> next_search_nodes; // next search nodes | |
99 | - while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search | |
100 | - kdtree::kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first) | |
101 | - next_nodes.pop_back(); // pop out current node in order to store next round of nodes | |
102 | - if (current_node->level < max_levels) { | |
103 | - if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point | |
104 | - kdtree::kdnode<T> *left = new kdtree::kdnode<T>(); | |
105 | - kdtree::kdnode<T> *right = new kdtree::kdnode<T>(); | |
106 | - left->idx = n_id++; // set the index of current node's left node | |
107 | - right->idx = n_id++; | |
108 | - split(current_node, left, right); // split left and right and determine a node | |
109 | - std::vector <size_t> temp; // empty vecters of int | |
110 | - //temp.resize(current_node->indices.size()); | |
111 | - current_node->indices.swap(temp); // clean up current node's indices | |
112 | - current_node->left = left; | |
113 | - current_node->right = right; | |
114 | - current_node->left_idx = left->idx; | |
115 | - current_node->right_idx = right->idx; | |
116 | - if (right->indices.size()) | |
117 | - next_search_nodes.push_back(right); // left pop out first | |
118 | - if (left->indices.size()) | |
119 | - next_search_nodes.push_back(left); | |
120 | - } | |
121 | - } | |
122 | - } | |
123 | - next_nodes = next_search_nodes; // go deeper within the tree | |
124 | - } | |
125 | - } | |
126 | - | |
127 | - static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort | |
128 | - std::vector < typename kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer | |
129 | - return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis]; | |
130 | - } | |
131 | - | |
132 | - void split(kdtree::kdnode<T> *cur, kdtree::kdnode<T> *left, kdtree::kdnode<T> *right) { | |
133 | - std::vector < typename kdtree::point<T, D> > &pts = *tmp_points; | |
134 | - current_axis = cur->level % D; // indicate the judicative dimension or axis | |
135 | - std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data | |
136 | - size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor | |
137 | - cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node | |
138 | - left->parent = cur; // set the parent of the next search nodes to current node | |
139 | - right->parent = cur; | |
140 | - left->level = cur->level + 1; // level + 1 | |
141 | - right->level = cur->level + 1; | |
142 | - left->parent_idx = cur->idx; // set its parent node's index | |
143 | - right->parent_idx = cur->idx; | |
144 | - for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one | |
145 | - size_t idx = cur->indices[i]; | |
146 | - if (pts[idx].dim[current_axis] < cur->split_value) | |
147 | - left->indices.push_back(idx); | |
148 | - else | |
149 | - right->indices.push_back(idx); | |
150 | - } | |
151 | - } | |
152 | - | |
153 | - void create(T *h_reference_points, size_t reference_count, size_t max_levels) { | |
154 | - std::vector < typename kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way | |
155 | - for (size_t j = 0; j < reference_count; j++) | |
156 | - for (size_t i = 0; i < D; i++) | |
157 | - reference_points[j].dim[i] = h_reference_points[j * D + i]; | |
158 | - cpu_create(reference_points, max_levels); | |
159 | - cpu_tmp_points = *tmp_points; | |
160 | - } | |
161 | - | |
162 | - int get_num_nodes() const { // get the total number of nodes | |
163 | - return n_id; | |
164 | - } | |
165 | - | |
166 | - kdtree::kdnode<T>* get_root() const { // get the root node of tree | |
167 | - return root; | |
168 | - } | |
169 | - | |
170 | - T cpu_distance(const kdtree::point<T, D> &a, const kdtree::point<T, D> &b) { | |
171 | - T distance = 0; | |
172 | - | |
173 | - for (size_t i = 0; i < D; i++) { | |
174 | - T d = a.dim[i] - b.dim[i]; | |
175 | - distance += d*d; | |
176 | - } | |
177 | - return distance; | |
178 | - } | |
179 | - | |
180 | - void cpu_search_at_node(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, size_t *index, T *distance, kdtree::kdnode<T> **node) { | |
181 | - T best_distance = FLT_MAX; // initialize the best distance to max of floating point | |
182 | - size_t best_index = 0; | |
183 | - std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points; | |
184 | - while (true) { | |
185 | - size_t split_axis = cur->level % D; | |
186 | - if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace | |
187 | - *node = cur; // pointer points to a pointer | |
188 | - for (size_t i = 0; i < cur->indices.size(); i++) { | |
189 | - size_t idx = cur->indices[i]; | |
190 | - T d = cpu_distance(query, pts[idx]); // compute distances | |
191 | - /// if we want to compute k nearest neighbor, we can input the last resul | |
192 | - /// (last_best_dist < dist < best_dist) to select the next point until reaching to k | |
193 | - if (d < best_distance) { | |
194 | - best_distance = d; | |
195 | - best_index = idx; // record the nearest neighbor index | |
196 | - } | |
197 | - } | |
198 | - break; // find the target point then break the loop | |
199 | - } | |
200 | - else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side | |
201 | - cur = cur->left; | |
202 | - } | |
203 | - else { | |
204 | - cur = cur->right; | |
205 | - } | |
206 | - } | |
207 | - *index = best_index; | |
208 | - *distance = best_distance; | |
209 | - } | |
210 | - | |
211 | - void cpu_search_at_node_range(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, T range, size_t *index, T *distance) { | |
212 | - T best_distance = FLT_MAX; // initialize the best distance to max of floating point | |
213 | - size_t best_index = 0; | |
214 | - std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points; | |
215 | - std::vector < typename kdtree::kdnode<T>*> next_node; | |
216 | - next_node.push_back(cur); | |
217 | - while (next_node.size()) { | |
218 | - std::vector<typename kdtree::kdnode<T>*> next_search; | |
219 | - while (next_node.size()) { | |
220 | - cur = next_node.back(); | |
221 | - next_node.pop_back(); | |
222 | - size_t split_axis = cur->level % D; | |
223 | - if (cur->left == NULL) { | |
224 | - for (size_t i = 0; i < cur->indices.size(); i++) { | |
225 | - size_t idx = cur->indices[i]; | |
226 | - T d = cpu_distance(query, pts[idx]); | |
227 | - if (d < best_distance) { | |
228 | - best_distance = d; | |
229 | - best_index = idx; | |
230 | - } | |
231 | - } | |
232 | - } | |
233 | - else { | |
234 | - T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension | |
235 | - /// there are three possibilities: on either left or right, and on both left and right | |
236 | - if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist | |
237 | - if (d < 0) | |
238 | - next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region | |
239 | - else | |
240 | - next_search.push_back(cur->right); | |
241 | - } | |
242 | - else { // it is possible that nereast neighbor will appear on both left and right | |
243 | - next_search.push_back(cur->left); | |
244 | - next_search.push_back(cur->right); | |
245 | - } | |
246 | - } | |
247 | - } | |
248 | - next_node = next_search; // pop out at least one time | |
249 | - } | |
250 | - *index = best_index; | |
251 | - *distance = best_distance; | |
252 | - } | |
253 | - | |
254 | - void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) { | |
255 | - /// first convert the input query point into specific type | |
256 | - kdtree::point<T, D> query; | |
257 | - for (size_t j = 0; j < query_count; j++) { | |
258 | - for (size_t i = 0; i < D; i++) | |
259 | - query.dim[i] = h_query_points[j * D + i]; | |
260 | - /// find the nearest node, this will be the upper bound for the next time searching | |
261 | - kdtree::kdnode<T> *best_node = NULL; | |
262 | - T best_distance = FLT_MAX; | |
263 | - size_t best_index = 0; | |
264 | - T radius = 0; // radius for range | |
265 | - cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step | |
266 | - radius = sqrt(best_distance); // It is possible that nearest will appear in another region | |
267 | - /// find other possibilities | |
268 | - kdtree::kdnode<T> *cur = best_node; | |
269 | - while (cur->parent != NULL) { // every node that you pass will be possible to be the best node | |
270 | - /// go up | |
271 | - kdtree::kdnode<T> *parent = cur->parent; // travel back to every node that we pass through | |
272 | - size_t split_axis = (parent->level) % D; | |
273 | - /// search other nodes | |
274 | - size_t tmp_index; | |
275 | - T tmp_distance = FLT_MAX; | |
276 | - if (fabs(parent->split_value - query.dim[split_axis]) <= radius) { | |
277 | - /// search opposite node | |
278 | - if (parent->left != cur) | |
279 | - cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node | |
280 | - else | |
281 | - cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance); | |
282 | - } | |
283 | - if (tmp_distance < best_distance) { | |
284 | - best_distance = tmp_distance; | |
285 | - best_index = tmp_index; | |
286 | - } | |
287 | - cur = parent; | |
288 | - } | |
289 | - h_indices[j] = best_index; | |
290 | - h_distances[j] = best_distance; | |
291 | - } | |
292 | - } | |
293 | - }; //end class kdtree | |
294 | - | |
295 | - template <typename T, int D> | |
296 | - cpu_kdtree<T, D>* cpu_kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class | |
50 | + } // end of namespace cpu_kdtree | |
297 | 51 | |
298 | 52 | template <typename T> |
299 | 53 | struct cuda_kdnode { |
... | ... | @@ -305,7 +59,7 @@ namespace stim { |
305 | 59 | }; |
306 | 60 | |
307 | 61 | template <typename T, int D> |
308 | - __device__ T gpu_distance(kdtree::point<T, D> &a, kdtree::point<T, D> &b) { | |
62 | + __device__ T gpu_distance(cpu_kdtree::point<T, D> &a, cpu_kdtree::point<T, D> &b) { | |
309 | 63 | T distance = 0; |
310 | 64 | |
311 | 65 | for (size_t i = 0; i < D; i++) { |
... | ... | @@ -316,7 +70,7 @@ namespace stim { |
316 | 70 | } |
317 | 71 | |
318 | 72 | template <typename T, int D> |
319 | - __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, int cur, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) { | |
73 | + __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, int cur, cpu_kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) { | |
320 | 74 | T best_distance = FLT_MAX; |
321 | 75 | size_t best_index = 0; |
322 | 76 | |
... | ... | @@ -346,7 +100,7 @@ namespace stim { |
346 | 100 | } |
347 | 101 | |
348 | 102 | template <typename T, int D> |
349 | - __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { | |
103 | + __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { | |
350 | 104 | T best_distance = FLT_MAX; |
351 | 105 | size_t best_index = 0; |
352 | 106 | |
... | ... | @@ -405,7 +159,7 @@ namespace stim { |
405 | 159 | } |
406 | 160 | |
407 | 161 | template <typename T, int D> |
408 | - __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { | |
162 | + __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { | |
409 | 163 | int best_node = 0; |
410 | 164 | T best_distance = FLT_MAX; |
411 | 165 | size_t best_index = 0; |
... | ... | @@ -438,7 +192,7 @@ namespace stim { |
438 | 192 | } |
439 | 193 | |
440 | 194 | template <typename T, int D> |
441 | - __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) { | |
195 | + __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) { | |
442 | 196 | size_t idx = blockIdx.x * blockDim.x + threadIdx.x; |
443 | 197 | if (idx >= d_query_count) return; // avoid segfault |
444 | 198 | |
... | ... | @@ -446,11 +200,11 @@ namespace stim { |
446 | 200 | } |
447 | 201 | |
448 | 202 | template <typename T, int D> |
449 | - void search_stream(cuda_kdnode<T> *d_nodes, size_t *d_index, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *query_stream_points, size_t stream_count, size_t *indices, T *distances) { | |
203 | + void search_stream(cuda_kdnode<T> *d_nodes, size_t *d_index, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> *query_stream_points, size_t stream_count, size_t *indices, T *distances) { | |
450 | 204 | unsigned int threads = (unsigned int)(stream_count > 1024 ? 1024 : stream_count); |
451 | 205 | unsigned int blocks = (unsigned int)(stream_count / threads + (stream_count % threads ? 1 : 0)); |
452 | 206 | |
453 | - kdtree::point<T, D> *d_query_points; | |
207 | + cpu_kdtree::point<T, D> *d_query_points; | |
454 | 208 | size_t *d_indices; |
455 | 209 | T *d_distances; |
456 | 210 | |
... | ... | @@ -480,26 +234,121 @@ namespace stim { |
480 | 234 | HANDLE_ERROR(cudaFree(d_distances)); |
481 | 235 | } |
482 | 236 | |
483 | - template <typename T, int D = 3> | |
484 | - class cuda_kdtree { | |
237 | + template <typename T, int D = 3> // set dimension of data to default 3 | |
238 | + class kdtree { | |
485 | 239 | protected: |
486 | - cuda_kdnode<T> *d_nodes; | |
487 | - size_t *d_index; | |
488 | - kdtree::point<T, D>* d_reference_points; | |
489 | - size_t npts; | |
490 | - int num_nodes; | |
240 | + int current_axis; // current judging axis | |
241 | + int n_id; // store the total number of nodes | |
242 | + std::vector < typename cpu_kdtree::point<T, D> > *tmp_points; // transfer or temperary points | |
243 | + std::vector < typename cpu_kdtree::point<T, D> > cpu_tmp_points; // for cpu searching | |
244 | + cpu_kdtree::cpu_kdnode<T> *root; // root node | |
245 | + static kdtree<T, D> *cur_tree_ptr; | |
246 | + #ifdef __CUDACC__ | |
247 | + cuda_kdnode<T> *d_nodes; | |
248 | + size_t *d_index; | |
249 | + cpu_kdtree::point<T, D>* d_reference_points; | |
250 | + size_t npts; | |
251 | + int num_nodes; | |
252 | + #endif | |
491 | 253 | public: |
492 | - ~cuda_kdtree() { | |
254 | + kdtree() { // constructor for creating a cpu_kdtree | |
255 | + cur_tree_ptr = this; // create a class pointer points to the current class value | |
256 | + n_id = 0; // set total number of points to default 0 | |
257 | + } | |
258 | + | |
259 | + ~kdtree() { // destructor of cpu_kdtree | |
260 | + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes; | |
261 | + next_nodes.push_back(root); | |
262 | + while (next_nodes.size()) { | |
263 | + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_search_nodes; | |
264 | + while (next_nodes.size()) { | |
265 | + cpu_kdtree::cpu_kdnode<T> *cur = next_nodes.back(); | |
266 | + next_nodes.pop_back(); | |
267 | + if (cur->left) | |
268 | + next_search_nodes.push_back(cur->left); | |
269 | + if (cur->right) | |
270 | + next_search_nodes.push_back(cur->right); | |
271 | + delete cur; | |
272 | + } | |
273 | + next_nodes = next_search_nodes; | |
274 | + } | |
275 | + root = NULL; | |
276 | + #ifdef __CUDACC__ | |
493 | 277 | HANDLE_ERROR(cudaFree(d_nodes)); |
494 | 278 | HANDLE_ERROR(cudaFree(d_index)); |
495 | 279 | HANDLE_ERROR(cudaFree(d_reference_points)); |
280 | + #endif | |
496 | 281 | } |
497 | - | |
498 | - /// Create a KD-tree given a pointer to an array of reference points and the number of reference points | |
499 | - /// @param h_reference_points is a host array containing the reference points in (x0, y0, z0, ...., ) order | |
500 | - /// @param reference_count is the number of reference point in the array | |
501 | - /// @param max_levels is the deepest number of tree levels allowed | |
502 | - void create(T *h_reference_points, size_t reference_count, size_t max_levels = 3) { | |
282 | + | |
283 | + void cpu_create(std::vector < typename cpu_kdtree::point<T, D> > &reference_points, size_t max_levels) { | |
284 | + tmp_points = &reference_points; | |
285 | + root = new cpu_kdtree::cpu_kdnode<T>(); // initializing the root node | |
286 | + root->idx = n_id++; // the index of root is 0 | |
287 | + root->level = 0; // tree level begins at 0 | |
288 | + root->indices.resize(reference_points.size()); // get the number of points | |
289 | + for (size_t i = 0; i < reference_points.size(); i++) { | |
290 | + root->indices[i] = i; // set indices of input points | |
291 | + } | |
292 | + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes; // next nodes | |
293 | + next_nodes.push_back(root); // push back the root node | |
294 | + while (next_nodes.size()) { | |
295 | + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_search_nodes; // next search nodes | |
296 | + while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search | |
297 | + cpu_kdtree::cpu_kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first) | |
298 | + next_nodes.pop_back(); // pop out current node in order to store next round of nodes | |
299 | + if (current_node->level < max_levels) { | |
300 | + if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point | |
301 | + cpu_kdtree::cpu_kdnode<T> *left = new cpu_kdtree::cpu_kdnode<T>(); | |
302 | + cpu_kdtree::cpu_kdnode<T> *right = new cpu_kdtree::cpu_kdnode<T>(); | |
303 | + left->idx = n_id++; // set the index of current node's left node | |
304 | + right->idx = n_id++; | |
305 | + split(current_node, left, right); // split left and right and determine a node | |
306 | + std::vector <size_t> temp; // empty vecters of int | |
307 | + //temp.resize(current_node->indices.size()); | |
308 | + current_node->indices.swap(temp); // clean up current node's indices | |
309 | + current_node->left = left; | |
310 | + current_node->right = right; | |
311 | + current_node->left_idx = left->idx; | |
312 | + current_node->right_idx = right->idx; | |
313 | + if (right->indices.size()) | |
314 | + next_search_nodes.push_back(right); // left pop out first | |
315 | + if (left->indices.size()) | |
316 | + next_search_nodes.push_back(left); | |
317 | + } | |
318 | + } | |
319 | + } | |
320 | + next_nodes = next_search_nodes; // go deeper within the tree | |
321 | + } | |
322 | + } | |
323 | + | |
324 | + static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort | |
325 | + std::vector < typename cpu_kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer | |
326 | + return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis]; | |
327 | + } | |
328 | + | |
329 | + void split(cpu_kdtree::cpu_kdnode<T> *cur, cpu_kdtree::cpu_kdnode<T> *left, cpu_kdtree::cpu_kdnode<T> *right) { | |
330 | + std::vector < typename cpu_kdtree::point<T, D> > &pts = *tmp_points; | |
331 | + current_axis = cur->level % D; // indicate the judicative dimension or axis | |
332 | + std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data | |
333 | + size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor | |
334 | + cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node | |
335 | + left->parent = cur; // set the parent of the next search nodes to current node | |
336 | + right->parent = cur; | |
337 | + left->level = cur->level + 1; // level + 1 | |
338 | + right->level = cur->level + 1; | |
339 | + left->parent_idx = cur->idx; // set its parent node's index | |
340 | + right->parent_idx = cur->idx; | |
341 | + for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one | |
342 | + size_t idx = cur->indices[i]; | |
343 | + if (pts[idx].dim[current_axis] < cur->split_value) | |
344 | + left->indices.push_back(idx); | |
345 | + else | |
346 | + right->indices.push_back(idx); | |
347 | + } | |
348 | + } | |
349 | + | |
350 | + void create(T *h_reference_points, size_t reference_count, size_t max_levels) { | |
351 | + #ifdef __CUDACC__ | |
503 | 352 | if (max_levels > 10) { |
504 | 353 | std::cout<<"The max_tree_levels should be smaller!"<<std::endl; |
505 | 354 | exit(1); |
... | ... | @@ -507,29 +356,28 @@ namespace stim { |
507 | 356 | //bb.init(&h_reference_points[0]); |
508 | 357 | //aaboundingboxing<T, D>(bb, h_reference_points, reference_count); |
509 | 358 | |
510 | - std::vector < typename kdtree::point<T, D>> reference_points(reference_count); // restore the reference points in particular way | |
359 | + std::vector < typename cpu_kdtree::point<T, D>> reference_points(reference_count); // restore the reference points in particular way | |
511 | 360 | for (size_t j = 0; j < reference_count; j++) |
512 | 361 | for (size_t i = 0; i < D; i++) |
513 | - reference_points[j].dim[i] = h_reference_points[j * D + i]; | |
514 | - cpu_kdtree<T, D> tree; // creating a tree on cpu | |
515 | - tree.cpu_create(reference_points, max_levels); // building a tree on cpu | |
516 | - kdtree::kdnode<T> *d_root = tree.get_root(); | |
517 | - num_nodes = tree.get_num_nodes(); | |
362 | + reference_points[j].dim[i] = h_reference_points[j * D + i]; // creating a tree on cpu | |
363 | + (*this).cpu_create(reference_points, max_levels); // building a tree on cpu | |
364 | + cpu_kdtree::cpu_kdnode<T> *d_root = (*this).get_root(); | |
365 | + num_nodes = (*this).get_num_nodes(); | |
518 | 366 | npts = reference_count; // also equals to reference_count |
519 | 367 | |
520 | 368 | HANDLE_ERROR(cudaMalloc((void**)&d_nodes, sizeof(cuda_kdnode<T>) * num_nodes)); // copy data from host to device |
521 | 369 | HANDLE_ERROR(cudaMalloc((void**)&d_index, sizeof(size_t) * npts)); |
522 | - HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(kdtree::point<T, D>) * npts)); | |
370 | + HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(cpu_kdtree::point<T, D>) * npts)); | |
523 | 371 | |
524 | 372 | std::vector < cuda_kdnode<T> > tmp_nodes(num_nodes); |
525 | 373 | std::vector <size_t> indices(npts); |
526 | - std::vector <kdtree::kdnode<T>*> next_nodes; | |
374 | + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes; | |
527 | 375 | size_t cur_pos = 0; |
528 | 376 | next_nodes.push_back(d_root); |
529 | 377 | while (next_nodes.size()) { |
530 | - std::vector <typename kdtree::kdnode<T>*> next_search_nodes; | |
378 | + std::vector <typename cpu_kdtree::cpu_kdnode<T>*> next_search_nodes; | |
531 | 379 | while (next_nodes.size()) { |
532 | - kdtree::kdnode<T> *cur = next_nodes.back(); | |
380 | + cpu_kdtree::cpu_kdnode<T> *cur = next_nodes.back(); | |
533 | 381 | next_nodes.pop_back(); |
534 | 382 | int id = cur->idx; // the nodes at same level are independent |
535 | 383 | tmp_nodes[id].level = cur->level; |
... | ... | @@ -559,16 +407,154 @@ namespace stim { |
559 | 407 | } |
560 | 408 | HANDLE_ERROR(cudaMemcpy(d_nodes, &tmp_nodes[0], sizeof(cuda_kdnode<T>) * tmp_nodes.size(), cudaMemcpyHostToDevice)); |
561 | 409 | HANDLE_ERROR(cudaMemcpy(d_index, &indices[0], sizeof(size_t) * indices.size(), cudaMemcpyHostToDevice)); |
562 | - HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(kdtree::point<T, D>) * reference_count, cudaMemcpyHostToDevice)); | |
410 | + HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(cpu_kdtree::point<T, D>) * reference_count, cudaMemcpyHostToDevice)); | |
411 | + | |
412 | + #else | |
413 | + std::vector < typename cpu_kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way | |
414 | + for (size_t j = 0; j < reference_count; j++) | |
415 | + for (size_t i = 0; i < D; i++) | |
416 | + reference_points[j].dim[i] = h_reference_points[j * D + i]; | |
417 | + cpu_create(reference_points, max_levels); | |
418 | + cpu_tmp_points = *tmp_points; | |
419 | + | |
420 | + #endif | |
421 | + } | |
422 | + | |
423 | + int get_num_nodes() const { // get the total number of nodes | |
424 | + return n_id; | |
425 | + } | |
426 | + | |
427 | + cpu_kdtree::cpu_kdnode<T>* get_root() const { // get the root node of tree | |
428 | + return root; | |
429 | + } | |
430 | + | |
431 | + T cpu_distance(const cpu_kdtree::point<T, D> &a, const cpu_kdtree::point<T, D> &b) { | |
432 | + T distance = 0; | |
433 | + | |
434 | + for (size_t i = 0; i < D; i++) { | |
435 | + T d = a.dim[i] - b.dim[i]; | |
436 | + distance += d*d; | |
437 | + } | |
438 | + return distance; | |
439 | + } | |
440 | + | |
441 | + void cpu_search_at_node(cpu_kdtree::cpu_kdnode<T> *cur, const cpu_kdtree::point<T, D> &query, size_t *index, T *distance, cpu_kdtree::cpu_kdnode<T> **node) { | |
442 | + T best_distance = FLT_MAX; // initialize the best distance to max of floating point | |
443 | + size_t best_index = 0; | |
444 | + std::vector < typename cpu_kdtree::point<T, D> > pts = cpu_tmp_points; | |
445 | + while (true) { | |
446 | + size_t split_axis = cur->level % D; | |
447 | + if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace | |
448 | + *node = cur; // pointer points to a pointer | |
449 | + for (size_t i = 0; i < cur->indices.size(); i++) { | |
450 | + size_t idx = cur->indices[i]; | |
451 | + T d = cpu_distance(query, pts[idx]); // compute distances | |
452 | + /// if we want to compute k nearest neighbor, we can input the last resul | |
453 | + /// (last_best_dist < dist < best_dist) to select the next point until reaching to k | |
454 | + if (d < best_distance) { | |
455 | + best_distance = d; | |
456 | + best_index = idx; // record the nearest neighbor index | |
457 | + } | |
458 | + } | |
459 | + break; // find the target point then break the loop | |
460 | + } | |
461 | + else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side | |
462 | + cur = cur->left; | |
463 | + } | |
464 | + else { | |
465 | + cur = cur->right; | |
466 | + } | |
467 | + } | |
468 | + *index = best_index; | |
469 | + *distance = best_distance; | |
470 | + } | |
471 | + | |
472 | + void cpu_search_at_node_range(cpu_kdtree::cpu_kdnode<T> *cur, const cpu_kdtree::point<T, D> &query, T range, size_t *index, T *distance) { | |
473 | + T best_distance = FLT_MAX; // initialize the best distance to max of floating point | |
474 | + size_t best_index = 0; | |
475 | + std::vector < typename cpu_kdtree::point<T, D> > pts = cpu_tmp_points; | |
476 | + std::vector < typename cpu_kdtree::cpu_kdnode<T>*> next_node; | |
477 | + next_node.push_back(cur); | |
478 | + while (next_node.size()) { | |
479 | + std::vector<typename cpu_kdtree::cpu_kdnode<T>*> next_search; | |
480 | + while (next_node.size()) { | |
481 | + cur = next_node.back(); | |
482 | + next_node.pop_back(); | |
483 | + size_t split_axis = cur->level % D; | |
484 | + if (cur->left == NULL) { | |
485 | + for (size_t i = 0; i < cur->indices.size(); i++) { | |
486 | + size_t idx = cur->indices[i]; | |
487 | + T d = cpu_distance(query, pts[idx]); | |
488 | + if (d < best_distance) { | |
489 | + best_distance = d; | |
490 | + best_index = idx; | |
491 | + } | |
492 | + } | |
493 | + } | |
494 | + else { | |
495 | + T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension | |
496 | + /// there are three possibilities: on either left or right, and on both left and right | |
497 | + if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist | |
498 | + if (d < 0) | |
499 | + next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region | |
500 | + else | |
501 | + next_search.push_back(cur->right); | |
502 | + } | |
503 | + else { // it is possible that nereast neighbor will appear on both left and right | |
504 | + next_search.push_back(cur->left); | |
505 | + next_search.push_back(cur->right); | |
506 | + } | |
507 | + } | |
508 | + } | |
509 | + next_node = next_search; // pop out at least one time | |
510 | + } | |
511 | + *index = best_index; | |
512 | + *distance = best_distance; | |
513 | + } | |
514 | + | |
515 | + void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) { | |
516 | + /// first convert the input query point into specific type | |
517 | + cpu_kdtree::point<T, D> query; | |
518 | + for (size_t j = 0; j < query_count; j++) { | |
519 | + for (size_t i = 0; i < D; i++) | |
520 | + query.dim[i] = h_query_points[j * D + i]; | |
521 | + /// find the nearest node, this will be the upper bound for the next time searching | |
522 | + cpu_kdtree::cpu_kdnode<T> *best_node = NULL; | |
523 | + T best_distance = FLT_MAX; | |
524 | + size_t best_index = 0; | |
525 | + T radius = 0; // radius for range | |
526 | + cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step | |
527 | + radius = sqrt(best_distance); // It is possible that nearest will appear in another region | |
528 | + /// find other possibilities | |
529 | + cpu_kdtree::cpu_kdnode<T> *cur = best_node; | |
530 | + while (cur->parent != NULL) { // every node that you pass will be possible to be the best node | |
531 | + /// go up | |
532 | + cpu_kdtree::cpu_kdnode<T> *parent = cur->parent; // travel back to every node that we pass through | |
533 | + size_t split_axis = (parent->level) % D; | |
534 | + /// search other nodes | |
535 | + size_t tmp_index; | |
536 | + T tmp_distance = FLT_MAX; | |
537 | + if (fabs(parent->split_value - query.dim[split_axis]) <= radius) { | |
538 | + /// search opposite node | |
539 | + if (parent->left != cur) | |
540 | + cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node | |
541 | + else | |
542 | + cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance); | |
543 | + } | |
544 | + if (tmp_distance < best_distance) { | |
545 | + best_distance = tmp_distance; | |
546 | + best_index = tmp_index; | |
547 | + } | |
548 | + cur = parent; | |
549 | + } | |
550 | + h_indices[j] = best_index; | |
551 | + h_distances[j] = best_distance; | |
552 | + } | |
563 | 553 | } |
564 | 554 | |
565 | - /// Search the KD tree for nearest neighbors to a set of specified query points | |
566 | - /// @param h_query_points an array of query points in (x0, y0, z0, ...) order | |
567 | - /// @param query_count is the number of query points | |
568 | - /// @param indices are the indices to the nearest reference point for each query points | |
569 | - /// @param distances is an array containing the distance between each query point and the nearest reference point | |
570 | 555 | void search(T *h_query_points, size_t query_count, size_t *indices, T *distances) { |
571 | - std::vector < typename kdtree::point<T, D> > query_points(query_count); | |
556 | + #ifdef __CUDACC__ | |
557 | + std::vector < typename cpu_kdtree::point<T, D> > query_points(query_count); | |
572 | 558 | for (size_t j = 0; j < query_count; j++) |
573 | 559 | for (size_t i = 0; i < D; i++) |
574 | 560 | query_points[j].dim[i] = h_query_points[j * D + i]; |
... | ... | @@ -595,7 +581,7 @@ namespace stim { |
595 | 581 | unsigned int threads = (unsigned int)(query_count > 1024 ? 1024 : query_count); |
596 | 582 | unsigned int blocks = (unsigned int)(query_count / threads + (query_count % threads ? 1 : 0)); |
597 | 583 | |
598 | - kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu | |
584 | + cpu_kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu | |
599 | 585 | size_t *d_indices; |
600 | 586 | T *d_distances; |
601 | 587 | |
... | ... | @@ -624,64 +610,18 @@ namespace stim { |
624 | 610 | HANDLE_ERROR(cudaFree(d_indices)); |
625 | 611 | HANDLE_ERROR(cudaFree(d_distances)); |
626 | 612 | } |
627 | - } | |
628 | - | |
629 | - /// Return the number of points in the KD tree | |
630 | - size_t num_points() { | |
631 | - return npts; | |
632 | - } | |
633 | 613 | |
634 | - stim::aabbn<T, D> getbox() { | |
635 | - size_t N = npts; | |
636 | - //std::vector < typename kdtree::point<T, D> > cpu_ref(npts); //allocate space on the CPU for the reference points | |
637 | - T* cpu_ref = (T*)malloc(N * D * sizeof(T)); //allocate space on the CPU for the reference points | |
638 | - HANDLE_ERROR(cudaMemcpy(cpu_ref, d_reference_points, N * D * sizeof(T), cudaMemcpyDeviceToHost)); //copy from GPU to CPU | |
614 | + #else | |
615 | + cpu_search(h_query_points, query_count, indices, distances); | |
639 | 616 | |
640 | - stim::aabbn<T, D> bb(cpu_ref); | |
617 | + #endif | |
641 | 618 | |
642 | - for (size_t i = 1; i < N; i++) { //for each reference point | |
643 | - //std::cout << "( " << cpu_ref[i * D + 0] << ", " << cpu_ref[i * D + 1] << ", " << cpu_ref[i * D + 2] << ")" << std::endl; | |
644 | - bb.insert(&cpu_ref[i * D]); | |
645 | - } | |
646 | - return bb; | |
647 | 619 | } |
648 | 620 | |
649 | - //generate an implicit distance field for the KD-tree | |
650 | - void dist_field3(T* dist, size_t* dims, stim::aabbn<T, 3> bb) { | |
651 | - size_t N = 1; //number of query points that make up the distance field | |
652 | - for (size_t d = 0; d < 3; d++) N *= dims[d]; //calculate the total number of query points | |
653 | - | |
654 | - //calculate the grid spatial parameters | |
655 | - T dx = 0; | |
656 | - if (dims[0] > 1) dx = bb.length(0) / dims[0]; | |
657 | - T dy = 0; | |
658 | - if (dims[1] > 1) dy = bb.length(1) / dims[1]; | |
659 | - T dz = 0; | |
660 | - if (dims[2] > 1) dz = bb.length(2) / dims[2]; | |
661 | - | |
662 | - T* Q = (T*)malloc(N * 3 * sizeof(T)); //allocate space for the query points | |
663 | - size_t i; | |
664 | - for (size_t z = 0; z < dims[2]; z++) { //for each query point (which is a point in the grid) | |
665 | - for (size_t y = 0; y < dims[1]; y++) { | |
666 | - for (size_t x = 0; x < dims[0]; x++) { | |
667 | - i = z * dims[1] * dims[0] + y * dims[0] + x; | |
668 | - Q[i * 3 + 0] = bb.low[0] + x * dx + dx / 2; | |
669 | - Q[i * 3 + 1] = bb.low[1] + y * dy + dy / 2; | |
670 | - Q[i * 3 + 2] = bb.low[2] + z * dz + dz / 2; | |
671 | - //std::cout << i<<" "<<Q[i * 3 + 0] << " " << Q[i * 3 + 1] << " " << Q[i * 3 + 2] << std::endl; | |
672 | - } | |
673 | - } | |
674 | - } | |
675 | - size_t* temp = (size_t*)malloc(N * sizeof(size_t)); //allocate space to store the indices (unused) | |
676 | - search(Q, N, temp, dist); | |
677 | - } | |
621 | + }; //end class kdtree | |
678 | 622 | |
679 | - //generate an implicit distance field for the KD-tree | |
680 | - void dist_field3(T* dist, size_t* dims) { | |
681 | - stim::aabbn<T, D> bb = getbox(); //get a bounding box around the tree | |
682 | - dist_field3(dist, dims, bb); | |
683 | - } | |
623 | + template <typename T, int D> | |
624 | + kdtree<T, D>* kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class | |
684 | 625 | |
685 | - }; | |
686 | 626 | } //end namespace stim |
687 | 627 | #endif |
688 | 628 | \ No newline at end of file | ... | ... |
stim/visualization/gl_network.h
... | ... | @@ -44,6 +44,23 @@ public: |
44 | 44 | } |
45 | 45 | |
46 | 46 | /// Render the network centerline as a series of line strips. |
47 | + /// glCenterline0 is for only one input | |
48 | + void glCenterline0(){ | |
49 | + if (!glIsList(dlist)) { //if dlist isn't a display list, create it | |
50 | + dlist = glGenLists(1); //generate a display list | |
51 | + glNewList(dlist, GL_COMPILE); //start a new display list | |
52 | + for (unsigned e = 0; e < E.size(); e++) { //for each edge in the network | |
53 | + glBegin(GL_LINE_STRIP); | |
54 | + for (unsigned p = 0; p < E[e].size(); p++) { //for each point on that edge | |
55 | + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]); //set the vertex position based on the current point | |
56 | + glTexCoord1f(0); //set white color | |
57 | + } | |
58 | + glEnd(); | |
59 | + } | |
60 | + glEndList(); //end the display list | |
61 | + } | |
62 | + glCallList(dlist); // render the display list | |
63 | + } | |
47 | 64 | |
48 | 65 | /// @param m specifies the magnitude value used as the vertex weight (radius, error, etc.) |
49 | 66 | void glCenterline(unsigned m = 0){ | ... | ... |