Commit 32ba91f2e44b2f9c23650a36360051388cec1182

Authored by David Mayerich
2 parents 1ee79b84 fdfdeda0

Merge remote-tracking branch 'refs/remotes/origin/JACK'

Conflicts:
	stim/biomodels/network.h
	stim/math/filters/conv2.h
	stim/visualization/cylinder.h
	stim/visualization/gl_network.h
stim/biomodels/centerline.h
@@ -138,7 +138,7 @@ public: @@ -138,7 +138,7 @@ public:
138 //if the index is not an end point 138 //if the index is not an end point
139 else{ 139 else{
140 140
141 - unsigned int N1 = idx + 1; //calculate the size of both fibers 141 + unsigned int N1 = idx; //calculate the size of both fibers
142 unsigned int N2 = N - idx; 142 unsigned int N2 = N - idx;
143 143
144 fl.resize(2); //set the array size to 2 144 fl.resize(2); //set the array size to 2
@@ -147,7 +147,7 @@ public: @@ -147,7 +147,7 @@ public:
147 fl[1] = stim::centerline<T>(N2); 147 fl[1] = stim::centerline<T>(N2);
148 148
149 //copy both halves of the fiber 149 //copy both halves of the fiber
150 - unsigned int i, d; 150 + unsigned int i;
151 151
152 //first half 152 //first half
153 for(i = 0; i < N1; i++) //for each centerline point 153 for(i = 0; i < N1; i++) //for each centerline point
stim/biomodels/network.h
@@ -15,6 +15,39 @@ @@ -15,6 +15,39 @@
15 #include <stim/cuda/cudatools/timer.h> 15 #include <stim/cuda/cudatools/timer.h>
16 16
17 17
  18 +#ifdef __CUDACC__
  19 +//device gaussian function
  20 +__device__ float gaussianFunction(float x, float std){ return expf(-x/(2*std*std));}
  21 +
  22 +//compute metric in parallel
  23 +template <typename T>
  24 +__global__ void d_metric(T* M, size_t n, T* D, float sigma){
  25 + size_t x = blockDim.x * blockIdx.x + threadIdx.x;
  26 + if(x >= n) return;
  27 + M[x] = 1.0f - gaussianFunction(D[x], sigma);
  28 +}
  29 +
  30 +//find the corresponding edge index from array index
  31 +__global__ void d_findedge(size_t* I, size_t n, unsigned* R, size_t* E, size_t ne){
  32 + size_t x = blockDim.x * blockIdx.x + threadIdx.x;
  33 + if(x >= n) return;
  34 + unsigned i = 0;
  35 + size_t N = 0;
  36 + for(unsigned e = 0; e < ne; e++){
  37 + N += E[e];
  38 + if(I[x] < N){
  39 + R[x] = i;
  40 + break;
  41 + }
  42 + i++;
  43 + }
  44 +}
  45 +#endif
  46 +
  47 +//hard-coded factor
  48 +int threshold_fac = 10;
  49 +float metric_fac = 0.6f; // might be related to the distance field
  50 +
18 namespace stim{ 51 namespace stim{
19 /** This is the a class that interfaces with gl_spider in order to store the currently 52 /** This is the a class that interfaces with gl_spider in order to store the currently
20 * segmented network. The following data is stored and can be extracted: 53 * segmented network. The following data is stored and can be extracted:
@@ -22,7 +55,6 @@ namespace stim{ @@ -22,7 +55,6 @@ namespace stim{
22 * 2)Network connectivity (a graph of nodes and edges), reconstructed using ANN library. 55 * 2)Network connectivity (a graph of nodes and edges), reconstructed using ANN library.
23 */ 56 */
24 57
25 -  
26 template<typename T> 58 template<typename T>
27 class network{ 59 class network{
28 60
@@ -60,6 +92,19 @@ class network{ @@ -60,6 +92,19 @@ class network{
60 return ss.str(); 92 return ss.str();
61 } 93 }
62 94
  95 + std::vector<edge> split(unsigned int idx){
  96 +
  97 + std::vector< stim::cylinder<T> > C;
  98 + C.resize(2);
  99 + C = (*this).cylinder<T>::split(idx);
  100 + std::vector<edge> E(C.size());
  101 +
  102 + for(unsigned e = 0; e < E.size(); e++){
  103 + E[e] = C[e];
  104 + }
  105 + return E;
  106 + }
  107 +
63 }; 108 };
64 109
65 ///Node class that stores the physical position of the node as well as the edges it is connected to (edges that connect to it), As well as any additional data necessary. 110 ///Node class that stores the physical position of the node as well as the edges it is connected to (edges that connect to it), As well as any additional data necessary.
@@ -376,7 +421,8 @@ public: @@ -376,7 +421,8 @@ public:
376 return n; //return the resampled network 421 return n; //return the resampled network
377 } 422 }
378 423
379 - 424 + // host gaussian function
  425 + __host__ float gaussianFunction(float x, float std = 25){ return exp(-x/(2*std*std));} // std default value is 25
380 426
381 /// Calculate the total number of points on all edges. 427 /// Calculate the total number of points on all edges.
382 unsigned total_points(){ 428 unsigned total_points(){
@@ -402,9 +448,6 @@ public: @@ -402,9 +448,6 @@ public:
402 } 448 }
403 } 449 }
404 450
405 - // gaussian function  
406 - float gaussianFunction(float x, float std=25){ return exp(-x/(2*std*std));} // by default std = 25  
407 -  
408 // convert vec3 to array 451 // convert vec3 to array
409 void stim2array(float *a, stim::vec3<T> b){ 452 void stim2array(float *a, stim::vec3<T> b){
410 a[0] = b[0]; 453 a[0] = b[0];
@@ -412,6 +455,16 @@ public: @@ -412,6 +455,16 @@ public:
412 a[2] = b[2]; 455 a[2] = b[2];
413 } 456 }
414 457
  458 + // convert vec3 to array in bunch
  459 + void edge2array(T* a, edge b){
  460 + size_t n = b.size();
  461 + for(size_t i = 0; i < n; i++){
  462 + a[i * 3 + 0] = b[i][0];
  463 + a[i * 3 + 1] = b[i][1];
  464 + a[i * 3 + 2] = b[i][2];
  465 + }
  466 + }
  467 +
415 /// Calculate the average magnitude across the entire network. 468 /// Calculate the average magnitude across the entire network.
416 /// @param m is the magnitude value to use. The default is 0 (usually radius). 469 /// @param m is the magnitude value to use. The default is 0 (usually radius).
417 T average(unsigned m = 0){ 470 T average(unsigned m = 0){
@@ -419,7 +472,7 @@ public: @@ -419,7 +472,7 @@ public:
419 T M, L; //allocate space for the total magnitude and length 472 T M, L; //allocate space for the total magnitude and length
420 M = L = 0; //initialize both the initial magnitude and length to zero 473 M = L = 0; //initialize both the initial magnitude and length to zero
421 for(unsigned e = 0; e < E.size(); e++){ //for each edge in the network 474 for(unsigned e = 0; e < E.size(); e++){ //for each edge in the network
422 - M += E[e].integrate(m); //get the integrated magnitude 475 + M += E[e].integrate(); //get the integrated magnitude
423 L += E[e].length(); //get the edge length 476 L += E[e].length(); //get the edge length
424 } 477 }
425 478
@@ -427,7 +480,6 @@ public: @@ -427,7 +480,6 @@ public:
427 } 480 }
428 481
429 /// This function compares two networks and returns the percentage of the current network that is missing from A. 482 /// This function compares two networks and returns the percentage of the current network that is missing from A.
430 -  
431 /// @param A is the network to compare to - the field is generated for A 483 /// @param A is the network to compare to - the field is generated for A
432 /// @param sigma is the user-defined tolerance value - smaller values provide a stricter comparison 484 /// @param sigma is the user-defined tolerance value - smaller values provide a stricter comparison
433 stim::network<T> compare(stim::network<T> A, float sigma, int device = -1){ 485 stim::network<T> compare(stim::network<T> A, float sigma, int device = -1){
@@ -437,7 +489,7 @@ public: @@ -437,7 +489,7 @@ public:
437 489
438 T *c; // centerline (array of double pointers) - points on kdtree must be double 490 T *c; // centerline (array of double pointers) - points on kdtree must be double
439 size_t n_data = A.total_points(); // set the number of points 491 size_t n_data = A.total_points(); // set the number of points
440 - c = (T*) malloc(sizeof(T) * n_data * 3); //allocate an array to store all points in the data set 492 + c = (T*) malloc(sizeof(T) * n_data * 3); // allocate an array to store all points in the data set
441 493
442 unsigned t = 0; 494 unsigned t = 0;
443 for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in the network 495 for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in the network
@@ -451,62 +503,362 @@ public: @@ -451,62 +503,362 @@ public:
451 } 503 }
452 504
453 //generate a KD-tree for network A 505 //generate a KD-tree for network A
454 - //float metric = 0.0; // initialize metric to be returned after comparing the network  
455 size_t MaxTreeLevels = 3; // max tree level 506 size_t MaxTreeLevels = 3; // max tree level
456 507
457 #ifdef __CUDACC__ 508 #ifdef __CUDACC__
458 cudaSetDevice(device); 509 cudaSetDevice(device);
459 - stim::cuda_kdtree<T, 3> kdt; // initialize a pointer to a kd tree  
460 -  
461 - //compare each point in the current network to the field produced by A 510 + stim::kdtree<T, 3> kdt; // initialize a pointer to a kd tree
  511 +
462 kdt.create(c, n_data, MaxTreeLevels); // build a KD tree 512 kdt.create(c, n_data, MaxTreeLevels); // build a KD tree
463 - T *dists = new T[1]; // near neighbor distances  
464 - size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices  
465 513
466 - stim::vec3<T> p0, p1;  
467 - T m1;  
468 - //float M = 0; //stores the total metric value  
469 - //float L = 0; //stores the total network length  
470 - T* queryPt = new T[3];  
471 for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A 514 for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A
472 R.E[e].add_mag(0); //add a new magnitude for the metric 515 R.E[e].add_mag(0); //add a new magnitude for the metric
473 size_t errormag_id = R.E[e].nmags() - 1; //get the id for the new magnitude 516 size_t errormag_id = R.E[e].nmags() - 1; //get the id for the new magnitude
  517 +
  518 + size_t n = R.E[e].size(); // the number of points in current edge
  519 + T* queryPt = new T[3 * n];
  520 + T* m1 = new T[n];
  521 + T* dists = new T[n];
  522 + size_t* nnIdx = new size_t[n];
474 523
475 - for(unsigned p = 0; p < R.E[e].size(); p++){ //for each point in the edge 524 + T* d_dists;
  525 + T* d_m1;
  526 + cudaMalloc((void**)&d_dists, n * sizeof(T));
  527 + cudaMalloc((void**)&d_m1, n * sizeof(T));
476 528
477 - p1 = R.E[e][p]; //get the next point in the edge  
478 - stim2array(queryPt, p1);  
479 - kdt.search(queryPt, 1, nnIdx, dists); //find the distance between A and the current network 529 + edge2array(queryPt, R.E[e]);
  530 + kdt.search(queryPt, n, nnIdx, dists);
  531 +
  532 + cudaMemcpy(d_dists, dists, n * sizeof(T), cudaMemcpyHostToDevice); // copy dists from host to device
  533 +
  534 + // configuration parameters
  535 + size_t threads = (1024>n)?n:1024;
  536 + size_t blocks = n/threads + (n%threads)?1:0;
  537 +
  538 + d_metric<<<blocks, threads>>>(d_m1, n, d_dists, sigma); //calculate the metric value based on the distance
  539 +
  540 + cudaMemcpy(m1, d_m1, n * sizeof(T), cudaMemcpyDeviceToHost);
  541 +
  542 + for(unsigned p = 0; p < n; p++){
  543 + R.E[e].set_mag(errormag_id, p, m1[p]);
  544 + }
  545 +
  546 + //d_set_mag<<<blocks, threads>>>(R.E[e].M, errormag_id, n, m1);
  547 + }
  548 +
  549 +#else
  550 + stim::kdtree<T, 3> kdt;
  551 + kdt.create(c, n_data, MaxTreeLevels);
  552 +
  553 + for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A
  554 + R.E[e].add_mag(0); //add a new magnitude for the metric
  555 + size_t errormag_id = R.E[e].nmags() - 1;
  556 +
  557 + size_t n = R.E[e].size(); // the number of points in current edge
  558 + T* query = new T[3 * n];
  559 + T* m1 = new T[n];
  560 + T* dists = new T[n];
  561 + size* nnIdx = new size_t[n];
  562 +
  563 + edge2array(queryPt, R.E[e]);
  564 +
  565 + kdt.cpu_search(queryPt, n, nnIdx, dists); //find the distance between A and the current network
  566 +
  567 + for(unsigned p = 0; p < R.E[e].size(); p++){
  568 + m1[p] = 1.0f - gaussianFunction((T)dists[p], sigma); //calculate the metric value based on the distance
  569 + R.E[e].set_mag(errormag_id, p, m1[p]); //set the error for the second point in the segment
  570 + }
  571 + }
  572 +#endif
  573 + return R; //return the resulting network
  574 + }
  575 +
  576 + /// This function compares two networks and split the current one according to the nearest neighbor of each point in each edge
  577 + /// @param A is the network to split
  578 + /// @param B is the corresponding mapping network
  579 + /// @param sigma is the user-defined tolerance value - smaller values provide a stricter comparison
  580 + /// @param device is the device that user want to use
  581 + void split(stim::network<T> A, stim::network<T> B, float sigma, int device){
  582 +
  583 + T *c;
  584 + size_t n_data = B.total_points();
  585 + c = (T*) malloc(sizeof(T) * n_data * 3);
  586 +
  587 + size_t NB = B.E.size(); // the number of edges in B
  588 + unsigned t = 0;
  589 + for(unsigned e = 0; e < NB; e++){ // for every edge in B
  590 + for(unsigned p = 0; p < B.E[e].size(); p++){ // for every points in B.E[e]
  591 + for(unsigned d = 0; d < 3; d++){
  592 +
  593 + c[t * 3 + d] = B.E[e][p][d]; // convert to array
  594 + }
  595 + t++;
  596 + }
  597 + }
  598 + size_t MaxTreeLevels = 3; // max tree level
  599 +
  600 +#ifdef __CUDACC__
  601 + cudaSetDevice(device);
  602 + stim::kdtree<T, 3> kdt; // initialize a pointer to a kd tree
  603 +
  604 + //compare each point in the current network to the field produced by A
  605 + kdt.create(c, n_data, MaxTreeLevels); // build a KD tree
  606 +
  607 + std::vector<std::vector<unsigned>> relation; // the relationship between GT and T corresponding to NN
  608 + relation.resize(A.E.size());
  609 +
  610 + for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in A
  611 + A.E[e].add_mag(0); //add a new magnitude for the metric
  612 + size_t errormag_id = A.E[e].nmags() - 1;
  613 +
  614 + size_t n = A.E[e].size(); // the number of edges in A
  615 +
  616 + T* queryPt = new T[3 * n]; // set of all the points in current edge
  617 + T* m1 = new T[n]; // array of metrics for every point in current edge
  618 + T* dists = new T[n]; // store the distances for every point in current edge
  619 + size_t* nnIdx = new size_t[n]; // store the indices for every point in current edge
  620 +
  621 + // define pointers in device
  622 + T* d_dists;
  623 + T* d_m1;
  624 + size_t* d_nnIdx;
  625 +
  626 + // allocate memory for defined pointers
  627 + cudaMalloc((void**)&d_dists, n * sizeof(T));
  628 + cudaMalloc((void**)&d_m1, n * sizeof(T));
  629 + cudaMalloc((void**)&d_nnIdx, n * sizeof(size_t));
  630 +
  631 + edge2array(queryPt, A.E[e]); // convert edge to array
  632 + kdt.search(queryPt, n, nnIdx, dists); // search the tree to find the NN for every point in current edge
  633 +
  634 + cudaMemcpy(d_dists, dists, n * sizeof(T), cudaMemcpyHostToDevice); // copy dists from host to device
  635 + cudaMemcpy(d_nnIdx, nnIdx, n * sizeof(size_t), cudaMemcpyHostToDevice); // copy Idx from host to device
  636 +
  637 + // configuration parameters
  638 + size_t threads = (1024>n)?n:1024; // test to see whether the number of point in current edge is more than 1024
  639 + size_t blocks = n/threads + (n%threads)?1:0;
  640 +
  641 + d_metric<<<blocks, threads>>>(d_m1, n, d_dists, sigma); // calculate the metrics in parallel
  642 +
  643 + cudaMemcpy(m1, d_m1, n * sizeof(T), cudaMemcpyDeviceToHost);
  644 +
  645 + for(unsigned p = 0; p < n; p++){
  646 + A.E[e].set_mag(errormag_id, p, m1[p]); // set the error(radius) value to every point in current edge
  647 + }
  648 +
  649 + relation[e].resize(n); // resize every edge relation size
  650 +
  651 + unsigned* d_relation;
  652 + cudaMalloc((void**)&d_relation, n * sizeof(unsigned)); // allocate memory
  653 +
  654 + std::vector<size_t> edge_point_num(NB); // %th element is the number of points that %th edge has
  655 + for(unsigned ee = 0; ee < NB; ee++)
  656 + edge_point_num[ee] = B.E[ee].size();
  657 +
  658 + size_t* d_edge_point_num;
  659 + cudaMalloc((void**)&d_edge_point_num, NB * sizeof(size_t));
  660 + cudaMemcpy(d_edge_point_num, &edge_point_num[0], NB * sizeof(size_t), cudaMemcpyHostToDevice);
  661 +
  662 + d_findedge<<<blocks, threads>>>(d_nnIdx, n, d_relation, d_edge_point_num, NB); // find the edge corresponding to the array index in parallel
  663 +
  664 + cudaMemcpy(&relation[e][0], d_relation, n * sizeof(unsigned), cudaMemcpyDeviceToHost); //copy relationship from device to host
  665 + }
  666 +#else
  667 + stim::kdtree<T, 3> kdt;
  668 + kdt.create(c, n_data, MaxTreeLevels);
  669 +
  670 + std::vector<std::vector<unsigned>> relation; // the mapping relationship between two networks
  671 + relation.resize(A.E.size());
  672 + for(unsigned i = 0; i < A.E.size(); i++)
  673 + relation[i].resize(A.E[i].size());
  674 +
  675 + std::vector<size_t> edge_point_num(NB); //%th element is the number of points that %th edge has
  676 + for(unsigned ee = 0; ee < NB; ee++)
  677 + edge_point_num[ee] = B.E[ee].size();
  678 +
  679 + for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in A
  680 + A.E[e].add_mag(0); //add a new magnitude for the metric
  681 + size_t errormag_id = A.E[e].nmags() - 1;
  682 +
  683 + size_t n = A.E[e].size(); // the number of edges in A
  684 +
  685 + T* queryPt = new T[3 * n];
  686 + T* m1 = new T[n];
  687 + T* dists = new T[n]; //store the distances
  688 + size_t* nnIdx = new size_t[n]; //store the indices
  689 +
  690 + edge2array(queryPt, A.E[e]);
  691 + kdt.search(queryPt, n, nnIdx, dists);
  692 +
  693 + for(unsigned p = 0; p < A.E[e].size(); p++){
  694 + m1[p] = 1.0f - gaussianFunction((T)dists[p], sigma); //calculate the metric value based on the distance
  695 + A.E[e].set_mag(errormag_id, p, m1[p]); //set the error for the second point in the segment
  696 +
  697 + unsigned id = 0; //mapping edge's idx
  698 + size_t num = 0; //total number of points before #th edge
  699 + for(unsigned i = 0; i < NB; i++){
  700 + num += B.E[i].size();
  701 + if(nnIdx[p] < num){ //find the edge it belongs to
  702 + relation[e][p] = id;
  703 + break;
  704 + }
  705 + id++; //current edge won't be the one, move to next edge
  706 + }
  707 + }
  708 + }
  709 +#endif
  710 + E = A.E;
  711 + V = A.V;
  712 +
  713 + unsigned int id = 0; // split value
  714 + for(unsigned e = 0; e < E.size(); e++){ // for every edge
  715 + for(unsigned p = 0; p < E[e].size() - 1; p++){ // for every point in each edge
  716 + if(relation[e][p] != relation[e][p + 1]){ // find the nearest edge changing point
  717 + id = p + 1; // if there is no change in NN
  718 + if(id < E[e].size()/threshold_fac || (E[e].size() - id) < E[e].size()/threshold_fac) // set tolerance to 10, tentatively--should find out the mathematical threshold!
  719 + id = E[e].size() - 1; // extreme situation is not acceptable
  720 + else
  721 + break;
  722 + }
  723 + if(p == E[e].size() - 2) // if there is no splitting index, set the id to the last point index of current edge
  724 + id = p + 1;
  725 + }
  726 + unsigned errormag_id = E[e].nmags() - 1;
  727 + T G = 0; // test to see whether it has its nearest neighbor
  728 + for(unsigned i = 0; i < E[e].size(); i++)
  729 + G += E[e].m(i, errormag_id); // won't split special edges
  730 + if(G / E[e].size() > metric_fac) // should based on the color map
  731 + id = E[e].size() - 1; // set split idx to outgoing direction vertex
  732 +
  733 + std::vector<edge> tmpe;
  734 + tmpe.resize(2);
  735 + tmpe = E[e].split(id);
  736 + vertex tmpv = stim::vec3<T>(-1, -1, 0); // store the split point as vertex
  737 + if(tmpe.size() == 2){
  738 + relation.resize(relation.size() + 1);
  739 + for(unsigned d = id; d < E[e].size(); d++)
  740 + relation[relation.size() - 1].push_back(relation[e][d]);
  741 + tmpe[0].v[0] = E[e].v[0]; // begining vertex of first half edge -> original begining vertex
  742 + tmpe[1].v[1] = E[e].v[1]; // ending vertex of second half edge -> original ending vertex
  743 + tmpv = E[e][id];
  744 + V.push_back(tmpv);
  745 + tmpe[0].v[1] = (unsigned)V.size() - 1; // ending vertex of first half edge -> new vertex
  746 + tmpe[1].v[0] = (unsigned)V.size() - 1; // begining vertex of second half edge -> new vertex
  747 + edge tmp(E[e]);
  748 + E[e] = tmpe[0]; // replace original edge by first half edge
  749 + E.push_back(tmpe[1]); // push second half edge to the last
  750 + V[V.size() - 1].e[1].push_back(e); // push first half edge to the incoming of new vertex
  751 + V[V.size() - 1].e[0].push_back((unsigned)E.size() - 1); // push second half edge to the outgoing of new vertex
  752 + for(unsigned i = 0; i < V[tmp.v[1]].e[1].size(); i++) // find the incoming edge of original ending vertex
  753 + if(V[tmp.v[1]].e[1][i] == e)
  754 + V[tmp.v[1]].e[1][i] = (unsigned)V.size() - 1;
  755 + }
  756 + }
  757 + }
  758 +
  759 + /// This function compares two splitted networks and yields a mapping relationship between them according to NN
  760 + /// @param B is the network that the current network is going to map to
  761 + /// @param C is the mapping relationship: C[e1] = _e1 means e1 edge in current network is mapping the _e1 edge in B
  762 + /// @param device is the device that user want to use
  763 + void mapping(stim::network<T> B, std::vector<unsigned> &C, int device){
  764 + stim::network<T> A; //generate a network storing the result of the comparison
  765 + A = (*this);
  766 +
  767 + size_t n = A.E.size(); // the number of edges in A
  768 + size_t NB = B.E.size(); // the number of edges in B
  769 +
  770 + C.resize(A.E.size());
  771 +
  772 + T *c; // centerline (array of double pointers) - points on kdtree must be double
  773 + size_t n_data = B.total_points(); // set the number of points
  774 + c = (T*) malloc(sizeof(T) * n_data * 3);
  775 +
  776 + unsigned t = 0;
  777 + for(unsigned e = 0; e < NB; e++){ // for each edge in the network
  778 + for(unsigned p = 0; p < B.E[e].size(); p++){ // for each point in the edge
  779 + for(unsigned d = 0; d < 3; d++){ // for each coordinate
  780 +
  781 + c[t * 3 + d] = B.E[e][p][d];
  782 + }
  783 + t++;
  784 + }
  785 + }
480 786
481 - m1 = 1.0f - gaussianFunction((T)dists[0], sigma); //calculate the metric value based on the distance  
482 - R.E[e].set_mag(errormag_id, p, m1); //set the error for the second point in the segment 787 + //generate a KD-tree for network A
  788 + //float metric = 0.0; // initialize metric to be returned after comparing the network
  789 + size_t MaxTreeLevels = 3; // max tree level
  790 +
  791 +#ifdef __CUDACC__
  792 + cudaSetDevice(device);
  793 + stim::kdtree<T, 3> kdt; // initialize a pointer to a kd tree
  794 +
  795 + kdt.create(c, n_data, MaxTreeLevels); // build a KD tree
483 796
  797 + for(unsigned e = 0; e < n; e++){ //for each edge in A
  798 + size_t errormag_id = A.E[e].nmags() - 1; //get the id for the new magnitude
  799 +
  800 + //pre-judge to get rid of impossibly mapping edges
  801 + T M = 0;
  802 + for(unsigned p = 0; p < A.E[e].size(); p++)
  803 + M += A.E[e].m(p, errormag_id);
  804 + M = M / A.E[e].size();
  805 + if(M > metric_fac)
  806 + C[e] = (unsigned)-1; //set the nearest edge of impossibly mapping edges to maximum of unsigned
  807 + else{
  808 + T* queryPt = new T[3];
  809 + T* dists = new T[1];
  810 + size_t* nnIdx = new size_t[1];
  811 +
  812 + stim2array(queryPt, A.E[e][A.E[e].size()/2]);
  813 + kdt.search(queryPt, 1, nnIdx, dists);
  814 +
  815 + unsigned id = 0; //mapping edge's idx
  816 + size_t num = 0; //total number of points before #th edge
  817 + for(unsigned i = 0; i < NB; i++){
  818 + num += B.E[i].size();
  819 + if(nnIdx[0] < num){
  820 + C[e] = id;
  821 + break;
  822 + }
  823 + id++;
  824 + }
484 } 825 }
485 } 826 }
486 #else 827 #else
487 - stim::cpu_kdtree<T, 3> kdt; 828 + stim::kdtree<T, 3> kdt;
488 kdt.create(c, n_data, MaxTreeLevels); 829 kdt.create(c, n_data, MaxTreeLevels);
489 T *dists = new T[1]; // near neighbor distances 830 T *dists = new T[1]; // near neighbor distances
490 size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices 831 size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices
491 832
492 stim::vec3<T> p0, p1; 833 stim::vec3<T> p0, p1;
493 - T m1;  
494 T* queryPt = new T[3]; 834 T* queryPt = new T[3];
495 - for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A  
496 - R.E[e].add_mag(0); //add a new magnitude for the metric  
497 835
498 - for(unsigned p = 0; p < R.E[e].size(); p++){ //for each point in the edge  
499 -  
500 - p1 = R.E[e][p]; //get the next point in the edge 836 + for(unsigned e = 0; e < R.E.size(); e++){ // for each edge in A
  837 + T M; // the sum of metrics of current edge
  838 + unsigned errormag_id = R.E[e].nmags() - 1;
  839 + for(unsigned p = 0; p < R.E[e].size(); p++)
  840 + M += A.E[e].m(p, errormag_id);
  841 + M = M / A.E[e].size();
  842 + if(M > metric_fac) // if the sum is larger than the metric_fac, we assume that it doesn't has corresponding edge in B
  843 + C[e] = (unsigned)-1;
  844 + else{ // if it should have corresponding edge in B, then...
  845 + p1 = R.E[e][R.E[e].size()/2];
501 stim2array(queryPt, p1); 846 stim2array(queryPt, p1);
502 - kdt.cpu_search(queryPt, 1, nnIdx, dists); //find the distance between A and the current network  
503 -  
504 - m1 = 1.0f - gaussianFunction((T)dists[0], sigma); //calculate the metric value based on the distance  
505 - R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment 847 + kdt.cpu_search(queryPt, 1, nnIdx, dists); // search the tree
  848 +
  849 + unsigned id = 0; //mapping edge's idx
  850 + size_t num = 0; //total number of points before #th edge
  851 + for(unsigned i = 0; i < NB; i++){
  852 + num += B.E[i].size();
  853 + if(nnIdx[0] < num){
  854 + C[e] = id;
  855 + break;
  856 + }
  857 + id++;
  858 + }
506 } 859 }
507 } 860 }
508 #endif 861 #endif
509 - return R; //return the resulting network  
510 } 862 }
511 863
512 /// Returns the number of magnitude values stored in each edge. This should be uniform across the network. 864 /// Returns the number of magnitude values stored in each edge. This should be uniform across the network.
@@ -616,4 +968,4 @@ public: @@ -616,4 +968,4 @@ public:
616 } 968 }
617 }; //end stim::network class 969 }; //end stim::network class
618 }; //end stim namespace 970 }; //end stim namespace
619 -#endif 971 -#endif
  972 +#endif
620 \ No newline at end of file 973 \ No newline at end of file
stim/math/filters/conv2.h 0 โ†’ 100644
  1 +#ifndef STIM_CUDA_CONV2_H
  2 +#define STIM_CUDA_CONV2_H
  3 +//#define __CUDACC__
  4 +
  5 +#ifdef __CUDACC__
  6 +#include <stim/cuda/cudatools.h>
  7 +#include <stim/cuda/sharedmem.cuh>
  8 +#endif
  9 +
  10 +namespace stim {
  11 +#ifdef __CUDACC__
  12 + //Kernel function that performs the 2D convolution.
  13 + template<typename T, typename K>
  14 + __global__ void kernel_conv2(T* out, T* in, K* kernel, size_t sx, size_t sy, size_t kx, size_t ky) {
  15 + extern __shared__ T s[]; //declare a shared memory array
  16 + size_t xi = blockIdx.x * blockDim.x + threadIdx.x; //threads correspond to indices into the output image
  17 + size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
  18 + size_t tid = threadIdx.y * blockDim.x + threadIdx.x;
  19 + size_t nt = blockDim.x * blockDim.y;
  20 +
  21 + size_t cx = blockIdx.x * blockDim.x; //find the upper left corner of the input region
  22 + size_t cy = blockIdx.y * blockDim.y;
  23 +
  24 + size_t X = sx - kx + 1; //calculate the size of the output image
  25 + size_t Y = sy - ky + 1;
  26 +
  27 + if (cx >= X || cy >= Y) return; //return if the entire block is outside the image
  28 + size_t smx = min(blockDim.x + kx - 1, sx - cx); //size of the shared copy of the input image
  29 + size_t smy = min(blockDim.y + ky - 1, sy - cy); // min function is used to deal with boundary blocks
  30 + stim::cuda::threadedMemcpy2D<T>(s, smx, smy, in, cx, cy, sx, sy, tid, nt); //copy the input region to shared memory
  31 + __syncthreads();
  32 +
  33 + if (xi >= X || yi >= Y) return; //returns if the thread is outside of the output image
  34 +
  35 + //loop through the kernel
  36 + size_t kxi, kyi;
  37 + K v = 0;
  38 + for (kyi = 0; kyi < ky; kyi++) {
  39 + for (kxi = 0; kxi < kx; kxi++) {
  40 + v += s[(threadIdx.y + kyi) * smx + threadIdx.x + kxi] * kernel[kyi * kx + kxi];
  41 + //v += in[(yi + kyi) * sx + xi + kxi] * kernel[kyi * kx + kxi];
  42 + }
  43 + }
  44 + out[yi * X + xi] = (T)v; //write the result to global memory
  45 +
  46 + }
  47 +
  48 + //Performs a convolution of a 2D image using the GPU. All pointers are assumed to be to memory on the current device.
  49 + //@param out is a pointer to the output image
  50 + //@param in is a pointer to the input image
  51 + //@param sx is the size of the input image along X
  52 + //@param sy is the size of the input image along Y
  53 + //@param kx is the size of the kernel along X
  54 + //@param ky is the size of the kernel along Y
  55 + template<typename T, typename K>
  56 + void gpu_conv2(T* out, T* in, K* kernel, size_t sx, size_t sy, size_t kx, size_t ky) {
  57 + cudaDeviceProp p;
  58 + HANDLE_ERROR(cudaGetDeviceProperties(&p, 0));
  59 + size_t tmax = p.maxThreadsPerBlock;
  60 + dim3 nt(sqrt(tmax), sqrt(tmax)); //calculate the block dimensions
  61 + size_t X = sx - kx + 1; //calculate the size of the output image
  62 + size_t Y = sy - ky + 1;
  63 + dim3 nb(X / nt.x + 1, Y / nt.y + 1); //calculate the grid dimensions
  64 + size_t sm = (nt.x + kx - 1) * (nt.y + ky - 1) * sizeof(T); //shared memory bytes required to store block data
  65 + if (sm > p.sharedMemPerBlock) {
  66 + std::cout << "Error in stim::gpu_conv2() - insufficient shared memory for this kernel." << std::endl;
  67 + exit(1);
  68 + }
  69 + kernel_conv2 <<<nb, nt, sm>>> (out, in, kernel, sx, sy, kx, ky); //launch the kernel
  70 + }
  71 +#endif
  72 + //Performs a convolution of a 2D image. Only valid pixels based on the kernel are returned.
  73 + // As a result, the output image will be smaller than the input image by (kx-1, ky-1)
  74 + //@param out is a pointer to the output image
  75 + //@param in is a pointer to the input image
  76 + //@param sx is the size of the input image along X
  77 + //@param sy is the size of the input image along Y
  78 + //@param kx is the size of the kernel along X
  79 + //@param ky is the size of the kernel along Y
  80 + template<typename T, typename K>
  81 + void cpu_conv2(T* out, T* in, K* kernel, size_t sx, size_t sy, size_t kx, size_t ky) {
  82 + size_t X = sx - kx + 1; //x size of the output image
  83 + size_t Y = sy - ky + 1; //y size of the output image
  84 +
  85 +#ifdef __CUDACC__
  86 + //allocate memory and copy everything to the GPU
  87 + T* gpu_in;
  88 + HANDLE_ERROR(cudaMalloc(&gpu_in, sx * sy * sizeof(T)));
  89 + HANDLE_ERROR(cudaMemcpy(gpu_in, in, sx * sy * sizeof(T), cudaMemcpyHostToDevice));
  90 + K* gpu_kernel;
  91 + HANDLE_ERROR(cudaMalloc(&gpu_kernel, kx * ky * sizeof(K)));
  92 + HANDLE_ERROR(cudaMemcpy(gpu_kernel, kernel, kx * ky * sizeof(K), cudaMemcpyHostToDevice));
  93 + T* gpu_out;
  94 + HANDLE_ERROR(cudaMalloc(&gpu_out, X * Y * sizeof(T)));
  95 + gpu_conv2(gpu_out, gpu_in, gpu_kernel, sx, sy, kx, ky); //execute the GPU kernel
  96 + HANDLE_ERROR(cudaMemcpy(out, gpu_out, X * Y * sizeof(T), cudaMemcpyDeviceToHost)); //copy the result to the host
  97 + HANDLE_ERROR(cudaFree(gpu_in));
  98 + HANDLE_ERROR(cudaFree(gpu_kernel));
  99 + HANDLE_ERROR(cudaFree(gpu_out));
  100 +#else
  101 + K v; //register stores the integral of the current pixel value
  102 + size_t yi, xi, kyi, kxi, yi_kyi_sx;
  103 + for (yi = 0; yi < Y; yi++) { //for each pixel in the output image
  104 + for (xi = 0; xi < X; xi++) {
  105 + v = 0;
  106 + for (kyi = 0; kyi < ky; kyi++) { //for each pixel in the kernel
  107 + yi_kyi_sx = (yi + kyi) * sx;
  108 + for (kxi = 0; kxi < kx; kxi++) {
  109 + v += in[yi_kyi_sx + xi + kxi] * kernel[kyi * kx + kxi];
  110 + }
  111 + }
  112 + out[yi * X + xi] = v; //save the result to the output array
  113 + }
  114 + }
  115 +
  116 +#endif
  117 + }
  118 +
  119 +
  120 +}
  121 +
  122 +
  123 +#endif
0 \ No newline at end of file 124 \ No newline at end of file
stim/structures/kdtree.cuh
1 -// right now the size of CUDA STACK is set to 1000, increase it if you mean to make deeper tree 1 +// right now the size of CUDA STACK is set to 50, increase it if you mean to make deeper tree
2 // data should be stored in row-major 2 // data should be stored in row-major
3 // x1,x2,x3,x4,x5...... 3 // x1,x2,x3,x4,x5......
4 // y1,y2,y3,y4,y5...... 4 // y1,y2,y3,y4,y5......
@@ -22,16 +22,16 @@ @@ -22,16 +22,16 @@
22 #include <stim/visualization/aabbn.h> 22 #include <stim/visualization/aabbn.h>
23 23
24 namespace stim { 24 namespace stim {
25 - namespace kdtree { 25 + namespace cpu_kdtree {
26 template<typename T, int D> // typename refers to float or double while D refers to dimension of points 26 template<typename T, int D> // typename refers to float or double while D refers to dimension of points
27 struct point { 27 struct point {
28 T dim[D]; // create a structure to store every one input point 28 T dim[D]; // create a structure to store every one input point
29 }; 29 };
30 30
31 template<typename T> 31 template<typename T>
32 - class kdnode { 32 + class cpu_kdnode {
33 public: 33 public:
34 - kdnode() { // constructor for initializing a kdnode 34 + cpu_kdnode() { // constructor for initializing a kdnode
35 parent = NULL; // set every node's parent, left and right kdnode pointers to NULL 35 parent = NULL; // set every node's parent, left and right kdnode pointers to NULL
36 left = NULL; 36 left = NULL;
37 right = NULL; 37 right = NULL;
@@ -42,258 +42,12 @@ namespace stim { @@ -42,258 +42,12 @@ namespace stim {
42 } 42 }
43 int idx; // index of current node 43 int idx; // index of current node
44 int parent_idx, left_idx, right_idx; // index of parent, left and right nodes 44 int parent_idx, left_idx, right_idx; // index of parent, left and right nodes
45 - kdnode *parent, *left, *right; // parent, left and right kdnodes 45 + cpu_kdnode *parent, *left, *right; // parent, left and right kdnodes
46 T split_value; // splitting value of current node 46 T split_value; // splitting value of current node
47 std::vector <size_t> indices; // it indicates the points' indices that current node has 47 std::vector <size_t> indices; // it indicates the points' indices that current node has
48 size_t level; // tree level of current node 48 size_t level; // tree level of current node
49 }; 49 };
50 - } // end of namespace kdtree  
51 -  
52 - template <typename T, int D = 3> // set dimension of data to default 3  
53 - class cpu_kdtree {  
54 - protected:  
55 - int current_axis; // current judging axis  
56 - int n_id; // store the total number of nodes  
57 - std::vector < typename kdtree::point<T, D> > *tmp_points; // transfer or temperary points  
58 - std::vector < typename kdtree::point<T, D> > cpu_tmp_points; // for cpu searching  
59 - kdtree::kdnode<T> *root; // root node  
60 - static cpu_kdtree<T, D> *cur_tree_ptr;  
61 - public:  
62 - cpu_kdtree() { // constructor for creating a cpu_kdtree  
63 - cur_tree_ptr = this; // create a class pointer points to the current class value  
64 - n_id = 0; // set total number of points to default 0  
65 - }  
66 -  
67 - ~cpu_kdtree() { // destructor of cpu_kdtree  
68 - std::vector <kdtree::kdnode<T>*> next_nodes;  
69 - next_nodes.push_back(root);  
70 - while (next_nodes.size()) {  
71 - std::vector <kdtree::kdnode<T>*> next_search_nodes;  
72 - while (next_nodes.size()) {  
73 - kdtree::kdnode<T> *cur = next_nodes.back();  
74 - next_nodes.pop_back();  
75 - if (cur->left)  
76 - next_search_nodes.push_back(cur->left);  
77 - if (cur->right)  
78 - next_search_nodes.push_back(cur->right);  
79 - delete cur;  
80 - }  
81 - next_nodes = next_search_nodes;  
82 - }  
83 - root = NULL;  
84 - }  
85 -  
86 - void cpu_create(std::vector < typename kdtree::point<T, D> > &reference_points, size_t max_levels) {  
87 - tmp_points = &reference_points;  
88 - root = new kdtree::kdnode<T>(); // initializing the root node  
89 - root->idx = n_id++; // the index of root is 0  
90 - root->level = 0; // tree level begins at 0  
91 - root->indices.resize(reference_points.size()); // get the number of points  
92 - for (size_t i = 0; i < reference_points.size(); i++) {  
93 - root->indices[i] = i; // set indices of input points  
94 - }  
95 - std::vector <kdtree::kdnode<T>*> next_nodes; // next nodes  
96 - next_nodes.push_back(root); // push back the root node  
97 - while (next_nodes.size()) {  
98 - std::vector <kdtree::kdnode<T>*> next_search_nodes; // next search nodes  
99 - while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search  
100 - kdtree::kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first)  
101 - next_nodes.pop_back(); // pop out current node in order to store next round of nodes  
102 - if (current_node->level < max_levels) {  
103 - if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point  
104 - kdtree::kdnode<T> *left = new kdtree::kdnode<T>();  
105 - kdtree::kdnode<T> *right = new kdtree::kdnode<T>();  
106 - left->idx = n_id++; // set the index of current node's left node  
107 - right->idx = n_id++;  
108 - split(current_node, left, right); // split left and right and determine a node  
109 - std::vector <size_t> temp; // empty vecters of int  
110 - //temp.resize(current_node->indices.size());  
111 - current_node->indices.swap(temp); // clean up current node's indices  
112 - current_node->left = left;  
113 - current_node->right = right;  
114 - current_node->left_idx = left->idx;  
115 - current_node->right_idx = right->idx;  
116 - if (right->indices.size())  
117 - next_search_nodes.push_back(right); // left pop out first  
118 - if (left->indices.size())  
119 - next_search_nodes.push_back(left);  
120 - }  
121 - }  
122 - }  
123 - next_nodes = next_search_nodes; // go deeper within the tree  
124 - }  
125 - }  
126 -  
127 - static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort  
128 - std::vector < typename kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer  
129 - return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis];  
130 - }  
131 -  
132 - void split(kdtree::kdnode<T> *cur, kdtree::kdnode<T> *left, kdtree::kdnode<T> *right) {  
133 - std::vector < typename kdtree::point<T, D> > &pts = *tmp_points;  
134 - current_axis = cur->level % D; // indicate the judicative dimension or axis  
135 - std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data  
136 - size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor  
137 - cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node  
138 - left->parent = cur; // set the parent of the next search nodes to current node  
139 - right->parent = cur;  
140 - left->level = cur->level + 1; // level + 1  
141 - right->level = cur->level + 1;  
142 - left->parent_idx = cur->idx; // set its parent node's index  
143 - right->parent_idx = cur->idx;  
144 - for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one  
145 - size_t idx = cur->indices[i];  
146 - if (pts[idx].dim[current_axis] < cur->split_value)  
147 - left->indices.push_back(idx);  
148 - else  
149 - right->indices.push_back(idx);  
150 - }  
151 - }  
152 -  
153 - void create(T *h_reference_points, size_t reference_count, size_t max_levels) {  
154 - std::vector < typename kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way  
155 - for (size_t j = 0; j < reference_count; j++)  
156 - for (size_t i = 0; i < D; i++)  
157 - reference_points[j].dim[i] = h_reference_points[j * D + i];  
158 - cpu_create(reference_points, max_levels);  
159 - cpu_tmp_points = *tmp_points;  
160 - }  
161 -  
162 - int get_num_nodes() const { // get the total number of nodes  
163 - return n_id;  
164 - }  
165 -  
166 - kdtree::kdnode<T>* get_root() const { // get the root node of tree  
167 - return root;  
168 - }  
169 -  
170 - T cpu_distance(const kdtree::point<T, D> &a, const kdtree::point<T, D> &b) {  
171 - T distance = 0;  
172 -  
173 - for (size_t i = 0; i < D; i++) {  
174 - T d = a.dim[i] - b.dim[i];  
175 - distance += d*d;  
176 - }  
177 - return distance;  
178 - }  
179 -  
180 - void cpu_search_at_node(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, size_t *index, T *distance, kdtree::kdnode<T> **node) {  
181 - T best_distance = FLT_MAX; // initialize the best distance to max of floating point  
182 - size_t best_index = 0;  
183 - std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points;  
184 - while (true) {  
185 - size_t split_axis = cur->level % D;  
186 - if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace  
187 - *node = cur; // pointer points to a pointer  
188 - for (size_t i = 0; i < cur->indices.size(); i++) {  
189 - size_t idx = cur->indices[i];  
190 - T d = cpu_distance(query, pts[idx]); // compute distances  
191 - /// if we want to compute k nearest neighbor, we can input the last resul  
192 - /// (last_best_dist < dist < best_dist) to select the next point until reaching to k  
193 - if (d < best_distance) {  
194 - best_distance = d;  
195 - best_index = idx; // record the nearest neighbor index  
196 - }  
197 - }  
198 - break; // find the target point then break the loop  
199 - }  
200 - else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side  
201 - cur = cur->left;  
202 - }  
203 - else {  
204 - cur = cur->right;  
205 - }  
206 - }  
207 - *index = best_index;  
208 - *distance = best_distance;  
209 - }  
210 -  
211 - void cpu_search_at_node_range(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, T range, size_t *index, T *distance) {  
212 - T best_distance = FLT_MAX; // initialize the best distance to max of floating point  
213 - size_t best_index = 0;  
214 - std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points;  
215 - std::vector < typename kdtree::kdnode<T>*> next_node;  
216 - next_node.push_back(cur);  
217 - while (next_node.size()) {  
218 - std::vector<typename kdtree::kdnode<T>*> next_search;  
219 - while (next_node.size()) {  
220 - cur = next_node.back();  
221 - next_node.pop_back();  
222 - size_t split_axis = cur->level % D;  
223 - if (cur->left == NULL) {  
224 - for (size_t i = 0; i < cur->indices.size(); i++) {  
225 - size_t idx = cur->indices[i];  
226 - T d = cpu_distance(query, pts[idx]);  
227 - if (d < best_distance) {  
228 - best_distance = d;  
229 - best_index = idx;  
230 - }  
231 - }  
232 - }  
233 - else {  
234 - T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension  
235 - /// there are three possibilities: on either left or right, and on both left and right  
236 - if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist  
237 - if (d < 0)  
238 - next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region  
239 - else  
240 - next_search.push_back(cur->right);  
241 - }  
242 - else { // it is possible that nereast neighbor will appear on both left and right  
243 - next_search.push_back(cur->left);  
244 - next_search.push_back(cur->right);  
245 - }  
246 - }  
247 - }  
248 - next_node = next_search; // pop out at least one time  
249 - }  
250 - *index = best_index;  
251 - *distance = best_distance;  
252 - }  
253 -  
254 - void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) {  
255 - /// first convert the input query point into specific type  
256 - kdtree::point<T, D> query;  
257 - for (size_t j = 0; j < query_count; j++) {  
258 - for (size_t i = 0; i < D; i++)  
259 - query.dim[i] = h_query_points[j * D + i];  
260 - /// find the nearest node, this will be the upper bound for the next time searching  
261 - kdtree::kdnode<T> *best_node = NULL;  
262 - T best_distance = FLT_MAX;  
263 - size_t best_index = 0;  
264 - T radius = 0; // radius for range  
265 - cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step  
266 - radius = sqrt(best_distance); // It is possible that nearest will appear in another region  
267 - /// find other possibilities  
268 - kdtree::kdnode<T> *cur = best_node;  
269 - while (cur->parent != NULL) { // every node that you pass will be possible to be the best node  
270 - /// go up  
271 - kdtree::kdnode<T> *parent = cur->parent; // travel back to every node that we pass through  
272 - size_t split_axis = (parent->level) % D;  
273 - /// search other nodes  
274 - size_t tmp_index;  
275 - T tmp_distance = FLT_MAX;  
276 - if (fabs(parent->split_value - query.dim[split_axis]) <= radius) {  
277 - /// search opposite node  
278 - if (parent->left != cur)  
279 - cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node  
280 - else  
281 - cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance);  
282 - }  
283 - if (tmp_distance < best_distance) {  
284 - best_distance = tmp_distance;  
285 - best_index = tmp_index;  
286 - }  
287 - cur = parent;  
288 - }  
289 - h_indices[j] = best_index;  
290 - h_distances[j] = best_distance;  
291 - }  
292 - }  
293 - }; //end class kdtree  
294 -  
295 - template <typename T, int D>  
296 - cpu_kdtree<T, D>* cpu_kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class 50 + } // end of namespace cpu_kdtree
297 51
298 template <typename T> 52 template <typename T>
299 struct cuda_kdnode { 53 struct cuda_kdnode {
@@ -305,7 +59,7 @@ namespace stim { @@ -305,7 +59,7 @@ namespace stim {
305 }; 59 };
306 60
307 template <typename T, int D> 61 template <typename T, int D>
308 - __device__ T gpu_distance(kdtree::point<T, D> &a, kdtree::point<T, D> &b) { 62 + __device__ T gpu_distance(cpu_kdtree::point<T, D> &a, cpu_kdtree::point<T, D> &b) {
309 T distance = 0; 63 T distance = 0;
310 64
311 for (size_t i = 0; i < D; i++) { 65 for (size_t i = 0; i < D; i++) {
@@ -316,7 +70,7 @@ namespace stim { @@ -316,7 +70,7 @@ namespace stim {
316 } 70 }
317 71
318 template <typename T, int D> 72 template <typename T, int D>
319 - __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, int cur, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) { 73 + __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, int cur, cpu_kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) {
320 T best_distance = FLT_MAX; 74 T best_distance = FLT_MAX;
321 size_t best_index = 0; 75 size_t best_index = 0;
322 76
@@ -346,7 +100,7 @@ namespace stim { @@ -346,7 +100,7 @@ namespace stim {
346 } 100 }
347 101
348 template <typename T, int D> 102 template <typename T, int D>
349 - __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { 103 + __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
350 T best_distance = FLT_MAX; 104 T best_distance = FLT_MAX;
351 size_t best_index = 0; 105 size_t best_index = 0;
352 106
@@ -405,7 +159,7 @@ namespace stim { @@ -405,7 +159,7 @@ namespace stim {
405 } 159 }
406 160
407 template <typename T, int D> 161 template <typename T, int D>
408 - __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { 162 + __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
409 int best_node = 0; 163 int best_node = 0;
410 T best_distance = FLT_MAX; 164 T best_distance = FLT_MAX;
411 size_t best_index = 0; 165 size_t best_index = 0;
@@ -438,7 +192,7 @@ namespace stim { @@ -438,7 +192,7 @@ namespace stim {
438 } 192 }
439 193
440 template <typename T, int D> 194 template <typename T, int D>
441 - __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) { 195 + __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) {
442 size_t idx = blockIdx.x * blockDim.x + threadIdx.x; 196 size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
443 if (idx >= d_query_count) return; // avoid segfault 197 if (idx >= d_query_count) return; // avoid segfault
444 198
@@ -446,11 +200,11 @@ namespace stim { @@ -446,11 +200,11 @@ namespace stim {
446 } 200 }
447 201
448 template <typename T, int D> 202 template <typename T, int D>
449 - void search_stream(cuda_kdnode<T> *d_nodes, size_t *d_index, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *query_stream_points, size_t stream_count, size_t *indices, T *distances) { 203 + void search_stream(cuda_kdnode<T> *d_nodes, size_t *d_index, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> *query_stream_points, size_t stream_count, size_t *indices, T *distances) {
450 unsigned int threads = (unsigned int)(stream_count > 1024 ? 1024 : stream_count); 204 unsigned int threads = (unsigned int)(stream_count > 1024 ? 1024 : stream_count);
451 unsigned int blocks = (unsigned int)(stream_count / threads + (stream_count % threads ? 1 : 0)); 205 unsigned int blocks = (unsigned int)(stream_count / threads + (stream_count % threads ? 1 : 0));
452 206
453 - kdtree::point<T, D> *d_query_points; 207 + cpu_kdtree::point<T, D> *d_query_points;
454 size_t *d_indices; 208 size_t *d_indices;
455 T *d_distances; 209 T *d_distances;
456 210
@@ -480,26 +234,121 @@ namespace stim { @@ -480,26 +234,121 @@ namespace stim {
480 HANDLE_ERROR(cudaFree(d_distances)); 234 HANDLE_ERROR(cudaFree(d_distances));
481 } 235 }
482 236
483 - template <typename T, int D = 3>  
484 - class cuda_kdtree { 237 + template <typename T, int D = 3> // set dimension of data to default 3
  238 + class kdtree {
485 protected: 239 protected:
486 - cuda_kdnode<T> *d_nodes;  
487 - size_t *d_index;  
488 - kdtree::point<T, D>* d_reference_points;  
489 - size_t npts;  
490 - int num_nodes; 240 + int current_axis; // current judging axis
  241 + int n_id; // store the total number of nodes
  242 + std::vector < typename cpu_kdtree::point<T, D> > *tmp_points; // transfer or temperary points
  243 + std::vector < typename cpu_kdtree::point<T, D> > cpu_tmp_points; // for cpu searching
  244 + cpu_kdtree::cpu_kdnode<T> *root; // root node
  245 + static kdtree<T, D> *cur_tree_ptr;
  246 + #ifdef __CUDACC__
  247 + cuda_kdnode<T> *d_nodes;
  248 + size_t *d_index;
  249 + cpu_kdtree::point<T, D>* d_reference_points;
  250 + size_t npts;
  251 + int num_nodes;
  252 + #endif
491 public: 253 public:
492 - ~cuda_kdtree() { 254 + kdtree() { // constructor for creating a cpu_kdtree
  255 + cur_tree_ptr = this; // create a class pointer points to the current class value
  256 + n_id = 0; // set total number of points to default 0
  257 + }
  258 +
  259 + ~kdtree() { // destructor of cpu_kdtree
  260 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes;
  261 + next_nodes.push_back(root);
  262 + while (next_nodes.size()) {
  263 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_search_nodes;
  264 + while (next_nodes.size()) {
  265 + cpu_kdtree::cpu_kdnode<T> *cur = next_nodes.back();
  266 + next_nodes.pop_back();
  267 + if (cur->left)
  268 + next_search_nodes.push_back(cur->left);
  269 + if (cur->right)
  270 + next_search_nodes.push_back(cur->right);
  271 + delete cur;
  272 + }
  273 + next_nodes = next_search_nodes;
  274 + }
  275 + root = NULL;
  276 + #ifdef __CUDACC__
493 HANDLE_ERROR(cudaFree(d_nodes)); 277 HANDLE_ERROR(cudaFree(d_nodes));
494 HANDLE_ERROR(cudaFree(d_index)); 278 HANDLE_ERROR(cudaFree(d_index));
495 HANDLE_ERROR(cudaFree(d_reference_points)); 279 HANDLE_ERROR(cudaFree(d_reference_points));
  280 + #endif
496 } 281 }
497 -  
498 - /// Create a KD-tree given a pointer to an array of reference points and the number of reference points  
499 - /// @param h_reference_points is a host array containing the reference points in (x0, y0, z0, ...., ) order  
500 - /// @param reference_count is the number of reference point in the array  
501 - /// @param max_levels is the deepest number of tree levels allowed  
502 - void create(T *h_reference_points, size_t reference_count, size_t max_levels = 3) { 282 +
  283 + void cpu_create(std::vector < typename cpu_kdtree::point<T, D> > &reference_points, size_t max_levels) {
  284 + tmp_points = &reference_points;
  285 + root = new cpu_kdtree::cpu_kdnode<T>(); // initializing the root node
  286 + root->idx = n_id++; // the index of root is 0
  287 + root->level = 0; // tree level begins at 0
  288 + root->indices.resize(reference_points.size()); // get the number of points
  289 + for (size_t i = 0; i < reference_points.size(); i++) {
  290 + root->indices[i] = i; // set indices of input points
  291 + }
  292 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes; // next nodes
  293 + next_nodes.push_back(root); // push back the root node
  294 + while (next_nodes.size()) {
  295 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_search_nodes; // next search nodes
  296 + while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search
  297 + cpu_kdtree::cpu_kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first)
  298 + next_nodes.pop_back(); // pop out current node in order to store next round of nodes
  299 + if (current_node->level < max_levels) {
  300 + if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point
  301 + cpu_kdtree::cpu_kdnode<T> *left = new cpu_kdtree::cpu_kdnode<T>();
  302 + cpu_kdtree::cpu_kdnode<T> *right = new cpu_kdtree::cpu_kdnode<T>();
  303 + left->idx = n_id++; // set the index of current node's left node
  304 + right->idx = n_id++;
  305 + split(current_node, left, right); // split left and right and determine a node
  306 + std::vector <size_t> temp; // empty vecters of int
  307 + //temp.resize(current_node->indices.size());
  308 + current_node->indices.swap(temp); // clean up current node's indices
  309 + current_node->left = left;
  310 + current_node->right = right;
  311 + current_node->left_idx = left->idx;
  312 + current_node->right_idx = right->idx;
  313 + if (right->indices.size())
  314 + next_search_nodes.push_back(right); // left pop out first
  315 + if (left->indices.size())
  316 + next_search_nodes.push_back(left);
  317 + }
  318 + }
  319 + }
  320 + next_nodes = next_search_nodes; // go deeper within the tree
  321 + }
  322 + }
  323 +
  324 + static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort
  325 + std::vector < typename cpu_kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer
  326 + return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis];
  327 + }
  328 +
  329 + void split(cpu_kdtree::cpu_kdnode<T> *cur, cpu_kdtree::cpu_kdnode<T> *left, cpu_kdtree::cpu_kdnode<T> *right) {
  330 + std::vector < typename cpu_kdtree::point<T, D> > &pts = *tmp_points;
  331 + current_axis = cur->level % D; // indicate the judicative dimension or axis
  332 + std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data
  333 + size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor
  334 + cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node
  335 + left->parent = cur; // set the parent of the next search nodes to current node
  336 + right->parent = cur;
  337 + left->level = cur->level + 1; // level + 1
  338 + right->level = cur->level + 1;
  339 + left->parent_idx = cur->idx; // set its parent node's index
  340 + right->parent_idx = cur->idx;
  341 + for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one
  342 + size_t idx = cur->indices[i];
  343 + if (pts[idx].dim[current_axis] < cur->split_value)
  344 + left->indices.push_back(idx);
  345 + else
  346 + right->indices.push_back(idx);
  347 + }
  348 + }
  349 +
  350 + void create(T *h_reference_points, size_t reference_count, size_t max_levels) {
  351 + #ifdef __CUDACC__
503 if (max_levels > 10) { 352 if (max_levels > 10) {
504 std::cout<<"The max_tree_levels should be smaller!"<<std::endl; 353 std::cout<<"The max_tree_levels should be smaller!"<<std::endl;
505 exit(1); 354 exit(1);
@@ -507,29 +356,28 @@ namespace stim { @@ -507,29 +356,28 @@ namespace stim {
507 //bb.init(&h_reference_points[0]); 356 //bb.init(&h_reference_points[0]);
508 //aaboundingboxing<T, D>(bb, h_reference_points, reference_count); 357 //aaboundingboxing<T, D>(bb, h_reference_points, reference_count);
509 358
510 - std::vector < typename kdtree::point<T, D>> reference_points(reference_count); // restore the reference points in particular way 359 + std::vector < typename cpu_kdtree::point<T, D>> reference_points(reference_count); // restore the reference points in particular way
511 for (size_t j = 0; j < reference_count; j++) 360 for (size_t j = 0; j < reference_count; j++)
512 for (size_t i = 0; i < D; i++) 361 for (size_t i = 0; i < D; i++)
513 - reference_points[j].dim[i] = h_reference_points[j * D + i];  
514 - cpu_kdtree<T, D> tree; // creating a tree on cpu  
515 - tree.cpu_create(reference_points, max_levels); // building a tree on cpu  
516 - kdtree::kdnode<T> *d_root = tree.get_root();  
517 - num_nodes = tree.get_num_nodes(); 362 + reference_points[j].dim[i] = h_reference_points[j * D + i]; // creating a tree on cpu
  363 + (*this).cpu_create(reference_points, max_levels); // building a tree on cpu
  364 + cpu_kdtree::cpu_kdnode<T> *d_root = (*this).get_root();
  365 + num_nodes = (*this).get_num_nodes();
518 npts = reference_count; // also equals to reference_count 366 npts = reference_count; // also equals to reference_count
519 367
520 HANDLE_ERROR(cudaMalloc((void**)&d_nodes, sizeof(cuda_kdnode<T>) * num_nodes)); // copy data from host to device 368 HANDLE_ERROR(cudaMalloc((void**)&d_nodes, sizeof(cuda_kdnode<T>) * num_nodes)); // copy data from host to device
521 HANDLE_ERROR(cudaMalloc((void**)&d_index, sizeof(size_t) * npts)); 369 HANDLE_ERROR(cudaMalloc((void**)&d_index, sizeof(size_t) * npts));
522 - HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(kdtree::point<T, D>) * npts)); 370 + HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(cpu_kdtree::point<T, D>) * npts));
523 371
524 std::vector < cuda_kdnode<T> > tmp_nodes(num_nodes); 372 std::vector < cuda_kdnode<T> > tmp_nodes(num_nodes);
525 std::vector <size_t> indices(npts); 373 std::vector <size_t> indices(npts);
526 - std::vector <kdtree::kdnode<T>*> next_nodes; 374 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes;
527 size_t cur_pos = 0; 375 size_t cur_pos = 0;
528 next_nodes.push_back(d_root); 376 next_nodes.push_back(d_root);
529 while (next_nodes.size()) { 377 while (next_nodes.size()) {
530 - std::vector <typename kdtree::kdnode<T>*> next_search_nodes; 378 + std::vector <typename cpu_kdtree::cpu_kdnode<T>*> next_search_nodes;
531 while (next_nodes.size()) { 379 while (next_nodes.size()) {
532 - kdtree::kdnode<T> *cur = next_nodes.back(); 380 + cpu_kdtree::cpu_kdnode<T> *cur = next_nodes.back();
533 next_nodes.pop_back(); 381 next_nodes.pop_back();
534 int id = cur->idx; // the nodes at same level are independent 382 int id = cur->idx; // the nodes at same level are independent
535 tmp_nodes[id].level = cur->level; 383 tmp_nodes[id].level = cur->level;
@@ -559,16 +407,154 @@ namespace stim { @@ -559,16 +407,154 @@ namespace stim {
559 } 407 }
560 HANDLE_ERROR(cudaMemcpy(d_nodes, &tmp_nodes[0], sizeof(cuda_kdnode<T>) * tmp_nodes.size(), cudaMemcpyHostToDevice)); 408 HANDLE_ERROR(cudaMemcpy(d_nodes, &tmp_nodes[0], sizeof(cuda_kdnode<T>) * tmp_nodes.size(), cudaMemcpyHostToDevice));
561 HANDLE_ERROR(cudaMemcpy(d_index, &indices[0], sizeof(size_t) * indices.size(), cudaMemcpyHostToDevice)); 409 HANDLE_ERROR(cudaMemcpy(d_index, &indices[0], sizeof(size_t) * indices.size(), cudaMemcpyHostToDevice));
562 - HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(kdtree::point<T, D>) * reference_count, cudaMemcpyHostToDevice)); 410 + HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(cpu_kdtree::point<T, D>) * reference_count, cudaMemcpyHostToDevice));
  411 +
  412 + #else
  413 + std::vector < typename cpu_kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way
  414 + for (size_t j = 0; j < reference_count; j++)
  415 + for (size_t i = 0; i < D; i++)
  416 + reference_points[j].dim[i] = h_reference_points[j * D + i];
  417 + cpu_create(reference_points, max_levels);
  418 + cpu_tmp_points = *tmp_points;
  419 +
  420 + #endif
  421 + }
  422 +
  423 + int get_num_nodes() const { // get the total number of nodes
  424 + return n_id;
  425 + }
  426 +
  427 + cpu_kdtree::cpu_kdnode<T>* get_root() const { // get the root node of tree
  428 + return root;
  429 + }
  430 +
  431 + T cpu_distance(const cpu_kdtree::point<T, D> &a, const cpu_kdtree::point<T, D> &b) {
  432 + T distance = 0;
  433 +
  434 + for (size_t i = 0; i < D; i++) {
  435 + T d = a.dim[i] - b.dim[i];
  436 + distance += d*d;
  437 + }
  438 + return distance;
  439 + }
  440 +
  441 + void cpu_search_at_node(cpu_kdtree::cpu_kdnode<T> *cur, const cpu_kdtree::point<T, D> &query, size_t *index, T *distance, cpu_kdtree::cpu_kdnode<T> **node) {
  442 + T best_distance = FLT_MAX; // initialize the best distance to max of floating point
  443 + size_t best_index = 0;
  444 + std::vector < typename cpu_kdtree::point<T, D> > pts = cpu_tmp_points;
  445 + while (true) {
  446 + size_t split_axis = cur->level % D;
  447 + if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace
  448 + *node = cur; // pointer points to a pointer
  449 + for (size_t i = 0; i < cur->indices.size(); i++) {
  450 + size_t idx = cur->indices[i];
  451 + T d = cpu_distance(query, pts[idx]); // compute distances
  452 + /// if we want to compute k nearest neighbor, we can input the last resul
  453 + /// (last_best_dist < dist < best_dist) to select the next point until reaching to k
  454 + if (d < best_distance) {
  455 + best_distance = d;
  456 + best_index = idx; // record the nearest neighbor index
  457 + }
  458 + }
  459 + break; // find the target point then break the loop
  460 + }
  461 + else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side
  462 + cur = cur->left;
  463 + }
  464 + else {
  465 + cur = cur->right;
  466 + }
  467 + }
  468 + *index = best_index;
  469 + *distance = best_distance;
  470 + }
  471 +
  472 + void cpu_search_at_node_range(cpu_kdtree::cpu_kdnode<T> *cur, const cpu_kdtree::point<T, D> &query, T range, size_t *index, T *distance) {
  473 + T best_distance = FLT_MAX; // initialize the best distance to max of floating point
  474 + size_t best_index = 0;
  475 + std::vector < typename cpu_kdtree::point<T, D> > pts = cpu_tmp_points;
  476 + std::vector < typename cpu_kdtree::cpu_kdnode<T>*> next_node;
  477 + next_node.push_back(cur);
  478 + while (next_node.size()) {
  479 + std::vector<typename cpu_kdtree::cpu_kdnode<T>*> next_search;
  480 + while (next_node.size()) {
  481 + cur = next_node.back();
  482 + next_node.pop_back();
  483 + size_t split_axis = cur->level % D;
  484 + if (cur->left == NULL) {
  485 + for (size_t i = 0; i < cur->indices.size(); i++) {
  486 + size_t idx = cur->indices[i];
  487 + T d = cpu_distance(query, pts[idx]);
  488 + if (d < best_distance) {
  489 + best_distance = d;
  490 + best_index = idx;
  491 + }
  492 + }
  493 + }
  494 + else {
  495 + T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension
  496 + /// there are three possibilities: on either left or right, and on both left and right
  497 + if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist
  498 + if (d < 0)
  499 + next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region
  500 + else
  501 + next_search.push_back(cur->right);
  502 + }
  503 + else { // it is possible that nereast neighbor will appear on both left and right
  504 + next_search.push_back(cur->left);
  505 + next_search.push_back(cur->right);
  506 + }
  507 + }
  508 + }
  509 + next_node = next_search; // pop out at least one time
  510 + }
  511 + *index = best_index;
  512 + *distance = best_distance;
  513 + }
  514 +
  515 + void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) {
  516 + /// first convert the input query point into specific type
  517 + cpu_kdtree::point<T, D> query;
  518 + for (size_t j = 0; j < query_count; j++) {
  519 + for (size_t i = 0; i < D; i++)
  520 + query.dim[i] = h_query_points[j * D + i];
  521 + /// find the nearest node, this will be the upper bound for the next time searching
  522 + cpu_kdtree::cpu_kdnode<T> *best_node = NULL;
  523 + T best_distance = FLT_MAX;
  524 + size_t best_index = 0;
  525 + T radius = 0; // radius for range
  526 + cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step
  527 + radius = sqrt(best_distance); // It is possible that nearest will appear in another region
  528 + /// find other possibilities
  529 + cpu_kdtree::cpu_kdnode<T> *cur = best_node;
  530 + while (cur->parent != NULL) { // every node that you pass will be possible to be the best node
  531 + /// go up
  532 + cpu_kdtree::cpu_kdnode<T> *parent = cur->parent; // travel back to every node that we pass through
  533 + size_t split_axis = (parent->level) % D;
  534 + /// search other nodes
  535 + size_t tmp_index;
  536 + T tmp_distance = FLT_MAX;
  537 + if (fabs(parent->split_value - query.dim[split_axis]) <= radius) {
  538 + /// search opposite node
  539 + if (parent->left != cur)
  540 + cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node
  541 + else
  542 + cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance);
  543 + }
  544 + if (tmp_distance < best_distance) {
  545 + best_distance = tmp_distance;
  546 + best_index = tmp_index;
  547 + }
  548 + cur = parent;
  549 + }
  550 + h_indices[j] = best_index;
  551 + h_distances[j] = best_distance;
  552 + }
563 } 553 }
564 554
565 - /// Search the KD tree for nearest neighbors to a set of specified query points  
566 - /// @param h_query_points an array of query points in (x0, y0, z0, ...) order  
567 - /// @param query_count is the number of query points  
568 - /// @param indices are the indices to the nearest reference point for each query points  
569 - /// @param distances is an array containing the distance between each query point and the nearest reference point  
570 void search(T *h_query_points, size_t query_count, size_t *indices, T *distances) { 555 void search(T *h_query_points, size_t query_count, size_t *indices, T *distances) {
571 - std::vector < typename kdtree::point<T, D> > query_points(query_count); 556 + #ifdef __CUDACC__
  557 + std::vector < typename cpu_kdtree::point<T, D> > query_points(query_count);
572 for (size_t j = 0; j < query_count; j++) 558 for (size_t j = 0; j < query_count; j++)
573 for (size_t i = 0; i < D; i++) 559 for (size_t i = 0; i < D; i++)
574 query_points[j].dim[i] = h_query_points[j * D + i]; 560 query_points[j].dim[i] = h_query_points[j * D + i];
@@ -595,7 +581,7 @@ namespace stim { @@ -595,7 +581,7 @@ namespace stim {
595 unsigned int threads = (unsigned int)(query_count > 1024 ? 1024 : query_count); 581 unsigned int threads = (unsigned int)(query_count > 1024 ? 1024 : query_count);
596 unsigned int blocks = (unsigned int)(query_count / threads + (query_count % threads ? 1 : 0)); 582 unsigned int blocks = (unsigned int)(query_count / threads + (query_count % threads ? 1 : 0));
597 583
598 - kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu 584 + cpu_kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu
599 size_t *d_indices; 585 size_t *d_indices;
600 T *d_distances; 586 T *d_distances;
601 587
@@ -624,64 +610,18 @@ namespace stim { @@ -624,64 +610,18 @@ namespace stim {
624 HANDLE_ERROR(cudaFree(d_indices)); 610 HANDLE_ERROR(cudaFree(d_indices));
625 HANDLE_ERROR(cudaFree(d_distances)); 611 HANDLE_ERROR(cudaFree(d_distances));
626 } 612 }
627 - }  
628 -  
629 - /// Return the number of points in the KD tree  
630 - size_t num_points() {  
631 - return npts;  
632 - }  
633 613
634 - stim::aabbn<T, D> getbox() {  
635 - size_t N = npts;  
636 - //std::vector < typename kdtree::point<T, D> > cpu_ref(npts); //allocate space on the CPU for the reference points  
637 - T* cpu_ref = (T*)malloc(N * D * sizeof(T)); //allocate space on the CPU for the reference points  
638 - HANDLE_ERROR(cudaMemcpy(cpu_ref, d_reference_points, N * D * sizeof(T), cudaMemcpyDeviceToHost)); //copy from GPU to CPU 614 + #else
  615 + cpu_search(h_query_points, query_count, indices, distances);
639 616
640 - stim::aabbn<T, D> bb(cpu_ref); 617 + #endif
641 618
642 - for (size_t i = 1; i < N; i++) { //for each reference point  
643 - //std::cout << "( " << cpu_ref[i * D + 0] << ", " << cpu_ref[i * D + 1] << ", " << cpu_ref[i * D + 2] << ")" << std::endl;  
644 - bb.insert(&cpu_ref[i * D]);  
645 - }  
646 - return bb;  
647 } 619 }
648 620
649 - //generate an implicit distance field for the KD-tree  
650 - void dist_field3(T* dist, size_t* dims, stim::aabbn<T, 3> bb) {  
651 - size_t N = 1; //number of query points that make up the distance field  
652 - for (size_t d = 0; d < 3; d++) N *= dims[d]; //calculate the total number of query points  
653 -  
654 - //calculate the grid spatial parameters  
655 - T dx = 0;  
656 - if (dims[0] > 1) dx = bb.length(0) / dims[0];  
657 - T dy = 0;  
658 - if (dims[1] > 1) dy = bb.length(1) / dims[1];  
659 - T dz = 0;  
660 - if (dims[2] > 1) dz = bb.length(2) / dims[2];  
661 -  
662 - T* Q = (T*)malloc(N * 3 * sizeof(T)); //allocate space for the query points  
663 - size_t i;  
664 - for (size_t z = 0; z < dims[2]; z++) { //for each query point (which is a point in the grid)  
665 - for (size_t y = 0; y < dims[1]; y++) {  
666 - for (size_t x = 0; x < dims[0]; x++) {  
667 - i = z * dims[1] * dims[0] + y * dims[0] + x;  
668 - Q[i * 3 + 0] = bb.low[0] + x * dx + dx / 2;  
669 - Q[i * 3 + 1] = bb.low[1] + y * dy + dy / 2;  
670 - Q[i * 3 + 2] = bb.low[2] + z * dz + dz / 2;  
671 - //std::cout << i<<" "<<Q[i * 3 + 0] << " " << Q[i * 3 + 1] << " " << Q[i * 3 + 2] << std::endl;  
672 - }  
673 - }  
674 - }  
675 - size_t* temp = (size_t*)malloc(N * sizeof(size_t)); //allocate space to store the indices (unused)  
676 - search(Q, N, temp, dist);  
677 - } 621 + }; //end class kdtree
678 622
679 - //generate an implicit distance field for the KD-tree  
680 - void dist_field3(T* dist, size_t* dims) {  
681 - stim::aabbn<T, D> bb = getbox(); //get a bounding box around the tree  
682 - dist_field3(dist, dims, bb);  
683 - } 623 + template <typename T, int D>
  624 + kdtree<T, D>* kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class
684 625
685 - };  
686 } //end namespace stim 626 } //end namespace stim
687 #endif 627 #endif
688 \ No newline at end of file 628 \ No newline at end of file
stim/visualization/cylinder.h
@@ -668,4 +668,4 @@ public: @@ -668,4 +668,4 @@ public:
668 }; 668 };
669 669
670 } 670 }
671 -#endif 671 -#endif
  672 +#endif
672 \ No newline at end of file 673 \ No newline at end of file
stim/visualization/gl_network.h
@@ -44,25 +44,94 @@ public: @@ -44,25 +44,94 @@ public:
44 } 44 }
45 45
46 /// Render the network centerline as a series of line strips. 46 /// Render the network centerline as a series of line strips.
  47 + /// glCenterline0 is for only one input
  48 + void glCenterline0(){
  49 + if (!glIsList(dlist)) { //if dlist isn't a display list, create it
  50 + dlist = glGenLists(1); //generate a display list
  51 + glNewList(dlist, GL_COMPILE); //start a new display list
  52 + for (unsigned e = 0; e < E.size(); e++) { //for each edge in the network
  53 + glBegin(GL_LINE_STRIP);
  54 + for (unsigned p = 0; p < E[e].size(); p++) { //for each point on that edge
  55 + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]); //set the vertex position based on the current point
  56 + glTexCoord1f(0); //set white color
  57 + }
  58 + glEnd();
  59 + }
  60 + glEndList(); //end the display list
  61 + }
  62 + glCallList(dlist); // render the display list
  63 + }
47 64
48 /// @param m specifies the magnitude value used as the vertex weight (radius, error, etc.) 65 /// @param m specifies the magnitude value used as the vertex weight (radius, error, etc.)
49 void glCenterline(unsigned m = 0){ 66 void glCenterline(unsigned m = 0){
50 67
51 if(!glIsList(dlist)){ //if dlist isn't a display list, create it 68 if(!glIsList(dlist)){ //if dlist isn't a display list, create it
52 - dlist = glGenLists(1); //generate a display list 69 + dlist = glGenLists(3); //generate a display list
53 glNewList(dlist, GL_COMPILE); //start a new display list 70 glNewList(dlist, GL_COMPILE); //start a new display list
54 for(unsigned e = 0; e < E.size(); e++){ //for each edge in the network 71 for(unsigned e = 0; e < E.size(); e++){ //for each edge in the network
  72 + unsigned errormag_id = E[e].nmags() - 1;
55 glBegin(GL_LINE_STRIP); 73 glBegin(GL_LINE_STRIP);
56 for(unsigned p = 0; p < E[e].size(); p++){ //for each point on that edge 74 for(unsigned p = 0; p < E[e].size(); p++){ //for each point on that edge
57 glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]); //set the vertex position based on the current point 75 glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]); //set the vertex position based on the current point
58 - glTexCoord1f(E[e].m(p, m)); //set the texture coordinate based on the specified magnitude index 76 + glTexCoord1f(E[e].m(p, errormag_id)); //set the texture coordinate based on the specified magnitude index
59 } 77 }
60 glEnd(); 78 glEnd();
61 } 79 }
62 glEndList(); //end the display list 80 glEndList(); //end the display list
63 } 81 }
64 glCallList(dlist); // render the display list 82 glCallList(dlist); // render the display list
  83 + }
65 84
  85 + void glRandColorCenterlineGT(GLuint &dlist1, std::vector<unsigned> map, std::vector<T> colormap){
  86 + if(!glIsList(dlist1)){
  87 + glNewList(dlist1, GL_COMPILE);
  88 + for(unsigned e = 0; e < E.size(); e++){
  89 + if(map[e] != unsigned(-1)){
  90 + glColor3f(colormap[e * 3 + 0], colormap[e * 3 + 1], colormap[e * 3 + 2]);
  91 + glBegin(GL_LINE_STRIP);
  92 + for(unsigned p = 0; p < E[e].size(); p++){
  93 + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]);
  94 + }
  95 + glEnd();
  96 + }
  97 + else{
  98 + glColor3f(1.0, 1.0, 1.0);
  99 + glBegin(GL_LINE_STRIP);
  100 + for(unsigned p = 0; p < E[e].size(); p++){
  101 + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]);
  102 + }
  103 + glEnd();
  104 + }
  105 + }
  106 + glEndList();
  107 + }
  108 + glCallList(dlist1);
  109 + }
  110 +
  111 + void glRandColorCenterlineT(GLuint &dlist2, std::vector<unsigned> map, std::vector<T> colormap){
  112 + if(!glIsList(dlist2)){
  113 + glNewList(dlist2, GL_COMPILE);
  114 + for(unsigned e = 0; e < E.size(); e++){
  115 + if(map[e] != unsigned(-1)){
  116 + glColor3f(colormap[map[e] * 3 + 0], colormap[map[e] * 3 + 1], colormap[map[e] * 3 + 2]);
  117 + glBegin(GL_LINE_STRIP);
  118 + for(unsigned p = 0; p < E[e].size(); p++){
  119 + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]);
  120 + }
  121 + glEnd();
  122 + }
  123 + else{
  124 + glColor3f(1.0, 1.0, 1.0);
  125 + glBegin(GL_LINE_STRIP);
  126 + for(unsigned p = 0; p < E[e].size(); p++){
  127 + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]);
  128 + }
  129 + glEnd();
  130 + }
  131 + }
  132 + glEndList();
  133 + }
  134 + glCallList(dlist2);
66 } 135 }
67 136
68 }; //end stim::gl_network class 137 }; //end stim::gl_network class