Commit 32ba91f2e44b2f9c23650a36360051388cec1182

Authored by David Mayerich
2 parents 1ee79b84 fdfdeda0

Merge remote-tracking branch 'refs/remotes/origin/JACK'

Conflicts:
	stim/biomodels/network.h
	stim/math/filters/conv2.h
	stim/visualization/cylinder.h
	stim/visualization/gl_network.h
stim/biomodels/centerline.h
... ... @@ -138,7 +138,7 @@ public:
138 138 //if the index is not an end point
139 139 else{
140 140  
141   - unsigned int N1 = idx + 1; //calculate the size of both fibers
  141 + unsigned int N1 = idx; //calculate the size of both fibers
142 142 unsigned int N2 = N - idx;
143 143  
144 144 fl.resize(2); //set the array size to 2
... ... @@ -147,7 +147,7 @@ public:
147 147 fl[1] = stim::centerline<T>(N2);
148 148  
149 149 //copy both halves of the fiber
150   - unsigned int i, d;
  150 + unsigned int i;
151 151  
152 152 //first half
153 153 for(i = 0; i < N1; i++) //for each centerline point
... ...
stim/biomodels/network.h
... ... @@ -15,6 +15,39 @@
15 15 #include <stim/cuda/cudatools/timer.h>
16 16  
17 17  
  18 +#ifdef __CUDACC__
  19 +//device gaussian function
  20 +__device__ float gaussianFunction(float x, float std){ return expf(-x/(2*std*std));}
  21 +
  22 +//compute metric in parallel
  23 +template <typename T>
  24 +__global__ void d_metric(T* M, size_t n, T* D, float sigma){
  25 + size_t x = blockDim.x * blockIdx.x + threadIdx.x;
  26 + if(x >= n) return;
  27 + M[x] = 1.0f - gaussianFunction(D[x], sigma);
  28 +}
  29 +
  30 +//find the corresponding edge index from array index
  31 +__global__ void d_findedge(size_t* I, size_t n, unsigned* R, size_t* E, size_t ne){
  32 + size_t x = blockDim.x * blockIdx.x + threadIdx.x;
  33 + if(x >= n) return;
  34 + unsigned i = 0;
  35 + size_t N = 0;
  36 + for(unsigned e = 0; e < ne; e++){
  37 + N += E[e];
  38 + if(I[x] < N){
  39 + R[x] = i;
  40 + break;
  41 + }
  42 + i++;
  43 + }
  44 +}
  45 +#endif
  46 +
  47 +//hard-coded factor
  48 +int threshold_fac = 10;
  49 +float metric_fac = 0.6f; // might be related to the distance field
  50 +
18 51 namespace stim{
19 52 /** This is the a class that interfaces with gl_spider in order to store the currently
20 53 * segmented network. The following data is stored and can be extracted:
... ... @@ -22,7 +55,6 @@ namespace stim{
22 55 * 2)Network connectivity (a graph of nodes and edges), reconstructed using ANN library.
23 56 */
24 57  
25   -
26 58 template<typename T>
27 59 class network{
28 60  
... ... @@ -60,6 +92,19 @@ class network{
60 92 return ss.str();
61 93 }
62 94  
  95 + std::vector<edge> split(unsigned int idx){
  96 +
  97 + std::vector< stim::cylinder<T> > C;
  98 + C.resize(2);
  99 + C = (*this).cylinder<T>::split(idx);
  100 + std::vector<edge> E(C.size());
  101 +
  102 + for(unsigned e = 0; e < E.size(); e++){
  103 + E[e] = C[e];
  104 + }
  105 + return E;
  106 + }
  107 +
63 108 };
64 109  
65 110 ///Node class that stores the physical position of the node as well as the edges it is connected to (edges that connect to it), As well as any additional data necessary.
... ... @@ -376,7 +421,8 @@ public:
376 421 return n; //return the resampled network
377 422 }
378 423  
379   -
  424 + // host gaussian function
  425 + __host__ float gaussianFunction(float x, float std = 25){ return exp(-x/(2*std*std));} // std default value is 25
380 426  
381 427 /// Calculate the total number of points on all edges.
382 428 unsigned total_points(){
... ... @@ -402,9 +448,6 @@ public:
402 448 }
403 449 }
404 450  
405   - // gaussian function
406   - float gaussianFunction(float x, float std=25){ return exp(-x/(2*std*std));} // by default std = 25
407   -
408 451 // convert vec3 to array
409 452 void stim2array(float *a, stim::vec3<T> b){
410 453 a[0] = b[0];
... ... @@ -412,6 +455,16 @@ public:
412 455 a[2] = b[2];
413 456 }
414 457  
  458 + // convert vec3 to array in bunch
  459 + void edge2array(T* a, edge b){
  460 + size_t n = b.size();
  461 + for(size_t i = 0; i < n; i++){
  462 + a[i * 3 + 0] = b[i][0];
  463 + a[i * 3 + 1] = b[i][1];
  464 + a[i * 3 + 2] = b[i][2];
  465 + }
  466 + }
  467 +
415 468 /// Calculate the average magnitude across the entire network.
416 469 /// @param m is the magnitude value to use. The default is 0 (usually radius).
417 470 T average(unsigned m = 0){
... ... @@ -419,7 +472,7 @@ public:
419 472 T M, L; //allocate space for the total magnitude and length
420 473 M = L = 0; //initialize both the initial magnitude and length to zero
421 474 for(unsigned e = 0; e < E.size(); e++){ //for each edge in the network
422   - M += E[e].integrate(m); //get the integrated magnitude
  475 + M += E[e].integrate(); //get the integrated magnitude
423 476 L += E[e].length(); //get the edge length
424 477 }
425 478  
... ... @@ -427,7 +480,6 @@ public:
427 480 }
428 481  
429 482 /// This function compares two networks and returns the percentage of the current network that is missing from A.
430   -
431 483 /// @param A is the network to compare to - the field is generated for A
432 484 /// @param sigma is the user-defined tolerance value - smaller values provide a stricter comparison
433 485 stim::network<T> compare(stim::network<T> A, float sigma, int device = -1){
... ... @@ -437,7 +489,7 @@ public:
437 489  
438 490 T *c; // centerline (array of double pointers) - points on kdtree must be double
439 491 size_t n_data = A.total_points(); // set the number of points
440   - c = (T*) malloc(sizeof(T) * n_data * 3); //allocate an array to store all points in the data set
  492 + c = (T*) malloc(sizeof(T) * n_data * 3); // allocate an array to store all points in the data set
441 493  
442 494 unsigned t = 0;
443 495 for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in the network
... ... @@ -451,62 +503,362 @@ public:
451 503 }
452 504  
453 505 //generate a KD-tree for network A
454   - //float metric = 0.0; // initialize metric to be returned after comparing the network
455 506 size_t MaxTreeLevels = 3; // max tree level
456 507  
457 508 #ifdef __CUDACC__
458 509 cudaSetDevice(device);
459   - stim::cuda_kdtree<T, 3> kdt; // initialize a pointer to a kd tree
460   -
461   - //compare each point in the current network to the field produced by A
  510 + stim::kdtree<T, 3> kdt; // initialize a pointer to a kd tree
  511 +
462 512 kdt.create(c, n_data, MaxTreeLevels); // build a KD tree
463   - T *dists = new T[1]; // near neighbor distances
464   - size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices
465 513  
466   - stim::vec3<T> p0, p1;
467   - T m1;
468   - //float M = 0; //stores the total metric value
469   - //float L = 0; //stores the total network length
470   - T* queryPt = new T[3];
471 514 for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A
472 515 R.E[e].add_mag(0); //add a new magnitude for the metric
473 516 size_t errormag_id = R.E[e].nmags() - 1; //get the id for the new magnitude
  517 +
  518 + size_t n = R.E[e].size(); // the number of points in current edge
  519 + T* queryPt = new T[3 * n];
  520 + T* m1 = new T[n];
  521 + T* dists = new T[n];
  522 + size_t* nnIdx = new size_t[n];
474 523  
475   - for(unsigned p = 0; p < R.E[e].size(); p++){ //for each point in the edge
  524 + T* d_dists;
  525 + T* d_m1;
  526 + cudaMalloc((void**)&d_dists, n * sizeof(T));
  527 + cudaMalloc((void**)&d_m1, n * sizeof(T));
476 528  
477   - p1 = R.E[e][p]; //get the next point in the edge
478   - stim2array(queryPt, p1);
479   - kdt.search(queryPt, 1, nnIdx, dists); //find the distance between A and the current network
  529 + edge2array(queryPt, R.E[e]);
  530 + kdt.search(queryPt, n, nnIdx, dists);
  531 +
  532 + cudaMemcpy(d_dists, dists, n * sizeof(T), cudaMemcpyHostToDevice); // copy dists from host to device
  533 +
  534 + // configuration parameters
  535 + size_t threads = (1024>n)?n:1024;
  536 + size_t blocks = n/threads + (n%threads)?1:0;
  537 +
  538 + d_metric<<<blocks, threads>>>(d_m1, n, d_dists, sigma); //calculate the metric value based on the distance
  539 +
  540 + cudaMemcpy(m1, d_m1, n * sizeof(T), cudaMemcpyDeviceToHost);
  541 +
  542 + for(unsigned p = 0; p < n; p++){
  543 + R.E[e].set_mag(errormag_id, p, m1[p]);
  544 + }
  545 +
  546 + //d_set_mag<<<blocks, threads>>>(R.E[e].M, errormag_id, n, m1);
  547 + }
  548 +
  549 +#else
  550 + stim::kdtree<T, 3> kdt;
  551 + kdt.create(c, n_data, MaxTreeLevels);
  552 +
  553 + for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A
  554 + R.E[e].add_mag(0); //add a new magnitude for the metric
  555 + size_t errormag_id = R.E[e].nmags() - 1;
  556 +
  557 + size_t n = R.E[e].size(); // the number of points in current edge
  558 + T* query = new T[3 * n];
  559 + T* m1 = new T[n];
  560 + T* dists = new T[n];
  561 + size* nnIdx = new size_t[n];
  562 +
  563 + edge2array(queryPt, R.E[e]);
  564 +
  565 + kdt.cpu_search(queryPt, n, nnIdx, dists); //find the distance between A and the current network
  566 +
  567 + for(unsigned p = 0; p < R.E[e].size(); p++){
  568 + m1[p] = 1.0f - gaussianFunction((T)dists[p], sigma); //calculate the metric value based on the distance
  569 + R.E[e].set_mag(errormag_id, p, m1[p]); //set the error for the second point in the segment
  570 + }
  571 + }
  572 +#endif
  573 + return R; //return the resulting network
  574 + }
  575 +
  576 + /// This function compares two networks and split the current one according to the nearest neighbor of each point in each edge
  577 + /// @param A is the network to split
  578 + /// @param B is the corresponding mapping network
  579 + /// @param sigma is the user-defined tolerance value - smaller values provide a stricter comparison
  580 + /// @param device is the device that user want to use
  581 + void split(stim::network<T> A, stim::network<T> B, float sigma, int device){
  582 +
  583 + T *c;
  584 + size_t n_data = B.total_points();
  585 + c = (T*) malloc(sizeof(T) * n_data * 3);
  586 +
  587 + size_t NB = B.E.size(); // the number of edges in B
  588 + unsigned t = 0;
  589 + for(unsigned e = 0; e < NB; e++){ // for every edge in B
  590 + for(unsigned p = 0; p < B.E[e].size(); p++){ // for every points in B.E[e]
  591 + for(unsigned d = 0; d < 3; d++){
  592 +
  593 + c[t * 3 + d] = B.E[e][p][d]; // convert to array
  594 + }
  595 + t++;
  596 + }
  597 + }
  598 + size_t MaxTreeLevels = 3; // max tree level
  599 +
  600 +#ifdef __CUDACC__
  601 + cudaSetDevice(device);
  602 + stim::kdtree<T, 3> kdt; // initialize a pointer to a kd tree
  603 +
  604 + //compare each point in the current network to the field produced by A
  605 + kdt.create(c, n_data, MaxTreeLevels); // build a KD tree
  606 +
  607 + std::vector<std::vector<unsigned>> relation; // the relationship between GT and T corresponding to NN
  608 + relation.resize(A.E.size());
  609 +
  610 + for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in A
  611 + A.E[e].add_mag(0); //add a new magnitude for the metric
  612 + size_t errormag_id = A.E[e].nmags() - 1;
  613 +
  614 + size_t n = A.E[e].size(); // the number of edges in A
  615 +
  616 + T* queryPt = new T[3 * n]; // set of all the points in current edge
  617 + T* m1 = new T[n]; // array of metrics for every point in current edge
  618 + T* dists = new T[n]; // store the distances for every point in current edge
  619 + size_t* nnIdx = new size_t[n]; // store the indices for every point in current edge
  620 +
  621 + // define pointers in device
  622 + T* d_dists;
  623 + T* d_m1;
  624 + size_t* d_nnIdx;
  625 +
  626 + // allocate memory for defined pointers
  627 + cudaMalloc((void**)&d_dists, n * sizeof(T));
  628 + cudaMalloc((void**)&d_m1, n * sizeof(T));
  629 + cudaMalloc((void**)&d_nnIdx, n * sizeof(size_t));
  630 +
  631 + edge2array(queryPt, A.E[e]); // convert edge to array
  632 + kdt.search(queryPt, n, nnIdx, dists); // search the tree to find the NN for every point in current edge
  633 +
  634 + cudaMemcpy(d_dists, dists, n * sizeof(T), cudaMemcpyHostToDevice); // copy dists from host to device
  635 + cudaMemcpy(d_nnIdx, nnIdx, n * sizeof(size_t), cudaMemcpyHostToDevice); // copy Idx from host to device
  636 +
  637 + // configuration parameters
  638 + size_t threads = (1024>n)?n:1024; // test to see whether the number of point in current edge is more than 1024
  639 + size_t blocks = n/threads + (n%threads)?1:0;
  640 +
  641 + d_metric<<<blocks, threads>>>(d_m1, n, d_dists, sigma); // calculate the metrics in parallel
  642 +
  643 + cudaMemcpy(m1, d_m1, n * sizeof(T), cudaMemcpyDeviceToHost);
  644 +
  645 + for(unsigned p = 0; p < n; p++){
  646 + A.E[e].set_mag(errormag_id, p, m1[p]); // set the error(radius) value to every point in current edge
  647 + }
  648 +
  649 + relation[e].resize(n); // resize every edge relation size
  650 +
  651 + unsigned* d_relation;
  652 + cudaMalloc((void**)&d_relation, n * sizeof(unsigned)); // allocate memory
  653 +
  654 + std::vector<size_t> edge_point_num(NB); // %th element is the number of points that %th edge has
  655 + for(unsigned ee = 0; ee < NB; ee++)
  656 + edge_point_num[ee] = B.E[ee].size();
  657 +
  658 + size_t* d_edge_point_num;
  659 + cudaMalloc((void**)&d_edge_point_num, NB * sizeof(size_t));
  660 + cudaMemcpy(d_edge_point_num, &edge_point_num[0], NB * sizeof(size_t), cudaMemcpyHostToDevice);
  661 +
  662 + d_findedge<<<blocks, threads>>>(d_nnIdx, n, d_relation, d_edge_point_num, NB); // find the edge corresponding to the array index in parallel
  663 +
  664 + cudaMemcpy(&relation[e][0], d_relation, n * sizeof(unsigned), cudaMemcpyDeviceToHost); //copy relationship from device to host
  665 + }
  666 +#else
  667 + stim::kdtree<T, 3> kdt;
  668 + kdt.create(c, n_data, MaxTreeLevels);
  669 +
  670 + std::vector<std::vector<unsigned>> relation; // the mapping relationship between two networks
  671 + relation.resize(A.E.size());
  672 + for(unsigned i = 0; i < A.E.size(); i++)
  673 + relation[i].resize(A.E[i].size());
  674 +
  675 + std::vector<size_t> edge_point_num(NB); //%th element is the number of points that %th edge has
  676 + for(unsigned ee = 0; ee < NB; ee++)
  677 + edge_point_num[ee] = B.E[ee].size();
  678 +
  679 + for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in A
  680 + A.E[e].add_mag(0); //add a new magnitude for the metric
  681 + size_t errormag_id = A.E[e].nmags() - 1;
  682 +
  683 + size_t n = A.E[e].size(); // the number of edges in A
  684 +
  685 + T* queryPt = new T[3 * n];
  686 + T* m1 = new T[n];
  687 + T* dists = new T[n]; //store the distances
  688 + size_t* nnIdx = new size_t[n]; //store the indices
  689 +
  690 + edge2array(queryPt, A.E[e]);
  691 + kdt.search(queryPt, n, nnIdx, dists);
  692 +
  693 + for(unsigned p = 0; p < A.E[e].size(); p++){
  694 + m1[p] = 1.0f - gaussianFunction((T)dists[p], sigma); //calculate the metric value based on the distance
  695 + A.E[e].set_mag(errormag_id, p, m1[p]); //set the error for the second point in the segment
  696 +
  697 + unsigned id = 0; //mapping edge's idx
  698 + size_t num = 0; //total number of points before #th edge
  699 + for(unsigned i = 0; i < NB; i++){
  700 + num += B.E[i].size();
  701 + if(nnIdx[p] < num){ //find the edge it belongs to
  702 + relation[e][p] = id;
  703 + break;
  704 + }
  705 + id++; //current edge won't be the one, move to next edge
  706 + }
  707 + }
  708 + }
  709 +#endif
  710 + E = A.E;
  711 + V = A.V;
  712 +
  713 + unsigned int id = 0; // split value
  714 + for(unsigned e = 0; e < E.size(); e++){ // for every edge
  715 + for(unsigned p = 0; p < E[e].size() - 1; p++){ // for every point in each edge
  716 + if(relation[e][p] != relation[e][p + 1]){ // find the nearest edge changing point
  717 + id = p + 1; // if there is no change in NN
  718 + if(id < E[e].size()/threshold_fac || (E[e].size() - id) < E[e].size()/threshold_fac) // set tolerance to 10, tentatively--should find out the mathematical threshold!
  719 + id = E[e].size() - 1; // extreme situation is not acceptable
  720 + else
  721 + break;
  722 + }
  723 + if(p == E[e].size() - 2) // if there is no splitting index, set the id to the last point index of current edge
  724 + id = p + 1;
  725 + }
  726 + unsigned errormag_id = E[e].nmags() - 1;
  727 + T G = 0; // test to see whether it has its nearest neighbor
  728 + for(unsigned i = 0; i < E[e].size(); i++)
  729 + G += E[e].m(i, errormag_id); // won't split special edges
  730 + if(G / E[e].size() > metric_fac) // should based on the color map
  731 + id = E[e].size() - 1; // set split idx to outgoing direction vertex
  732 +
  733 + std::vector<edge> tmpe;
  734 + tmpe.resize(2);
  735 + tmpe = E[e].split(id);
  736 + vertex tmpv = stim::vec3<T>(-1, -1, 0); // store the split point as vertex
  737 + if(tmpe.size() == 2){
  738 + relation.resize(relation.size() + 1);
  739 + for(unsigned d = id; d < E[e].size(); d++)
  740 + relation[relation.size() - 1].push_back(relation[e][d]);
  741 + tmpe[0].v[0] = E[e].v[0]; // begining vertex of first half edge -> original begining vertex
  742 + tmpe[1].v[1] = E[e].v[1]; // ending vertex of second half edge -> original ending vertex
  743 + tmpv = E[e][id];
  744 + V.push_back(tmpv);
  745 + tmpe[0].v[1] = (unsigned)V.size() - 1; // ending vertex of first half edge -> new vertex
  746 + tmpe[1].v[0] = (unsigned)V.size() - 1; // begining vertex of second half edge -> new vertex
  747 + edge tmp(E[e]);
  748 + E[e] = tmpe[0]; // replace original edge by first half edge
  749 + E.push_back(tmpe[1]); // push second half edge to the last
  750 + V[V.size() - 1].e[1].push_back(e); // push first half edge to the incoming of new vertex
  751 + V[V.size() - 1].e[0].push_back((unsigned)E.size() - 1); // push second half edge to the outgoing of new vertex
  752 + for(unsigned i = 0; i < V[tmp.v[1]].e[1].size(); i++) // find the incoming edge of original ending vertex
  753 + if(V[tmp.v[1]].e[1][i] == e)
  754 + V[tmp.v[1]].e[1][i] = (unsigned)V.size() - 1;
  755 + }
  756 + }
  757 + }
  758 +
  759 + /// This function compares two splitted networks and yields a mapping relationship between them according to NN
  760 + /// @param B is the network that the current network is going to map to
  761 + /// @param C is the mapping relationship: C[e1] = _e1 means e1 edge in current network is mapping the _e1 edge in B
  762 + /// @param device is the device that user want to use
  763 + void mapping(stim::network<T> B, std::vector<unsigned> &C, int device){
  764 + stim::network<T> A; //generate a network storing the result of the comparison
  765 + A = (*this);
  766 +
  767 + size_t n = A.E.size(); // the number of edges in A
  768 + size_t NB = B.E.size(); // the number of edges in B
  769 +
  770 + C.resize(A.E.size());
  771 +
  772 + T *c; // centerline (array of double pointers) - points on kdtree must be double
  773 + size_t n_data = B.total_points(); // set the number of points
  774 + c = (T*) malloc(sizeof(T) * n_data * 3);
  775 +
  776 + unsigned t = 0;
  777 + for(unsigned e = 0; e < NB; e++){ // for each edge in the network
  778 + for(unsigned p = 0; p < B.E[e].size(); p++){ // for each point in the edge
  779 + for(unsigned d = 0; d < 3; d++){ // for each coordinate
  780 +
  781 + c[t * 3 + d] = B.E[e][p][d];
  782 + }
  783 + t++;
  784 + }
  785 + }
480 786  
481   - m1 = 1.0f - gaussianFunction((T)dists[0], sigma); //calculate the metric value based on the distance
482   - R.E[e].set_mag(errormag_id, p, m1); //set the error for the second point in the segment
  787 + //generate a KD-tree for network A
  788 + //float metric = 0.0; // initialize metric to be returned after comparing the network
  789 + size_t MaxTreeLevels = 3; // max tree level
  790 +
  791 +#ifdef __CUDACC__
  792 + cudaSetDevice(device);
  793 + stim::kdtree<T, 3> kdt; // initialize a pointer to a kd tree
  794 +
  795 + kdt.create(c, n_data, MaxTreeLevels); // build a KD tree
483 796  
  797 + for(unsigned e = 0; e < n; e++){ //for each edge in A
  798 + size_t errormag_id = A.E[e].nmags() - 1; //get the id for the new magnitude
  799 +
  800 + //pre-judge to get rid of impossibly mapping edges
  801 + T M = 0;
  802 + for(unsigned p = 0; p < A.E[e].size(); p++)
  803 + M += A.E[e].m(p, errormag_id);
  804 + M = M / A.E[e].size();
  805 + if(M > metric_fac)
  806 + C[e] = (unsigned)-1; //set the nearest edge of impossibly mapping edges to maximum of unsigned
  807 + else{
  808 + T* queryPt = new T[3];
  809 + T* dists = new T[1];
  810 + size_t* nnIdx = new size_t[1];
  811 +
  812 + stim2array(queryPt, A.E[e][A.E[e].size()/2]);
  813 + kdt.search(queryPt, 1, nnIdx, dists);
  814 +
  815 + unsigned id = 0; //mapping edge's idx
  816 + size_t num = 0; //total number of points before #th edge
  817 + for(unsigned i = 0; i < NB; i++){
  818 + num += B.E[i].size();
  819 + if(nnIdx[0] < num){
  820 + C[e] = id;
  821 + break;
  822 + }
  823 + id++;
  824 + }
484 825 }
485 826 }
486 827 #else
487   - stim::cpu_kdtree<T, 3> kdt;
  828 + stim::kdtree<T, 3> kdt;
488 829 kdt.create(c, n_data, MaxTreeLevels);
489 830 T *dists = new T[1]; // near neighbor distances
490 831 size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices
491 832  
492 833 stim::vec3<T> p0, p1;
493   - T m1;
494 834 T* queryPt = new T[3];
495   - for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A
496   - R.E[e].add_mag(0); //add a new magnitude for the metric
497 835  
498   - for(unsigned p = 0; p < R.E[e].size(); p++){ //for each point in the edge
499   -
500   - p1 = R.E[e][p]; //get the next point in the edge
  836 + for(unsigned e = 0; e < R.E.size(); e++){ // for each edge in A
  837 + T M; // the sum of metrics of current edge
  838 + unsigned errormag_id = R.E[e].nmags() - 1;
  839 + for(unsigned p = 0; p < R.E[e].size(); p++)
  840 + M += A.E[e].m(p, errormag_id);
  841 + M = M / A.E[e].size();
  842 + if(M > metric_fac) // if the sum is larger than the metric_fac, we assume that it doesn't has corresponding edge in B
  843 + C[e] = (unsigned)-1;
  844 + else{ // if it should have corresponding edge in B, then...
  845 + p1 = R.E[e][R.E[e].size()/2];
501 846 stim2array(queryPt, p1);
502   - kdt.cpu_search(queryPt, 1, nnIdx, dists); //find the distance between A and the current network
503   -
504   - m1 = 1.0f - gaussianFunction((T)dists[0], sigma); //calculate the metric value based on the distance
505   - R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment
  847 + kdt.cpu_search(queryPt, 1, nnIdx, dists); // search the tree
  848 +
  849 + unsigned id = 0; //mapping edge's idx
  850 + size_t num = 0; //total number of points before #th edge
  851 + for(unsigned i = 0; i < NB; i++){
  852 + num += B.E[i].size();
  853 + if(nnIdx[0] < num){
  854 + C[e] = id;
  855 + break;
  856 + }
  857 + id++;
  858 + }
506 859 }
507 860 }
508 861 #endif
509   - return R; //return the resulting network
510 862 }
511 863  
512 864 /// Returns the number of magnitude values stored in each edge. This should be uniform across the network.
... ... @@ -616,4 +968,4 @@ public:
616 968 }
617 969 }; //end stim::network class
618 970 }; //end stim namespace
619 971 -#endif
  972 +#endif
620 973 \ No newline at end of file
... ...
stim/math/filters/conv2.h 0 โ†’ 100644
  1 +#ifndef STIM_CUDA_CONV2_H
  2 +#define STIM_CUDA_CONV2_H
  3 +//#define __CUDACC__
  4 +
  5 +#ifdef __CUDACC__
  6 +#include <stim/cuda/cudatools.h>
  7 +#include <stim/cuda/sharedmem.cuh>
  8 +#endif
  9 +
  10 +namespace stim {
  11 +#ifdef __CUDACC__
  12 + //Kernel function that performs the 2D convolution.
  13 + template<typename T, typename K>
  14 + __global__ void kernel_conv2(T* out, T* in, K* kernel, size_t sx, size_t sy, size_t kx, size_t ky) {
  15 + extern __shared__ T s[]; //declare a shared memory array
  16 + size_t xi = blockIdx.x * blockDim.x + threadIdx.x; //threads correspond to indices into the output image
  17 + size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
  18 + size_t tid = threadIdx.y * blockDim.x + threadIdx.x;
  19 + size_t nt = blockDim.x * blockDim.y;
  20 +
  21 + size_t cx = blockIdx.x * blockDim.x; //find the upper left corner of the input region
  22 + size_t cy = blockIdx.y * blockDim.y;
  23 +
  24 + size_t X = sx - kx + 1; //calculate the size of the output image
  25 + size_t Y = sy - ky + 1;
  26 +
  27 + if (cx >= X || cy >= Y) return; //return if the entire block is outside the image
  28 + size_t smx = min(blockDim.x + kx - 1, sx - cx); //size of the shared copy of the input image
  29 + size_t smy = min(blockDim.y + ky - 1, sy - cy); // min function is used to deal with boundary blocks
  30 + stim::cuda::threadedMemcpy2D<T>(s, smx, smy, in, cx, cy, sx, sy, tid, nt); //copy the input region to shared memory
  31 + __syncthreads();
  32 +
  33 + if (xi >= X || yi >= Y) return; //returns if the thread is outside of the output image
  34 +
  35 + //loop through the kernel
  36 + size_t kxi, kyi;
  37 + K v = 0;
  38 + for (kyi = 0; kyi < ky; kyi++) {
  39 + for (kxi = 0; kxi < kx; kxi++) {
  40 + v += s[(threadIdx.y + kyi) * smx + threadIdx.x + kxi] * kernel[kyi * kx + kxi];
  41 + //v += in[(yi + kyi) * sx + xi + kxi] * kernel[kyi * kx + kxi];
  42 + }
  43 + }
  44 + out[yi * X + xi] = (T)v; //write the result to global memory
  45 +
  46 + }
  47 +
  48 + //Performs a convolution of a 2D image using the GPU. All pointers are assumed to be to memory on the current device.
  49 + //@param out is a pointer to the output image
  50 + //@param in is a pointer to the input image
  51 + //@param sx is the size of the input image along X
  52 + //@param sy is the size of the input image along Y
  53 + //@param kx is the size of the kernel along X
  54 + //@param ky is the size of the kernel along Y
  55 + template<typename T, typename K>
  56 + void gpu_conv2(T* out, T* in, K* kernel, size_t sx, size_t sy, size_t kx, size_t ky) {
  57 + cudaDeviceProp p;
  58 + HANDLE_ERROR(cudaGetDeviceProperties(&p, 0));
  59 + size_t tmax = p.maxThreadsPerBlock;
  60 + dim3 nt(sqrt(tmax), sqrt(tmax)); //calculate the block dimensions
  61 + size_t X = sx - kx + 1; //calculate the size of the output image
  62 + size_t Y = sy - ky + 1;
  63 + dim3 nb(X / nt.x + 1, Y / nt.y + 1); //calculate the grid dimensions
  64 + size_t sm = (nt.x + kx - 1) * (nt.y + ky - 1) * sizeof(T); //shared memory bytes required to store block data
  65 + if (sm > p.sharedMemPerBlock) {
  66 + std::cout << "Error in stim::gpu_conv2() - insufficient shared memory for this kernel." << std::endl;
  67 + exit(1);
  68 + }
  69 + kernel_conv2 <<<nb, nt, sm>>> (out, in, kernel, sx, sy, kx, ky); //launch the kernel
  70 + }
  71 +#endif
  72 + //Performs a convolution of a 2D image. Only valid pixels based on the kernel are returned.
  73 + // As a result, the output image will be smaller than the input image by (kx-1, ky-1)
  74 + //@param out is a pointer to the output image
  75 + //@param in is a pointer to the input image
  76 + //@param sx is the size of the input image along X
  77 + //@param sy is the size of the input image along Y
  78 + //@param kx is the size of the kernel along X
  79 + //@param ky is the size of the kernel along Y
  80 + template<typename T, typename K>
  81 + void cpu_conv2(T* out, T* in, K* kernel, size_t sx, size_t sy, size_t kx, size_t ky) {
  82 + size_t X = sx - kx + 1; //x size of the output image
  83 + size_t Y = sy - ky + 1; //y size of the output image
  84 +
  85 +#ifdef __CUDACC__
  86 + //allocate memory and copy everything to the GPU
  87 + T* gpu_in;
  88 + HANDLE_ERROR(cudaMalloc(&gpu_in, sx * sy * sizeof(T)));
  89 + HANDLE_ERROR(cudaMemcpy(gpu_in, in, sx * sy * sizeof(T), cudaMemcpyHostToDevice));
  90 + K* gpu_kernel;
  91 + HANDLE_ERROR(cudaMalloc(&gpu_kernel, kx * ky * sizeof(K)));
  92 + HANDLE_ERROR(cudaMemcpy(gpu_kernel, kernel, kx * ky * sizeof(K), cudaMemcpyHostToDevice));
  93 + T* gpu_out;
  94 + HANDLE_ERROR(cudaMalloc(&gpu_out, X * Y * sizeof(T)));
  95 + gpu_conv2(gpu_out, gpu_in, gpu_kernel, sx, sy, kx, ky); //execute the GPU kernel
  96 + HANDLE_ERROR(cudaMemcpy(out, gpu_out, X * Y * sizeof(T), cudaMemcpyDeviceToHost)); //copy the result to the host
  97 + HANDLE_ERROR(cudaFree(gpu_in));
  98 + HANDLE_ERROR(cudaFree(gpu_kernel));
  99 + HANDLE_ERROR(cudaFree(gpu_out));
  100 +#else
  101 + K v; //register stores the integral of the current pixel value
  102 + size_t yi, xi, kyi, kxi, yi_kyi_sx;
  103 + for (yi = 0; yi < Y; yi++) { //for each pixel in the output image
  104 + for (xi = 0; xi < X; xi++) {
  105 + v = 0;
  106 + for (kyi = 0; kyi < ky; kyi++) { //for each pixel in the kernel
  107 + yi_kyi_sx = (yi + kyi) * sx;
  108 + for (kxi = 0; kxi < kx; kxi++) {
  109 + v += in[yi_kyi_sx + xi + kxi] * kernel[kyi * kx + kxi];
  110 + }
  111 + }
  112 + out[yi * X + xi] = v; //save the result to the output array
  113 + }
  114 + }
  115 +
  116 +#endif
  117 + }
  118 +
  119 +
  120 +}
  121 +
  122 +
  123 +#endif
0 124 \ No newline at end of file
... ...
stim/structures/kdtree.cuh
1   -// right now the size of CUDA STACK is set to 1000, increase it if you mean to make deeper tree
  1 +// right now the size of CUDA STACK is set to 50, increase it if you mean to make deeper tree
2 2 // data should be stored in row-major
3 3 // x1,x2,x3,x4,x5......
4 4 // y1,y2,y3,y4,y5......
... ... @@ -22,16 +22,16 @@
22 22 #include <stim/visualization/aabbn.h>
23 23  
24 24 namespace stim {
25   - namespace kdtree {
  25 + namespace cpu_kdtree {
26 26 template<typename T, int D> // typename refers to float or double while D refers to dimension of points
27 27 struct point {
28 28 T dim[D]; // create a structure to store every one input point
29 29 };
30 30  
31 31 template<typename T>
32   - class kdnode {
  32 + class cpu_kdnode {
33 33 public:
34   - kdnode() { // constructor for initializing a kdnode
  34 + cpu_kdnode() { // constructor for initializing a kdnode
35 35 parent = NULL; // set every node's parent, left and right kdnode pointers to NULL
36 36 left = NULL;
37 37 right = NULL;
... ... @@ -42,258 +42,12 @@ namespace stim {
42 42 }
43 43 int idx; // index of current node
44 44 int parent_idx, left_idx, right_idx; // index of parent, left and right nodes
45   - kdnode *parent, *left, *right; // parent, left and right kdnodes
  45 + cpu_kdnode *parent, *left, *right; // parent, left and right kdnodes
46 46 T split_value; // splitting value of current node
47 47 std::vector <size_t> indices; // it indicates the points' indices that current node has
48 48 size_t level; // tree level of current node
49 49 };
50   - } // end of namespace kdtree
51   -
52   - template <typename T, int D = 3> // set dimension of data to default 3
53   - class cpu_kdtree {
54   - protected:
55   - int current_axis; // current judging axis
56   - int n_id; // store the total number of nodes
57   - std::vector < typename kdtree::point<T, D> > *tmp_points; // transfer or temperary points
58   - std::vector < typename kdtree::point<T, D> > cpu_tmp_points; // for cpu searching
59   - kdtree::kdnode<T> *root; // root node
60   - static cpu_kdtree<T, D> *cur_tree_ptr;
61   - public:
62   - cpu_kdtree() { // constructor for creating a cpu_kdtree
63   - cur_tree_ptr = this; // create a class pointer points to the current class value
64   - n_id = 0; // set total number of points to default 0
65   - }
66   -
67   - ~cpu_kdtree() { // destructor of cpu_kdtree
68   - std::vector <kdtree::kdnode<T>*> next_nodes;
69   - next_nodes.push_back(root);
70   - while (next_nodes.size()) {
71   - std::vector <kdtree::kdnode<T>*> next_search_nodes;
72   - while (next_nodes.size()) {
73   - kdtree::kdnode<T> *cur = next_nodes.back();
74   - next_nodes.pop_back();
75   - if (cur->left)
76   - next_search_nodes.push_back(cur->left);
77   - if (cur->right)
78   - next_search_nodes.push_back(cur->right);
79   - delete cur;
80   - }
81   - next_nodes = next_search_nodes;
82   - }
83   - root = NULL;
84   - }
85   -
86   - void cpu_create(std::vector < typename kdtree::point<T, D> > &reference_points, size_t max_levels) {
87   - tmp_points = &reference_points;
88   - root = new kdtree::kdnode<T>(); // initializing the root node
89   - root->idx = n_id++; // the index of root is 0
90   - root->level = 0; // tree level begins at 0
91   - root->indices.resize(reference_points.size()); // get the number of points
92   - for (size_t i = 0; i < reference_points.size(); i++) {
93   - root->indices[i] = i; // set indices of input points
94   - }
95   - std::vector <kdtree::kdnode<T>*> next_nodes; // next nodes
96   - next_nodes.push_back(root); // push back the root node
97   - while (next_nodes.size()) {
98   - std::vector <kdtree::kdnode<T>*> next_search_nodes; // next search nodes
99   - while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search
100   - kdtree::kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first)
101   - next_nodes.pop_back(); // pop out current node in order to store next round of nodes
102   - if (current_node->level < max_levels) {
103   - if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point
104   - kdtree::kdnode<T> *left = new kdtree::kdnode<T>();
105   - kdtree::kdnode<T> *right = new kdtree::kdnode<T>();
106   - left->idx = n_id++; // set the index of current node's left node
107   - right->idx = n_id++;
108   - split(current_node, left, right); // split left and right and determine a node
109   - std::vector <size_t> temp; // empty vecters of int
110   - //temp.resize(current_node->indices.size());
111   - current_node->indices.swap(temp); // clean up current node's indices
112   - current_node->left = left;
113   - current_node->right = right;
114   - current_node->left_idx = left->idx;
115   - current_node->right_idx = right->idx;
116   - if (right->indices.size())
117   - next_search_nodes.push_back(right); // left pop out first
118   - if (left->indices.size())
119   - next_search_nodes.push_back(left);
120   - }
121   - }
122   - }
123   - next_nodes = next_search_nodes; // go deeper within the tree
124   - }
125   - }
126   -
127   - static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort
128   - std::vector < typename kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer
129   - return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis];
130   - }
131   -
132   - void split(kdtree::kdnode<T> *cur, kdtree::kdnode<T> *left, kdtree::kdnode<T> *right) {
133   - std::vector < typename kdtree::point<T, D> > &pts = *tmp_points;
134   - current_axis = cur->level % D; // indicate the judicative dimension or axis
135   - std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data
136   - size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor
137   - cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node
138   - left->parent = cur; // set the parent of the next search nodes to current node
139   - right->parent = cur;
140   - left->level = cur->level + 1; // level + 1
141   - right->level = cur->level + 1;
142   - left->parent_idx = cur->idx; // set its parent node's index
143   - right->parent_idx = cur->idx;
144   - for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one
145   - size_t idx = cur->indices[i];
146   - if (pts[idx].dim[current_axis] < cur->split_value)
147   - left->indices.push_back(idx);
148   - else
149   - right->indices.push_back(idx);
150   - }
151   - }
152   -
153   - void create(T *h_reference_points, size_t reference_count, size_t max_levels) {
154   - std::vector < typename kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way
155   - for (size_t j = 0; j < reference_count; j++)
156   - for (size_t i = 0; i < D; i++)
157   - reference_points[j].dim[i] = h_reference_points[j * D + i];
158   - cpu_create(reference_points, max_levels);
159   - cpu_tmp_points = *tmp_points;
160   - }
161   -
162   - int get_num_nodes() const { // get the total number of nodes
163   - return n_id;
164   - }
165   -
166   - kdtree::kdnode<T>* get_root() const { // get the root node of tree
167   - return root;
168   - }
169   -
170   - T cpu_distance(const kdtree::point<T, D> &a, const kdtree::point<T, D> &b) {
171   - T distance = 0;
172   -
173   - for (size_t i = 0; i < D; i++) {
174   - T d = a.dim[i] - b.dim[i];
175   - distance += d*d;
176   - }
177   - return distance;
178   - }
179   -
180   - void cpu_search_at_node(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, size_t *index, T *distance, kdtree::kdnode<T> **node) {
181   - T best_distance = FLT_MAX; // initialize the best distance to max of floating point
182   - size_t best_index = 0;
183   - std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points;
184   - while (true) {
185   - size_t split_axis = cur->level % D;
186   - if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace
187   - *node = cur; // pointer points to a pointer
188   - for (size_t i = 0; i < cur->indices.size(); i++) {
189   - size_t idx = cur->indices[i];
190   - T d = cpu_distance(query, pts[idx]); // compute distances
191   - /// if we want to compute k nearest neighbor, we can input the last resul
192   - /// (last_best_dist < dist < best_dist) to select the next point until reaching to k
193   - if (d < best_distance) {
194   - best_distance = d;
195   - best_index = idx; // record the nearest neighbor index
196   - }
197   - }
198   - break; // find the target point then break the loop
199   - }
200   - else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side
201   - cur = cur->left;
202   - }
203   - else {
204   - cur = cur->right;
205   - }
206   - }
207   - *index = best_index;
208   - *distance = best_distance;
209   - }
210   -
211   - void cpu_search_at_node_range(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, T range, size_t *index, T *distance) {
212   - T best_distance = FLT_MAX; // initialize the best distance to max of floating point
213   - size_t best_index = 0;
214   - std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points;
215   - std::vector < typename kdtree::kdnode<T>*> next_node;
216   - next_node.push_back(cur);
217   - while (next_node.size()) {
218   - std::vector<typename kdtree::kdnode<T>*> next_search;
219   - while (next_node.size()) {
220   - cur = next_node.back();
221   - next_node.pop_back();
222   - size_t split_axis = cur->level % D;
223   - if (cur->left == NULL) {
224   - for (size_t i = 0; i < cur->indices.size(); i++) {
225   - size_t idx = cur->indices[i];
226   - T d = cpu_distance(query, pts[idx]);
227   - if (d < best_distance) {
228   - best_distance = d;
229   - best_index = idx;
230   - }
231   - }
232   - }
233   - else {
234   - T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension
235   - /// there are three possibilities: on either left or right, and on both left and right
236   - if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist
237   - if (d < 0)
238   - next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region
239   - else
240   - next_search.push_back(cur->right);
241   - }
242   - else { // it is possible that nereast neighbor will appear on both left and right
243   - next_search.push_back(cur->left);
244   - next_search.push_back(cur->right);
245   - }
246   - }
247   - }
248   - next_node = next_search; // pop out at least one time
249   - }
250   - *index = best_index;
251   - *distance = best_distance;
252   - }
253   -
254   - void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) {
255   - /// first convert the input query point into specific type
256   - kdtree::point<T, D> query;
257   - for (size_t j = 0; j < query_count; j++) {
258   - for (size_t i = 0; i < D; i++)
259   - query.dim[i] = h_query_points[j * D + i];
260   - /// find the nearest node, this will be the upper bound for the next time searching
261   - kdtree::kdnode<T> *best_node = NULL;
262   - T best_distance = FLT_MAX;
263   - size_t best_index = 0;
264   - T radius = 0; // radius for range
265   - cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step
266   - radius = sqrt(best_distance); // It is possible that nearest will appear in another region
267   - /// find other possibilities
268   - kdtree::kdnode<T> *cur = best_node;
269   - while (cur->parent != NULL) { // every node that you pass will be possible to be the best node
270   - /// go up
271   - kdtree::kdnode<T> *parent = cur->parent; // travel back to every node that we pass through
272   - size_t split_axis = (parent->level) % D;
273   - /// search other nodes
274   - size_t tmp_index;
275   - T tmp_distance = FLT_MAX;
276   - if (fabs(parent->split_value - query.dim[split_axis]) <= radius) {
277   - /// search opposite node
278   - if (parent->left != cur)
279   - cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node
280   - else
281   - cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance);
282   - }
283   - if (tmp_distance < best_distance) {
284   - best_distance = tmp_distance;
285   - best_index = tmp_index;
286   - }
287   - cur = parent;
288   - }
289   - h_indices[j] = best_index;
290   - h_distances[j] = best_distance;
291   - }
292   - }
293   - }; //end class kdtree
294   -
295   - template <typename T, int D>
296   - cpu_kdtree<T, D>* cpu_kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class
  50 + } // end of namespace cpu_kdtree
297 51  
298 52 template <typename T>
299 53 struct cuda_kdnode {
... ... @@ -305,7 +59,7 @@ namespace stim {
305 59 };
306 60  
307 61 template <typename T, int D>
308   - __device__ T gpu_distance(kdtree::point<T, D> &a, kdtree::point<T, D> &b) {
  62 + __device__ T gpu_distance(cpu_kdtree::point<T, D> &a, cpu_kdtree::point<T, D> &b) {
309 63 T distance = 0;
310 64  
311 65 for (size_t i = 0; i < D; i++) {
... ... @@ -316,7 +70,7 @@ namespace stim {
316 70 }
317 71  
318 72 template <typename T, int D>
319   - __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, int cur, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) {
  73 + __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, int cur, cpu_kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) {
320 74 T best_distance = FLT_MAX;
321 75 size_t best_index = 0;
322 76  
... ... @@ -346,7 +100,7 @@ namespace stim {
346 100 }
347 101  
348 102 template <typename T, int D>
349   - __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
  103 + __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
350 104 T best_distance = FLT_MAX;
351 105 size_t best_index = 0;
352 106  
... ... @@ -405,7 +159,7 @@ namespace stim {
405 159 }
406 160  
407 161 template <typename T, int D>
408   - __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
  162 + __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
409 163 int best_node = 0;
410 164 T best_distance = FLT_MAX;
411 165 size_t best_index = 0;
... ... @@ -438,7 +192,7 @@ namespace stim {
438 192 }
439 193  
440 194 template <typename T, int D>
441   - __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) {
  195 + __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) {
442 196 size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
443 197 if (idx >= d_query_count) return; // avoid segfault
444 198  
... ... @@ -446,11 +200,11 @@ namespace stim {
446 200 }
447 201  
448 202 template <typename T, int D>
449   - void search_stream(cuda_kdnode<T> *d_nodes, size_t *d_index, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *query_stream_points, size_t stream_count, size_t *indices, T *distances) {
  203 + void search_stream(cuda_kdnode<T> *d_nodes, size_t *d_index, cpu_kdtree::point<T, D> *d_reference_points, cpu_kdtree::point<T, D> *query_stream_points, size_t stream_count, size_t *indices, T *distances) {
450 204 unsigned int threads = (unsigned int)(stream_count > 1024 ? 1024 : stream_count);
451 205 unsigned int blocks = (unsigned int)(stream_count / threads + (stream_count % threads ? 1 : 0));
452 206  
453   - kdtree::point<T, D> *d_query_points;
  207 + cpu_kdtree::point<T, D> *d_query_points;
454 208 size_t *d_indices;
455 209 T *d_distances;
456 210  
... ... @@ -480,26 +234,121 @@ namespace stim {
480 234 HANDLE_ERROR(cudaFree(d_distances));
481 235 }
482 236  
483   - template <typename T, int D = 3>
484   - class cuda_kdtree {
  237 + template <typename T, int D = 3> // set dimension of data to default 3
  238 + class kdtree {
485 239 protected:
486   - cuda_kdnode<T> *d_nodes;
487   - size_t *d_index;
488   - kdtree::point<T, D>* d_reference_points;
489   - size_t npts;
490   - int num_nodes;
  240 + int current_axis; // current judging axis
  241 + int n_id; // store the total number of nodes
  242 + std::vector < typename cpu_kdtree::point<T, D> > *tmp_points; // transfer or temperary points
  243 + std::vector < typename cpu_kdtree::point<T, D> > cpu_tmp_points; // for cpu searching
  244 + cpu_kdtree::cpu_kdnode<T> *root; // root node
  245 + static kdtree<T, D> *cur_tree_ptr;
  246 + #ifdef __CUDACC__
  247 + cuda_kdnode<T> *d_nodes;
  248 + size_t *d_index;
  249 + cpu_kdtree::point<T, D>* d_reference_points;
  250 + size_t npts;
  251 + int num_nodes;
  252 + #endif
491 253 public:
492   - ~cuda_kdtree() {
  254 + kdtree() { // constructor for creating a cpu_kdtree
  255 + cur_tree_ptr = this; // create a class pointer points to the current class value
  256 + n_id = 0; // set total number of points to default 0
  257 + }
  258 +
  259 + ~kdtree() { // destructor of cpu_kdtree
  260 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes;
  261 + next_nodes.push_back(root);
  262 + while (next_nodes.size()) {
  263 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_search_nodes;
  264 + while (next_nodes.size()) {
  265 + cpu_kdtree::cpu_kdnode<T> *cur = next_nodes.back();
  266 + next_nodes.pop_back();
  267 + if (cur->left)
  268 + next_search_nodes.push_back(cur->left);
  269 + if (cur->right)
  270 + next_search_nodes.push_back(cur->right);
  271 + delete cur;
  272 + }
  273 + next_nodes = next_search_nodes;
  274 + }
  275 + root = NULL;
  276 + #ifdef __CUDACC__
493 277 HANDLE_ERROR(cudaFree(d_nodes));
494 278 HANDLE_ERROR(cudaFree(d_index));
495 279 HANDLE_ERROR(cudaFree(d_reference_points));
  280 + #endif
496 281 }
497   -
498   - /// Create a KD-tree given a pointer to an array of reference points and the number of reference points
499   - /// @param h_reference_points is a host array containing the reference points in (x0, y0, z0, ...., ) order
500   - /// @param reference_count is the number of reference point in the array
501   - /// @param max_levels is the deepest number of tree levels allowed
502   - void create(T *h_reference_points, size_t reference_count, size_t max_levels = 3) {
  282 +
  283 + void cpu_create(std::vector < typename cpu_kdtree::point<T, D> > &reference_points, size_t max_levels) {
  284 + tmp_points = &reference_points;
  285 + root = new cpu_kdtree::cpu_kdnode<T>(); // initializing the root node
  286 + root->idx = n_id++; // the index of root is 0
  287 + root->level = 0; // tree level begins at 0
  288 + root->indices.resize(reference_points.size()); // get the number of points
  289 + for (size_t i = 0; i < reference_points.size(); i++) {
  290 + root->indices[i] = i; // set indices of input points
  291 + }
  292 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes; // next nodes
  293 + next_nodes.push_back(root); // push back the root node
  294 + while (next_nodes.size()) {
  295 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_search_nodes; // next search nodes
  296 + while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search
  297 + cpu_kdtree::cpu_kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first)
  298 + next_nodes.pop_back(); // pop out current node in order to store next round of nodes
  299 + if (current_node->level < max_levels) {
  300 + if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point
  301 + cpu_kdtree::cpu_kdnode<T> *left = new cpu_kdtree::cpu_kdnode<T>();
  302 + cpu_kdtree::cpu_kdnode<T> *right = new cpu_kdtree::cpu_kdnode<T>();
  303 + left->idx = n_id++; // set the index of current node's left node
  304 + right->idx = n_id++;
  305 + split(current_node, left, right); // split left and right and determine a node
  306 + std::vector <size_t> temp; // empty vecters of int
  307 + //temp.resize(current_node->indices.size());
  308 + current_node->indices.swap(temp); // clean up current node's indices
  309 + current_node->left = left;
  310 + current_node->right = right;
  311 + current_node->left_idx = left->idx;
  312 + current_node->right_idx = right->idx;
  313 + if (right->indices.size())
  314 + next_search_nodes.push_back(right); // left pop out first
  315 + if (left->indices.size())
  316 + next_search_nodes.push_back(left);
  317 + }
  318 + }
  319 + }
  320 + next_nodes = next_search_nodes; // go deeper within the tree
  321 + }
  322 + }
  323 +
  324 + static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort
  325 + std::vector < typename cpu_kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer
  326 + return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis];
  327 + }
  328 +
  329 + void split(cpu_kdtree::cpu_kdnode<T> *cur, cpu_kdtree::cpu_kdnode<T> *left, cpu_kdtree::cpu_kdnode<T> *right) {
  330 + std::vector < typename cpu_kdtree::point<T, D> > &pts = *tmp_points;
  331 + current_axis = cur->level % D; // indicate the judicative dimension or axis
  332 + std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data
  333 + size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor
  334 + cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node
  335 + left->parent = cur; // set the parent of the next search nodes to current node
  336 + right->parent = cur;
  337 + left->level = cur->level + 1; // level + 1
  338 + right->level = cur->level + 1;
  339 + left->parent_idx = cur->idx; // set its parent node's index
  340 + right->parent_idx = cur->idx;
  341 + for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one
  342 + size_t idx = cur->indices[i];
  343 + if (pts[idx].dim[current_axis] < cur->split_value)
  344 + left->indices.push_back(idx);
  345 + else
  346 + right->indices.push_back(idx);
  347 + }
  348 + }
  349 +
  350 + void create(T *h_reference_points, size_t reference_count, size_t max_levels) {
  351 + #ifdef __CUDACC__
503 352 if (max_levels > 10) {
504 353 std::cout<<"The max_tree_levels should be smaller!"<<std::endl;
505 354 exit(1);
... ... @@ -507,29 +356,28 @@ namespace stim {
507 356 //bb.init(&h_reference_points[0]);
508 357 //aaboundingboxing<T, D>(bb, h_reference_points, reference_count);
509 358  
510   - std::vector < typename kdtree::point<T, D>> reference_points(reference_count); // restore the reference points in particular way
  359 + std::vector < typename cpu_kdtree::point<T, D>> reference_points(reference_count); // restore the reference points in particular way
511 360 for (size_t j = 0; j < reference_count; j++)
512 361 for (size_t i = 0; i < D; i++)
513   - reference_points[j].dim[i] = h_reference_points[j * D + i];
514   - cpu_kdtree<T, D> tree; // creating a tree on cpu
515   - tree.cpu_create(reference_points, max_levels); // building a tree on cpu
516   - kdtree::kdnode<T> *d_root = tree.get_root();
517   - num_nodes = tree.get_num_nodes();
  362 + reference_points[j].dim[i] = h_reference_points[j * D + i]; // creating a tree on cpu
  363 + (*this).cpu_create(reference_points, max_levels); // building a tree on cpu
  364 + cpu_kdtree::cpu_kdnode<T> *d_root = (*this).get_root();
  365 + num_nodes = (*this).get_num_nodes();
518 366 npts = reference_count; // also equals to reference_count
519 367  
520 368 HANDLE_ERROR(cudaMalloc((void**)&d_nodes, sizeof(cuda_kdnode<T>) * num_nodes)); // copy data from host to device
521 369 HANDLE_ERROR(cudaMalloc((void**)&d_index, sizeof(size_t) * npts));
522   - HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(kdtree::point<T, D>) * npts));
  370 + HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(cpu_kdtree::point<T, D>) * npts));
523 371  
524 372 std::vector < cuda_kdnode<T> > tmp_nodes(num_nodes);
525 373 std::vector <size_t> indices(npts);
526   - std::vector <kdtree::kdnode<T>*> next_nodes;
  374 + std::vector <cpu_kdtree::cpu_kdnode<T>*> next_nodes;
527 375 size_t cur_pos = 0;
528 376 next_nodes.push_back(d_root);
529 377 while (next_nodes.size()) {
530   - std::vector <typename kdtree::kdnode<T>*> next_search_nodes;
  378 + std::vector <typename cpu_kdtree::cpu_kdnode<T>*> next_search_nodes;
531 379 while (next_nodes.size()) {
532   - kdtree::kdnode<T> *cur = next_nodes.back();
  380 + cpu_kdtree::cpu_kdnode<T> *cur = next_nodes.back();
533 381 next_nodes.pop_back();
534 382 int id = cur->idx; // the nodes at same level are independent
535 383 tmp_nodes[id].level = cur->level;
... ... @@ -559,16 +407,154 @@ namespace stim {
559 407 }
560 408 HANDLE_ERROR(cudaMemcpy(d_nodes, &tmp_nodes[0], sizeof(cuda_kdnode<T>) * tmp_nodes.size(), cudaMemcpyHostToDevice));
561 409 HANDLE_ERROR(cudaMemcpy(d_index, &indices[0], sizeof(size_t) * indices.size(), cudaMemcpyHostToDevice));
562   - HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(kdtree::point<T, D>) * reference_count, cudaMemcpyHostToDevice));
  410 + HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(cpu_kdtree::point<T, D>) * reference_count, cudaMemcpyHostToDevice));
  411 +
  412 + #else
  413 + std::vector < typename cpu_kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way
  414 + for (size_t j = 0; j < reference_count; j++)
  415 + for (size_t i = 0; i < D; i++)
  416 + reference_points[j].dim[i] = h_reference_points[j * D + i];
  417 + cpu_create(reference_points, max_levels);
  418 + cpu_tmp_points = *tmp_points;
  419 +
  420 + #endif
  421 + }
  422 +
  423 + int get_num_nodes() const { // get the total number of nodes
  424 + return n_id;
  425 + }
  426 +
  427 + cpu_kdtree::cpu_kdnode<T>* get_root() const { // get the root node of tree
  428 + return root;
  429 + }
  430 +
  431 + T cpu_distance(const cpu_kdtree::point<T, D> &a, const cpu_kdtree::point<T, D> &b) {
  432 + T distance = 0;
  433 +
  434 + for (size_t i = 0; i < D; i++) {
  435 + T d = a.dim[i] - b.dim[i];
  436 + distance += d*d;
  437 + }
  438 + return distance;
  439 + }
  440 +
  441 + void cpu_search_at_node(cpu_kdtree::cpu_kdnode<T> *cur, const cpu_kdtree::point<T, D> &query, size_t *index, T *distance, cpu_kdtree::cpu_kdnode<T> **node) {
  442 + T best_distance = FLT_MAX; // initialize the best distance to max of floating point
  443 + size_t best_index = 0;
  444 + std::vector < typename cpu_kdtree::point<T, D> > pts = cpu_tmp_points;
  445 + while (true) {
  446 + size_t split_axis = cur->level % D;
  447 + if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace
  448 + *node = cur; // pointer points to a pointer
  449 + for (size_t i = 0; i < cur->indices.size(); i++) {
  450 + size_t idx = cur->indices[i];
  451 + T d = cpu_distance(query, pts[idx]); // compute distances
  452 + /// if we want to compute k nearest neighbor, we can input the last resul
  453 + /// (last_best_dist < dist < best_dist) to select the next point until reaching to k
  454 + if (d < best_distance) {
  455 + best_distance = d;
  456 + best_index = idx; // record the nearest neighbor index
  457 + }
  458 + }
  459 + break; // find the target point then break the loop
  460 + }
  461 + else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side
  462 + cur = cur->left;
  463 + }
  464 + else {
  465 + cur = cur->right;
  466 + }
  467 + }
  468 + *index = best_index;
  469 + *distance = best_distance;
  470 + }
  471 +
  472 + void cpu_search_at_node_range(cpu_kdtree::cpu_kdnode<T> *cur, const cpu_kdtree::point<T, D> &query, T range, size_t *index, T *distance) {
  473 + T best_distance = FLT_MAX; // initialize the best distance to max of floating point
  474 + size_t best_index = 0;
  475 + std::vector < typename cpu_kdtree::point<T, D> > pts = cpu_tmp_points;
  476 + std::vector < typename cpu_kdtree::cpu_kdnode<T>*> next_node;
  477 + next_node.push_back(cur);
  478 + while (next_node.size()) {
  479 + std::vector<typename cpu_kdtree::cpu_kdnode<T>*> next_search;
  480 + while (next_node.size()) {
  481 + cur = next_node.back();
  482 + next_node.pop_back();
  483 + size_t split_axis = cur->level % D;
  484 + if (cur->left == NULL) {
  485 + for (size_t i = 0; i < cur->indices.size(); i++) {
  486 + size_t idx = cur->indices[i];
  487 + T d = cpu_distance(query, pts[idx]);
  488 + if (d < best_distance) {
  489 + best_distance = d;
  490 + best_index = idx;
  491 + }
  492 + }
  493 + }
  494 + else {
  495 + T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension
  496 + /// there are three possibilities: on either left or right, and on both left and right
  497 + if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist
  498 + if (d < 0)
  499 + next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region
  500 + else
  501 + next_search.push_back(cur->right);
  502 + }
  503 + else { // it is possible that nereast neighbor will appear on both left and right
  504 + next_search.push_back(cur->left);
  505 + next_search.push_back(cur->right);
  506 + }
  507 + }
  508 + }
  509 + next_node = next_search; // pop out at least one time
  510 + }
  511 + *index = best_index;
  512 + *distance = best_distance;
  513 + }
  514 +
  515 + void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) {
  516 + /// first convert the input query point into specific type
  517 + cpu_kdtree::point<T, D> query;
  518 + for (size_t j = 0; j < query_count; j++) {
  519 + for (size_t i = 0; i < D; i++)
  520 + query.dim[i] = h_query_points[j * D + i];
  521 + /// find the nearest node, this will be the upper bound for the next time searching
  522 + cpu_kdtree::cpu_kdnode<T> *best_node = NULL;
  523 + T best_distance = FLT_MAX;
  524 + size_t best_index = 0;
  525 + T radius = 0; // radius for range
  526 + cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step
  527 + radius = sqrt(best_distance); // It is possible that nearest will appear in another region
  528 + /// find other possibilities
  529 + cpu_kdtree::cpu_kdnode<T> *cur = best_node;
  530 + while (cur->parent != NULL) { // every node that you pass will be possible to be the best node
  531 + /// go up
  532 + cpu_kdtree::cpu_kdnode<T> *parent = cur->parent; // travel back to every node that we pass through
  533 + size_t split_axis = (parent->level) % D;
  534 + /// search other nodes
  535 + size_t tmp_index;
  536 + T tmp_distance = FLT_MAX;
  537 + if (fabs(parent->split_value - query.dim[split_axis]) <= radius) {
  538 + /// search opposite node
  539 + if (parent->left != cur)
  540 + cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node
  541 + else
  542 + cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance);
  543 + }
  544 + if (tmp_distance < best_distance) {
  545 + best_distance = tmp_distance;
  546 + best_index = tmp_index;
  547 + }
  548 + cur = parent;
  549 + }
  550 + h_indices[j] = best_index;
  551 + h_distances[j] = best_distance;
  552 + }
563 553 }
564 554  
565   - /// Search the KD tree for nearest neighbors to a set of specified query points
566   - /// @param h_query_points an array of query points in (x0, y0, z0, ...) order
567   - /// @param query_count is the number of query points
568   - /// @param indices are the indices to the nearest reference point for each query points
569   - /// @param distances is an array containing the distance between each query point and the nearest reference point
570 555 void search(T *h_query_points, size_t query_count, size_t *indices, T *distances) {
571   - std::vector < typename kdtree::point<T, D> > query_points(query_count);
  556 + #ifdef __CUDACC__
  557 + std::vector < typename cpu_kdtree::point<T, D> > query_points(query_count);
572 558 for (size_t j = 0; j < query_count; j++)
573 559 for (size_t i = 0; i < D; i++)
574 560 query_points[j].dim[i] = h_query_points[j * D + i];
... ... @@ -595,7 +581,7 @@ namespace stim {
595 581 unsigned int threads = (unsigned int)(query_count > 1024 ? 1024 : query_count);
596 582 unsigned int blocks = (unsigned int)(query_count / threads + (query_count % threads ? 1 : 0));
597 583  
598   - kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu
  584 + cpu_kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu
599 585 size_t *d_indices;
600 586 T *d_distances;
601 587  
... ... @@ -624,64 +610,18 @@ namespace stim {
624 610 HANDLE_ERROR(cudaFree(d_indices));
625 611 HANDLE_ERROR(cudaFree(d_distances));
626 612 }
627   - }
628   -
629   - /// Return the number of points in the KD tree
630   - size_t num_points() {
631   - return npts;
632   - }
633 613  
634   - stim::aabbn<T, D> getbox() {
635   - size_t N = npts;
636   - //std::vector < typename kdtree::point<T, D> > cpu_ref(npts); //allocate space on the CPU for the reference points
637   - T* cpu_ref = (T*)malloc(N * D * sizeof(T)); //allocate space on the CPU for the reference points
638   - HANDLE_ERROR(cudaMemcpy(cpu_ref, d_reference_points, N * D * sizeof(T), cudaMemcpyDeviceToHost)); //copy from GPU to CPU
  614 + #else
  615 + cpu_search(h_query_points, query_count, indices, distances);
639 616  
640   - stim::aabbn<T, D> bb(cpu_ref);
  617 + #endif
641 618  
642   - for (size_t i = 1; i < N; i++) { //for each reference point
643   - //std::cout << "( " << cpu_ref[i * D + 0] << ", " << cpu_ref[i * D + 1] << ", " << cpu_ref[i * D + 2] << ")" << std::endl;
644   - bb.insert(&cpu_ref[i * D]);
645   - }
646   - return bb;
647 619 }
648 620  
649   - //generate an implicit distance field for the KD-tree
650   - void dist_field3(T* dist, size_t* dims, stim::aabbn<T, 3> bb) {
651   - size_t N = 1; //number of query points that make up the distance field
652   - for (size_t d = 0; d < 3; d++) N *= dims[d]; //calculate the total number of query points
653   -
654   - //calculate the grid spatial parameters
655   - T dx = 0;
656   - if (dims[0] > 1) dx = bb.length(0) / dims[0];
657   - T dy = 0;
658   - if (dims[1] > 1) dy = bb.length(1) / dims[1];
659   - T dz = 0;
660   - if (dims[2] > 1) dz = bb.length(2) / dims[2];
661   -
662   - T* Q = (T*)malloc(N * 3 * sizeof(T)); //allocate space for the query points
663   - size_t i;
664   - for (size_t z = 0; z < dims[2]; z++) { //for each query point (which is a point in the grid)
665   - for (size_t y = 0; y < dims[1]; y++) {
666   - for (size_t x = 0; x < dims[0]; x++) {
667   - i = z * dims[1] * dims[0] + y * dims[0] + x;
668   - Q[i * 3 + 0] = bb.low[0] + x * dx + dx / 2;
669   - Q[i * 3 + 1] = bb.low[1] + y * dy + dy / 2;
670   - Q[i * 3 + 2] = bb.low[2] + z * dz + dz / 2;
671   - //std::cout << i<<" "<<Q[i * 3 + 0] << " " << Q[i * 3 + 1] << " " << Q[i * 3 + 2] << std::endl;
672   - }
673   - }
674   - }
675   - size_t* temp = (size_t*)malloc(N * sizeof(size_t)); //allocate space to store the indices (unused)
676   - search(Q, N, temp, dist);
677   - }
  621 + }; //end class kdtree
678 622  
679   - //generate an implicit distance field for the KD-tree
680   - void dist_field3(T* dist, size_t* dims) {
681   - stim::aabbn<T, D> bb = getbox(); //get a bounding box around the tree
682   - dist_field3(dist, dims, bb);
683   - }
  623 + template <typename T, int D>
  624 + kdtree<T, D>* kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class
684 625  
685   - };
686 626 } //end namespace stim
687 627 #endif
688 628 \ No newline at end of file
... ...
stim/visualization/cylinder.h
... ... @@ -668,4 +668,4 @@ public:
668 668 };
669 669  
670 670 }
671 671 -#endif
  672 +#endif
672 673 \ No newline at end of file
... ...
stim/visualization/gl_network.h
... ... @@ -44,25 +44,94 @@ public:
44 44 }
45 45  
46 46 /// Render the network centerline as a series of line strips.
  47 + /// glCenterline0 is for only one input
  48 + void glCenterline0(){
  49 + if (!glIsList(dlist)) { //if dlist isn't a display list, create it
  50 + dlist = glGenLists(1); //generate a display list
  51 + glNewList(dlist, GL_COMPILE); //start a new display list
  52 + for (unsigned e = 0; e < E.size(); e++) { //for each edge in the network
  53 + glBegin(GL_LINE_STRIP);
  54 + for (unsigned p = 0; p < E[e].size(); p++) { //for each point on that edge
  55 + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]); //set the vertex position based on the current point
  56 + glTexCoord1f(0); //set white color
  57 + }
  58 + glEnd();
  59 + }
  60 + glEndList(); //end the display list
  61 + }
  62 + glCallList(dlist); // render the display list
  63 + }
47 64  
48 65 /// @param m specifies the magnitude value used as the vertex weight (radius, error, etc.)
49 66 void glCenterline(unsigned m = 0){
50 67  
51 68 if(!glIsList(dlist)){ //if dlist isn't a display list, create it
52   - dlist = glGenLists(1); //generate a display list
  69 + dlist = glGenLists(3); //generate a display list
53 70 glNewList(dlist, GL_COMPILE); //start a new display list
54 71 for(unsigned e = 0; e < E.size(); e++){ //for each edge in the network
  72 + unsigned errormag_id = E[e].nmags() - 1;
55 73 glBegin(GL_LINE_STRIP);
56 74 for(unsigned p = 0; p < E[e].size(); p++){ //for each point on that edge
57 75 glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]); //set the vertex position based on the current point
58   - glTexCoord1f(E[e].m(p, m)); //set the texture coordinate based on the specified magnitude index
  76 + glTexCoord1f(E[e].m(p, errormag_id)); //set the texture coordinate based on the specified magnitude index
59 77 }
60 78 glEnd();
61 79 }
62 80 glEndList(); //end the display list
63 81 }
64 82 glCallList(dlist); // render the display list
  83 + }
65 84  
  85 + void glRandColorCenterlineGT(GLuint &dlist1, std::vector<unsigned> map, std::vector<T> colormap){
  86 + if(!glIsList(dlist1)){
  87 + glNewList(dlist1, GL_COMPILE);
  88 + for(unsigned e = 0; e < E.size(); e++){
  89 + if(map[e] != unsigned(-1)){
  90 + glColor3f(colormap[e * 3 + 0], colormap[e * 3 + 1], colormap[e * 3 + 2]);
  91 + glBegin(GL_LINE_STRIP);
  92 + for(unsigned p = 0; p < E[e].size(); p++){
  93 + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]);
  94 + }
  95 + glEnd();
  96 + }
  97 + else{
  98 + glColor3f(1.0, 1.0, 1.0);
  99 + glBegin(GL_LINE_STRIP);
  100 + for(unsigned p = 0; p < E[e].size(); p++){
  101 + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]);
  102 + }
  103 + glEnd();
  104 + }
  105 + }
  106 + glEndList();
  107 + }
  108 + glCallList(dlist1);
  109 + }
  110 +
  111 + void glRandColorCenterlineT(GLuint &dlist2, std::vector<unsigned> map, std::vector<T> colormap){
  112 + if(!glIsList(dlist2)){
  113 + glNewList(dlist2, GL_COMPILE);
  114 + for(unsigned e = 0; e < E.size(); e++){
  115 + if(map[e] != unsigned(-1)){
  116 + glColor3f(colormap[map[e] * 3 + 0], colormap[map[e] * 3 + 1], colormap[map[e] * 3 + 2]);
  117 + glBegin(GL_LINE_STRIP);
  118 + for(unsigned p = 0; p < E[e].size(); p++){
  119 + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]);
  120 + }
  121 + glEnd();
  122 + }
  123 + else{
  124 + glColor3f(1.0, 1.0, 1.0);
  125 + glBegin(GL_LINE_STRIP);
  126 + for(unsigned p = 0; p < E[e].size(); p++){
  127 + glVertex3f(E[e][p][0], E[e][p][1], E[e][p][2]);
  128 + }
  129 + glEnd();
  130 + }
  131 + }
  132 + glEndList();
  133 + }
  134 + glCallList(dlist2);
66 135 }
67 136  
68 137 }; //end stim::gl_network class
... ...