diff --git a/stim/biomodels/network.h b/stim/biomodels/network.h index 84fb949..c517203 100644 --- a/stim/biomodels/network.h +++ b/stim/biomodels/network.h @@ -13,6 +13,7 @@ #include #include #include +#include namespace stim{ @@ -415,19 +416,14 @@ public: /// @param A is the network to compare to - the field is generated for A /// @param sigma is the user-defined tolerance value - smaller values provide a stricter comparison - stim::network compare(stim::network A, float sigma){ + stim::network compare(stim::network A, float sigma, int device){ stim::network R; //generate a network storing the result of the comparison R = (*this); //initialize the result with the current network - //generate a KD-tree for network A - //float metric = 0.0; // initialize metric to be returned after comparing the networks - stim::cuda_kdtree kdt; // initialize a pointer to a kd tree - size_t MaxTreeLevels = 3; // max tree level - - float *c; // centerline (array of double pointers) - points on kdtree must be double - unsigned int n_data = A.total_points(); // set the number of points - c = (float*) malloc(sizeof(float) * n_data * 3); + T *c; // centerline (array of double pointers) - points on kdtree must be double + size_t n_data = A.total_points(); // set the number of points + c = (T*) malloc(sizeof(T) * n_data * 3); unsigned t = 0; for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in the network @@ -440,16 +436,24 @@ public: } } + //generate a KD-tree for network A + //float metric = 0.0; // initialize metric to be returned after comparing the network + size_t MaxTreeLevels = 3; // max tree level + +#ifdef __CUDACC__ + cudaSetDevice(device); + stim::cuda_kdtree kdt; // initialize a pointer to a kd tree + //compare each point in the current network to the field produced by A - kdt.CreateKDTree(c, n_data, 3, MaxTreeLevels); // build a KD tree - float *dists = new float[1]; // near neighbor distances + kdt.create(c, n_data, MaxTreeLevels); // build a KD tree + T *dists = new T[1]; // near neighbor distances size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices stim::vec3 p0, p1; - float m1; + T m1; //float M = 0; //stores the total metric value //float L = 0; //stores the total network length - float* queryPt = new float[3]; + T* queryPt = new T[3]; for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A R.E[e].add_mag(0); //add a new magnitude for the metric @@ -457,14 +461,36 @@ public: p1 = R.E[e][p]; //get the next point in the edge stim2array(queryPt, p1); - kdt.Search(queryPt, 1, 3, dists, nnIdx); //find the distance between A and the current network + kdt.search(queryPt, 1, nnIdx, dists); //find the distance between A and the current network - m1 = 1.0f - gaussianFunction((float)dists[0], sigma); //calculate the metric value based on the distance + m1 = 1.0f - gaussianFunction((T)dists[0], sigma); //calculate the metric value based on the distance R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment } } +#else + stim::cpu_kdtree kdt; + kdt.create(c, n_data, MaxTreeLevels); + T *dists = new T[1]; // near neighbor distances + size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices + + stim::vec3 p0, p1; + T m1; + T* queryPt = new T[3]; + for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A + R.E[e].add_mag(0); //add a new magnitude for the metric + + for(unsigned p = 0; p < R.E[e].size(); p++){ //for each point in the edge + p1 = R.E[e][p]; //get the next point in the edge + stim2array(queryPt, p1); + kdt.cpu_search(queryPt, 1, nnIdx, dists); //find the distance between A and the current network + + m1 = 1.0f - gaussianFunction((T)dists[0], sigma); //calculate the metric value based on the distance + R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment + } + } +#endif return R; //return the resulting network } diff --git a/stim/structures/kdtree.cuh b/stim/structures/kdtree.cuh index f85dc23..c6b350e 100644 --- a/stim/structures/kdtree.cuh +++ b/stim/structures/kdtree.cuh @@ -1,4 +1,4 @@ -// right now the size of CUDA STACK is set to 50, increase it if you mean to make deeper tree +// right now the size of CUDA STACK is set to 1000, increase it if you mean to make deeper tree // data should be stored in row-major // x1,x2,x3,x4,x5...... // y1,y2,y3,y4,y5...... @@ -13,6 +13,7 @@ #include #include "cuda_runtime.h" #include +#include #include #include #include @@ -50,14 +51,14 @@ namespace stim { class cpu_kdtree { protected: int current_axis; // current judging axis - int cmps; // count how many time of comparisons (just for cpu-kdtree) int n_id; // store the total number of nodes - std::vector < typename kdtree::point > *tmp_points; // transfer or temp points + std::vector < typename kdtree::point > *tmp_points; // transfer or temperary points + std::vector < typename kdtree::point > cpu_tmp_points; // for cpu searching kdtree::kdnode *root; // root node static cpu_kdtree *cur_tree_ptr; public: cpu_kdtree() { // constructor for creating a cpu_kdtree - cur_tree_ptr = this; // create a class pointer points to the current class value + cur_tree_ptr = this; // create a class pointer points to the current class value n_id = 0; // set total number of points to default 0 } ~cpu_kdtree() { // destructor of cpu_kdtree @@ -78,8 +79,8 @@ namespace stim { } root = NULL; } - void Create(std::vector < typename kdtree::point > &reference_points, size_t max_levels) { - tmp_points = &reference_points; + void cpu_create(std::vector < typename kdtree::point > &reference_points, size_t max_levels) { + tmp_points = &reference_points; root = new kdtree::kdnode(); // initializing the root node root->idx = n_id++; // the index of root is 0 root->level = 0; // tree level begins at 0 @@ -100,7 +101,7 @@ namespace stim { kdtree::kdnode *right = new kdtree::kdnode(); left->idx = n_id++; // set the index of current node's left node right->idx = n_id++; - Split(current_node, left, right); // split left and right and determine a node + split(current_node, left, right); // split left and right and determine a node std::vector temp; // empty vecters of int //temp.resize(current_node->indices.size()); current_node->indices.swap(temp); // clean up current node's indices @@ -118,14 +119,14 @@ namespace stim { next_nodes = next_search_nodes; // go deeper within the tree } } - static bool SortPoints(const size_t a, const size_t b) { // create functor for std::sort + static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort std::vector < typename kdtree::point > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis]; } - void Split(kdtree::kdnode *cur, kdtree::kdnode *left, kdtree::kdnode *right) { + void split(kdtree::kdnode *cur, kdtree::kdnode *left, kdtree::kdnode *right) { std::vector < typename kdtree::point > &pts = *tmp_points; current_axis = cur->level % D; // indicate the judicative dimension or axis - std::sort(cur->indices.begin(), cur->indices.end(), SortPoints); // using SortPoints as comparison function to sort the data + std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node left->parent = cur; // set the parent of the next search nodes to current node @@ -142,48 +143,176 @@ namespace stim { right->indices.push_back(idx); } } - int GetNumNodes() const { // get the total number of nodes + void create(T *h_reference_points, size_t reference_count, size_t max_levels) { + std::vector < typename kdtree::point > reference_points(reference_count); // restore the reference points in particular way + for (size_t j = 0; j < reference_count; j++) + for (size_t i = 0; i < D; i++) + reference_points[j].dim[i] = h_reference_points[j * D + i]; + cpu_create(reference_points, max_levels); + cpu_tmp_points = *tmp_points; + } + int get_num_nodes() const { // get the total number of nodes return n_id; } - kdtree::kdnode* GetRoot() const { // get the root node of tree + kdtree::kdnode* get_root() const { // get the root node of tree return root; } + T cpu_distance(const kdtree::point &a, const kdtree::point &b) { + T distance = 0; + + for (size_t i = 0; i < D; i++) { + T d = a.dim[i] - b.dim[i]; + distance += d*d; + } + return distance; + } + void cpu_search_at_node(kdtree::kdnode *cur, const kdtree::point &query, size_t *index, T *distance, kdtree::kdnode **node) { + T best_distance = FLT_MAX; // initialize the best distance to max of floating point + size_t best_index = 0; + std::vector < typename kdtree::point > pts = cpu_tmp_points; + while (true) { + size_t split_axis = cur->level % D; + if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace + *node = cur; // pointer points to a pointer + for (size_t i = 0; i < cur->indices.size(); i++) { + size_t idx = cur->indices[i]; + T d = cpu_distance(query, pts[idx]); // compute distances + /// if we want to compute k nearest neighbor, we can input the last resul + /// (last_best_dist < dist < best_dist) to select the next point until reaching to k + if (d < best_distance) { + best_distance = d; + best_index = idx; // record the nearest neighbor index + } + } + break; // find the target point then break the loop + } + else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side + cur = cur->left; + } + else { + cur = cur->right; + } + } + *index = best_index; + *distance = best_distance; + } + void cpu_search_at_node_range(kdtree::kdnode *cur, const kdtree::point &query, T range, size_t *index, T *distance) { + T best_distance = FLT_MAX; // initialize the best distance to max of floating point + size_t best_index = 0; + std::vector < typename kdtree::point > pts = cpu_tmp_points; + std::vector < typename kdtree::kdnode*> next_node; + next_node.push_back(cur); + while (next_node.size()) { + std::vector*> next_search; + while (next_node.size()) { + cur = next_node.back(); + next_node.pop_back(); + size_t split_axis = cur->level % D; + if (cur->left == NULL) { + for (size_t i = 0; i < cur->indices.size(); i++) { + size_t idx = cur->indices[i]; + T d = cpu_distance(query, pts[idx]); + if (d < best_distance) { + best_distance = d; + best_index = idx; + } + } + } + else { + T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension + /// there are three possibilities: on either left or right, and on both left and right + if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist + if (d < 0) + next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region + else + next_search.push_back(cur->right); + } + else { // it is possible that nereast neighbor will appear on both left and right + next_search.push_back(cur->left); + next_search.push_back(cur->right); + } + } + } + next_node = next_search; // pop out at least one time + } + *index = best_index; + *distance = best_distance; + } + void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) { + /// first convert the input query point into specific type + kdtree::point query; + for (size_t j = 0; j < query_count; j++) { + for (size_t i = 0; i < D; i++) + query.dim[i] = h_query_points[j * D + i]; + /// find the nearest node, this will be the upper bound for the next time searching + kdtree::kdnode *best_node = NULL; + T best_distance = FLT_MAX; + size_t best_index = 0; + T radius = 0; // radius for range + cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step + radius = sqrt(best_distance); // It is possible that nearest will appear in another region + /// find other possibilities + kdtree::kdnode *cur = best_node; + while (cur->parent != NULL) { // every node that you pass will be possible to be the best node + /// go up + kdtree::kdnode *parent = cur->parent; // travel back to every node that we pass through + size_t split_axis = (parent->level) % D; + /// search other nodes + size_t tmp_index; + T tmp_distance = FLT_MAX; + if (fabs(parent->split_value - query.dim[split_axis]) <= radius) { + /// search opposite node + if (parent->left != cur) + cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node + else + cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance); + } + if (tmp_distance < best_distance) { + best_distance = tmp_distance; + best_index = tmp_index; + } + cur = parent; + } + h_indices[j] = best_index; + h_distances[j] = best_distance; + } + } }; //end class kdtree template - cpu_kdtree* cpu_kdtree::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class + cpu_kdtree* cpu_kdtree::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class template struct cuda_kdnode { int parent, left, right; T split_value; - size_t num_index; // number of indices it has - int index; // the beginning index + size_t num_index; // number of indices it has + int index; // the beginning index size_t level; }; template - __device__ T Distance(kdtree::point &a, kdtree::point &b) { - T dist = 0; + __device__ T gpu_distance(kdtree::point &a, kdtree::point &b) { + T distance = 0; for (size_t i = 0; i < D; i++) { T d = a.dim[i] - b.dim[i]; - dist += d*d; + distance += d*d; } - return dist; + return distance; } template - __device__ void SearchAtNode(cuda_kdnode *nodes, size_t *indices, kdtree::point *d_reference_points, int cur, kdtree::point &d_query_point, size_t *d_index, T *d_distance, int *d_node) { + __device__ void search_at_node(cuda_kdnode *nodes, size_t *indices, kdtree::point *d_reference_points, int cur, kdtree::point &d_query_point, size_t *d_index, T *d_distance, int *d_node) { T best_distance = FLT_MAX; size_t best_index = 0; - while (true) { // break until reach the bottom + while (true) { // break until reach the bottom int split_axis = nodes[cur].level % D; - if (nodes[cur].left == -1) { // check whether it has left node or not + if (nodes[cur].left == -1) { // check whether it has left node or not *d_node = cur; for (int i = 0; i < nodes[cur].num_index; i++) { size_t idx = indices[nodes[cur].index + i]; - T dist = Distance(d_query_point, d_reference_points[idx]); + T dist = gpu_distance(d_query_point, d_reference_points[idx]); if (dist < best_distance) { best_distance = dist; best_index = idx; @@ -191,7 +320,7 @@ namespace stim { } break; } - else if (d_query_point.dim[split_axis] < nodes[cur].split_value) { // jump into specific son node + else if (d_query_point.dim[split_axis] < nodes[cur].split_value) { // jump into specific son node cur = nodes[cur].left; } else { @@ -202,25 +331,25 @@ namespace stim { *d_index = best_index; } template - __device__ void SearchAtNodeRange(cuda_kdnode *nodes, size_t *indices, kdtree::point *d_reference_points, kdtree::point &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { + __device__ void search_at_node_range(cuda_kdnode *nodes, size_t *indices, kdtree::point *d_reference_points, kdtree::point &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { T best_distance = FLT_MAX; size_t best_index = 0; - int next_nodes_pos = 0; // initialize pop out order index - next_nodes[id * 50 + next_nodes_pos] = cur; // find data that belongs to the very specific thread + int next_nodes_pos = 0; // initialize pop out order index + next_nodes[id * 50 + next_nodes_pos] = cur; // find data that belongs to the very specific thread next_nodes_pos++; while (next_nodes_pos) { - int next_search_nodes_pos = 0; // record push back order index + int next_search_nodes_pos = 0; // record push back order index while (next_nodes_pos) { - cur = next_nodes[id * 50 + next_nodes_pos - 1]; // pop out the last push in one and keep poping out + cur = next_nodes[id * 50 + next_nodes_pos - 1]; // pop out the last push in one and keep poping out next_nodes_pos--; int split_axis = nodes[cur].level % D; if (nodes[cur].left == -1) { for (int i = 0; i < nodes[cur].num_index; i++) { - int idx = indices[nodes[cur].index + i]; // all indices are stored in one array, pick up from every node's beginning index - T d = Distance(d_query_point, d_reference_points[idx]); + int idx = indices[nodes[cur].index + i]; // all indices are stored in one array, pick up from every node's beginning index + T d = gpu_distance(d_query_point, d_reference_points[idx]); if (d < best_distance) { best_distance = d; best_index = idx; @@ -260,13 +389,13 @@ namespace stim { *d_index = best_index; } template - __device__ void Search(cuda_kdnode *nodes, size_t *indices, kdtree::point *d_reference_points, kdtree::point &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { + __device__ void search(cuda_kdnode *nodes, size_t *indices, kdtree::point *d_reference_points, kdtree::point &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { int best_node = 0; T best_distance = FLT_MAX; size_t best_index = 0; T radius = 0; - SearchAtNode(nodes, indices, d_reference_points, 0, d_query_point, &best_index, &best_distance, &best_node); + search_at_node(nodes, indices, d_reference_points, 0, d_query_point, &best_index, &best_distance, &best_node); radius = sqrt(best_distance); // get range int cur = best_node; @@ -278,9 +407,9 @@ namespace stim { size_t tmp_idx; if (fabs(nodes[parent].split_value - d_query_point.dim[split_axis]) <= radius) { if (nodes[parent].left != cur) - SearchAtNodeRange(nodes, indices, d_reference_points, d_query_point, nodes[parent].left, radius, &tmp_idx, &tmp_dist, id, next_nodes, next_search_nodes, Judge); + search_at_node_range(nodes, indices, d_reference_points, d_query_point, nodes[parent].left, radius, &tmp_idx, &tmp_dist, id, next_nodes, next_search_nodes, Judge); else - SearchAtNodeRange(nodes, indices, d_reference_points, d_query_point, nodes[parent].right, radius, &tmp_idx, &tmp_dist, id, next_nodes, next_search_nodes, Judge); + search_at_node_range(nodes, indices, d_reference_points, d_query_point, nodes[parent].right, radius, &tmp_idx, &tmp_dist, id, next_nodes, next_search_nodes, Judge); } if (tmp_dist < best_distance) { best_distance = tmp_dist; @@ -292,11 +421,11 @@ namespace stim { *d_index = best_index; } template - __global__ void SearchBatch(cuda_kdnode *nodes, size_t *indices, kdtree::point *d_reference_points, kdtree::point *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) { + __global__ void search_batch(cuda_kdnode *nodes, size_t *indices, kdtree::point *d_reference_points, kdtree::point *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) { size_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= d_query_count) return; // avoid segfault - Search(nodes, indices, d_reference_points, d_query_points[idx], &d_indices[idx], &d_distances[idx], idx, next_nodes, next_search_nodes, Judge); // every query points are independent + search(nodes, indices, d_reference_points, d_query_points[idx], &d_indices[idx], &d_distances[idx], idx, next_nodes, next_search_nodes, Judge); // every query points are independent } template @@ -312,20 +441,20 @@ namespace stim { HANDLE_ERROR(cudaFree(d_index)); HANDLE_ERROR(cudaFree(d_reference_points)); } - void CreateKDTree(T *h_reference_points, size_t reference_count, size_t dim_count, size_t max_levels) { + void create(T *h_reference_points, size_t reference_count, size_t max_levels) { if (max_levels > 10) { std::cout<<"The max_tree_levels should be smaller!"< > reference_points(reference_count); // restore the reference points in particular way + } + std::vector > reference_points(reference_count); // restore the reference points in particular way for (size_t j = 0; j < reference_count; j++) - for (size_t i = 0; i < dim_count; i++) - reference_points[j].dim[i] = h_reference_points[j * dim_count + i]; + for (size_t i = 0; i < D; i++) + reference_points[j].dim[i] = h_reference_points[j * D + i]; cpu_kdtree tree; // creating a tree on cpu - tree.Create(reference_points, max_levels); // building a tree on cpu - kdtree::kdnode *d_root = tree.GetRoot(); - int num_nodes = tree.GetNumNodes(); - d_reference_count = reference_points.size(); // also equals to reference_count + tree.cpu_create(reference_points, max_levels); // building a tree on cpu + kdtree::kdnode *d_root = tree.get_root(); + int num_nodes = tree.get_num_nodes(); + d_reference_count = reference_count; // also equals to reference_count HANDLE_ERROR(cudaMalloc((void**)&d_nodes, sizeof(cuda_kdnode) * num_nodes)); // copy data from host to device HANDLE_ERROR(cudaMalloc((void**)&d_index, sizeof(size_t) * d_reference_count)); @@ -371,11 +500,11 @@ namespace stim { HANDLE_ERROR(cudaMemcpy(d_index, &indices[0], sizeof(size_t) * indices.size(), cudaMemcpyHostToDevice)); HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(kdtree::point) * reference_points.size(), cudaMemcpyHostToDevice)); } - void Search(T *h_query_points, size_t query_count, size_t dim_count, T *dists, size_t *indices) { + void search(T *h_query_points, size_t query_count, size_t *indices, T *distances) { std::vector < typename kdtree::point > query_points(query_count); for (size_t j = 0; j < query_count; j++) - for (size_t i = 0; i < dim_count; i++) - query_points[j].dim[i] = h_query_points[j * dim_count + i]; + for (size_t i = 0; i < D; i++) + query_points[j].dim[i] = h_query_points[j * D + i]; unsigned int threads = (unsigned int)(query_points.size() > 1024 ? 1024 : query_points.size()); unsigned int blocks = (unsigned int)(query_points.size() / threads + (query_points.size() % threads ? 1 : 0)); @@ -392,15 +521,15 @@ namespace stim { HANDLE_ERROR(cudaMalloc((void**)&d_query_points, sizeof(T) * query_points.size() * D)); HANDLE_ERROR(cudaMalloc((void**)&d_indices, sizeof(size_t) * query_points.size())); HANDLE_ERROR(cudaMalloc((void**)&d_distances, sizeof(T) * query_points.size())); - HANDLE_ERROR(cudaMalloc((void**)&next_nodes, threads * blocks * 50 * sizeof(int))); // STACK size right now is 50, you can change it if you mean to + HANDLE_ERROR(cudaMalloc((void**)&next_nodes, threads * blocks * 50 * sizeof(int))); // STACK size right now is 50, you can change it if you mean to HANDLE_ERROR(cudaMalloc((void**)&next_search_nodes, threads * blocks * 50 * sizeof(int))); HANDLE_ERROR(cudaMemcpy(d_query_points, &query_points[0], sizeof(T) * query_points.size() * D, cudaMemcpyHostToDevice)); - SearchBatch<<>> (d_nodes, d_index, d_reference_points, d_query_points, query_points.size(), d_indices, d_distances, next_nodes, next_search_nodes, Judge); + search_batch<<>> (d_nodes, d_index, d_reference_points, d_query_points, query_points.size(), d_indices, d_distances, next_nodes, next_search_nodes, Judge); if (Judge == NULL) { // do the following work if the thread works safely HANDLE_ERROR(cudaMemcpy(indices, d_indices, sizeof(size_t) * query_points.size(), cudaMemcpyDeviceToHost)); - HANDLE_ERROR(cudaMemcpy(dists, d_distances, sizeof(T) * query_points.size(), cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(distances, d_distances, sizeof(T) * query_points.size(), cudaMemcpyDeviceToHost)); } HANDLE_ERROR(cudaFree(next_nodes)); -- libgit2 0.21.4