diff --git a/stim/biomodels/network.h b/stim/biomodels/network.h
index 8741920..823a46a 100644
--- a/stim/biomodels/network.h
+++ b/stim/biomodels/network.h
@@ -11,7 +11,7 @@
 #include <stim/math/vec3.h>
 #include <stim/visualization/obj.h>
 #include <stim/visualization/cylinder.h>
-#include <ANN/ANN.h>
+#include <stim/structures/kdtree.cuh>
 #include <boost/tuple/tuple.hpp>
 
 
@@ -57,7 +57,7 @@ class network{
 		/// Output the edge information as a string
 		std::string str(){
 			std::stringstream ss;
-			ss<<"("<<cylinder<T>::size()<<")\tl = "<<this.length()<<"\t"<<v[0]<<"----"<<v[1];
+			ss<<"("<<cylinder<T>::size()<<")\tl = "<<this->length()<<"\t"<<v[0]<<"----"<<v[1];
 			return ss.str();
 		}
 
@@ -390,8 +390,8 @@ public:
 	// gaussian function
 	float gaussianFunction(float x, float std=25){ return exp(-x/(2*std*std));} // by default std = 25
 
-    // stim 3d vector to annpoint of 3 dimensions
-	void stim2ann(ANNpoint &a, stim::vec3<T> b){
+    // convert vec3 to array
+	void stim2array(float *a, stim::vec3<T> b){
 		a[0] = b[0];
 		a[1] = b[1];
 		a[2] = b[2];
@@ -422,44 +422,43 @@ public:
 
 		//generate a KD-tree for network A
 		float metric = 0.0;                               				// initialize metric to be returned after comparing the networks
-		ANNkd_tree* kdt;                                 				// initialize a pointer to a kd tree
-		double **c;						                 	// centerline (array of double pointers) - points on kdtree must be double
+		stim::cuda_kdtree<T, 3> kdt;								// initialize a pointer to a kd tree
+		size_t MaxTreeLevels = 3;									// max tree level
+
+		float *c;						                 	// centerline (array of double pointers) - points on kdtree must be double
 		unsigned int n_data = A.total_points();          				// set the number of points
-		c = (double**) malloc(sizeof(double*) * n_data); 				// allocate the array pointer
-		for(unsigned int i = 0; i < n_data; i++)		 			// allocate space for each point of 3 dimensions
-			c[i] = (double*) malloc(sizeof(double) * 3);
+		c = (float*) malloc(sizeof(float) * n_data * 3); 				
 
 		unsigned t = 0;
 		for(unsigned e = 0; e < A.E.size(); e++){					//for each edge in the network
 			for(unsigned p = 0; p < A.E[e].size(); p++){				//for each point in the edge
 				for(unsigned d = 0; d < 3; d++){				//for each coordinate
 
-					c[t][d] = A.E[e][p][d];
+					c[t * 3 + d] = A.E[e][p][d];
 				}
 				t++;
 			}
 		}
 
 		//compare each point in the current network to the field produced by A
-		ANNpointArray pts = (ANNpointArray)c;           				// create an array of data points of type double
-		kdt = new ANNkd_tree(pts, n_data, 3);						// build a KD tree using the annpointarray
-		double eps = 0; // error bound
-		ANNdistArray dists = new ANNdist[1];     					// near neighbor distances
-		ANNidxArray nnIdx = new ANNidx[1];						// near neighbor indices // allocate near neigh indices
+		kdt.CreateKDTree(c, n_data, 3, MaxTreeLevels);				// build a KD tree
+		float *dists = new float[1];								// near neighbor distances
+		size_t *nnIdx = new size_t[1];							// near neighbor indices // allocate near neigh indices
 
 		stim::vec3<T> p0, p1;
 		float m1;
 		float M = 0;									//stores the total metric value
 		float L = 0;									//stores the total network length
-		ANNpoint queryPt = annAllocPt(3);
+		float* queryPt = new float[3];
 		for(unsigned e = 0; e < R.E.size(); e++){					//for each edge in A
 			R.E[e].add_mag(0);							//add a new magnitude for the metric
 
 			for(unsigned p = 0; p < R.E[e].size(); p++){				//for each point in the edge
 
 				p1 = R.E[e][p];							//get the next point in the edge
-				stim2ann(queryPt, p1);
-				kdt->annkSearch( queryPt, 1, nnIdx, dists, eps);		//find the distance between A and the current network
+				stim2array(queryPt, p1);
+				kdt.Search(queryPt, 1, 3, dists, nnIdx);			//find the distance between A and the current network
+
 				m1 = 1.0f - gaussianFunction((float)dists[0], sigma);		//calculate the metric value based on the distance
 				R.E[e].set_mag(m1, p, 1);					//set the error for the second point in the segment
 
diff --git a/stim/biomodels/network_dep.h b/stim/biomodels/network_dep.h
index f604361..d9fca0e 100644
--- a/stim/biomodels/network_dep.h
+++ b/stim/biomodels/network_dep.h
@@ -4,7 +4,7 @@
 #include <stim/math/vector.h>
 #include <stim/visualization/obj.h>
 #include <list>
-#include <ANN/ANN.h>
+//#include <ANN/ANN.h>
 
 namespace stim{
 
diff --git a/stim/structures/kdtree.cuh b/stim/structures/kdtree.cuh
index 8e80163..15f458a 100644
--- a/stim/structures/kdtree.cuh
+++ b/stim/structures/kdtree.cuh
@@ -1,4 +1,4 @@
-// right now the size of CUDA STACK is set to 1000, increase it if you mean to make deeper tree
+// right now the size of CUDA STACK is set to 50, increase it if you mean to make deeper tree
 // data should be stored in row-major
 // x1,x2,x3,x4,x5......
 // y1,y2,y3,y4,y5......
@@ -207,13 +207,13 @@ namespace stim {
 		size_t best_index = 0;
 
 		int next_nodes_pos = 0;															// initialize pop out order index
-		next_nodes[id * 1000 + next_nodes_pos] = cur;									// find data that belongs to the very specific thread
+		next_nodes[id * 50 + next_nodes_pos] = cur;									// find data that belongs to the very specific thread
 		next_nodes_pos++;
 
 		while (next_nodes_pos) {
 			int next_search_nodes_pos = 0;												// record push back order index
 			while (next_nodes_pos) {
-				cur = next_nodes[id * 1000 + next_nodes_pos - 1];						// pop out the last push in one and keep poping out
+				cur = next_nodes[id * 50 + next_nodes_pos - 1];						// pop out the last push in one and keep poping out
 				next_nodes_pos--;
 				int split_axis = nodes[cur].level % D;
 
@@ -232,20 +232,20 @@ namespace stim {
 
 					if (fabs(d) > range) {
 						if (d < 0) {
-							next_search_nodes[id * 1000 + next_search_nodes_pos] = nodes[cur].left;
+							next_search_nodes[id * 50 + next_search_nodes_pos] = nodes[cur].left;
 							next_search_nodes_pos++;
 						}
 						else {
-							next_search_nodes[id * 1000 + next_search_nodes_pos] = nodes[cur].right;
+							next_search_nodes[id * 50 + next_search_nodes_pos] = nodes[cur].right;
 							next_search_nodes_pos++;
 						}
 					}
 					else {
-						next_search_nodes[id * 1000 + next_search_nodes_pos] = nodes[cur].right;
+						next_search_nodes[id * 50 + next_search_nodes_pos] = nodes[cur].right;
 						next_search_nodes_pos++;
-						next_search_nodes[id * 1000 + next_search_nodes_pos] = nodes[cur].left;
+						next_search_nodes[id * 50 + next_search_nodes_pos] = nodes[cur].left;
 						next_search_nodes_pos++;
-						if (next_search_nodes_pos > 1000) {
+						if (next_search_nodes_pos > 50) {
 							printf("Thread conflict might be caused by thread %d, so please try smaller input max_tree_levels\n", id);
 							(*Judge)++;
 						}
@@ -253,7 +253,7 @@ namespace stim {
 				}
 			}
 			for (int i = 0; i < next_search_nodes_pos; i++)
-				next_nodes[id * 1000 + i] = next_search_nodes[id * 1000 + i];
+				next_nodes[id * 50 + i] = next_search_nodes[id * 50 + i];
 			next_nodes_pos = next_search_nodes_pos;										
 		}
 		*d_distance = best_distance;
@@ -392,8 +392,8 @@ namespace stim {
 			HANDLE_ERROR(cudaMalloc((void**)&d_query_points, sizeof(T) * query_points.size() * D));
 			HANDLE_ERROR(cudaMalloc((void**)&d_indices, sizeof(size_t) * query_points.size()));
 			HANDLE_ERROR(cudaMalloc((void**)&d_distances, sizeof(T) * query_points.size()));
-			HANDLE_ERROR(cudaMalloc((void**)&next_nodes, threads * blocks * 1000 * sizeof(int)));																// STACK size right now is 1000, you can change it if you mean to
-			HANDLE_ERROR(cudaMalloc((void**)&next_search_nodes, threads * blocks * 1000 * sizeof(int)));	
+			HANDLE_ERROR(cudaMalloc((void**)&next_nodes, threads * blocks * 50 * sizeof(int)));																// STACK size right now is 50, you can change it if you mean to
+			HANDLE_ERROR(cudaMalloc((void**)&next_search_nodes, threads * blocks * 50 * sizeof(int)));	
 			HANDLE_ERROR(cudaMemcpy(d_query_points, &query_points[0], sizeof(T) * query_points.size() * D, cudaMemcpyHostToDevice));
 
 			SearchBatch<<<threads, blocks>>> (d_nodes, d_index, d_reference_points, d_query_points, query_points.size(), d_indices, d_distances, next_nodes, next_search_nodes, Judge);
--
libgit2 0.21.4