Laila Saadatifard · Jiaming Guo · David Mayerich · David Mayerich · David Mayerich · Jiaming Guo
Showing 7 changed files Show diff stats
python/structen.py
stim/biomodels/network.h
stim/cuda/cudatools/callable.h
stim/iVote/ivote2/local_max.cuh
stim/math/vec3.h
stim/visualization/colormap.h
stim/visualization/gl_network.h
@@ -226,7 +226,7 @@ def st2amira(filename, T):
     A[..., 2] = T[..., 3]
  
     #roll the tensor axis so that it is the leading component
-    A = numpy.rollaxis(A, A.ndim - 1)
+    #A = numpy.rollaxis(A, A.ndim - 1)
     A.tofile(filename)
     print("\n", A.shape)
  
@@ -14,22 +14,23 @@
 #include <stim/visualization/cylinder.h>
 #include <stim/structures/kdtree.cuh>
 #include <stim/cuda/cudatools/timer.h>
+#include <stim/cuda/cudatools/callable.h>
  
+//********************help function********************
+// gaussian_function
+CUDA_CALLABLE float gaussianFunction(float x, float std = 25) { return exp(-x / (2 * std*std)); }  // std default sigma value is 25
  
+// compute metric in parallel
 #ifdef __CUDACC__
-//device gaussian function
-__device__ float gaussianFunction(float x, float std){ return expf(-x/(2*std*std));}
-
-//compute metric in parallel
 template <typename T>
-__global__ void d_metric(T* M, size_t n, T* D, float sigma){
+__global__ void find_metric_parallel(T* M, size_t n, T* D, float sigma){
 	size_t x = blockDim.x * blockIdx.x + threadIdx.x;
 	if(x >= n) return;
 	M[x] = 1.0f - gaussianFunction(D[x], sigma);
 }
  
 //find the corresponding edge index from array index
-__global__ void d_findedge(size_t* I, size_t n, unsigned* R, size_t* E, size_t ne){
+__global__ void find_edge_index_parallel(size_t* I, size_t n, unsigned* R, size_t* E, size_t ne){
 	size_t x = blockDim.x * blockIdx.x + threadIdx.x;
 	if(x >= n) return;
 	unsigned i = 0;
@@ -46,13 +47,13 @@ __global__ void d_findedge(size_t* I, size_t n, unsigned* R, size_t* E, size_t n
 #endif
  
 //hard-coded factor
-int threshold_fac = 0;
+int threshold_fac;
  
 namespace stim{
 /** This is the a class that interfaces with gl_spider in order to store the currently
  *   segmented network. The following data is stored and can be extracted:
  *   1)Network geometry and centerline.
- *   2)Network connectivity (a graph of nodes and edges), reconstructed using ANN library.
+ *   2)Network connectivity (a graph of nodes and edges), reconstructed using kdtree.
 */
  
 template<typename T>
@@ -136,15 +137,12 @@ class network{
  
 				return ss.str();
 			}
-
-
 	};
  
 protected:
  
 	std::vector<edge> E;       //list of edges
 	std::vector<vertex> V;	   //list of vertices.
-	//std::vector<int> NT;	   //stores a list of neuronal type for each point in the centerline (will set value only when centerline is built from swc file)
  
 public:
  
@@ -171,12 +169,12 @@ public:
 	}
  
 	///Returns the radius at specific point in the edge
-	T get_R(unsigned e, unsigned i) {
+	T get_r(unsigned e, unsigned i) {
 		return E[e].r(i);
 	}
  
 	///Returns the average radius of specific edge
-	T get_average_R(unsigned e) {
+	T get_average_r(unsigned e) {
 		T result = 0.0;
 		unsigned n = E[e].size();
 		for (unsigned p = 0; p < n; p++)
@@ -186,7 +184,7 @@ public:
 	}
  
 	///Returns the length of current edge
-	T get_L(unsigned e) {
+	T get_l(unsigned e) {
 		return E[e].length();
 	}
  
@@ -219,12 +217,13 @@ public:
 	}
  
 	///Set radius
-	void set_R(unsigned e, std::vector<T> radius) {
+	void set_r(unsigned e, std::vector<T> radius) {
 		E[e].cylinder<T>::copy_r(radius);
 	}
  
-	void set_R(unsigned e, unsigned i, T radius) {
-		E[e].cylinder<T>::set_r(i, radius);
+	void set_r(unsigned e, T radius) {
+		for (size_t i = 0; i < E[e].size(); i++)
+			E[e].cylinder<T>::set_r(i, radius);
 	}
 	//scale the network by some constant value
 	//	I don't think these work??????
@@ -580,9 +579,6 @@ public:
 		return n;							              	//return the resampled network
 	}
  
-	// host gaussian function
-	__host__ float gaussianFunction(float x, float std = 25){ return exp(-x/(2*std*std));}  // std default value is 25
-
 	/// Calculate the total number of points on all edges.
 	unsigned total_points(){
 		unsigned n = 0;
@@ -624,6 +620,19 @@ public:
 		}
 	}
  
+	// get list of metric
+	std::vector<T> metric() {
+		std::vector<T> result;
+		T m;
+		for (size_t e = 0; e < E.size(); e++) {
+			for (size_t p = 0; p < E[e].size(); p++) {
+				m = E[e].r(p);
+				result.push_back(m);
+			}
+		}
+		return result;
+	}
+
 	/// Calculate the average magnitude across the entire network.
 	/// @param m is the magnitude value to use. The default is 0 (usually radius).
 	T average(unsigned m = 0){
@@ -694,7 +703,7 @@ public:
 			size_t threads = (1024>n)?n:1024;
 			size_t blocks = n/threads + (n%threads)?1:0;
  
-			d_metric<<<blocks, threads>>>(d_m1, n, d_dists, sigma);					//calculate the metric value based on the distance
+			find_metric_parallel<<<blocks, threads>>>(d_m1, n, d_dists, sigma);					//calculate the metric value based on the distance
  
 			cudaMemcpy(m1, d_m1, n * sizeof(T), cudaMemcpyDeviceToHost);
  
@@ -717,9 +726,9 @@ public:
 			T* dists = new T[n];
 			size* nnIdx = new size_t[n];
  
-			edge2array(queryPt, R.E[e]);
+			edge2array(query, R.E[e]);
  
-			kdt.cpu_search(queryPt, n, nnIdx, dists);			//find the distance between A and the current network
+			kdt.cpu_search(query, n, nnIdx, dists);			//find the distance between A and the current network
  
 			for(unsigned p = 0; p < R.E[e].size(); p++){
 				m1[p] = 1.0f - gaussianFunction((T)dists[p], sigma);	//calculate the metric value based on the distance
@@ -795,7 +804,7 @@ public:
 			size_t threads = (1024>n)?n:1024;													// test to see whether the number of point in current edge is more than 1024
 			size_t blocks = n/threads + (n%threads)?1:0;
  
-			d_metric<<<blocks, threads>>>(d_m1, n, d_dists, sigma);								// calculate the metrics in parallel
+			find_metric_parallel<<<blocks, threads>>>(d_m1, n, d_dists, sigma);								// calculate the metrics in parallel
  
 			cudaMemcpy(m1, d_m1, n * sizeof(T), cudaMemcpyDeviceToHost);
  
@@ -816,7 +825,7 @@ public:
 			cudaMalloc((void**)&d_edge_point_num, NB * sizeof(size_t));
 			cudaMemcpy(d_edge_point_num, &edge_point_num[0], NB * sizeof(size_t), cudaMemcpyHostToDevice);
  
-			d_findedge<<<blocks, threads>>>(d_nnIdx, n, d_relation, d_edge_point_num, NB);			// find the edge corresponding to the array index in parallel
+			find_edge_index_parallel<<<blocks, threads>>>(d_nnIdx, n, d_relation, d_edge_point_num, NB);			// find the edge corresponding to the array index in parallel
  
 			cudaMemcpy(&relation[e][0], d_relation, n * sizeof(unsigned), cudaMemcpyDeviceToHost);	//copy relationship from device to host
 		}
@@ -7,4 +7,10 @@
 #define CUDA_CALLABLE
 #endif
  
+#ifdef __CUDACC__
+#define CUDA_UNCALLABLE __host__ inline
+#else
+#define CUDA_UNCALLABLE
+#endif
+
 #endif
@@ -10,30 +10,22 @@ namespace stim{
  
 		// this kernel calculates the local maximum for finding the cell centers
 		template<typename T>
-		__global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, size_t x, size_t y){
+		__global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, int x, int y){
  
 			// calculate the 2D coordinates for this current thread.
-			size_t xi = blockIdx.x * blockDim.x + threadIdx.x;
-			size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
+			int xi = blockIdx.x * blockDim.x + threadIdx.x;
+			int yi = blockIdx.y * blockDim.y + threadIdx.y;
  
 			if(xi >= x || yi >= y)
 				return;
  
 			// convert 2D coordinates to 1D
-			size_t i = yi * x + xi;
+			int i = yi * x + xi;
  
 			gpuCenters[i] = 0;		//initialize the value at this location to zero
  
 			T val = gpuVote[i];
  
-			//compare to the threshold
-			//if(val < final_t) return;
-			
-			//define an array to store indices with same vote value
-			/*int * IdxEq;
-			IdxEq = new int  [2*conn];
-			int n = 0;*/
-			
 			for(int xl = xi - conn; xl < xi + conn; xl++){
 				for(int yl = yi - conn; yl < yi + conn; yl++){
 					if(xl >= 0 && xl < x && yl >= 0 && yl < y){
@@ -42,8 +34,7 @@ namespace stim{
 							return;
 							}
 						if (gpuVote[il] == val){
-							/*IdxEq[n] = il;
-							n = n+1;*/
+							
 							 if( il > i){
 								 return;
 							}
@@ -51,12 +42,7 @@ namespace stim{
 					}							
 				}
 			}
-			/*if (n!=0){
-				if(IdxEq[n/2] !=i){
-					return;
-				}
-			}	*/	
-			//gpuCenters[i] = 1;
+			
 			gpuCenters[i] = gpuVote[i];
 		}
  
@@ -5,6 +5,8 @@
 #include <stim/cuda/cudatools/callable.h>
 #include <cmath>
  
+#include <sstream>
+
  
 namespace stim{
  
@@ -243,9 +245,9 @@ public:
 			return false;	
 	}
  
-#ifndef __NVCC__
+//#ifndef __CUDACC__
 	/// Outputs the vector as a string
-	std::string str() const{
+std::string str() const{
 		std::stringstream ss;
  
 		const size_t N = 3;
@@ -261,7 +263,7 @@ public:
  
 		return ss.str();
 	}
-#endif
+//#endif
  
 	size_t size(){ return 3; }
  
@@ -166,14 +166,12 @@ static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T 
         gridX = 65535;
     }
     dim3 dimGrid(gridX, gridY);
-	//int gridDim = (nVals + blockDim - 1)/blockDim;
 	if(cm == cmGrayscale)
 		applyGrayscale<<<dimGrid, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal);
 	else if(cm == cmBrewer)
 	{
 		initBrewer();
 		applyBrewer<<<dimGrid, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal);
-		//HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3));
 		destroyBrewer();
 	}
  
@@ -190,13 +188,9 @@ static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T 
     unsigned char* gpuDest;
     HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 ));
  
-	//HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals));
-
     //create the image on the gpu
     gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm);
-
-	//HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3));
-
+	
     //copy the image from the GPU to the CPU
     HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost));
  
@@ -158,17 +158,18 @@ public:
 	///render the network cylinder as a series of tubes(when only one network loaded) 
 	void glCylinder0() {
  
+		float r1, r2;
 		if (!glIsList(dlist)) {										// if dlist isn't a display list, create it
 			dlist = glGenLists(1);									// generate a display list
 			glNewList(dlist, GL_COMPILE);							// start a new display list
-			T radius = 0.0;
 			for (unsigned e = 0; e < E.size(); e++) {				// for each edge in the network
-				radius = get_average_R(e) / get_average_R(0);	// calculate the  radius
 				for (unsigned p = 1; p < E[e].size(); p++) {		// for each point on that edge
 					stim::circle<T> C1 = E[e].circ(p - 1);
 					stim::circle<T> C2 = E[e].circ(p);
-					C1.set_R(2 * radius);								// re-scale the circle to the same
-					C2.set_R(2 * radius);
+					r1 = E[e].r(p - 1);
+					r2 = E[e].r(p);
+					C1.set_R(2 * r1);								// re-scale the circle to the same
+					C2.set_R(2 * r2);
 					std::vector< stim::vec3<T> > Cp1 = C1.glpoints(20);// get 20 points on the circle plane
 					std::vector< stim::vec3<T> > Cp2 = C2.glpoints(20);
 					glBegin(GL_QUAD_STRIP);
@@ -180,8 +181,17 @@ public:
 				}													// set the texture coordinate based on the specified magnitude index
 			}
 			for (unsigned n = 0; n < V.size(); n++) {
-				size_t num = V[n].e[0].size();					// store the number of outgoing edge of that vertex
-				renderBall(V[n][0], V[n][1], V[n][2], 2 * radius, 20);
+				for (unsigned i = 0; i < E.size(); i++) {
+					if (E[i].v[0] == n) {
+						r1 = E[i].r(0);
+						break;
+					}
+					else if (E[i].v[1] == n) {
+						r1 = E[i].r(E[i].size() - 1);
+						break;
+					}
+				}
+				renderBall(V[n][0], V[n][1], V[n][2], 2 * r1, 20);
 			}
 			glEndList();						// end the display list
 		}