Merged only the necessary parts of Branch detection into master

Pavel Govyadinov
1 parent bf731970
Showing 12 changed files with 1295 additions and 128 deletions Show diff stats
stim/cuda/arraymath/array_cart2polar.cuh
stim/cuda/branch_detection.cuh
stim/cuda/branch_detection2.cuh
stim/cuda/cuda_texture.cuh
stim/cuda/sharedmem.cuh
stim/cuda/spider_cost.cuh
stim/cuda/templates/conv2sep.cuh
stim/cuda/templates/gaussian_blur.cuh
stim/cuda/testKernel.cuh
stim/gl/gl_spider.h
stim/visualization/glObj.h
stim/visualization/obj.h
@@ -4,7 +4,7 @@
 namespace stim{
 	namespace cuda{
 		template<typename T>
-		__global__ void cuda_cart2polar(T* a, int x, int y){
+		__global__ void cuda_cart2polar(T* a, int x, int y, float rotation){
  
  
 			// calculate the 2D coordinates for this current thread.
@@ -20,14 +20,14 @@ namespace stim{
 				float yl = a[i * 2 + 1];
 				float theta = atan2( yl, xl ) ;
 				float r = sqrt(xl * xl + yl * yl);
-				a[i * 2 + 0] = theta;
+				a[i * 2 + 0] = theta + rotation;
 				a[i * 2 + 1] = r;
  
 		}
  
  
 		template<typename T>
-		void gpu_cart2polar(T* gpuGrad, unsigned int x, unsigned int y){
+		void gpu_cart2polar(T* gpuGrad, unsigned int x, unsigned int y, float rotation = 0){
  
 			unsigned int max_threads = stim::maxThreadsPerBlock();
 			dim3 threads(max_threads, 1);
@@ -35,7 +35,7 @@ namespace stim{
  
  
 			//call the kernel to do the multiplication
-			cuda_cart2polar <<< blocks, threads >>>(gpuGrad, x, y);
+			cuda_cart2polar <<< blocks, threads >>>(gpuGrad, x, y, rotation);
  
 		}
  
@@ -68,4 +68,4 @@ namespace stim{
 	}
 }
  
-#endif
 \ No newline at end of file
+#endif
+#include <iostream>
+#include <fstream>
+#include <cuda_runtime.h>
+#include <stim/math/vector.h>
+//#include <math.h>
+#include <stim/visualization/colormap.h>
+#include <stim/cuda/cuda_texture.cuh>
+#include <stim/cuda/templates/gradient.cuh>
+#include <stim/cuda/templates/gaussian_blur.cuh>
+#include <stim/cuda/arraymath.cuh>
+#include <stim/cuda/ivote.cuh>
+#include <stim/cuda/testKernel.cuh>
+typedef unsigned int uint;
+typedef unsigned int uchar;
+
+stim::cuda::cuda_texture t;	
+float*		gpuTable;
+float*		gpuGrad;
+float*		gpuVote;	
+float*		gpuI;
+float*		gpuCenters;
+
+void atan_2d(float* cpuTable, unsigned int rmax)
+{
+	//initialize the width and height of the window which atan2 are computed in.
+	int xsize = 2*rmax +1;
+	int ysize = 2*rmax +1;
+	
+	// assign the center coordinates of the atan2 window to yi and xi
+	int yi = rmax;
+	int xi = rmax;
+	
+
+	for (int xt = 0; xt < xsize; xt++){
+
+		for(int yt = 0; yt < ysize; yt++){
+
+			//convert the current 2D coordinates to 1D
+			int id = yt * xsize + xt;
+			// calculate the distance between the pixel and the center of the atan2 window
+			float xd = xi - xt;
+			float yd = yi - yt;
+
+			// calculate the angle between the pixel and the center of the atan2 window and store the result.
+			float atan_2d_vote = atan2(yd, xd);
+			cpuTable[id] = atan_2d_vote;
+		}
+	}
+
+}
+
+void initCuda(unsigned int bytes_table, unsigned int bytes_ds)
+{
+	HANDLE_ERROR(
+		cudaMalloc((void**) &gpuTable, bytes_table)
+		);
+	HANDLE_ERROR(
+		cudaMalloc((void**) &gpuI, bytes_ds)
+		);
+	HANDLE_ERROR(
+		cudaMalloc((void**) &gpuGrad,  bytes_ds*2)
+		);
+	HANDLE_ERROR(
+		cudaMalloc((void**) &gpuVote,  bytes_ds)
+		);
+	HANDLE_ERROR(
+		cudaMalloc((void**) &gpuCenters, bytes_ds)
+		);
+}
+
+void cleanCuda()
+{
+	HANDLE_ERROR(
+		cudaFree(gpuTable)
+	);
+	HANDLE_ERROR(
+		cudaFree(gpuGrad)
+	);
+	HANDLE_ERROR(
+		cudaFree(gpuVote)
+	);
+	HANDLE_ERROR(
+		cudaFree(gpuCenters)
+	);
+	HANDLE_ERROR(
+		cudaFree(gpuI)
+	);
+}
+
+std::vector< stim::vec<float> >
+find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
+{
+	float 		phi	 	= 15.1*M_PI/180;
+	int		iter		= 5;
+	float 		dphi		= phi/iter;
+	float 		rmax 		= 10;
+	float		sigma		= 4;
+	unsigned int 	pixels 		= x * y;
+	unsigned int 	bytes  		= sizeof(float) * pixels;
+	unsigned int 	bytes_table	= sizeof(float) * (2*rmax + 1) * (2*rmax + 1);
+	unsigned int 	x_ds		= (x + (x % 1 == 0 ? 0:1));
+	unsigned int 	y_ds		= (y + (x % 1 == 0 ? 0:1));
+	unsigned int	bytes_ds	= sizeof(float) * x_ds * y_ds;
+	unsigned int	conn		= 5;
+	float		final_t		= 200.0;
+	float*		cpuTable	= (float*) malloc(bytes_table);
+	float*		cpuCenters	= (float*) malloc(bytes_ds);
+
+	stringstream name;
+
+
+
+
+	std::vector<stim::vec<float> >  output;
+	initCuda(bytes_table, bytes_ds); 
+
+	atan_2d(cpuTable, rmax);
+	cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
+
+	test(texbufferID, texType);
+
+	t.MapCudaTexture(texbufferID, texType);
+	cudaDeviceSynchronize();
+	stim::cuda::tex_gaussian_blur2<float>(
+		gpuI, sigma, x, y, t.getTexture(), t.getArray()
+		);
+	cudaDeviceSynchronize();
+	stim::gpu2image<float>(gpuI, "Blur.jpg", 16, 8*27, 0, 255);	
+
+
+	stim::cuda::gpu_gradient_2d<float>(
+		gpuGrad, gpuI, x, y
+		);
+	cudaDeviceSynchronize();
+//	stim::gpu2image<float>(gpuGrad, "Grad.jpg", 16, 8*27, 0, 1);	
+	
+	stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, M_PI);
+	cudaDeviceSynchronize();
+//	stim::gpu2image<float>(gpuGrad, "Cart_2_polar.jpg", 16, 8*27, 0, 1);	
+
+	cudaDeviceSynchronize();
+	for (int i = 0; i < iter; i++)
+	{
+		stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
+		name << "Vote" << i << ".bmp";
+		stim::gpu2image<float>(gpuVote, name.str(), 16, 8*27, 0, 255);
+		name.str("");
+		name.clear();	
+	cudaDeviceSynchronize();
+//		std::cout << "got here7_a_" << i << std::endl;
+		stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
+	cudaDeviceSynchronize();
+//		std::cout << "got here7_b_" << i << std::endl;
+		phi = phi - dphi;
+	}
+	
+	cudaDeviceSynchronize();
+	stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, final_t, conn, x, y);
+//	std::cout << "got here_sdkfj" << std::endl;
+	cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
+	stim::gpu2image<float>(gpuCenters, "Centers.jpg", 16, 8*27, 0, 255);	
+	for(int i = 0; i < pixels; i++)
+	{
+		int ix = (i % x);
+		int iy = (i / x);
+//		std::cout << i << " : " << ix <<" : " <<  iy << std::endl;
+		if((cpuCenters[i] == 1) && (ix > 4) && (ix < x-4))
+		{
+			std::cout << ix << " : " << iy << std::endl; 
+
+			float x_v = (float) ix;
+			float y_v = (float) iy;
+			output.push_back(stim::vec<float>((x_v/(float)x),
+							  (y_v/(float)y), 0.0));	
+
+			std::cout << x_v/16.0 << " : " << y_v/216.0 << std::endl; 
+		}
+	}
+
+
+	t.UnmapCudaTexture();
+	cleanCuda();
+	free(cpuTable);
+	free(cpuCenters);
+	return output;
+}
+#include <stim/cuda/templates/gaussian_blur.cuh>
+#include <stim/cuda/templates/gradient.cuh>
+#include <stim/cuda/arraymath.cuh>
+#include <stim/cuda/ivote.cuh>
+
+
+
+
+
+
+
+
+
+
+void atan_2(float* cpuTable, unsigned int rmax){
+
+	//initialize the width and height of the window which atan2 are computed in.
+	int xsize = 2*rmax +1;
+	int ysize = 2*rmax +1;
+	
+	// assign the center coordinates of the atan2 window to yi and xi
+	int yi = rmax;
+	int xi = rmax;
+	
+
+	for (int xt = 0; xt < xsize; xt++){
+
+		for(int yt = 0; yt < ysize; yt++){
+
+			//convert the current 2D coordinates to 1D
+			int id = yt * xsize + xt;
+			// calculate the distance between the pixel and the center of the atan2 window
+			float xd = xi - xt;
+			float yd = yi - yt;
+
+			// calculate the angle between the pixel and the center of the atan2 window and store the result.
+			float atan_2d_vote = atan2(yd, xd);
+			cpuTable[id] = atan_2d_vote;
+		}
+	}
+
+}
+std::vector<stim::vec<float> > 
+find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
+{
+
+	float* cpuTable		= (float
+
+	unsigned int pixels = x * y;
+	unsigned int bytes = sizeof(float) * pixels;
+
+	//calculate the number of bytes in the atan2 table
+
+	unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(float);
+
+
+
+	//allocate space on the GPU for the atan2 table
+
+	float* gpuTable;
+
+	cudaMalloc(&gpuTable, bytes_table);
+
+
+
+	cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
+
+	unsigned int sigma_ds = 1/resize;
+	unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
+	unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
+	unsigned int bytes_ds = sizeof(float) * x_ds * y_ds;
+	
+
+	float* gpuI;
+	cudaMalloc(&gpuI, bytes_ds);
+
+	
+	float* gpuGrad;
+	cudaMalloc(&gpuGrad, bytes_ds*2);
+
+	float* gpuVote;
+	cudaMalloc(&gpuVote, bytes_ds);
+
+	// allocate space on the GPU for the detected cell centes
+
+	float* gpuCenters;
+
+	cudaMalloc(&gpuCenters, bytes_ds);		
+
+
+	stim::cuda::gpu_down_sample<float>(gpuI, gpuI0, resize, x , y);
+	cudaMemcpy(cpuResize, gpuI, bytes_ds, cudaMemcpyDeviceToHost);
+
+x = x_ds;
+	y = y_ds;
+	t = t * resize;
+	//sigma = sigma * resize;
+
+	cudaDeviceSynchronize();
+	stim::cuda::gpu_gaussian_blur2<float>(gpuI,sigma, x, y);
+	cudaDeviceSynchronize();
+	cudaMemcpy(cpuBlur, gpuI, bytes_ds, cudaMemcpyDeviceToHost);
+	cudaDeviceSynchronize();
+	
+	stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y);
+	cudaDeviceSynchronize();
+	cudaMemcpy(cpuGradient, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost);
+
+	stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
+	cudaDeviceSynchronize();
+	cudaMemcpy(cpuCart2Polar, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost);
+	
+
+	//multiply the gradient by a constant and calculate the absolute value (to save an image)	
+
+	stim::cuda::cpu_multiply<float>(cpuCart2Polar, 40, x * y * 2);
+
+	cudaDeviceSynchronize();
+
+	stim::cuda::cpu_abs<float>(cpuCart2Polar, x * y * 2);
+
+	cudaDeviceSynchronize();
+
+		
+	for (int i =0; i<iter; i++){
+		
+		stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
+		cudaDeviceSynchronize();
+		stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
+		cudaDeviceSynchronize();
+		switch (i){
+		case 0 : cudaMemcpy(cpuVote1, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
+			break;
+		case 1 : cudaMemcpy(cpuVote2, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
+			break;
+		case 2 : cudaMemcpy(cpuVote3, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
+			break;
+		case 3 : cudaMemcpy(cpuVote4, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
+			break;
+		case 4 : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
+			break;
+		default : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
+			break;
+		}
+		phi = phi - dphi;
+	}
+	
+	stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, t, conn, x, y);
+	cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
+	
+}
+#ifndef STIM_CUDA_TEXTURE_H
+#define STIM_CUDA_TEXTURE_H
+
+#include <assert.h>
+#include <stim/cuda/cudatools/error.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <cublas_v2.h>
+#include <stdio.h>
+#include <GL/glew.h>
+#include <GL/glut.h>
+#include <sstream>
+#include <stim/visualization/colormap.h>
+#include <stim/cuda/cudatools/devices.h>
+#include <stim/cuda/cudatools/threads.h>
+#include <stim/math/vector.h>
+
+///A container for the texture based methods used by the spider class.
+namespace stim
+{
+	namespace cuda
+	{
+		class cuda_texture
+		{
+		public:
+			cudaArray* srcArray;
+			cudaGraphicsResource_t resource;
+			struct cudaResourceDesc resDesc;
+			struct cudaTextureDesc texDesc;
+			cudaTextureObject_t tObj;
+
+
+			///basic constructor that creates the texture with default parameters.
+			cuda_texture()
+			{
+			memset(&texDesc, 0, sizeof(texDesc));
+				texDesc.addressMode[0] 		= cudaAddressModeWrap;
+				texDesc.addressMode[1] 		= cudaAddressModeWrap;
+				texDesc.filterMode		= cudaFilterModePoint;
+				texDesc.readMode		= cudaReadModeElementType;
+				texDesc.normalizedCoords	= 0;
+			}
+
+//-------------------------------------------------------------------------//
+//-------------------------------CUDA_MAPPING------------------------------//
+//-------------------------------------------------------------------------//
+//Methods for creating the cuda texture.
+		///@param GLuint tex	 	 -- GLtexture (must be contained in a frame buffer object)
+		///				    that holds that data that will be handed to cuda.
+		///@param GLenum target		 -- either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
+		///				    map work with other gl texture types but untested.
+		///Maps the gl texture in cuda memory, binds that data to a cuda array, and binds the cuda
+		///array to a cuda texture.
+		void
+		MapCudaTexture(GLuint tex, GLenum target)
+		{
+			HANDLE_ERROR(
+				cudaGraphicsGLRegisterImage(
+					&resource,
+					tex,
+					target,
+//					cudaGraphicsMapFlagsReadOnly
+					cudaGraphicsRegisterFlagsNone	
+				)
+			);
+
+			HANDLE_ERROR(
+				cudaGraphicsMapResources(1, &resource)
+			);
+			
+			HANDLE_ERROR(
+				cudaGraphicsSubResourceGetMappedArray(&srcArray, resource, 0, 0)
+			);
+			
+			memset(&resDesc, 0, sizeof(resDesc));
+			resDesc.resType = cudaResourceTypeArray;
+			resDesc.res.array.array = srcArray;
+			HANDLE_ERROR(
+				cudaCreateTextureObject(&tObj, &resDesc, &texDesc, NULL)
+			);
+		}
+
+		///Unmaps the gl texture,  binds that data to a cuda array, and binds the cuda
+		///array to a cuda texture.
+		void
+		UnmapCudaTexture()
+		{
+			HANDLE_ERROR(
+				cudaGraphicsUnmapResources(1, &resource)
+			);
+			HANDLE_ERROR(
+				cudaGraphicsUnregisterResource(resource)
+			);	
+			HANDLE_ERROR(
+				cudaDestroyTextureObject(tObj)
+			);
+		}
+
+//-------------------------------------------------------------------------//
+//------------------------------GET/SET METHODS----------------------------//
+//-------------------------------------------------------------------------//
+
+///Returns the bound texture object.
+		cudaTextureObject_t
+		getTexture()
+		{
+			return tObj;
+		}
+
+		cudaArray*
+		getArray()
+		{
+			return srcArray;
+		}
+	};
+}
+}
+
+
+#endif
@@ -34,9 +34,38 @@ namespace stim{
 				}
 			}
 		}
+
+		template<typename T, typename D>
+		__device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
+										 unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
+										 dim3 threadIdx, dim3 blockDim){
+
+			//calculate the number of iterations required for the copy
+			unsigned int xI, yI;
+			xI = X/blockDim.x + 1;				//number of iterations along X
+			yI = Y/blockDim.y + 1;				//number of iterations along Y
+
+			//for each iteration
+			for(unsigned int xi = 0; xi < xI; xi++){
+				for(unsigned int yi = 0; yi < yI; yi++){
+
+					//calculate the index into shared memory
+					unsigned int sx = xi * blockDim.x + threadIdx.x;
+					unsigned int sy = yi * blockDim.y + threadIdx.y;
+
+					//calculate the index into the texture
+					unsigned int tx = x + sx;
+					unsigned int ty = y + sy;
+
+					//perform the copy
+					if(sx < X && sy < Y)
+						dest[sy * X + sx] = tex2D<D>(src, tx, ty);
+				}
+			}
+		}
  
 	}
 }
  
  
-#endif
 \ No newline at end of file
+#endif
+#ifndef STIM_SPIDER_COST_H
+#define STIM_SPIDER_COST_H
+
+#include <assert.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <stdio.h>
+#include <stim/visualization/colormap.h>
+#include <sstream>
+#include <stim/math/vector.h>
+#include <stim/cuda/cudatools/devices.h>
+#include <stim/cuda/cudatools/threads.h>
+#include <stim/cuda/cuda_texture.cuh>
+namespace stim{
+	namespace cuda
+	{
+	
+	stim::cuda::cuda_texture t;  //texture object.
+	float* result;
+	float* print;
+	
+	///Initialization function, allocates the memory and passes the necessary
+	///handles from OpenGL and Cuda.
+	///@param DIM_Y			--integer controlling how much memory to allocate.
+	void initArray(int DIM_Y)
+	{
+//			cudaMalloc( (void**) &print, DIM_Y*16*sizeof(float));     ///temporary
+			cudaMalloc( (void**) &result, DIM_Y*sizeof(float));
+	}
+
+	///Deinit function that frees the memery used and releases the texture resource
+	///back to OpenGL.
+	void cleanUP()
+	{
+			cudaFree(result);
+//			cudaFree(print);         ///temporary
+	}  
+
+	///A virtual representation of a uniform template.
+	///Returns the value of the template pixel.
+	///@param int x			--location of a pixel.
+	__device__
+	float Template(int x)
+	{
+		if(x < 16/6 || x > 16*5/6 || (x > 16*2/6 && x < 16*4/6)){
+			return 1.0;
+		}else{
+			return 0.0;
+		}
+
+	}
+		
+	///Find the difference of the given set of samples and the template
+	///using cuda acceleration.
+	///@param stim::cuda::cuda_texture t	--stim texture that holds all the references
+	///					  to the data.
+	///@param float* result			--a pointer to the memory that stores the result.
+	__global__
+	//void get_diff (float *result)
+	void get_diff (cudaTextureObject_t texIn, float *result)
+	{       
+		__shared__ float shared[16][8];
+		int x   = threadIdx.x + blockIdx.x * blockDim.x;
+		int y   = threadIdx.y + blockIdx.y * blockDim.y;
+		int x_t = threadIdx.x;
+		int y_t = threadIdx.y;
+//		int idx = y*16+x;
+		int g_idx = blockIdx.y;
+
+		float valIn             = tex2D<unsigned char>(texIn, x, y)/255.0;
+		float valTemp           = Template(x);
+
+//		print[idx]              = abs(valIn);             ///temporary
+
+		shared[x_t][y_t]        = abs(valIn-valTemp);
+
+		__syncthreads();
+
+		for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
+		{
+			__syncthreads();
+			if (x_t < step)
+			{
+				shared[x_t][y_t] += shared[x_t + step][y_t];
+			}
+		__syncthreads();
+		}
+		__syncthreads();
+
+		for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
+		{
+			__syncthreads();
+			if(y_t < step)
+			{
+				shared[x_t][y_t] += shared[x_t][y_t + step];
+			}
+		__syncthreads();
+		}
+		__syncthreads();
+		if(x_t == 0 && y_t == 0)
+			result[g_idx] = shared[0][0];
+
+
+	//      //result[idx]           = abs(valIn);
+	}
+
+
+	///External access-point to the cuda function
+	///@param GLuint texbufferID 	--GLtexture (most be contained in a framebuffer object)
+	///				  that holds the data that will be handed to cuda.
+	///@param GLenum texType	--either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
+	///				  may work with other gl texture types, but untested.
+	///@param DIM_Y, the number of samples in the template.
+	extern "C"
+	stim::vec<int> get_cost(GLint texbufferID, GLenum texType, int DIM_Y)
+	{
+
+		//Bind the Texture in GL and allow access to cuda.
+		t.MapCudaTexture(texbufferID, texType);
+
+		//initialize the return arrays.
+		float* output;	
+		output = (float* ) malloc(DIM_Y*sizeof(float));
+
+		stim::vec<int> ret(0, 0);
+		initArray(DIM_Y);
+		
+
+		//variables for finding the min.
+		float mini = 10000000000000000.0;
+		int idx = 0;
+	
+		//cuda launch variables.
+		dim3 numBlocks(1, DIM_Y);
+		dim3 threadsPerBlock(16, 8);
+
+
+		get_diff <<< numBlocks, threadsPerBlock >>> (t.getTexture(), result);
+
+		HANDLE_ERROR(
+			cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost)
+			);
+
+		for( int i = 0; i<DIM_Y; i++){
+			if(output[i] < mini){
+				mini = output[i];
+				idx = i;                                                                   
+			}
+		}
+
+//		stringstream name;      //for debugging
+//		name << "Test.bmp";
+//		stim::gpu2image<float>(print, name.str(),16,218,0,1);
+	  
+		t.UnmapCudaTexture();
+		cleanUP();
+		ret[0] = idx; ret[1] = (int) output[idx];
+		std::cout << output[idx] << std::endl;
+		
+		free(output);
+		return ret;
+	}
+
+	}
+}
+
+
+#endif
@@ -30,7 +30,7 @@ namespace stim{
 			int byi = blockIdx.y;
  
 			//copy the portion of the image necessary for this block to shared memory
-			stim::cuda::sharedMemcpy_tex2D(s, in, bxi - kr, byi, 2 * kr + blockDim.x, 1, threadIdx, blockDim);
+			stim::cuda::sharedMemcpy_tex2D<float, unsigned char>(s, in, bxi - kr, byi, 2 * kr + blockDim.x, 1, threadIdx, blockDim);
  
 			//calculate the thread index
 			int ti = threadIdx.x;
@@ -88,7 +88,7 @@ namespace stim{
 			int byi = blockIdx.y * blockDim.y;
  
 			//copy the portion of the image necessary for this block to shared memory
-			stim::cuda::sharedMemcpy_tex2D(s, in, bxi, byi - kr, 1, 2 * kr + blockDim.y, threadIdx, blockDim);
+			stim::cuda::sharedMemcpy_tex2D<float, unsigned char>(s, in, bxi, byi - kr, 1, 2 * kr + blockDim.y, threadIdx, blockDim);
  
 			//calculate the thread index
 			int ti = threadIdx.y;
@@ -257,4 +257,4 @@ namespace stim{
 	};
 };
  
-#endif
 \ No newline at end of file
+#endif
@@ -7,7 +7,6 @@
 #include <stim/cuda/sharedmem.cuh>
 #include <stim/cuda/templates/conv2sep.cuh>		//GPU-based separable convolution algorithm
  
-#define pi	3.14159
  
 namespace stim{
 	namespace cuda{
@@ -37,6 +36,7 @@ namespace stim{
  
 			//copy the kernel to the GPU
 			T* gpuKernel0;
+			HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth*sizeof(T)));
 			HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
  
 			//perform the gaussian blur as a separable convolution
@@ -58,7 +58,6 @@ namespace stim{
  
 			//copy the kernel to the GPU
 			T* gpuKernel0;
-			HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T)));
 			HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
  
 			//perform the gaussian blur as a separable convolution
@@ -87,4 +86,4 @@ namespace stim{
 	};
 };
  
-#endif
 \ No newline at end of file
+#endif
+#include <assert.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <stdio.h>
+#include <stim/visualization/colormap.h>
+#include <sstream>
+#include <stim/math/vector.h>
+#include <stim/cuda/cudatools/devices.h>
+#include <stim/cuda/cudatools/threads.h>
+#include <stim/cuda/cuda_texture.cuh>
+	stim::cuda::cuda_texture tx;  //texture object.
+	float* print;
+	
+	///Initialization function, allocates the memory and passes the necessary
+	///handles from OpenGL and Cuda.
+	///@param DIM_Y			--integer controlling how much memory to allocate.
+	void initArray()
+	{
+			cudaMalloc( (void**) &print, 216*16*sizeof(float));     ///temporary
+	}
+
+	///Deinit function that frees the memery used and releases the texture resource
+	///back to OpenGL.
+	void cleanUP()
+	{
+			cudaFree(print);         ///temporary
+	}  
+		
+	///Find the difference of the given set of samples and the template
+	///using cuda acceleration.
+	///@param stim::cuda::cuda_texture t	--stim texture that holds all the references
+	///					  to the data.
+	///@param float* result			--a pointer to the memory that stores the result.
+	__global__
+	//void get_diff (float *result)
+	void get_diff (cudaTextureObject_t texIn, float *print)
+	{       
+		int x   = threadIdx.x + blockIdx.x * blockDim.x;
+		int y   = threadIdx.y + blockIdx.y * blockDim.y;
+		int idx = y*16+x;
+
+		float valIn             = tex2D<unsigned char>(texIn, x, y)/255.0;
+
+		print[idx]              = abs(valIn);             ///temporary
+
+	}
+
+
+	///External access-point to the cuda function
+	///@param GLuint texbufferID 	--GLtexture (most be contained in a framebuffer object)
+	///				  that holds the data that will be handed to cuda.
+	///@param GLenum texType	--either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
+	///				  may work with other gl texture types, but untested.
+	///@param DIM_Y, the number of samples in the template.
+	extern "C"
+	void test(GLint texbufferID, GLenum texType)
+	{
+
+		//Bind the Texture in GL and allow access to cuda.
+		tx.MapCudaTexture(texbufferID, texType);
+
+		//initialize the return arrays.
+
+		initArray();
+		
+		int x = 16;
+		int y = 27*8;
+		int max_threads = stim::maxThreadsPerBlock();
+		dim3 threads(max_threads, 1);
+		dim3 blocks(x / threads.x +1, y);	
+		//dim3 numBlocks(1, 1);
+		//dim3 threadsPerBlock(16, 216);
+		dim3 numBlocks(2, 2);
+		dim3 threadsPerBlock(8, 108);
+
+
+//		get_diff <<< blocks, threads >>> (tx.getTexture(), print);
+		get_diff <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), print);
+
+		cudaDeviceSynchronize();
+		stringstream name;      //for debugging
+		name << "FromTex.bmp";
+		stim::gpu2image<float>(print, name.str(),16,216,0,1);
+	  
+		tx.UnmapCudaTexture();
+		cleanUP();
+	}
+
@@ -13,50 +13,79 @@
 #include "stim/math/vector.h"
 #include "stim/math/rect.h"
 #include "stim/math/matrix.h"
-#include "stim/cuda/cost.h"
+#include "stim/cuda/spider_cost.cuh"
 #include <stim/cuda/cudatools/glbind.h>
-#include <stim/visualization/obj.h>
+#include <stim/cuda/arraymath.cuh>
+#include <stim/cuda/cudatools.h>
+#include <stim/cuda/ivote.cuh>
+#include <stim/visualization/glObj.h>
 #include <vector>
+#include <stim/cuda/branch_detection.cuh>
+
+//#include <stim/cuda/testKernel.cuh>
  
 #include <iostream>
 #include <fstream>
+#ifdef TESTING
+	#include <iostream>
+	#include <cstdio>
+	#include <ctime>
+#endif
  
  
-
-/* Technically since gl_spider inherits from gl_texture, we could
-	call the init with a path to an image stack, and upload
-	the images while creating the spider (calling init) */
 namespace stim
 {
  
 template<typename T>
-class gl_spider
+class gl_spider : public virtual gl_texture<T>
 {
 	//doen't use gl_texture really, just needs the GLuint id.
 	//doesn't even need the texture iD really.
 	private:
+
+		//
 		stim::vec<float> p;  	//vector designating the position of the spider.
 		stim::vec<float> d;	//vector designating the orientation of the spider
 						//always a unit vector.
 		stim::vec<float> m;	//magnitude of the spider vector.
 						//mag[0] = length.
 						//mag[1] = width.
-		std::vector<stim::vec<float> > dV;
-		std::vector<stim::vec<float> > pV;
-		std::vector<stim::vec<float> > mV;
-		//currentTransform
-		stim::matrix<float, 4> cT;
+		std::vector<stim::vec<float> > dV;	//A list of all the direction vectors.
+		std::vector<stim::vec<float> > pV;	//A list of all the position vectors.
+		std::vector<stim::vec<float> > mV;	//A list of all the size vectors.
+
+		stim::matrix<float, 4> cT;		//current Transformation matrix
+							//From tissue space to texture space.
 		GLuint texID;
-		stim::vec<float> S;
-		stim::vec<float> R;
-		cudaGraphicsResource_t resource;
-
-		GLuint dList;
-		GLuint fboID;
-		GLuint texbufferID;
-		int numSamples;
-		float stepsize = 3.0;
+		stim::vec<float> S;			//Size of a voxel in the volume.
+		stim::vec<float> R;			//Dimensions of the volume.
+
+
+		//GL and Cuda variables
+		GLuint dList;				//displaylist ID
+		GLuint fboID;				//framebuffer ID
+		GLuint texbufferID;			//texbuffer ID, only necessary for 
+							//cuda aspect of the calculation.
+		GLuint bfboId;
+		GLuint btexbufferID;
+
+		int numSamples;				//The number of templates in the buffer.
+		float stepsize = 6.0;			//Step size.
 		int current_cost;
+		
+		
+		//Tracing variables.
+		std::stack< stim::vec<float> > seeds;	//Variables for tracing
+		std::stack< stim::vec<float> > seedsvecs;
+		std::vector< stim::vec<float> > cL;	//Line currently being traced.
+		stim::glObj<float> sk;
+		stim::vec<float> rev;			//reverse vector;
+		stim::camera camSel;
+		stim::vec<float> ps;
+		stim::vec<float> ups;
+		stim::vec<float> ds;
+		
+
  
 		/// Method for finding the best scale for the spider.
 		/// changes the x, y, z size of the spider to minimize the cost
@@ -73,7 +102,6 @@ class gl_spider
 			dV[best][2]*S[2]*R[2],
 			0);
 			next = (cT*next).norm();
-			//next = (cT*next);
 			setPosition(	p[0]+next[0]*m[0]/stepsize,
 					p[1]+next[1]*m[0]/stepsize,
 					p[2]+next[2]*m[0]/stepsize);
@@ -117,11 +145,46 @@ class gl_spider
 		void
 		branchDetection()
 		{
-			Bind();
 			setMatrix();
 			glCallList(dList+3);
-			
-		//	int best = getCost();
+		//	test(btexbufferID, GL_TEXTURE_2D, 8*27);
+			std::vector< stim::vec<float> > result = find_branch(
+					btexbufferID, GL_TEXTURE_2D, 16, 216);
+			if(!result.empty())
+			{
+				for(int i = 1; i < result.size(); i++)
+				{
+					std::cout << result[i] << std::endl;
+//					std::cout <<"["<< result[i][0]*16.0 << ", "
+//					<< result[i][1]*16.0 << ", " << 
+//					   result[i][2]*216.0 <<"]"  <<  std::endl;
+					
+					stim::vec<float> cylp(
+						0.5 * cos(2*M_PI*(result[i][1])),
+						0.5 * sin(2*M_PI*(result[i][1])),
+						result[i][0]-0.5,
+						1.0);
+					stim::vec<float> cylv(
+						0.5 * cos(2*M_PI*(result[i][1]))*S[0]*R[0],
+						0.5 * sin(2*M_PI*(result[i][1]))*S[1]*R[1],
+						(result[i][0]-0.5)*S[2]*R[2],
+						0.0);
+					std::cout << cylp << std::endl;
+					cylp = cT*cylp;
+					cylv = cT*cylv;
+					
+					std::cout << cylp[0]*S[0]*R[0] << ", "
+					<< cylp[1]*S[1]*R[1] << ", " << 
+					   cylp[2]*S[2]*R[2] << std::endl;
+					setSeed(cylp[0]*S[0]*R[0],
+						cylp[1]*S[1]*R[1],
+						cylp[2]*S[2]*R[2]);
+					cylv.norm();
+					setSeedVec(cylv[0],
+						   cylv[1],
+						   cylv[2]);
+				}
+			}    
  
 		}
  
@@ -365,6 +428,30 @@ class gl_spider
 		///@param height sets the height of the buffer.
 		///Function for setting up the 2D buffer that stores the samples.
 		void
+		GenerateFBO(unsigned int width, unsigned int height, GLuint &textureID, GLuint &framebufferID)
+		{
+			glGenFramebuffers(1, &framebufferID);
+			glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);
+			int numChannels = 1;
+			unsigned char* texels = new unsigned char[width * height * numChannels];
+			glGenTextures(1, &textureID);
+			glBindTexture(GL_TEXTURE_2D, textureID);
+			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
+			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
+			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+			glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE,
+				 width, height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, texels);   
+			delete[] texels;
+			glBindFramebuffer(GL_FRAMEBUFFER, 0); 
+			glBindTexture(GL_TEXTURE_2D, 0);
+				CHECK_OPENGL_ERROR
+		}
+
+		///@param width sets the width of the buffer.
+		///@param height sets the height of the buffer.
+		///Function for setting up the 2D buffer that stores the samples.
+		void
 		GenerateFBO(unsigned int width, unsigned int height)
 		{
 			glGenFramebuffers(1, &fboID);
@@ -382,6 +469,7 @@ class gl_spider
 			delete[] texels;
 			glBindFramebuffer(GL_FRAMEBUFFER, 0); 
 			glBindTexture(GL_TEXTURE_2D, 0);
+				CHECK_OPENGL_ERROR
 		}
  
  
@@ -409,12 +497,99 @@ class gl_spider
  
 			glGetFloatv(GL_TEXTURE_MATRIX, curTrans);
 			cT.set(curTrans);
-//			printTransform();
+		//	printTransform();
  
 			CHECK_OPENGL_ERROR
 			glMatrixMode(GL_MODELVIEW);
 		}
+
+		///Method for controling the buffer and texture binding in order to properly
+		///do the render to texture.
+		void
+		Bind()
+		{
+			float len = 8.0;
+			glBindFramebuffer(GL_FRAMEBUFFER, fboID);//set up GL buffer		
+			glFramebufferTexture2D(
+				GL_FRAMEBUFFER,
+				GL_COLOR_ATTACHMENT0,
+				GL_TEXTURE_2D,
+				texbufferID,
+				0);
+			glBindFramebuffer(GL_FRAMEBUFFER, fboID);
+			GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
+			glDrawBuffers(1, DrawBuffers);
+			glBindTexture(GL_TEXTURE_2D, texbufferID);
+			glClearColor(1,1,1,1);
+			glClear(GL_COLOR_BUFFER_BIT);
+			glMatrixMode(GL_PROJECTION);
+			glLoadIdentity();
+			glMatrixMode(GL_MODELVIEW);
+			glLoadIdentity();
+			glViewport(0,0,2.0*len, numSamples*len);
+			gluOrtho2D(0.0,2.0*len,0.0,numSamples*len);
+			glEnable(GL_TEXTURE_3D);
+			glBindTexture(GL_TEXTURE_3D, texID);
+
+			CHECK_OPENGL_ERROR
+		}
+		
+		///Method for controling the buffer and texture binding in order to properly
+		///do the render to texture.
+		///@param GLuint tbID
+		void
+		Bind(GLuint &textureID, GLuint &framebufferID, int nSamples)
+		{
+			float len = 8.0;
+			glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);//set up GL buffer
+			CHECK_OPENGL_ERROR
+
+			glFramebufferTexture2D(
+				GL_FRAMEBUFFER,
+				GL_COLOR_ATTACHMENT0,
+				GL_TEXTURE_2D,
+				textureID,
+				0);
+			CHECK_OPENGL_ERROR
+
+			glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);
+			CHECK_OPENGL_ERROR
+
+			GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
+			glDrawBuffers(1, DrawBuffers);
+			CHECK_OPENGL_ERROR
+
+			glBindTexture(GL_TEXTURE_2D, textureID);
+			CHECK_OPENGL_ERROR
+
+			glClearColor(1,1,1,1);
+			glClear(GL_COLOR_BUFFER_BIT);
+			glMatrixMode(GL_PROJECTION);
+			glLoadIdentity();
+			glMatrixMode(GL_MODELVIEW);
+			glLoadIdentity();
+			glViewport(0,0,2.0*len, nSamples*len);
+			gluOrtho2D(0.0,2.0*len,0.0,nSamples*len);
+			glEnable(GL_TEXTURE_3D);
+			glBindTexture(GL_TEXTURE_3D, texID);
+
+			CHECK_OPENGL_ERROR
+		}
  
+		///Method for Unbinding all of the texture resources
+		void
+		Unbind()
+		{
+			//Finalize GL_buffer
+			glBindTexture(GL_TEXTURE_3D, 0);                      
+			CHECK_OPENGL_ERROR
+			glBindTexture(GL_TEXTURE_2D, 0);                      
+			CHECK_OPENGL_ERROR
+			glBindFramebuffer(GL_FRAMEBUFFER, 0);
+			CHECK_OPENGL_ERROR
+			glDisable(GL_TEXTURE_3D);
+			CHECK_OPENGL_ERROR
+		}
  
  
  
@@ -422,40 +597,22 @@ class gl_spider
 //--------------------------------CUDA METHODS------------------------------//
 //--------------------------------------------------------------------------//
  
-		/// Method for registering the texture with Cuda for shared
-		///	access.
-		void
-		createResource()
-		{
-			HANDLE_ERROR(
-				cudaGraphicsGLRegisterImage(
-					 &resource,
-				 	texbufferID,
-				 	GL_TEXTURE_2D,
-				 	//CU_GRAPHICS_REGISTER_FLAGS_NONE)
-					cudaGraphicsMapFlagsReadOnly)
-			);
-		} 
-		
-		///Method for freeing the texture from Cuda for gl access.
-		void
-		destroyResource()
-		{
-			HANDLE_ERROR(
-				cudaGraphicsUnregisterResource(resource)
-			);		
-		}
  
 		///Entry-point into the cuda code for calculating the cost
 		///	of a given samples array (in texture form) 
 		int
 		getCost()
 		{
-			createResource();
-			stim::vec<int> cost = 	get_cost(resource, numSamples);
-			destroyResource();
-//			if (cost[1] >= 80)
-//				exit(0);
+			#ifdef TESTING
+				start = std::clock();
+			#endif
+			stim::vec<int> cost = 
+				stim::cuda::get_cost(texbufferID, GL_TEXTURE_2D, numSamples);
+			#ifdef TESTING
+				duration_cuda = duration_cuda +
+					 (std::clock() - start) / (double) CLOCKS_PER_SEC;
+				num_cuda = num_cuda + 1.0;
+			#endif
 			current_cost = cost[1];
 			return cost[0];
 		}
@@ -464,6 +621,15 @@ class gl_spider
 		stim::rect<float> hor;
 		stim::rect<float> ver;	
  
+		//Testing and Timing variables.
+		#ifdef TESTING
+			std::clock_t start;
+			double duration_sampling = 0.0;
+			double duration_cuda = 0.0;
+			double num_sampling = 0.0;
+			double num_cuda = 0.0;
+		#endif
+
 //--------------------------------------------------------------------------//
 //-----------------------------CONSTRUCTORS---------------------------------//
 //--------------------------------------------------------------------------//
@@ -508,6 +674,8 @@ class gl_spider
 			Unbind();
 			glDeleteTextures(1, &texbufferID);
 			glDeleteBuffers(1, &fboID);
+			glDeleteTextures(1, &btexbufferID);
+			glDeleteBuffers(1, &bfboId);
 		}
  
 		///@param GLuint id texture that is going to be sampled.
@@ -519,6 +687,7 @@ class gl_spider
 		{
 			texID = id;
 			GenerateFBO(16, numSamples*8);
+			GenerateFBO(16, 216, btexbufferID, bfboId); 
 			setDims(0.6, 0.6, 1.0);
 			setSize(512.0, 512.0, 426.0);
 			setMatrix();
@@ -528,6 +697,9 @@ class gl_spider
 			genDirectionVectors(5*M_PI/4);
 			genPositionVectors();
 			genMagnitudeVectors();
+			Unbind();
+			///temporarily changed to 216
+			Bind(btexbufferID, bfboId, 27);
 			DrawCylinder();
 			Unbind();
 		}
@@ -612,7 +784,6 @@ class gl_spider
 		{
 			m[0] = mag;
 			m[1] = mag;
-	//		m[2] = mag;
 		}
  
  
@@ -655,56 +826,104 @@ class gl_spider
 			}
 			return out;
 		}
-		
-		///Function to get back the framebuffer Object attached to the spider.
-		///For external access.
-		GLuint
-		getFB()
+
+		///@param pos, the position of the seed to be added.
+		///Adds a seed to the seed list.
+		///Assumes that the coordinates passes are in tissue space.
+		void
+		setSeed(stim::vec<float> pos)
 		{
-			return fboID;
+			seeds.push(pos);
 		}
  
-		///Method for controling the buffer and texture binding in order to properly
-		///do the render to texture.
 		void
-		Bind()
+		setSeedVec(stim::vec<float> pos)
 		{
-			float len = 8.0;
-			glBindFramebuffer(GL_FRAMEBUFFER, fboID);//set up GL buffer		
-			glFramebufferTexture2D(
-				GL_FRAMEBUFFER,
-				GL_COLOR_ATTACHMENT0,
-				GL_TEXTURE_2D,
-				texbufferID,
-				0);
-			glBindFramebuffer(GL_FRAMEBUFFER, fboID);
-			GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
-			glDrawBuffers(1, DrawBuffers);
-			glBindTexture(GL_TEXTURE_2D, texbufferID);
-			glClearColor(1,1,1,1);
-			glClear(GL_COLOR_BUFFER_BIT);
-			glMatrixMode(GL_PROJECTION);
-			glLoadIdentity();
-			glMatrixMode(GL_MODELVIEW);
-			glLoadIdentity();
-			glViewport(0,0,2.0*len, numSamples*len);
-			gluOrtho2D(0.0,2.0*len,0.0,numSamples*len);
-			glEnable(GL_TEXTURE_3D);
-			glBindTexture(GL_TEXTURE_3D, texID);
+			seedsvecs.push(pos);
+		}
  
-			CHECK_OPENGL_ERROR
+		///@param x, y, z: variables for the x, y and z coordinate of the seed
+		///Adds a seed to the seed list.
+		///Assumes that the coordinates passes are in tissue space.
+		void
+		setSeed(float x, float y, float z)
+		{
+			seeds.push(stim::vec<float>(x, y, z));
+		}
+
+		void
+		setSeedVec(float x, float y, float z)
+		{
+			seedsvecs.push(stim::vec<float>(x, y, z));
+		}
+
+		stim::vec<float> 
+		getLastSeed()
+		{
+			stim::vec<float> tp = seeds.top();
+			return tp;
+		}
+
+		std::stack<stim::vec<float> >
+		getSeeds()
+		{
+			return seeds;
+		}
+
+		bool
+		Empty()
+		{
+			return (seeds.empty());
+		}
+		///@param string file: variables for the x, y and z coordinate of the seed
+		///Adds a seed to the seed list.
+		///Assumes that the coordinates passes are in tissue space.
+		void
+		setSeeds(std::string file)
+		{
+			std::ifstream myfile(file.c_str());
+			string line;
+			if(myfile.is_open())
+           		{
+                   		while (getline(myfile, line))
+                   		{
+                           		float x, y, z, u, v, w;
+                           		myfile >> x >> y >> z >> u >> v >> w;
+                           		seeds.push(stim::vec<float>(
+                                  		((float) x), 
+                                  		((float) y), 
+                                  		((float) z)));
+					seedsvecs.push(stim::vec<float>(
+						((float) x),
+						((float) y),
+						((float) z)));
+                   		}
+                   	myfile.close();
+	          	} else {
+        	                  std::cerr<<"failed" << std::endl;
+			}
 		}
  
-		///Method for Unbinding all of the texture resources
 		void
-		Unbind()
+		saveNetwork(std::string name)
 		{
-			//Finalize GL_buffer
-			glBindTexture(GL_TEXTURE_3D, 0);                      
-			glDisable(GL_TEXTURE_3D);
-			glBindFramebuffer(GL_FRAMEBUFFER,0);
-			glBindTexture(GL_TEXTURE_2D, 0);
+			sk.save(name);
 		}
+
+		stim::glObj<float>
+		getNetwork()
+		{
+			return sk;
+		}
+		
+		///Function to get back the framebuffer Object attached to the spider.
+		///For external access.
+		GLuint
+		getFB()
+		{
+			return bfboId;
+		}
+
 //--------------------------------------------------------------------------//
 //-----------------------------TEMPORARY METHODS----------------------------//
 //--------------------------------------------------------------------------//
@@ -726,12 +945,28 @@ class gl_spider
 		int
 		Step()
 		{
-		//	Bind();
+			Bind();
+			CHECK_OPENGL_ERROR
+			#ifdef TESTING
+				start = std::clock();
+			#endif
 			findOptimalDirection();
 			findOptimalPosition();
 			findOptimalScale();
+			Unbind();
+			CHECK_OPENGL_ERROR
+
+			Bind(btexbufferID, bfboId, 27);
+			CHECK_OPENGL_ERROR
 			branchDetection();
-		//	Unbind();
+			CHECK_OPENGL_ERROR
+			Unbind();
+			CHECK_OPENGL_ERROR
+			#ifdef TESTING
+				duration_sampling = duration_sampling +
+					 (std::clock() - start) / (double) CLOCKS_PER_SEC;
+				num_sampling = num_sampling + 1.0; 
+			#endif
 			return current_cost;
 		}
  
@@ -760,36 +995,217 @@ class gl_spider
 		void
 		DrawCylinder()
 		{	 
-			 Bind();
 			 glNewList(dList+3, GL_COMPILE);
 			 float z0 = -0.5; float z1 = 0.5; float r0 = 0.5;
 			 float x,y;
-			 float xold = 0.5; float yold = 0.5;
+			 float xold = 0.5; float yold = 0.0;
 			 float step = 360.0/numSamples;
-		 	 int j = 0;
 			 glEnable(GL_TEXTURE_3D);
 			 glBindTexture(GL_TEXTURE_3D, texID);
 			 glBegin(GL_QUAD_STRIP);
+			 int j = 0;
 			 	for(float i = step; i <= 360.0; i += step)
 			 	{
 					 x=r0*cos(i*2.0*M_PI/360.0);
 					 y=r0*sin(i*2.0*M_PI/360.0);
 					 glTexCoord3f(x,y,z0); 
-					 glVertex2f(0.0, j*0.1+0.1);
+					 glVertex2f(0.0, j*0.2+0.2);
 					 glTexCoord3f(x,y,z1); 
-					 glVertex2f(16.0, j*0.1+0.1);
+					 glVertex2f(16.0, j*0.2+0.2);
 					 glTexCoord3f(xold,yold,z1); 
-					 glVertex2f(16.0, j*0.1); 
+					 glVertex2f(16.0, j*0.2); 
 					 glTexCoord3f(xold,yold,z0); 
-					 glVertex2f(0.0, j*0.1);
+					 glVertex2f(0.0, j*0.2);
 					 xold=x;
 					 yold=y;
 					 j++;
 				}
 		      	 glEnd();  
 			 glEndList();
-			 Unbind();
 		}
+		
+
+		///@param min_cost the cost value used for tracing
+		///traces out each seedpoint in the seeds queue to completion in both directions.
+		void
+		trace(int min_cost)
+		{	
+			Bind();
+			rev = stim::vec<float>(0.0,0.0,1.0);
+			while(!seeds.empty())
+			{
+				//clear the currently traced line and start a new one.
+				cL.clear();
+				sk.Begin(stim::OBJ_LINE);
+				stim::vec<float> curSeed = seeds.top();
+				stim::vec<float> curSeedVec = seedsvecs.top();
+				setPosition(curSeed);
+				setDirection(curSeedVec);
+				setMagnitude(16.0);
+				cL.push_back(curSeed);
+				sk.createFromSelf(GL_SELECT);
+				traceLine(min_cost);
+
+				sk.rev();
+				std::reverse(cL.begin(), cL.end());
+				setPosition(curSeed);
+				setDirection(rev);
+				setMagnitude(16.0);
+				sk.createFromSelf(GL_SELECT);
+				traceLine(min_cost);
+				
+				//temporary glObj rendering code.
+
+				sk.End();
+				seeds.pop();
+				seedsvecs.pop();
+			}
+			Unbind();
+		}
+
+		///@param min_cost the cost value used for tracing
+		///traces the seedpoint passed to completion in one directions.
+		void
+		traceLine(int min_cost)
+		{
+			stim::vec<float> pos;
+			stim::vec<float> mag;
+			bool h;
+			bool started = false;
+			stim::vec<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]);
+			while(1)
+			{
+				int cost = Step();
+				if (cost > min_cost){
+//				       std::cout << "Min Cost" << std::endl;
+					break;
+				} else {
+					//Have we found an edge?
+					pos = getPosition();
+					if(pos[0] > size[0] || pos[1] > size[1]
+					 || pos[2] > size[2] || pos[0] < 0
+					 || p[1] < 0 || p[2] < 0)
+					{
+//					       std::cout << "Found Edge" << std::endl;
+						break;
+					}
+					//If this is the first step in the trace,
+					// save the direction
+					//(to be used later to trace the fiber in the opposite direction)
+					if(started == false){
+						rev = -getDirection();
+						started = true;
+					}
+//					std::cout << i << p << std::endl;
+					m = getMagnitude();
+					//Has the template size gotten unreasonable?
+					if(m[0] > 75 || m[0] < 1){
+//						std::cout << "Magnitude Limit" << std::endl;
+						break;
+					}
+					else
+					{
+						h = selectObject(pos, getDirection(), m[0]);
+						//Have we hit something previously traced?
+						if(h){
+//						std::cout << "Hit a Previous Line" << std::endl;
+						break;
+						}
+						else {          
+							sk.TexCoord(m[0]);
+							sk.Vertex(p[0], p[1], p[2]);
+						}
+				 	}
+                 		}
+         		}
+		}	
+
+
+		bool
+		selectObject(stim::vec<float> loc, stim::vec<float> dir, float mag) 
+		{
+		//Define the varibles and turn on Selection Mode
+
+			float s = 3.0;
+			GLuint selectBuf[2048];
+			GLint hits;
+			glSelectBuffer(2048, selectBuf);
+			glDisable(GL_CULL_FACE);
+			(void) glRenderMode(GL_SELECT);
+
+		//Init Names stack
+
+			glInitNames();
+			glPushName(1);
+
+			CHECK_OPENGL_ERROR
+		//What would that vessel see in front of it.
+				camSel.setPosition(loc);
+				camSel.setFocalDistance(mag/s);
+				camSel.LookAt((loc[0]+dir[0]*mag/s),
+					 (loc[1]+dir[1]*mag/s),
+					 (loc[2]+dir[2]*mag/s));
+				ps  = camSel.getPosition();
+				ups = camSel.getUp();
+				ds  = camSel.getLookAt();
+				glMatrixMode(GL_PROJECTION);
+				glPushMatrix();
+				glLoadIdentity();
+				glOrtho(-mag/s, mag/s, -mag/s, mag/s, 0.0, mag/s/4.0);
+				glMatrixMode(GL_MODELVIEW);
+				glPushMatrix();
+				glLoadIdentity();
+				
+				CHECK_OPENGL_ERROR
+				gluLookAt(ps[0], ps[1], ps[2],
+					 ds[0], ds[1], ds[2],
+					 ups[0], ups[1], ups[2]);
+				sk.Render();
+				CHECK_OPENGL_ERROR
+				glLoadName((int) sk.numL());
+				sk.RenderLine(cL);
+//				glPopName();
+				glFlush();
+
+				glMatrixMode(GL_PROJECTION);
+				glPopMatrix();
+				glMatrixMode(GL_MODELVIEW);
+				CHECK_OPENGL_ERROR
+				glPopMatrix();
+
+			glEnable(GL_CULL_FACE);
+			hits = glRenderMode(GL_RENDER);
+			bool found_hits = processHits(hits, selectBuf);
+			return found_hits;
+		}
+
+		//Given a size of the array (hits) and the memory holding it (buffer)
+		//returns whether a hit tool place or not.
+		bool
+		processHits(GLint hits, GLuint buffer[])
+		{
+			GLuint names, *ptr;
+			printf("hits = %u\n", hits);
+			ptr = (GLuint *) buffer;
+			for (int i = 0; i < hits; i++) { /*  for each hit  */
+				names = *ptr;
+				printf (" number of names for hit = %u\n", names);
+				ptr++;
+				ptr++; //Skip the minimum depth value.  
+				ptr++; //Skip the maximum depth value.
+				printf ("   the name is ");                                                        
+				for (int j = 0; j < names; j++) {     /*  for each name */
+					printf ("%u ", *ptr); ptr++;
+				}
+				printf ("\n");
+			}
+			if(hits == 0)
+				return 0;
+			else
+				return 1;
+		}
+
+
 };
 }
 #endif
@@ -29,6 +29,8 @@ private:
 	void
 	init()
 	{
+		if(glIsList(dList))
+			glDeleteLists(dList, 1);
 		dList = glGenLists(1);
 		glListBase(dList);
  
@@ -40,15 +42,23 @@ private:
 	}
  
 	void
-	Create()
+	Create(GLenum mode)
 	{
+//		GLuint selectBuf[2048];
+//		GLint hits;
+//		glSelectBuffer(2048, selectBuf);
+		
 		int len = (int) stim::obj<T>::numL();
 		std::vector< stim::vec<float> > line;
 		glNewList(dList, GL_COMPILE);
 	//	glColor3f(0.0, 1.0, 0.0);
-		glLineWidth(2.5);
+		glLineWidth(3.5);
 		for(int i = 0; i < len; i++){
 			line = stim::obj<T>::getL_V(i);
+			if(mode == GL_SELECT)
+			{
+				glLoadName(i);
+			}
 			glColor3ub(rand()%255, rand()%255, rand()%255);
 			glBegin(GL_LINE_STRIP);
 			for(int j = 0; j < line.size(); j++){
@@ -71,21 +81,21 @@ public:
 	}
  
 	void
-	createFromSelf()
+	createFromSelf(GLenum mode = GL_RENDER)
 	{
 	//	glPopMatrix();
 		init();
-		Create();
+		Create(mode);
 	//	glPushMatrix();
 	}
  
 	void
-	createFromFile(std::string filename)
+	createFromFile(std::string filename, GLenum mode = GL_RENDER)
 	{
 		stim::obj<T>::load(filename);
 		glPushMatrix();		//Safety Operation to avoid changing the current matrix.
 		init();
-		Create();
+		Create(mode);
 		glPopMatrix();
 		CHECK_OPENGL_ERROR
 	}
@@ -651,14 +651,14 @@ public:
 		l.resize(nP);
  
 		//copy the points from the point list to the stim vector
-		unsigned int pi;
+		unsigned int pie;
 		for(unsigned int p = 0; p < nP; p++){
  
 			//get the index of the geometry point
-			pi = L[i][p][0] - 1;
+			pie = L[i][p][0] - 1;
  
 			//get the coordinates of the current point
-			stim::vec<T> newP = V[pi];
+			stim::vec<T> newP = V[pie];
  
 			//copy the point into the vector
 			l[p] = newP;
@@ -705,14 +705,14 @@ public:
 		l.resize(nP);
  
 		//copy the points from the point list to the stim vector
-		unsigned int pi;
+		unsigned int pie;
 		for(unsigned int p = 0; p < nP; p++){
  
 			//get the index of the geometry point
-			pi = L[i][p][1] - 1;
+			pie = L[i][p][1] - 1;
  
 			//get the coordinates of the current point
-			stim::vec<T> newP = VT[pi];
+			stim::vec<T> newP = VT[pie];
  
 			//copy the point into the vector
 			l[p] = newP;