Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib

Jiaming Guo
2 parents 5068402b 56737905
Showing 34 changed files with 1034 additions and 339 deletions Show diff stats
matlab/cls_ConfusionMatrix.m
matlab/cls_MeanClassFeatures.m
matlab/cls_PlotConfusionMatrix.m
matlab/stim_images2matrix.m
python/structen.py
stim/cuda/ivote.cuh
stim/cuda/ivote/down_sample.cuh
stim/cuda/ivote/re_sample.cuh
stim/cuda/ivote_atomic_bb.cuh
stim/envi/agilent_binary.h
stim/envi/envi.h
stim/envi/hsi.h
stim/iVote/ivote2.cuh
stim/iVote/ivote2/iter_vote2.cuh
stim/cuda/ivote/local_max.cuh → stim/iVote/ivote2/local_max.cuh
stim/cuda/ivote/update_dir.cuh → stim/iVote/ivote2/update_dir.cuh
stim/cuda/ivote/update_dir_bb.cuh → stim/iVote/ivote2/update_dir_bb.cuh
stim/cuda/ivote/update_dir_shared.cuh → stim/iVote/ivote2/update_dir_shared.cuh
stim/cuda/ivote/update_dir_threshold_global.cuh → stim/iVote/ivote2/update_dir_threshold_global.cuh
stim/cuda/ivote/vote.cuh → stim/iVote/ivote2/vote.cuh
+function M = cls_ConfusionMatrix(GT, T)
+
+%calculate the classes (unique elements in the GT array)
+C = unique(GT);
+nc = length(C);        %calculate the number of classes
+M = zeros(nc);      %allocate space for the confusion matrix
+
+%for each class
+for ci = 1:nc
+    for cj = 1:nc
+        M(ci, cj) = nnz((GT == C(ci)) .* (T == C(cj)));
+    end
+end
 \ No newline at end of file
+function S = cls_MeanClassFeatures(F, T)
+%Calculates the mean set of features for each class given the feature matrix F and targets T
+
+C = unique(T);                          %get the class IDs
+nc = length(C);
+
+S = zeros(nc, size(F, 2));              %allocate space for the mean feature vectors
+for c = 1:nc                            %for each class
+    S(c, :) = mean(F(T == C(c), :));    %calculate the mean feature vector for class c
+end
+	
+S = S';
 \ No newline at end of file
+function cls_PlotConfusionMatrix(M)
+
+
+%normalize each row by its column
+sum_cols = repmat(sum(M, 1), size(M, 1), 1);
+Mc = M ./ sum_cols;
+subplot(2, 1, 1),
+bar(Mc');
+
+sum_rows = repmat(sum(M, 2), 1, size(M, 2));
+Mr = M ./ sum_rows;
+subplot(2, 1, 2),
+bar(Mr);
 \ No newline at end of file
+function S = stim_images2matrix(filemask)
+%This function loads a set of images as a 3D matrix. Color images are
+%converted to grayscale when loaded, so the resulting matrix is always 3D
+%with size X x Y x Z, where:
+%   X is the size of the images along the X axis
+%   Y is the size of the images along the Y axis
+%   Z is the number of images
+%
+%   all images are assumed to be the same size (though they do not have to
+%   be the same file format or number of bits per pixel
+
+    files = dir(filemask);
+
+    %figure out the file size
+    I = imread([files(1).folder '/' files(1).name]);
+    X = size(I, 1);
+    Y = size(I, 2);
+    Z = length(files);
+
+    S = zeros(X, Y, Z, 'uint8');
+
+    h = waitbar(0, ['Loading ' num2str(Z) ' images...']);
+    for i = 1:Z    
+        I = rgb2gray(imread([files(1).folder '/' files(1).name]));
+        S(:, :, i) = I;
+        waitbar(i/Z, h);
+    end
+    close(h);
+end
+    
+
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Mar 12 21:54:40 2017
+
+@author: david
+"""
+
+import numpy
+import scipy.ndimage
+import progressbar
+import glob
+
+def st2(I, s=1, dtype=numpy.float32):   
+    
+    #calculates the 2D structure tensor for an image using a gaussian window with standard deviation s
+    
+    #calculate the gradient
+    dI = numpy.gradient(I)
+    
+    #calculate the dimensions of the tensor field
+    field_dims = [dI[0].shape[0], dI[0].shape[1], 3]
+    
+    #allocate space for the tensor field
+    Tg = numpy.zeros(field_dims, dtype=dtype)
+    
+    #calculate the gradient components of the tensor
+    ti = 0
+    for i in range(2):
+        for j in range(i + 1):
+            Tg[:, :, ti] = dI[j] * dI[i]
+            ti = ti + 1
+    
+    #blur the tensor field
+    T = numpy.zeros(field_dims, dtype=dtype)
+    
+    for i in range(3):
+        T[:, :, i] = scipy.ndimage.filters.gaussian_filter(Tg[:, :, i], [s, s])
+
+    
+    return T
+
+def st3(I, s=1):
+    #calculate the structure tensor field for the 3D input image I given the window size s in 3D
+    #check the format for the window size
+    if type(s) is not list:
+        s = [s] * 3
+    elif len(s) == 1:
+        s = s * 3
+    elif len(s) == 2:
+        s.insert(1, s[0])
+        
+    print("\nCalculating gradient...")
+    dI = numpy.gradient(I)
+    #calculate the dimensions of the tensor field
+    field_dims = [dI[0].shape[0], dI[0].shape[1], dI[0].shape[2], 6]
+    
+    #allocate space for the tensor field
+    Tg = numpy.zeros(field_dims, dtype=numpy.float32)
+    
+    #calculate the gradient components of the tensor
+    ti = 0
+    print("Calculating tensor components...")
+    bar = progressbar.ProgressBar()
+    bar.max_value = 6
+    for i in range(3):
+        for j in range(i + 1):
+            Tg[:, :, :, ti] = dI[j] * dI[i]
+            ti = ti + 1
+            bar.update(ti)
+    
+    #blur the tensor field
+    T = numpy.zeros(field_dims, dtype=numpy.float32)
+    
+    print("\nConvolving tensor field...")
+    bar = progressbar.ProgressBar()
+    bar.max_value = 6
+    for i in range(6):
+        T[:, :, :, i] = scipy.ndimage.filters.gaussian_filter(Tg[:, :, :, i], s)
+        bar.update(i+1)
+        
+    return T
+
+def st(I, s=1):
+    if I.ndim == 3:
+        return st3(I, s)
+    elif I.ndim == 2:
+        return st2(I, s)
+    else:
+        print("Image must be 2D or 3D")
+    return
+        
+   
+    
+def sym2mat(T):
+    #Calculate the full symmetric matrix from a list of lower triangular elements.
+    #The lower triangular components in the input field T are assumed to be the
+    #   final dimension of the input matrix.
+    
+    #       | 1  2  4  7  |
+    #       | 0  3  5  8  |
+    #       | 0  0  6  9  |
+    #       | 0  0  0  10 |
+   
+    in_s = T.shape
+    
+    #get the number of tensor elements
+    n = in_s[T.ndim - 1]
+    
+    #calculate the dimension of the symmetric matrix
+    d = int(0.5 * (numpy.sqrt(8. * n + 1.) - 1.))
+    
+    #calculate the dimensions of the output field
+    out_s = list(in_s)[:-1] + [d] + [d]
+
+    #allocate space for the output field
+    R = numpy.zeros(out_s)
+    
+    ni = 0
+    for i in range(d):
+        for j in range(i + 1):
+            R[..., i, j] = T[..., ni]
+            if i != j:
+                R[..., j, i] = T[..., ni]
+            ni = ni + 1
+    
+    return R   
+
+def st2vec(S, vector='largest'):
+    #Create a color image from a 2D or 3D structure tensor slice
+      
+    #convert the field to a full rank-2 tensor
+    T = sym2mat(S);
+    del(S)
+    
+    #calculate the eigenvectors and eigenvalues
+    l, v = numpy.linalg.eig(T)
+    
+    #get the dimension of the tensor field
+    d = T.shape[2]
+    
+    #allocate space for the vector field
+    V = numpy.zeros([T.shape[0], T.shape[1], 3])
+    
+    idx = l.argsort()
+    
+    for di in range(d):
+        if vector == 'smallest':
+            b = idx[:, :, 0] == di
+        elif vector == 'largest':
+            b = idx[:, :, d-1] == di
+        else:
+            b = idx[:, :, 1] == di
+        V[b, 0:d] = v[b, :, di]
+    
+    return V
+
+def loadstack(filemask):
+    #Load an image stack as a 3D grayscale array
+    
+    #get a list of all files matching the given mask
+    files = [file for file in glob.glob(filemask)]
+    
+    #calculate the size of the output stack
+    I = scipy.misc.imread(files[0])
+    X = I.shape[0]
+    Y = I.shape[1]
+    Z = len(files)
+    
+    #allocate space for the image stack
+    M = numpy.zeros([X, Y, Z]).astype('float32')
+    
+    #create a progress bar
+    bar = progressbar.ProgressBar()
+    bar.max_value = Z
+    
+    #for each file
+    for z in range(Z):
+        #load the file and save it to the image stack
+        M[:, :, z] = scipy.misc.imread(files[z], flatten="True").astype('float32')
+        bar.update(z+1)
+    return M
+
+def anisotropy(S):
+
+    Sf = sym2mat(S)
+    
+    #calculate the eigenvectors and eigenvalues
+    l, v = numpy.linalg.eig(Sf)
+    
+    #store the sorted eigenvalues
+    ls = numpy.sort(l)
+    l0 = ls[:, :, 0]
+    l1 = ls[:, :, 1]
+    l2 = ls[:, :, 2]
+    
+    #calculate the linear anisotropy
+    Cl = (l2 - l1)/(l2 + l1 + l0)
+    
+    #calculate the planar anisotropy
+    Cp = 2 * (l1 - l0) / (l2 + l1 + l0)
+    
+    #calculate the spherical anisotropy
+    Cs = 3 * l0 / (l2 + l1 + l0)
+    
+    #calculate the fractional anisotropy
+    l_hat = (l0 + l1 + l2)/3
+    fa_num = (l2 - l_hat) ** 2 + (l1 - l_hat) ** 2 + (l0 - l_hat) ** 2;
+    fa_den = l0 ** 2 + l1 ** 2 + l2 ** 2
+    FA = numpy.sqrt(3./2.) * numpy.sqrt(fa_num) / numpy.sqrt(fa_den)
+    
+    return FA, Cl, Cp, Cs
+
+def st2amira(filename, T):
+    #generates a tensor field that can be imported into Amira
+    
+    #   0    dx dx   ----> 0
+    #   1    dx dy   ----> 1
+    #   2    dy dy   ----> 3
+    #   3    dx dz   ----> 2
+    #   4    dy dz   ----> 4
+    #   5    dz dz   ----> 5
+    
+    #swap the 2nd and 3rd tensor components
+    A = numpy.copy(T)
+    A[..., 3] = T[..., 2]
+    A[..., 2] = T[..., 3]
+    
+    #roll the tensor axis so that it is the leading component
+    #A = numpy.rollaxis(A, A.ndim - 1)
+    A.tofile(filename)
+    print("\n", A.shape)
+
+def resample3(T, s=2):
+    #resample a tensor field by an integer factor s
+    #This function first convolves the field with a box filter and then
+    #   re-samples to create a smaller field
+    
+    #check the format for the window size
+    if type(s) is not list:
+        s = [s] * 3
+    elif len(s) == 1:
+        s = s * 3
+    elif len(s) == 2:
+        s.insert(1, s[0])
+    s = numpy.array(s)
+    
+    bar = progressbar.ProgressBar()
+    bar.max_value = T.shape[3]
+    
+    #blur with a uniform box filter of size r
+    for t in range(T.shape[3]):
+        T[..., t] = scipy.ndimage.filters.uniform_filter(T[..., t], 2 * s)
+        bar.update(t+1)
+        
+    #resample at a rate of r
+    R = T[::s[0], ::s[1], ::s[2], :]
+    return R
+
+def color3(prefix, T, vector='largest', aniso=True):
+    #Saves a stack of color images corresponding to the eigenvector and optionally scaled by anisotropy
+    
+    bar = progressbar.ProgressBar()
+    bar.max_value = T.shape[2]
+    
+    #for each z-axis slice
+    for z in range(T.shape[2]):
+        S = T[:, :, z, :]                           #get the slice
+        V = st2vec(S, vector='smallest')   #calculate the vector
+        C = numpy.absolute(V)                       #calculate the absolute value
+        
+        if aniso == True:                              #if the image is scaled by anisotropy
+            FA, Cl, Cp, Cs = anisotropy(S)          #calculate the anisotropy of the slice
+            if vector == 'largest':
+                A = Cl
+            elif vector == 'smallest':
+                A = Cp
+        else:                                       #otherwise just scale by 1
+            A = numpy.ones(T.shape[0], T.shape[1])
+        image = C * numpy.expand_dims(A, 3)
+        
+        filename = prefix + str(z).zfill(4) + ".bmp"
+        scipy.misc.imsave(filename, image)
+        bar.update(z + 1)
 \ No newline at end of file
-#ifndef STIM_CUDA_IVOTE_H
-#define STIM_CUDA_IVOTE_H
-
-#include <stim/cuda/ivote/down_sample.cuh>
-#include <stim/cuda/ivote/local_max.cuh>
-#include <stim/cuda/ivote/update_dir.cuh>
-#include <stim/cuda/ivote/vote.cuh>
-
-namespace stim{
-	namespace cuda{
-	
-	}
-}
-
-
-
-#endif
 \ No newline at end of file
-#ifndef STIM_CUDA_DOWN_SAMPLE_H
-#define STIM_CUDA_DOWN_SAMPLE_H
-
-#include <iostream>
-#include <cuda.h>
-#include <stim/cuda/cudatools.h>
-#include <stim/cuda/templates/gaussian_blur.cuh>
-
-namespace stim{
-	namespace cuda{
-
-		template<typename T>
-		__global__ void down_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
-
-			unsigned int sigma_ds = 1/resize;
-			unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
-			unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
-			
-			
-			// calculate the 2D coordinates for this current thread.
-			int xi = blockIdx.x * blockDim.x + threadIdx.x;
-			int yi = blockIdx.y;
-			// convert 2D coordinates to 1D
-			int i = yi * x_ds + xi;
-			
-			if(xi< x_ds && yi< y_ds){
-
-				int x_org = xi * sigma_ds ;
-				int y_org = yi * sigma_ds ;
-				int i_org = y_org * x + x_org;
-				gpuI[i] = gpuI0[i_org];
-			}
-
-		}
-
-
-		/// Applies a Gaussian blur to a 2D image stored on the GPU
-		template<typename T>
-		void gpu_down_sample(T* gpuI, T* gpuI0, T resize, size_t x, size_t y){
-
-			
-			unsigned int sigma_ds = (unsigned int)(1.0f/resize);
-			size_t x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
-			size_t y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
-			
-			//get the number of pixels in the image
-//			unsigned int pixels_ds = x_ds * y_ds;
-			
-			unsigned int max_threads = stim::maxThreadsPerBlock();
-			dim3 threads(max_threads, 1);
-			dim3 blocks(x_ds/threads.x + (x_ds %threads.x == 0 ? 0:1) , y_ds);
-			
-			stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
-			
-			//resample the image
-			down_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);
-
-		}
-
-		/// Applies a Gaussian blur to a 2D image stored on the CPU
-		template<typename T>
-		void cpu_down_sample(T* re_img, T* image, T resize, unsigned int x, unsigned int y){
-
-			//get the number of pixels in the image
-			unsigned int pixels = x * y;
-			unsigned int bytes = sizeof(T) * pixels;
-			
-			unsigned int sigma_ds = 1/resize;
-			unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
-			unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
-			unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;
-			
-
-
-			//allocate space on the GPU for the original image
-			T* gpuI0;
-			cudaMalloc(&gpuI0, bytes);
-			
-			
-			//copy the image data to the GPU
-			cudaMemcpy(gpuI0, image, bytes, cudaMemcpyHostToDevice);
-
-			//allocate space on the GPU for the down sampled image
-			T* gpuI;
-			cudaMalloc(&gpuI, bytes_ds);
-
-			//run the GPU-based version of the algorithm
-			gpu_down_sample<T>(gpuI, gpuI0, resize, x, y);
-
-			//copy the image data to the GPU
-			cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);
-
-			cudaFree(gpuI0);
-			cudeFree(gpuI);
-		}
-	
-	}
-}
-
-#endif
-#ifndef STIM_CUDA_RE_SAMPLE_H
-#define STIM_CUDA_RE_SAMPLE_H
-
-#include <iostream>
-#include <cuda.h>
-#include <stim/cuda/cudatools.h>
-#include <stim/cuda/templates/gaussian_blur.cuh>
-
-namespace stim{
-	namespace cuda{
-
-		template<typename T>
-		__global__ void cuda_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
-
-			unsigned int sigma_ds = 1/resize;
-			unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
-			unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
-			
-			
-			// calculate the 2D coordinates for this current thread.
-			int xi = blockIdx.x * blockDim.x + threadIdx.x;
-			int yi = blockIdx.y;
-			// convert 2D coordinates to 1D
-			int i = yi * x + xi;
-			
-			if(xi< x && yi< y){
-				if(xi%sigma_ds==0){
-					if(yi%sigma_ds==0){
-						gpuI[i] = gpuI0[(yi/sigma_ds)*x_ds + xi/sigma_ds];
-					}
-				}
-				else gpuI[i] = 0;
-
-				//int x_org = xi * sigma_ds ;
-				//int y_org = yi * sigma_ds ;
-				//int i_org = y_org * x + x_org;
-				//gpuI[i] = gpuI0[i_org];
-			}
-
-		}
-
-
-		/// Applies a Gaussian blur to a 2D image stored on the GPU
-		template<typename T>
-		void gpu_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
-
-			
-			//unsigned int sigma_ds = 1/resize;
-			//unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
-			//unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
-			
-			//get the number of pixels in the image
-			//unsigned int pixels_ds = x_ds * y_ds;
-			
-			unsigned int max_threads = stim::maxThreadsPerBlock();
-			dim3 threads(max_threads, 1);
-			dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);
-			
-			//stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
-			
-			//resample the image
-			cuda_re_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);
-
-		}
-
-		/// Applies a Gaussian blur to a 2D image stored on the CPU
-		template<typename T>
-		void cpu_re_sample(T* out, T* in, T resize, unsigned int x, unsigned int y){
-
-			//get the number of pixels in the image
-			unsigned int pixels = x*y;
-			unsigned int bytes = sizeof(T) * pixels;
-			
-			unsigned int sigma_ds = 1/resize;
-			unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
-			unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
-			unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;
-			
-
-
-			//allocate space on the GPU for the original image
-			T* gpuI0;
-			cudaMalloc(&gpuI0, bytes_ds);
-			
-			
-			//copy the image data to the GPU
-			cudaMemcpy(gpuI0, in, bytes_ds, cudaMemcpyHostToDevice);
-
-			//allocate space on the GPU for the down sampled image
-			T* gpuI;
-			cudaMalloc(&gpuI, bytes);
-
-			//run the GPU-based version of the algorithm
-			gpu_re_sample<T>(gpuI, gpuI0, resize, x, y);
-
-			//copy the image data to the GPU
-			cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);
-
-			cudaFree(gpuI0);
-			cudeFree(gpuI);
-		}
-	
-	}
-}
-
-#endif
 \ No newline at end of file
-#ifndef STIM_CUDA_IVOTE_ATOMIC_BB_H
-#define STIM_CUDA_IVOTE_ATOMIC_BB_H
-
-extern bool DEBUG;
-#include <stim/cuda/ivote/down_sample.cuh>
-#include <stim/cuda/ivote/local_max.cuh>
-#include <stim/cuda/ivote/update_dir_bb.cuh>
-#include <stim/cuda/ivote/vote_atomic_bb.cuh>
-
-namespace stim{
-	namespace cuda{
-	
-	}
-}
-
-
-
-#endif
 \ No newline at end of file
@@ -243,13 +243,13 @@ public:
 		if (device >= 0) {														//if a CUDA device is specified
 			int dev_count;
 			HANDLE_ERROR(cudaGetDeviceCount(&dev_count));						//get the number of CUDA devices
-			std::cout << "Number of CUDA devices: " << dev_count << std::endl;		//output the number of CUDA devices
+			//std::cout << "Number of CUDA devices: " << dev_count << std::endl;		//output the number of CUDA devices
 			cudaDeviceProp prop;
-			std::cout << "CUDA devices----" << std::endl;
+			//std::cout << "CUDA devices----" << std::endl;
 			for (int d = 0; d < dev_count; d++) {									//for each CUDA device
 				cudaGetDeviceProperties(&prop, d);									//get the property of the first device
 																					//float cc = prop.major + prop.minor / 10.0f;						//calculate the compute capability
-				std::cout << d << ":  [" << prop.major << "." << prop.minor << "]      " << prop.name << std::endl;	//display the device information
+				//std::cout << d << ":  [" << prop.major << "." << prop.minor << "]      " << prop.name << std::endl;	//display the device information
 																													//if(cc > best_device_cc){
 																													//	best_device_cc = cc;										//if this is better than the previous device, use it
 																													//	best_device_id = d;
@@ -258,7 +258,7 @@ public:
 			if (dev_count > 0 && dev_count > device) {							//if the first device is not an emulator
 				cudaGetDeviceProperties(&prop, device);						//get the property of the requested CUDA device
 				if (prop.major != 9999) {
-					std::cout << "Using device " << device << std::endl;
+					//std::cout << "Using device " << device << std::endl;
 					HANDLE_ERROR(cudaSetDevice(device));
 				}
 			}
@@ -1368,6 +1368,39 @@ public:
 		return false;
 	}
+	void band_bounds(double wavelength, size_t& low, size_t& high) {
+		if (header.interleave == envi_header::BSQ) {		//if the infile is bsq file
+			if (header.data_type == envi_header::float32)
+				((bsq<float>*)file)->band_bounds(wavelength, low, high);
+			else if (header.data_type == envi_header::float64)
+				((bsq<double>*)file)->band_bounds(wavelength, low, high);
+			else {
+				std::cout << "ERROR: unidentified data type" << std::endl;
+				exit(1);
+			}
+		}
+		else if (header.interleave == envi_header::BIL) {
+			if (header.data_type == envi_header::float32)
+				((bil<float>*)file)->band_bounds(wavelength, low, high);
+			else if (header.data_type == envi_header::float64)
+				((bil<double>*)file)->band_bounds(wavelength, low, high);
+			else {
+				std::cout << "ERROR: unidentified data type" << std::endl;
+				exit(1);
+			}
+		}
+		else if (header.interleave == envi_header::BIP) {
+			if (header.data_type == envi_header::float32)
+				((bip<float>*)file)->band_bounds(wavelength, low, high);
+			else if (header.data_type == envi_header::float64)
+				((bip<double>*)file)->band_bounds(wavelength, low, high);
+			else {
+				std::cout << "ERROR: unidentified data type" << std::endl;
+				exit(1);
+			}
+		}
+	}
+
 	// Retrieve a spectrum at the specified 1D location
 	/// @param ptr is a pointer to pre-allocated memory of size B*sizeof(T)
@@ -62,31 +62,6 @@ protected:
 		return (T)((1.0 - alpha) * low_v + alpha * high_v);							//interpolate
 	}
-	/// Gets the two band indices surrounding a given wavelength
-	void band_bounds(double wavelength, size_t& low, size_t& high){
-		size_t B = Z();
-		for(high = 0; high < B; high++){
-			if(w[high] > wavelength) break;
-		}
-		low = 0;
-		if(high > 0)
-			low = high-1;
-	}
-
-	/// Get the list of band numbers that bound a list of wavelengths
-	void band_bounds(std::vector<double> wavelengths,
-					 std::vector<unsigned long long>& low_bands,
-					 std::vector<unsigned long long>& high_bands){
-
-		unsigned long long W = w.size();									//get the number of wavelengths in the list
-		low_bands.resize(W);												//pre-allocate space for the band lists
-		high_bands.resize(W);
-
-		for(unsigned long long wl = 0; wl < W; wl++){						//for each wavelength
-			band_bounds(wavelengths[wl], low_bands[wl], high_bands[wl]);	//find the low and high bands
-		}
-	}
-
 	/// Returns the interpolated in the given spectrum based on the given wavelength
 	/// @param s is the spectrum in main memory of length Z()
@@ -139,6 +114,31 @@ protected:
 	}
 public:
+
+	/// Gets the two band indices surrounding a given wavelength
+	void band_bounds(double wavelength, size_t& low, size_t& high) {
+		size_t B = Z();
+		for (high = 0; high < B; high++) {
+			if (w[high] > wavelength) break;
+		}
+		low = 0;
+		if (high > 0)
+			low = high - 1;
+	}
+
+	/// Get the list of band numbers that bound a list of wavelengths
+	void band_bounds(std::vector<double> wavelengths,
+		std::vector<unsigned long long>& low_bands,
+		std::vector<unsigned long long>& high_bands) {
+
+		unsigned long long W = w.size();									//get the number of wavelengths in the list
+		low_bands.resize(W);												//pre-allocate space for the band lists
+		high_bands.resize(W);
+
+		for (unsigned long long wl = 0; wl < W; wl++) {						//for each wavelength
+			band_bounds(wavelengths[wl], low_bands[wl], high_bands[wl]);	//find the low and high bands
+		}
+	}
 			/// Get a mask that has all pixels with inf or NaN values masked out (false)
 	void mask_finite(unsigned char* out_mask, unsigned char* mask, bool PROGRESS = false){
 		size_t XY = X() * Y();
+#ifndef STIM_IVOTE2_CUH
+#define STIM_IVOTE2_CUH
+
+#include <iostream>
+#include <fstream>
+#include <stim/cuda/cudatools/error.h>
+#include <stim/cuda/templates/gradient.cuh>
+#include <stim/cuda/arraymath.cuh>
+#include <stim/iVote/ivote2/iter_vote2.cuh>
+#include <stim/iVote/ivote2/local_max.cuh>
+#include <stim/math/constants.h>
+#include <stim/math/vector.h>
+#include <stim/visualization/colormap.h>
+
+
+namespace stim {
+	// this function precomputes the atan2 values
+	template<typename T>
+	void atan_2(T* cpuTable, unsigned int rmax) {
+		int xsize = 2 * rmax + 1;						//initialize the width and height of the window which atan2 are computed in.
+		int ysize = 2 * rmax + 1;
+		int yi = rmax;									// assign the center coordinates of the atan2 window to yi and xi
+		int xi = rmax;
+		for (int xt = 0; xt < xsize; xt++) {			//for each element in the atan2 table
+			for (int yt = 0; yt < ysize; yt++) {
+				int id = yt * xsize + xt;				//convert the current 2D coordinates to 1D
+				int xd = xi - xt;						// calculate the distance between the pixel and the center of the atan2 window
+				int yd = yi - yt;
+				T atan_2d = atan2((T)yd, (T)xd);	// calculate the angle between the pixel and the center of the atan2 window and store the result.
+				cpuTable[id] = atan_2d;
+			}
+		}
+	}
+
+	//this kernel invert the 2D image
+	template<typename T>
+	__global__ void cuda_invert(T* gpuI, size_t x, size_t y) {
+		// calculate the 2D coordinates for this current thread.
+		size_t xi = blockIdx.x * blockDim.x + threadIdx.x;
+		size_t yi = blockIdx.y * blockDim.y + threadIdx.y;
+
+		if (xi >= x || yi >= y) return;
+		size_t i = yi * x + xi;					// convert 2D coordinates to 1D
+		gpuI[i] = 255 - gpuI[i];				//invert the pixel intensity
+	}
+
+
+
+	//this function calculate the threshold using OTSU method
+	template<typename T>
+	T th_otsu(T* pts, size_t pixels, unsigned int th_num = 20) {
+		T Imax = pts[0];				//initialize the maximum value to the first one
+		T Imin = pts[0];				//initialize the maximum value to the first on
+
+		for (size_t n = 0; n < pixels; n++) {		//for every value
+			if (pts[n] > Imax) {			//if the value is higher than the current max
+				Imax = pts[n];
+			}
+		}
+		for (size_t n = 0; n< pixels; n++) {		//for every value
+			if (pts[n] < Imin) {			//if the value is higher than the current max
+				Imin = pts[n];
+			}
+		}
+
+		T th_step = ((Imax - Imin) / th_num);
+		vector<T> var_b;
+		for (unsigned int t0 = 0; t0 < th_num; t0++) {
+			T th = t0 * th_step + Imin;
+			unsigned int n_b(0), n_o(0);		//these variables save the number of elements that are below and over the threshold
+			T m_b(0), m_o(0);				//these variables save the mean value for each cluster
+			for (unsigned int idx = 0; idx < pixels; idx++) {
+				if (pts[idx] <= th) {
+					m_b += pts[idx];
+					n_b += 1;
+				}
+				else {
+					m_o += pts[idx];
+					n_o += 1;
+				}
+			}
+
+			m_b = m_b / n_b;		//calculate the mean value for the below threshold cluster
+			m_o = m_o / n_o;		//calculate the mean value for the over threshold cluster
+
+			var_b.push_back(n_b * n_o * pow((m_b - m_o), 2));
+		}
+
+		vector<float>::iterator max_var = std::max_element(var_b.begin(), var_b.end());	//finding maximum elements in the vector
+		size_t th_idx = std::distance(var_b.begin(), max_var);
+		T threshold = Imin + (T)(th_idx * th_step);
+		return threshold;
+	}
+
+	//this function performs the 2D iterative voting algorithm on the image stored in the gpu 
+	template<typename T>
+	void gpu_ivote2(T* gpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
+					int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) {
+
+		size_t pixels = x * y;				//compute the size of input image
+		//
+		if (invert) {						//if inversion is required call the kernel to invert the image
+			unsigned int max_threads = stim::maxThreadsPerBlock();
+			dim3 threads((unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads));
+			dim3 blocks((unsigned int)x / threads.x + 1, (unsigned int)y / threads.y + 1);
+			cuda_invert << <blocks, threads >> > (gpuI, x, y);
+		}
+		//
+		size_t table_bytes = (size_t)(pow(2 * rmax + 1, 2) * sizeof(T));				// create the atan2 table
+		T* cpuTable = (T*)malloc(table_bytes);											//assign memory on the cpu for atan2 table
+		atan_2<T>(cpuTable, rmax);														//call the function to precompute the atan2 table
+		T* gpuTable;  HANDLE_ERROR(cudaMalloc(&gpuTable, table_bytes));
+		HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, table_bytes, cudaMemcpyHostToDevice));	//copy atan2 table to the gpu
+
+		size_t bytes = pixels* sizeof(T);													//calculate the bytes of the input
+		float dphi = phi / iter;															//change in phi for each iteration
+
+		float* gpuGrad; HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes * 2));									//allocate space to store the 2D gradient
+		float* gpuVote; HANDLE_ERROR(cudaMalloc(&gpuVote, bytes));										//allocate space to store the vote image
+
+		stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y);			//calculate the 2D gradient
+		stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);					//convert cartesian coordinate of gradient to the polar
+
+		for (int i = 0; i < iter; i++) {														//for each iteration
+			cudaMemset(gpuVote, 0, bytes);													//reset the vote image to 0
+			stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug);		//perform voting
+			stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y, debug);	//update the voter directions
+			phi = phi - dphi;																//decrement phi
+		}
+		stim::cuda::gpu_local_max<float>(gpuI, gpuVote, conn, x, y);				//calculate the local maxima
+
+		T* pts = (T*)malloc(bytes);													//allocate memory on the cpu to store the output of iterative voting
+		HANDLE_ERROR(cudaMemcpy(pts, gpuI, bytes, cudaMemcpyDeviceToHost));			//copy the output from gpu to the cpu memory
+		
+		T threshold;
+		if (t == 0) threshold = stim::th_otsu<T>(pts, pixels);	//if threshold value is not set call the function to compute the threshold
+		else threshold = t;
+		
+		std::ofstream output;		//save the thresholded detected seeds in a text file
+		output.open(outname_txt);
+		output << "X" << " " << "Y" << " " << "threshold" << "\n";
+		size_t ind;
+		for (size_t ix = 0; ix < x; ix++) {
+			for (size_t iy = 0; iy < y; iy++) {
+				ind = iy * x + ix;
+				if (pts[ind] > threshold) {
+					output << ix << " " << iy << " " << pts[ind] << "\n";
+					pts[ind] = 1;
+				}
+				else pts[ind] = 0;
+			}
+		}
+		output.close();
+
+		HANDLE_ERROR(cudaMemcpy(gpuI, pts, bytes, cudaMemcpyHostToDevice));		//copy the points to the gpu
+		stim::cpu2image(pts, outname_img, x, y); //output the image
+		
+	}
+
+
+	template<typename T>
+	void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, std::string outname_img = "out.bmp", std::string outname_txt = "out.txt",
+					int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) {
+		size_t bytes = x*y * sizeof(T);
+		T* gpuI;						//allocate space on the gpu to save the input image
+		HANDLE_ERROR(cudaMalloc(&gpuI, bytes));
+		HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice));		//copy the image to the gpu
+		stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, outname_img, outname_txt, iter, phi, conn, debug);				//call the gpu version of the ivote
+		HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost));		//copy the output to the cpu
+	}
+}
+#endif
 \ No newline at end of file
+#ifndef STIM_CUDA_ITER_VOTE2_H
+#define STIM_CUDA_ITER_VOTE2_H
+
+//extern bool DEBUG;
+
+#include "update_dir_bb.cuh"
+#include "vote_atomic_bb.cuh"
+
+namespace stim{
+	namespace cuda{
+	
+	}
+}
+
+
+
+#endif
 \ No newline at end of file
@@ -97,7 +97,7 @@ namespace stim{
 		}
 		template<typename T>
-		void gpu_update_dir(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y){
+		void gpu_update_dir(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y, bool DEBUG = false){
 			//calculate the number of bytes in the array
 			size_t bytes = x * y * sizeof(T);
@@ -87,7 +87,7 @@ namespace stim{
 		/// @param x and y are the spatial dimensions of the gradient image
 		/// @param gradmag defines whether or not the gradient magnitude is taken into account during the vote
 		template<typename T>
-		void gpu_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y, bool gradmag = true){
+		void gpu_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, size_t x, size_t y, bool DEBUG = false, bool gradmag = true){
 			unsigned int max_threads = stim::maxThreadsPerBlock();
 			dim3 threads( (unsigned int)sqrt(max_threads), (unsigned int)sqrt(max_threads) );
 			dim3 blocks((unsigned int)x/threads.x + 1, (unsigned int)y/threads.y + 1);
@@ -96,7 +96,7 @@ namespace stim{
 			if (DEBUG) std::cout<<"Shared Memory required: "<<shared_mem_req<<std::endl;
 			size_t shared_mem = stim::sharedMemPerBlock();
 			if(shared_mem_req > shared_mem){
-				std::cout<<"Error: insufficient shared memory for this implementation of cuda_update_dir()."<<std::endl;
+				std::cout<<"Error: insufficient shared memory for this implementation of cuda_vote()."<<std::endl;
 				exit(1);
 			}
@@ -53,6 +53,10 @@ class image{
 	void allocate(){
 		unalloc();
 		img = (T*) malloc( sizeof(T) * R[0] * R[1] * R[2] );	//allocate memory
+		if (img == NULL) {
+			std::cout << "stim::image ERROR - failed to allocate memory for image" << std::endl;
+			exit(1);
+		}
 	}
 	void allocate(size_t x, size_t y, size_t c){	//allocate memory based on the resolution
@@ -228,6 +232,14 @@ public:
 		}
 	}
 #endif
+	//Copy N data points from source to dest, casting while doing so
+	template<typename S, typename D>
+	void type_copy(S* source, D* dest, size_t N) {
+		if (typeid(S) == typeid(D))						//if both types are the same
+			memcpy(dest, source, N * sizeof(S));		//just use a memcpy
+		for (size_t n = 0; n < N; n++)					//otherwise, iterate through each element
+			dest[n] = (D)source[n];							//copy and cast
+	}
 	/// Load an image from a file
 	void load(std::string filename){
 #ifdef USING_OPENCV
@@ -236,13 +248,15 @@ public:
 			std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<std::endl;
 			exit(1);
 		}
+		int cv_type = cvImage.type();
 		int cols = cvImage.cols;
 		int rows = cvImage.rows;
 		int channels = cvImage.channels();
 		allocate(cols, rows, channels);			//allocate space for the image
+		size_t img_bytes = bytes();
 		unsigned char* cv_ptr = (unsigned char*)cvImage.data;
-		if(C() == 1)														//if this is a single-color image, just copy the data
-			memcpy(img, cv_ptr, bytes());
+		if (C() == 1)														//if this is a single-color image, just copy the data
+			type_copy<unsigned char, T>(cv_ptr, img, size());
 		if(C() == 3)														//if this is a 3-color image, OpenCV uses BGR interleaving
 			from_opencv(cv_ptr, X(), Y());
 #else
@@ -33,32 +33,58 @@ namespace stim{
 		}
 	}
+	//class encapsulates a mat4 file, and can be used to write multiple matrices to a single mat4 file
+	class mat4file {
+		std::ofstream matfile;
+
+	public:
+		/// Constructor opens a mat4 file for writing
+		mat4file(std::string filename) {
+			matfile.open(filename, std::ios::binary);
+		}
+
+		bool is_open() {
+			return matfile.is_open();
+		}
+
+		void close() {
+			matfile.close();
+		}
+
+		bool writemat(char* data, std::string varname, size_t sx, size_t sy, mat4Format format) {
+			//save the matrix file here (use the mat4 function above)
+			//data format: https://maxwell.ict.griffith.edu.au/spl/matlab-page/matfile_format.pdf (page 32)
+
+			int MOPT = 0;									//initialize the MOPT type value to zero
+			int m = 0;										//little endian
+			int o = 0;										//reserved, always 0
+			int p = format;
+			int t = 0;
+			MOPT = m * 1000 + o * 100 + p * 10 + t;			//calculate the type value
+			int mrows = (int)sx;
+			int ncols = (int)sy;
+			int imagf = 0;									//assume real (for now)
+			varname.push_back('\0');									//add a null to the string
+			int namlen = (int)varname.size();						//calculate the name size
+
+			size_t bytes = sx * sy * mat4Format_size(format);
+			matfile.write((char*)&MOPT, 4);
+			matfile.write((char*)&mrows, 4);
+			matfile.write((char*)&ncols, 4);
+			matfile.write((char*)&imagf, 4);
+			matfile.write((char*)&namlen, 4);
+			matfile.write((char*)&varname[0], namlen);
+			matfile.write((char*)data, bytes);				//write the matrix data
+			return is_open();
+		}
+	};
+
 	static void save_mat4(char* data, std::string filename, std::string varname, size_t sx, size_t sy, mat4Format format){
-		//save the matrix file here (use the mat4 function above)
-		//data format: https://maxwell.ict.griffith.edu.au/spl/matlab-page/matfile_format.pdf (page 32)
-		
-		int MOPT = 0;									//initialize the MOPT type value to zero
-		int m = 0;										//little endian
-		int o = 0;										//reserved, always 0
-		int p = format;
-		int t = 0;
-		MOPT = m * 1000 + o * 100 + p * 10 + t;			//calculate the type value
-		int mrows = (int)sx;
-		int ncols = (int)sy;
-		int imagf = 0;									//assume real (for now)
-		varname.push_back('\0');									//add a null to the string
-		int namlen = (int)varname.size();						//calculate the name size
-
-		size_t bytes = sx * sy * mat4Format_size(format);
-		std::ofstream outfile(filename, std::ios::binary);
-		outfile.write((char*)&MOPT, 4);
-		outfile.write((char*)&mrows, 4);
-		outfile.write((char*)&ncols, 4);
-		outfile.write((char*)&imagf, 4);
-		outfile.write((char*)&namlen, 4);
-		outfile.write((char*)&varname[0], namlen);
-		outfile.write((char*)data, bytes);				//write the matrix data
-		outfile.close();
+		mat4file outfile(filename);									//create a mat4 file object
+		if (outfile.is_open()) {									//if the file is open
+			outfile.writemat(data, varname, sx, sy, format);		//write the matrix
+			outfile.close();										//close the file
+		}		
 	}
 template <class T>
@@ -409,8 +435,29 @@ public:
 		}
 	}
-	// saves the matrix as a Level-4 MATLAB file
-	void mat4(std::string filename, std::string name = std::string("unknown"), mat4Format format = mat4_float) {
+	void raw(std::string filename) {
+		ofstream out(filename, std::ios::binary);
+		if (out) {
+			out.write((char*)data(), rows() * cols() * sizeof(T));
+			out.close();
+		}
+	}
+
+	void mat4(stim::mat4file& file, std::string name = std::string("unknown"), mat4Format format = mat4_float) {
+		//make sure the matrix name is valid (only numbers and letters, with a letter at the beginning
+		for (size_t c = 0; c < name.size(); c++) {
+			if (name[c] < 48 ||												//if the character isn't a number or letter, replace it with '_'
+				(name[c] > 57 && name[c] < 65) ||
+				(name[c] > 90 && name[c] < 97) ||
+				(name[c] > 122)) {
+				name[c] = '_';
+			}
+		}
+		if (name[0] < 65 ||
+			(name[0] > 91 && name[0] < 97) ||
+			name[0] > 122) {
+			name = std::string("m") + name;
+		}
 		if (format == mat4_float) {
 			if (sizeof(T) == 4) format = mat4_float32;
 			else if (sizeof(T) == 8) format = mat4_float64;
@@ -419,7 +466,40 @@ public:
 				exit(1);
 			}
 		}
-		stim::save_mat4((char*)M, filename, name, rows(), cols(), format);
+		//the name is now valid
+
+		//if the size of the array is more than 100,000,000 elements, the matrix isn't supported
+		if (rows() * cols() > 100000000) {											//break the matrix up into multiple parts
+			//mat4file out(filename);													//create a mat4 object to write the matrix
+			if (file.is_open()) {
+				if (rows() < 100000000) {												//if the size of the row is less than 100,000,000, split the matrix up by columns
+					size_t ncols = 100000000 / rows();									//calculate the number of columns that can fit in one matrix
+					size_t nmat = (size_t)std::ceil((double)cols() / (double)ncols);			//calculate the number of matrices required
+					for (size_t m = 0; m < nmat; m++) {									//for each matrix
+						std::stringstream ss;
+						ss << name << "_part_" << m + 1;
+						if (m == nmat - 1)
+							file.writemat((char*)(data() + m * ncols * rows()), ss.str(), rows(), cols() - m * ncols, format);
+						else
+							file.writemat((char*)(data() + m * ncols * rows()), ss.str(), rows(), ncols, format);
+					}
+				}
+			}
+		}
+		//call the mat4 subroutine
+		else
+			//stim::save_mat4((char*)M, filename, name, rows(), cols(), format);
+			file.writemat((char*)data(), name, rows(), cols(), format);
+	}
+
+	// saves the matrix as a Level-4 MATLAB file
+	void mat4(std::string filename, std::string name = std::string("unknown"), mat4Format format = mat4_float) {
+		stim::mat4file matfile(filename);
+
+		if (matfile.is_open()) {
+			mat4(matfile, name, format);
+			matfile.close();
+		}
 	}
 };
@@ -243,7 +243,7 @@ public:
 			return false;	
 	}
-//#ifndef __NVCC__
+#ifndef __NVCC__
 	/// Outputs the vector as a string
 	std::string str() const{
 		std::stringstream ss;
@@ -261,7 +261,7 @@ public:
 		return ss.str();
 	}
-//#endif
+#endif
 	size_t size(){ return 3; }
@@ -89,6 +89,11 @@ protected:
 				absolute.push_back(relative[i]);
 			}
 		}
+		else {
+			if (relative[0] == ".")
+				relative = std::vector<std::string>(relative.begin() + 1, relative.end());
+			absolute = relative;
+		}
 	}
 	/// Parses a directory string into a drive (NULL if not Windows) and list of hierarchical directories
@@ -6,14 +6,14 @@
 namespace stim{
-	template<typename T>
-	using aabb3 = aabbn<T, 3>;
-/*/// Structure for a 3D axis aligned bounding box
+	//template<typename T>
+	//using aabb3 = aabbn<T, 3>;
+/// Structure for a 3D axis aligned bounding box
 template<typename T>
 struct aabb3 : public aabbn<T, 3>{
-	aabb3() : aabbn() {}
-	aabb3(T x0, T y0, T z0, T x1, T y1, T z1){
+	CUDA_CALLABLE aabb3() : aabbn() {}
+	CUDA_CALLABLE aabb3(T x0, T y0, T z0, T x1, T y1, T z1){
 		low[0] = x0;
 		low[1] = y0;
 		low[2] = z0;
@@ -22,11 +22,39 @@ struct aabb3 : public aabbn&lt;T, 3&gt;{
 		high[2] = x2;
 	}
-	aabb3 aabbn<T, 3>() {
+	CUDA_CALLABLE aabb3(T x, T y, T z) {
+		low[0] = high[0] = x;
+		low[1] = high[1] = y;
+		low[2] = high[2] = z;
+	}
+
+	CUDA_CALLABLE void insert(T x, T y, T z) {
+		T p[3];
+		p[0] = x;
+		p[1] = y;
+		p[2] = z;
+		aabbn<T, 3>::insert(p);
+	}
+	CUDA_CALLABLE void trim_low(T x, T y, T z) {
+		T p[3];
+		p[0] = x;
+		p[1] = y;
+		p[2] = z;
+		aabbn<T, 3>::trim_low(p);
 	}
-};*/
+	CUDA_CALLABLE void trim_high(T x, T y, T z) {
+		T p[3];
+		p[0] = x;
+		p[1] = y;
+		p[2] = z;
+		aabbn<T, 3>::trim_high(p);
+	}
+
+
+
+};
 }
@@ -25,26 +25,58 @@ struct aabbn{
 		init(i);
 	}
+	/// For even inputs to the constructor, the input could be one point or a set of pairs of points
 	CUDA_CALLABLE aabbn(T x0, T x1) {
-		low[0] = x0;
-		high[0] = x1;
+		if (D == 1) {
+			low[0] = x0;
+			high[0] = x1;
+		}
+		else if (D == 2) {
+			low[0] = high[0] = x0;
+			low[1] = high[1] = x1;
+		}
 	}
+	/// In the case of 3 inputs, this must be a 3D bounding box, so initialize to a box of size 0 at (x, y, z)
+	/*CUDA_CALLABLE aabbn(T x, T y, T z) {
+		low[0] = high[0] = x;
+		low[1] = high[1] = y;
+		low[2] = high[2] = z;
+	}*/
+
 	CUDA_CALLABLE aabbn(T x0, T y0, T x1, T y1) {
-		low[0] = x0;
-		high[0] = x1;
-		low[1] = y0;
-		high[1] = y1;
+		if (D == 2) {
+			low[0] = x0;
+			high[0] = x1;
+			low[1] = y0;
+			high[1] = y1;
+		}
+		else if(D == 4){
+			low[0] = high[0] = x0;
+			low[1] = high[1] = y0;
+			low[2] = high[2] = x1;
+			low[3] = high[3] = y1;
+		}
 	}
-	CUDA_CALLABLE aabbn(T x0, T y0, T z0, T x1, T y1, T z1) {
-		low[0] = x0;
-		high[0] = x1;
-		low[1] = y0;
-		high[1] = y1;
-		low[2] = z0;
-		high[2] = z1;
-	}
+	/*CUDA_CALLABLE aabbn(T x0, T y0, T z0, T x1, T y1, T z1) {
+		if (D == 3) {
+			low[0] = x0;
+			high[0] = x1;
+			low[1] = y0;
+			high[1] = y1;
+			low[2] = z0;
+			high[2] = z1;
+		}
+		else if (D == 6) {
+			low[0] = high[0] = x0;
+			low[1] = high[1] = y0;
+			low[2] = high[2] = z0;
+			low[3] = high[3] = x1;
+			low[4] = high[4] = y1;
+			low[5] = high[5] = z1;
+		}
+	}*/
 	//insert a point into the bounding box, growing the box appropriately
@@ -7,6 +7,7 @@
 #include <stdlib.h>
 #include <stim/parser/parser.h>
 #include <stim/math/vector.h>
+#include <stim/visualization/obj/obj_material.h>
 #include <algorithm>
 #include <time.h>
@@ -29,7 +30,7 @@ namespace stim{
  *  geometry class - contains a list of triplets used to define a geometric structure, such as a face or line
  */
-enum obj_type { OBJ_NONE, OBJ_LINE, OBJ_FACE, OBJ_POINTS };
+enum obj_type { OBJ_NONE, OBJ_LINE, OBJ_FACE, OBJ_POINTS, OBJ_TRIANGLE_STRIP };
 template <typename T>
 class obj{
@@ -93,13 +94,13 @@ protected:
 	};	//end vertex
 	//triplet used to specify geometric vertices consisting of a position vertex, texture vertex, and normal
-	struct triplet : public std::vector<unsigned int>{
+	struct triplet : public std::vector<size_t>{
 		//default constructor, empty triplet
 		triplet(){}
 		//create a triplet given a parameter list (OBJ indices start at 1, so 0 can be used to indicate no value)
-		triplet(unsigned int v, unsigned int vt = 0, unsigned int vn = 0){
+		triplet(size_t v, size_t vt = 0, size_t vn = 0){
 			push_back(v);
 			if(vn != 0){
 				push_back(vt);
@@ -140,12 +141,12 @@ protected:
 			if(size() == 3){
 				if(at(1) == 0)
-					ss<<"\\\\"<<at(2);
+					ss<<"//"<<at(2);
 				else
-					ss<<'\\'<<at(1)<<'\\'<<at(2);
+					ss<<'/'<<at(1)<<'/'<<at(2);
 			}
 			else if(size() == 2)
-				ss<<"\\"<<at(1);
+				ss<<"/"<<at(1);
 			return ss.str();
 		}
@@ -223,10 +224,16 @@ protected:
 	std::vector<geometry> P;	//list of points structures
 	std::vector<geometry> F;	//list of faces
+	//material lists
+	std::vector< obj_material<T> > M;	//list of material descriptors
+	std::vector<size_t> Mf;			//face index where each material begins
+
 	//information for the current geometric object
 	geometry current_geo;
 	vertex current_vt;
 	vertex current_vn;
+	obj_material<T> current_material;					//stores the current material
+	bool new_material;								//flags if a material property has been changed since the last material was pushed
 		//flags for the current geometric object
 		obj_type current_type;
@@ -258,9 +265,9 @@ protected:
 	//create a triple and add it to the current geometry
 	void update_v(vertex vv){
-		unsigned int v;
-		unsigned int vt = 0;
-		unsigned int vn = 0;
+		size_t v;
+		size_t vt = 0;
+		size_t vn = 0;
 		//if the current geometry is using a texture coordinate, add the current texture coordinate to the geometry
 		if(geo_flag_vt){
@@ -303,6 +310,8 @@ protected:
 		geo_flag_vn = false;
 		vert_flag_vt = false;
 		vert_flag_vn = false;
+
+		new_material = false;						//initialize a new material to false (start with no material)
 	}
 	//gets the type of token representing the entry in the OBJ file
@@ -346,13 +355,107 @@ public:
 	void Vertex(T x, T y, T z){ update_v(vertex(x, y, z));}
 	void Vertex(T x, T y, T z, T w){ update_v(vertex(x, y, z, w));}
+	///Material functions
+	void matKa(T r, T g, T b) {
+		new_material = true;
+		current_material.ka[0] = r;
+		current_material.ka[1] = g;
+		current_material.ka[2] = b;
+	}
+	void matKa(std::string tex = std::string()) {
+		new_material = true;
+		current_material.tex_ka = tex;
+	}
+	void matKd(T r, T g, T b) {
+		new_material = true;
+		current_material.kd[0] = r;
+		current_material.kd[1] = g;
+		current_material.kd[2] = b;
+	}
+	void matKd(std::string tex = std::string()) {
+		new_material = true;
+		current_material.tex_kd = tex;
+	}
+	void matKs(T r, T g, T b) {
+		new_material = true;
+		current_material.ks[0] = r;
+		current_material.ks[1] = g;
+		current_material.ks[2] = b;
+	}
+	void matKs(std::string tex = std::string()) {
+		new_material = true;
+		current_material.tex_ks = tex;
+	}
+	void matNs(T n) {
+		new_material = true;
+		current_material.ns = n;
+	}
+	void matNs(std::string tex = std::string()) {
+		new_material = true;
+		current_material.tex_ns = tex;
+	}
+	void matIllum(int i) {
+		new_material = true;
+		current_material.illum = i;
+	}
+	void matD(std::string tex = std::string()) {
+		new_material = true;
+		current_material.tex_alpha = tex;
+	}
+	void matBump(std::string tex = std::string()) {
+		new_material = true;
+		current_material.tex_bump = tex;
+	}
+	void matDisp(std::string tex = std::string()) {
+		new_material = true;
+		current_material.tex_disp = tex;
+	}
+	void matDecal(std::string tex = std::string()) {
+		new_material = true;
+		current_material.tex_decal = tex;
+	}
+
 	///This function starts drawing of a primitive object, such as a line, face, or point set
 	/// @param t is the type of object to be drawn: OBJ_POINTS, OBJ_LINE, OBJ_FACE
 	void Begin(obj_type t){
+		if (new_material) {							//if a new material has been specified
+			if (current_material.name == "") {		//if a name wasn't given, create a new one
+				std::stringstream ss;				//create a name for it
+				ss << "material" << M.size();		//base it on the material number
+				current_material.name = ss.str();
+			}
+			Mf.push_back(F.size());					//start the material at the current face index
+			M.push_back(current_material);			//push the current material
+			current_material.name = "";			//reset the name of the current material
+		}
 		current_type = t;
 	}
+	//generates a list of faces from a list of points, assuming the input list forms a triangle strip
+	std::vector<geometry> genTriangleStrip(geometry s) {
+		if (s.size() < 3) return std::vector<geometry>();	//return an empty list if there aren't enough points to form a triangle
+		size_t nt = s.size() - 2;							//calculate the number of triangles in the strip
+		std::vector<geometry> r(nt);								//create a list of geometry objects, where the number of faces = the number of triangles in the strip
+
+		r[0].push_back(s[0]);
+		r[0].push_back(s[1]);
+		r[0].push_back(s[2]);
+		for (size_t i = 1; i < nt; i++) {
+			if (i % 2) {
+				r[i].push_back(s[i + 1]);
+				r[i].push_back(s[i + 0]);
+				r[i].push_back(s[i + 2]);
+			}
+			else {
+				r[i].push_back(s[i + 0]);
+				r[i].push_back(s[i + 1]);
+				r[i].push_back(s[i + 2]);
+			}
+		}
+		return r;		
+	}
+
 	/// This function terminates drawing of a primitive object, such as a line, face, or point set
 	void End(){
 		//copy the current object to the appropriate list
@@ -374,6 +477,12 @@ public:
 			case OBJ_FACE:
 				F.push_back(current_geo);
+				break;
+
+			case OBJ_TRIANGLE_STRIP:
+				std::vector<geometry> tstrip = genTriangleStrip(current_geo);		//generate a list of faces from the current geometry
+				F.insert(F.end(), tstrip.begin(), tstrip.end());					//insert all of the triangles into the face list
+				break;
 			}
 		}
 		//clear everything
@@ -438,10 +547,15 @@ public:
 			}
 		}
-		//output all of the lines
+		//output all of the faces
 		if(F.size()){
 			ss<<std::endl<<"#face structures"<<std::endl;
+			size_t mi = 0;											//start the current material index at 0
 			for(i = 0; i < F.size(); i++){
+				if (mi < M.size() && Mf[mi] == i) {
+					ss << "usemtl " << M[mi].name << std::endl;
+					mi++;
+				}
 				ss<<"f "<<F[i].str()<<std::endl;
 			}
 		}
@@ -449,6 +563,14 @@ public:
 		return ss.str();	//return the constructed string
 	}
+	///Output the material file as a string
+	std::string matstr() {
+		std::stringstream ss;
+		for (size_t i = 0; i < M.size(); i++) {
+			ss << M[i].str() << std::endl;
+		}
+	}
+
 	obj(){
 		init();		//private function that initializes everything
 	}
@@ -462,19 +584,42 @@ public:
 	/// @param filename is the name of the file to be saved
 	bool save(std::string filename){
-		std::ofstream outfile(filename.c_str());
+		
+
+		std::string obj_ext = ".obj";
+		size_t ext_found = filename.find(obj_ext);
+		if (ext_found != std::string::npos)									//if the extension was found
+			filename = filename.substr(0, ext_found);
+		std::string obj_filename;
+		std::string mtl_filename;
+		obj_filename = filename + ".obj";
+		mtl_filename = filename + ".mtl";
-		if(!outfile){
-			std::cout<<"STIM::OBJ error opening file for writing"<<std::endl;
+
+		std::ofstream outfile(obj_filename.c_str());
+		if (!outfile) {
+			std::cout << "STIM::OBJ error opening file for writing" << std::endl;
 			return false;
 		}
+		if (M.size())															//if there are any materials, there will be a corresponding material file
+			outfile << "mtllib " << mtl_filename << std::endl;				//output the material library name
+
 		//output the OBJ data to the file
 		outfile<<str();
 		//close the file
 		outfile.close();
+		if (M.size()) {															//if materials are used
+			
+			outfile.open(mtl_filename.c_str());										//open the material file
+			for (size_t i = 0; i < M.size(); i++) {								//for each material
+				outfile << M[i].str() << std::endl;								//output the material name and properties
+			}
+			outfile.close();
+		}
+
 		return true;
 	}
+#ifndef OBJ_MATERIAL_H
+#define OBJ_MATERIAL_H
+
+#include <sstream>
+#include <cstring>
+
+namespace stim {
+
+template<typename T>
+struct obj_material {
+	std::string name;			//material name
+	T ka[3];				//ambient color
+	T kd[3];				//diffuse color
+	T ks[3];				//specular color
+	T ns;					//specular exponent
+
+	int illum;					//illumination model
+
+	std::string tex_ka;			//ambient texture
+	std::string tex_kd;			//diffuse texture
+	std::string tex_ks;			//specular texture
+	std::string tex_ns;			//texture map for the specular exponent
+	std::string tex_alpha;		//texture map for the alpha component
+	std::string tex_bump;		//bump map
+	std::string tex_disp;		//displacement map
+	std::string tex_decal;		//stencil decal
+
+	obj_material() {			//constructor
+		std::memset(ka, 0, sizeof(T) * 3);
+		std::memset(kd, 0, sizeof(T) * 3);
+		std::memset(ks, 0, sizeof(T) * 3);
+		ns = 10;
+		illum = 2;
+
+	}
+	std::string str() {
+		std::stringstream ss;
+		ss << "newmtl " << name << std::endl;
+		ss << "Ka " << ka[0] << " " << ka[1] << " " << ka[2] << std::endl;
+		ss << "Kd " << kd[0] << " " << kd[1] << " " << kd[2] << std::endl;
+		ss << "Ks " << ks[0] << " " << ks[1] << " " << ks[2] << std::endl;
+		ss << "Ns " << ns << std::endl;
+		ss << "illum " << illum << std::endl;
+		if (tex_ka != "") ss << "map_Ka " << tex_ka << std::endl;
+		if (tex_kd != "") ss << "map_Kd " << tex_kd << std::endl;
+		if (tex_ks != "") ss << "map_Ks " << tex_ks << std::endl;
+		if (tex_ns != "") ss << "map_Ns " << tex_ns << std::endl;
+		if (tex_alpha != "") ss << "map_d " << tex_alpha << std::endl;
+		if (tex_bump != "") ss << "bump " << tex_bump << std::endl;
+		if (tex_disp != "") ss << "disp " << tex_disp << std::endl;
+		if (tex_decal != "") ss << "decal " << tex_decal << std::endl;
+		return ss.str();
+	}
+};
+
+}				//end namespace stim
+
+#endif
 \ No newline at end of file