envi.py: added function to load balanced data for training

sberisha
1 parent db331d8a
Showing 3 changed files with 104 additions and 25 deletions Show diff stats
matlab/stimBrewerMap.m
python/classify.py
python/envi.py
-function result = stimBrewerColormap(R)
+function result = stimBrewerMap(R)
  
 %returns a Brewer colormap with the specified resolution R
  
-ctrlPts = zeros(11, 3);
-
-ctrlPts(1, :) = [0.192157, 0.211765, 0.584314];
-ctrlPts(2, :) = [0.270588, 0.458824, 0.705882];
-ctrlPts(3, :) = [0.454902, 0.678431, 0.819608];
-ctrlPts(4, :) = [0.670588, 0.85098, 0.913725];
-ctrlPts(5, :) = [0.878431, 0.952941, 0.972549];
-ctrlPts(6, :) = [1, 1, 0.74902];
-ctrlPts(7, :) = [0.996078, 0.878431, 0.564706];
-ctrlPts(8, :) = [0.992157, 0.682353, 0.380392];
-ctrlPts(9, :) = [0.956863, 0.427451, 0.262745];
-ctrlPts(10, :) = [0.843137, 0.188235, 0.152941];
-ctrlPts(11, :) = [0.647059, 0, 0.14902];
-
-X = 1:11;
-
-r = 1:11/R:11;
+ctrlPts = zeros(12, 3);
+ctrlPts(1, :) = [0, 0, 0];
+ctrlPts(2, :) = [0.192157, 0.211765, 0.584314];
+ctrlPts(3, :) = [0.270588, 0.458824, 0.705882];
+ctrlPts(4, :) = [0.454902, 0.678431, 0.819608];
+ctrlPts(5, :) = [0.670588, 0.85098, 0.913725];
+ctrlPts(6, :) = [0.878431, 0.952941, 0.972549];
+ctrlPts(7, :) = [1, 1, 0.74902];
+ctrlPts(8, :) = [0.996078, 0.878431, 0.564706];
+ctrlPts(9, :) = [0.992157, 0.682353, 0.380392];
+ctrlPts(10, :) = [0.956863, 0.427451, 0.262745];
+ctrlPts(11, :) = [0.843137, 0.188235, 0.152941];
+ctrlPts(12, :) = [0.647059, 0, 0.14902];
+
+X = 1:12;
+
+r = 1:12/R:12;
  
 R = interp1(X, ctrlPts(:, 1), r);
 G = interp1(X, ctrlPts(:, 2), r);
@@ -14,7 +14,7 @@ import scipy.misc
 import envi
 import hyperspectral
 import random
-import progressbar
+import pyprind
 import matplotlib.pyplot as plt
  
 #generate N qualitative colors and return the value for color c
@@ -66,11 +66,13 @@ def filenames2class(masks):
         return
  
     classimages = []
-    bar = progressbar.ProgressBar(max_value=num_masks)
+    #bar = progressbar.ProgressBar(max_value=num_masks)
+    bar = pyprind.ProgBar(num_masks)
     for m in range(0, num_masks):
         img = scipy.misc.imread(masks[m], flatten=True).astype(numpy.bool)
         classimages.append(img)
-        bar.update(m+1)
+        #bar.update(m+1)
+        bar.update()
  
     result = numpy.stack(classimages)
     sum_images = numpy.sum(result.astype(numpy.uint32), 0)
@@ -169,7 +171,8 @@ def envi_batch_predict(E, C, batch=10000):
     i = 0
     Tv = []
     plt.ion()
-    bar = progressbar.ProgressBar(max_value=numpy.count_nonzero(E.mask))
+    #bar = progressbar.ProgressBar(max_value=numpy.count_nonzero(E.mask))
+    bar = pyprind.ProgBar(numpy.count_nonzero(E.mask))
     while not Fv == []:
         Fv = numpy.nan_to_num(Fv)                                                     #remove infinite values        
         if i == 0:
@@ -184,4 +187,5 @@ def envi_batch_predict(E, C, batch=10000):
         plt.pause(0.05)
         Fv = E.loadbatch(batch)   
         i = i + 1
-        bar.update(len(Tv))
+        #bar.update(len(Tv))
+        bar.update()
 \ No newline at end of file
@@ -9,8 +9,10 @@ import os
 import numpy
 import scipy
 import matplotlib.pyplot as plt
-import progressbar
+#import pyprind
 import sys
+from math import floor
+import progressbar
  
 class envi_header:
     def __init__(self, filename = ""):
@@ -272,30 +274,36 @@ class envi:
             flatmask = numpy.reshape(mask, (X * Y))
             i = numpy.flatnonzero(flatmask)
             bar = progressbar.ProgressBar(max_value = P)
+            #bar = pyprind.ProgBar(P)
             for p in range(0, P):
                 self.file.seek(i[p] * B * type_bytes)
                 self.file.readinto(spectrum)
                 M[:, p] = spectrum
                 bar.update(p+1)
+                #bar.update()
         elif self.header.interleave == "bsq":
             band = numpy.zeros(mask.shape, dtype=self.header.data_type)
             i = numpy.nonzero(mask)
             bar = progressbar.ProgressBar(max_value=B)
+            #bar = pyprind.ProgBar(P)
             for b in range(0, B):
                 self.file.seek(b * X * Y * type_bytes)
                 self.file.readinto(band)
                 M[b, :] = band[i]
                 bar.update(b+1)
+                #bar.update()
         elif self.header.interleave == "bil":
             plane = numpy.zeros((B, X), dtype=self.header.data_type)
             p = 0
             bar = progressbar.ProgressBar(max_value=Y)
+            #bar = pyprind.ProgBar(P)
             for l in range(0, Y):
                 i = numpy.flatnonzero(mask[l, :])
                 self.file.readinto(plane)
                 M[:, p:p+i.shape[0]] = plane[:, i]
                 p = p + i.shape[0]
                 bar.update(l+1)
+                #bar.update()
         self.file.seek(prev_pos)
         return M
  
@@ -334,6 +342,73 @@ class envi:
             T.append(t)
  
         return numpy.nan_to_num(numpy.concatenate(F, 1).transpose()), numpy.concatenate(T)
+
+
+    #create a set of feature/target pairs for classification with balanced data
+    #input: envi file object, stack of class masks C x Y x X, number of samples per class
+    #output: feature matrix (features x pixels), target matrix (1 x pixels)
+    #example: generate_training(("class_coll.bmp", "class_epith.bmp"), (1, 2))
+    #   verify      verify that there are no NaN or Inf values
+    def loadtrain_balance(self, classimages, num_samples=None):
+
+        # get number of classes
+        C = classimages.shape[0]
+
+        F = []
+        T = []
+
+        # get number of samples per class
+        samples_per_class = numpy.zeros(C, dtype=numpy.int32)
+        for c in range(0, C):
+            if num_samples is None:
+                samples_per_class[c] = numpy.count_nonzero(classimages[c, :, :])
+            else:
+                # if user has specified a max number of samples per class
+                if num_samples > numpy.count_nonzero(classimages[c, :, :]):
+                    samples_per_class[c] = numpy.count_nonzero(classimages[c, :, :])
+                else:
+                    samples_per_class[c] = num_samples
+
+        for c in range(0, C):
+            print("\nLoading class " + str(c+1) + "...")
+            # row, col index of valid pixels
+            temp = classimages[c,:]
+            flat_temp = numpy.reshape(temp, temp.shape[0]*temp.shape[1])
+
+            idx = numpy.flatnonzero(temp)  # indices of nonzero values
+            if num_samples:
+                # use specific number of samples for training
+                numpy.random.shuffle(idx)
+                idx = idx[0:samples_per_class[c]]
+
+            # increase number of samples by copying them over multiple times
+            max_samples = numpy.amax(samples_per_class)
+            # num of times to copy for even division
+            copy_times = int(floor(max_samples / samples_per_class[c]))
+            rem = max_samples % samples_per_class[c]  # remaining samples
+
+            for i in range(0, copy_times):
+                numpy.random.shuffle(idx)
+                shuffle_temp = numpy.zeros(flat_temp.shape, dtype=bool)
+                shuffle_temp[idx] = flat_temp[idx]
+                f = self.loadmask(numpy.reshape(shuffle_temp, (temp.shape[0], temp.shape[1])))  # load the feature matrix for class c
+                t = numpy.ones((f.shape[1])) * (c+1)  # generate a target array
+                F.append(f)
+                T.append(t)
+
+            # copy the remaning samples so the total matches the max number of samples chosen by user
+            if rem > 0:
+                numpy.random.shuffle(idx)
+                idx = idx[0:rem]
+                shuffle_temp = numpy.zeros(flat_temp.shape, dtype=bool)
+                shuffle_temp[idx] = flat_temp[idx]
+                f = self.loadmask(numpy.reshape(shuffle_temp, (temp.shape[0], temp.shape[1])))  # load the feature matrix for class c
+                t = numpy.ones((f.shape[1])) * (c+1)  # generate a target array
+                F.append(f)
+                T.append(t)
+
+        return numpy.nan_to_num(numpy.concatenate(F, 1).transpose()), numpy.concatenate(T)
+
  
     #read a batch of data based on the mask
     def loadbatch(self, npixels):
@@ -390,4 +465,4 @@ def save_envi(A, fname):
     #save the raw data
     file = open(fname, "wb")
     file.write(bytearray(A))
-    file.close()
 \ No newline at end of file
+    file.close()