From 35534291ba7760d96ef0239a0303a75f0481e2f8 Mon Sep 17 00:00:00 2001 From: sberisha Date: Wed, 18 Apr 2018 16:47:25 -0500 Subject: [PATCH] envi.py: added function to load balanced data for training --- matlab/stimBrewerMap.m | 36 ++++++++++++++++++------------------ python/classify.py | 14 +++++++++----- python/envi.py | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 104 insertions(+), 25 deletions(-) diff --git a/matlab/stimBrewerMap.m b/matlab/stimBrewerMap.m index 0a2c521..8b83879 100644 --- a/matlab/stimBrewerMap.m +++ b/matlab/stimBrewerMap.m @@ -1,24 +1,24 @@ -function result = stimBrewerColormap(R) +function result = stimBrewerMap(R) %returns a Brewer colormap with the specified resolution R -ctrlPts = zeros(11, 3); - -ctrlPts(1, :) = [0.192157, 0.211765, 0.584314]; -ctrlPts(2, :) = [0.270588, 0.458824, 0.705882]; -ctrlPts(3, :) = [0.454902, 0.678431, 0.819608]; -ctrlPts(4, :) = [0.670588, 0.85098, 0.913725]; -ctrlPts(5, :) = [0.878431, 0.952941, 0.972549]; -ctrlPts(6, :) = [1, 1, 0.74902]; -ctrlPts(7, :) = [0.996078, 0.878431, 0.564706]; -ctrlPts(8, :) = [0.992157, 0.682353, 0.380392]; -ctrlPts(9, :) = [0.956863, 0.427451, 0.262745]; -ctrlPts(10, :) = [0.843137, 0.188235, 0.152941]; -ctrlPts(11, :) = [0.647059, 0, 0.14902]; - -X = 1:11; - -r = 1:11/R:11; +ctrlPts = zeros(12, 3); +ctrlPts(1, :) = [0, 0, 0]; +ctrlPts(2, :) = [0.192157, 0.211765, 0.584314]; +ctrlPts(3, :) = [0.270588, 0.458824, 0.705882]; +ctrlPts(4, :) = [0.454902, 0.678431, 0.819608]; +ctrlPts(5, :) = [0.670588, 0.85098, 0.913725]; +ctrlPts(6, :) = [0.878431, 0.952941, 0.972549]; +ctrlPts(7, :) = [1, 1, 0.74902]; +ctrlPts(8, :) = [0.996078, 0.878431, 0.564706]; +ctrlPts(9, :) = [0.992157, 0.682353, 0.380392]; +ctrlPts(10, :) = [0.956863, 0.427451, 0.262745]; +ctrlPts(11, :) = [0.843137, 0.188235, 0.152941]; +ctrlPts(12, :) = [0.647059, 0, 0.14902]; + +X = 1:12; + +r = 1:12/R:12; R = interp1(X, ctrlPts(:, 1), r); G = interp1(X, ctrlPts(:, 2), r); diff --git a/python/classify.py b/python/classify.py index ffcbea9..deef0d0 100644 --- a/python/classify.py +++ b/python/classify.py @@ -14,7 +14,7 @@ import scipy.misc import envi import hyperspectral import random -import progressbar +import pyprind import matplotlib.pyplot as plt #generate N qualitative colors and return the value for color c @@ -66,11 +66,13 @@ def filenames2class(masks): return classimages = [] - bar = progressbar.ProgressBar(max_value=num_masks) + #bar = progressbar.ProgressBar(max_value=num_masks) + bar = pyprind.ProgBar(num_masks) for m in range(0, num_masks): img = scipy.misc.imread(masks[m], flatten=True).astype(numpy.bool) classimages.append(img) - bar.update(m+1) + #bar.update(m+1) + bar.update() result = numpy.stack(classimages) sum_images = numpy.sum(result.astype(numpy.uint32), 0) @@ -169,7 +171,8 @@ def envi_batch_predict(E, C, batch=10000): i = 0 Tv = [] plt.ion() - bar = progressbar.ProgressBar(max_value=numpy.count_nonzero(E.mask)) + #bar = progressbar.ProgressBar(max_value=numpy.count_nonzero(E.mask)) + bar = pyprind.ProgBar(numpy.count_nonzero(E.mask)) while not Fv == []: Fv = numpy.nan_to_num(Fv) #remove infinite values if i == 0: @@ -184,4 +187,5 @@ def envi_batch_predict(E, C, batch=10000): plt.pause(0.05) Fv = E.loadbatch(batch) i = i + 1 - bar.update(len(Tv)) + #bar.update(len(Tv)) + bar.update() \ No newline at end of file diff --git a/python/envi.py b/python/envi.py index b781839..b1b332d 100644 --- a/python/envi.py +++ b/python/envi.py @@ -9,8 +9,10 @@ import os import numpy import scipy import matplotlib.pyplot as plt -import progressbar +#import pyprind import sys +from math import floor +import progressbar class envi_header: def __init__(self, filename = ""): @@ -272,30 +274,36 @@ class envi: flatmask = numpy.reshape(mask, (X * Y)) i = numpy.flatnonzero(flatmask) bar = progressbar.ProgressBar(max_value = P) + #bar = pyprind.ProgBar(P) for p in range(0, P): self.file.seek(i[p] * B * type_bytes) self.file.readinto(spectrum) M[:, p] = spectrum bar.update(p+1) + #bar.update() elif self.header.interleave == "bsq": band = numpy.zeros(mask.shape, dtype=self.header.data_type) i = numpy.nonzero(mask) bar = progressbar.ProgressBar(max_value=B) + #bar = pyprind.ProgBar(P) for b in range(0, B): self.file.seek(b * X * Y * type_bytes) self.file.readinto(band) M[b, :] = band[i] bar.update(b+1) + #bar.update() elif self.header.interleave == "bil": plane = numpy.zeros((B, X), dtype=self.header.data_type) p = 0 bar = progressbar.ProgressBar(max_value=Y) + #bar = pyprind.ProgBar(P) for l in range(0, Y): i = numpy.flatnonzero(mask[l, :]) self.file.readinto(plane) M[:, p:p+i.shape[0]] = plane[:, i] p = p + i.shape[0] bar.update(l+1) + #bar.update() self.file.seek(prev_pos) return M @@ -334,6 +342,73 @@ class envi: T.append(t) return numpy.nan_to_num(numpy.concatenate(F, 1).transpose()), numpy.concatenate(T) + + + #create a set of feature/target pairs for classification with balanced data + #input: envi file object, stack of class masks C x Y x X, number of samples per class + #output: feature matrix (features x pixels), target matrix (1 x pixels) + #example: generate_training(("class_coll.bmp", "class_epith.bmp"), (1, 2)) + # verify verify that there are no NaN or Inf values + def loadtrain_balance(self, classimages, num_samples=None): + + # get number of classes + C = classimages.shape[0] + + F = [] + T = [] + + # get number of samples per class + samples_per_class = numpy.zeros(C, dtype=numpy.int32) + for c in range(0, C): + if num_samples is None: + samples_per_class[c] = numpy.count_nonzero(classimages[c, :, :]) + else: + # if user has specified a max number of samples per class + if num_samples > numpy.count_nonzero(classimages[c, :, :]): + samples_per_class[c] = numpy.count_nonzero(classimages[c, :, :]) + else: + samples_per_class[c] = num_samples + + for c in range(0, C): + print("\nLoading class " + str(c+1) + "...") + # row, col index of valid pixels + temp = classimages[c,:] + flat_temp = numpy.reshape(temp, temp.shape[0]*temp.shape[1]) + + idx = numpy.flatnonzero(temp) # indices of nonzero values + if num_samples: + # use specific number of samples for training + numpy.random.shuffle(idx) + idx = idx[0:samples_per_class[c]] + + # increase number of samples by copying them over multiple times + max_samples = numpy.amax(samples_per_class) + # num of times to copy for even division + copy_times = int(floor(max_samples / samples_per_class[c])) + rem = max_samples % samples_per_class[c] # remaining samples + + for i in range(0, copy_times): + numpy.random.shuffle(idx) + shuffle_temp = numpy.zeros(flat_temp.shape, dtype=bool) + shuffle_temp[idx] = flat_temp[idx] + f = self.loadmask(numpy.reshape(shuffle_temp, (temp.shape[0], temp.shape[1]))) # load the feature matrix for class c + t = numpy.ones((f.shape[1])) * (c+1) # generate a target array + F.append(f) + T.append(t) + + # copy the remaning samples so the total matches the max number of samples chosen by user + if rem > 0: + numpy.random.shuffle(idx) + idx = idx[0:rem] + shuffle_temp = numpy.zeros(flat_temp.shape, dtype=bool) + shuffle_temp[idx] = flat_temp[idx] + f = self.loadmask(numpy.reshape(shuffle_temp, (temp.shape[0], temp.shape[1]))) # load the feature matrix for class c + t = numpy.ones((f.shape[1])) * (c+1) # generate a target array + F.append(f) + T.append(t) + + return numpy.nan_to_num(numpy.concatenate(F, 1).transpose()), numpy.concatenate(T) + #read a batch of data based on the mask def loadbatch(self, npixels): @@ -390,4 +465,4 @@ def save_envi(A, fname): #save the raw data file = open(fname, "wb") file.write(bytearray(A)) - file.close() \ No newline at end of file + file.close() -- libgit2 0.21.4