Commit 9c5854ecbb1a2f8e30a3468588a7063bb51c5a52

Authored by Pavel Govyadinov
2 parents 42b3c74f c6251f8b

Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib

stim/envi/binary.h
... ... @@ -8,7 +8,7 @@
8 8 #include <fstream>
9 9 #include <sys/stat.h>
10 10 #include <cstring>
11   -
  11 +#include <chrono>
12 12 #ifdef _WIN32
13 13 #include <Windows.h>
14 14 #else
... ... @@ -17,6 +17,186 @@
17 17  
18 18 namespace stim{
19 19  
  20 +/// This class calculates the optimal setting for independent parameter b (batch size) for
  21 +/// minimizing the dependent parameter bps (bytes per second)
  22 +class stream_optimizer{
  23 +protected:
  24 + size_t Bps[2]; //bytes per second for the previous batch
  25 + size_t interval_B; //number of bytes processed this interval
  26 + size_t interval_ms; //number of milliseconds spent in the current interval
  27 + size_t n[2]; //current batch size (in bytes)
  28 + size_t h; //spacing used for finite difference calculations
  29 + size_t dn; //delta value (in bytes) for setting the batch size (minimum change in batch parameter)
  30 + size_t maxn; //maximum value for the batch size
  31 +
  32 + double alpha; //alpha value controls the factor of the gradient that is used to calculate the next point (speed of convergence)
  33 +
  34 + bool sample_step; //calculating the derivative (this class alternates between calculating dBps and B)
  35 + bool forward_diff; //evaluate the derivative using forward differences
  36 +
  37 + size_t window_ms; //size of the interval (in milliseconds) integrated to get a reliable bps value
  38 +
  39 + // This function rounds x to the nearest value within dB
  40 + size_t round_limit(double n0){
  41 + if(n0 < 0) return dn; //if n0 is less than zero, return the lowest possible n
  42 +
  43 + size_t new_n = (size_t)(n0 + 0.5); //now n0 must be positive, so round it to the nearest integer
  44 + if(new_n > maxn) new_n = maxn; //limit the returned size of x to within the specified bounds
  45 +
  46 + size_t lowest = new_n / dn;
  47 + size_t highest = lowest + dn;
  48 + size_t diff[2] = {new_n - lowest, highest - new_n}; //calculate the two differences
  49 + if(diff[0] < diff[1])
  50 + return lowest;
  51 + return highest;
  52 + }
  53 +
  54 +public:
  55 +
  56 + //constructor initializes a stream optimizer
  57 + stream_optimizer(size_t min_batch_size, size_t max_batch_size, double a = 0.003, size_t probe_step = 5, size_t window = 2000){
  58 + //Bps = 0; //initialize to zero bytes per second processed
  59 + Bps[0] = Bps[1] = 0; //initialize the bits per second to 0
  60 + interval_B = 0; //zero bytes have been processed at initialization
  61 + interval_ms = 0; //no time has been spent on the batch so far
  62 + dn = min_batch_size; //set the minimum batch size as the minimum change in batch size
  63 + maxn = max_batch_size; //set the maximum batch size
  64 + n[0] = max_batch_size; //set B
  65 + h = (max_batch_size / min_batch_size) / probe_step * dn;
  66 + std::cout<<"h = "<<h<<std::endl;
  67 + if(h < dn) h = dn;
  68 + alpha = a;
  69 + //n[0] = round_limit( (max_batch_size - min_batch_size)/2 );
  70 + window_ms = window; //minimum integration interval (for getting a reliable bps measure)
  71 + sample_step = true; //the first step is to calculate the derivative
  72 + forward_diff = true; //start with the forward difference (since we start at the maximum batch size)
  73 + }
  74 +
  75 + size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate, bool VERBOSE = false){
  76 + interval_B += bytes_processed; //increment the number of bytes processed
  77 + interval_ms += ms_spent; //increment the number of milliseconds spent processing
  78 + data_rate = interval_B / interval_ms;
  79 +
  80 + //if we have sufficient information to evaluate the optimization function at this point
  81 + if(interval_ms < window_ms){ //if insufficient time has passed to get a reliable Bps measurement
  82 + return n[0];
  83 + }
  84 + else{ //if we have collected enough information for a reliable Bps estimate
  85 +
  86 + if(Bps[0] == 0){ //if n[0] hasn't been evaluated yet, this is the first step
  87 + Bps[0] = data_rate; //set the initial Bps value
  88 + n[1] = n[0] - h; //set the position of the next sample point
  89 + if(VERBOSE)
  90 + std::cout<<"Bps value at n = "<<n[0]<<" is "<<Bps[0]<<" Bps, probing n = "<<n[1]<<std::endl;
  91 + return n[1]; //return the probe point
  92 + }
  93 + else{
  94 + Bps[1] = data_rate; //set the Bps for the current point (n[1])
  95 +
  96 + double Bps_p; //allocate a variable for the derivative
  97 + //calculate the derivative
  98 + if(n[0] < n[1]){ //if the current point is less than the previous one (probably the most common)
  99 + Bps_p = ((double)Bps[1] - (double)Bps[0]) / (double)h; //calculate the derivative using the forward finite difference
  100 + }
  101 + else{
  102 + Bps_p = ((double)Bps[0] - (double)Bps[1]) / (double)h; //calculate the derivative using the backward finite difference
  103 + }
  104 + if(VERBOSE)
  105 + std::cout<<" probed n = "<<n[1]<<" with "<<Bps[1]<<" Bps, gradient = "<<Bps_p<<" Bps"<<std::endl;
  106 +
  107 + double new_n_precise = n[0] + alpha * Bps_p; //calculate the next point (snap to closest integer)
  108 + size_t new_n_nearest = round_limit(new_n_precise); //calculate the next point (given batch parameters)
  109 +
  110 + if(new_n_nearest == n[0]){ //if the newest point is the same as the original point
  111 + Bps[0] = Bps[1]; //update the Bps
  112 + //if(n[0] == dn) n[1] = n[0] + h; //if we're on the left edge, probe forward
  113 + //else n[1] = n[0] - h; //otherwise probe backwards
  114 + if(VERBOSE)
  115 + std::cout<<" staying at n = "<<n[0]<<" for now"<<std::endl;
  116 + //return n[1]; //return the probe point
  117 +
  118 + Bps[0] = 0; //reset the Bps for the current point
  119 + return n[0]; //return the current point for a re-calculation
  120 + }
  121 + else{ //if the newest point is different from the original point
  122 + n[0] = new_n_nearest; //move to the new point
  123 + Bps[0] = 0; //set the Bps to zero (point hasn't been tested)
  124 + if(VERBOSE)
  125 + std::cout<<" moving to n = "<<n[0]<<std::endl;
  126 + return n[0]; //return the new point
  127 + }
  128 + }
  129 + }
  130 + }
  131 +
  132 + /*// this function updates the optimizer, given the number of bytes processed in an interval and time spent processing
  133 + size_t update(size_t bytes_processed, size_t ms_spent){
  134 + interval_B += bytes_processed; //increment the number of bytes processed
  135 + interval_ms += ms_spent; //increment the number of milliseconds spent processing
  136 +
  137 + //if we have sufficient information to evaluate the optimization function at this point
  138 + if(interval_ms >= window_ms){ //if sufficient time has passed to get a reliable Bps measurement
  139 + size_t new_Bps = interval_B / interval_ms; //calculate the current Bps
  140 +
  141 + if(sample_step){ //if this is a sample step, collect the information for Bps = f(n0)
  142 + Bps = new_Bps; //set the Bps to the evaluated value
  143 + n[1] = n[0] - dn; //reduce the batch size by one delta to take a second sample
  144 + if(n[1] == 0){ //if the resulting batch size is zero
  145 + n[1] = 2*dn; //we're at the left edge: set the new sample point to 2*dn
  146 + }
  147 +
  148 + interval_B = interval_ms = 0; //start a new interval at the new sample point
  149 + sample_step = false; //next step will calculate the new batch size via optimization
  150 + return n[1]; //return the new batch size
  151 + }
  152 + else{ //if we have sufficient information to evaluate the derivative and optimize
  153 + double f = (double)new_Bps; //we have evaluated the function at this location
  154 + double fprime;
  155 + if(n[1] < n[0] ){ //if the new point is less than the previous point (usually the case)
  156 + fprime = (double)(Bps - new_Bps) / (double)dn; //calculate the forward difference
  157 + }
  158 + else{ //if the new point is larger (only happens at the minimum limit)
  159 + fprime = (double)(new_Bps - Bps) / (double)dn; //calculate the backward difference
  160 + }
  161 + size_t bestn = n[1] - (size_t)(f / fprime); //calculate the best value for B using Newton's method
  162 + n[0] = round_limit( (size_t)bestn ); //set the new dependent point
  163 + sample_step = true; //the next step will be a sample step
  164 + }
  165 +
  166 + }
  167 + if(sample_step) return n[0];
  168 + return n[1]; //insufficient information, keep the same batch size
  169 + }*/
  170 +
  171 + /*size_t update(size_t bytes_processed, size_t ms_spent){
  172 + interval_B += bytes_processed; //increment the number of bytes processed
  173 + interval_ms += ms_spent; //increment the number of milliseconds spent processing
  174 +
  175 + //if( Bps[0] == 0 ){ //if the left boundary hasn't been processed
  176 +
  177 +
  178 + //if we have sufficient information to evaluate the optimization function at this point
  179 + if(interval_ms >= window_ms){
  180 + size_t new_Bps = interval_B / interval_ms; //calculate the current Bps
  181 +
  182 + if(Bps[0] == 0) //if the left interval Bps hasn't been calculated
  183 + Bps[0] = interval_B / interval_ms; //that is the interval being processed
  184 + else
  185 + Bps[1] = interval_B / interval_ms; //otherwise the right interval is being processed
  186 +
  187 + if(Bps[0] != 0 && Bps[1] != 0){ //if both intervals have been processed
  188 +
  189 +
  190 + }
  191 + }*/
  192 +
  193 + /*size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate, bool VERBOSE){
  194 + size_t time = update(bytes_processed, ms_spent, VERBOSE);
  195 + data_rate = Bps[0];
  196 + return time;
  197 + }*/
  198 +};
  199 +
20 200 /** This class manages the streaming of large multidimensional binary files.
21 201 * Generally these are hyperspectral files with 2 spatial and 1 spectral dimension. However, this class supports
22 202 * other dimensions via the template parameter D.
... ... @@ -36,6 +216,7 @@ protected:
36 216 unsigned char* mask; //pointer to a character array: 0 = background, 1 = foreground (or valid data)
37 217  
38 218 double progress; //stores the progress on the current operation (accessible using a thread)
  219 + size_t data_rate; //data rate (currently in Bps)
39 220  
40 221 size_t buffer_size; //available memory for processing large files
41 222  
... ... @@ -45,8 +226,9 @@ protected:
45 226 header = 0; //initialize the header size to zero
46 227 mask = NULL;
47 228  
48   - progress = 0;
49   - set_buffer(); //set the maximum buffer size to the default
  229 + progress = 0; //initialize the progress for any algorithm to zero
  230 + data_rate = 0; //initialize the data rate to zero
  231 + set_buffer_frac(); //set the maximum buffer size to the default
50 232 }
51 233  
52 234 /// Private helper function that returns the size of the file on disk using system functions.
... ... @@ -127,8 +309,12 @@ public:
127 309 progress = 0;
128 310 }
129 311  
  312 + size_t get_data_rate(){
  313 + return data_rate;
  314 + }
  315 +
130 316 //specify the maximum fraction of available memory that this class will use for buffering
131   - void set_buffer(double mem_frac = 0.5){ //default to 50%
  317 + void set_buffer_frac(double mem_frac = 0.5){ //default to 50%
132 318 #ifdef _WIN32
133 319 MEMORYSTATUSEX statex;
134 320 statex.dwLength = sizeof (statex);
... ... @@ -141,6 +327,10 @@ public:
141 327 #endif
142 328 }
143 329  
  330 + void set_buffer_raw(size_t bytes){
  331 + buffer_size = bytes;
  332 + }
  333 +
144 334 /// Open a binary file for streaming.
145 335  
146 336 /// @param filename is the name of the binary file
... ... @@ -404,8 +594,8 @@ public:
404 594 }
405 595  
406 596 /// Reads a block specified by an (x, y, z) position and size using the largest possible contiguous reads
407   - bool read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){
408   -
  597 + size_t read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){
  598 + auto t0 = std::chrono::high_resolution_clock::now();
409 599 size_t size_bytes = sx * sy * sz * sizeof(T); //size of the block to read in bytes
410 600  
411 601 size_t start = z * R[0] * R[1] + y * R[0] + x; //calculate the start postion
... ... @@ -415,10 +605,8 @@ public:
415 605  
416 606 if(sx == R[0] && sy == R[1]){ //if sx and sy result in a contiguous volume along z
417 607 file.read((char*)dest, size_bytes); //read the block in one pass
418   - return true;
419 608 }
420   -
421   - if(sx == R[0]){ //if sx is contiguous, read each z-axis slice can be read in one pass
  609 + else if(sx == R[0]){ //if sx is contiguous, read each z-axis slice can be read in one pass
422 610 size_t jump_bytes = (R[1] - sy) * R[0] * sizeof(T); //jump between each slice
423 611 size_t slice_bytes = sx * sy * sizeof(T); //size of the slice to be read
424 612 for(size_t zi = 0; zi < sz; zi++){ //for each z-axis slice
... ... @@ -426,29 +614,31 @@ public:
426 614 dest += sx * sy; //move the destination pointer to the next slice
427 615 file.seekg(jump_bytes, std::ios::cur); //skip to the next slice in the file
428 616 }
429   - return true;
430 617 }
431   -
432   - //in this case, x is not contiguous so the volume must be read line-by-line
433   - size_t jump_x_bytes = (R[0] - sx) * sizeof(T); //number of bytes skipped in the x direction
434   - size_t jump_y_bytes = (R[1] - sy) * R[0] * sizeof(T) + jump_x_bytes; //number of bytes skipped between slices
435   - size_t line_bytes = sx * sizeof(T); //size of the line to be read
436   - size_t zi, yi;
437   - for(zi = 0; zi < sz; zi++){ //for each slice
438   - file.read((char*)dest, line_bytes); //read the first line
439   - for(yi = 1; yi < sy; yi++){ //read each additional line
440   - dest += sx; //move the pointer in the destination block to the next line
441   - file.seekg(jump_x_bytes, std::ios::cur); //skip to the next line in the file
442   - file.read((char*)dest, line_bytes); //read the line to the destination block
  618 + else{
  619 + //in this case, x is not contiguous so the volume must be read line-by-line
  620 + size_t jump_x_bytes = (R[0] - sx) * sizeof(T); //number of bytes skipped in the x direction
  621 + size_t jump_y_bytes = (R[1] - sy) * R[0] * sizeof(T) + jump_x_bytes; //number of bytes skipped between slices
  622 + size_t line_bytes = sx * sizeof(T); //size of the line to be read
  623 + size_t zi, yi;
  624 + for(zi = 0; zi < sz; zi++){ //for each slice
  625 + file.read((char*)dest, line_bytes); //read the first line
  626 + for(yi = 1; yi < sy; yi++){ //read each additional line
  627 + dest += sx; //move the pointer in the destination block to the next line
  628 + file.seekg(jump_x_bytes, std::ios::cur); //skip to the next line in the file
  629 + file.read((char*)dest, line_bytes); //read the line to the destination block
  630 + }
  631 + file.seekg(jump_y_bytes, std::ios::cur); //skip to the beginning of the next slice
443 632 }
444   - file.seekg(jump_y_bytes, std::ios::cur); //skip to the beginning of the next slice
445 633 }
446   - return false;
  634 + auto t1 = std::chrono::high_resolution_clock::now();
  635 + return std::chrono::duration_cast<std::chrono::milliseconds>(t1-t0).count();
447 636 }
448 637  
449 638 // permutes a block of data from the current interleave to the interleave specified (re-arranged dimensions to the order specified by [d0, d1, d2])
450 639  
451   - void permute(T* dest, T* src, size_t sx, size_t sy, size_t sz, size_t d0, size_t d1, size_t d2){
  640 + size_t permute(T* dest, T* src, size_t sx, size_t sy, size_t sz, size_t d0, size_t d1, size_t d2){
  641 + auto t0 = std::chrono::high_resolution_clock::now();
452 642 size_t d[3] = {d0, d1, d2};
453 643 size_t s[3] = {sx, sy, sz};
454 644 size_t p[3];// = {x, y, z};
... ... @@ -467,7 +657,6 @@ public:
467 657 p[1] = y;
468 658 src_idx = z * sx * sy + y * sx;
469 659 dest_idx = p[d[2]] * s[d[0]] * s[d[1]] + p[d[1]] * s[d[0]];
470   - //std::cout<<z<<", "<<y<<" ------- "<<p[d[2]]<<" * "<<s[d[0]]<<" * "<<s[d[1]]<<" + "<<p[d[1]]<<" * "<<s[d[0]]<<std::endl;
471 660 memcpy(dest + dest_idx, src + src_idx, x_bytes);
472 661 }
473 662 }
... ... @@ -491,6 +680,8 @@ public:
491 680 }
492 681 }
493 682 }
  683 + auto t1 = std::chrono::high_resolution_clock::now();
  684 + return std::chrono::duration_cast<std::chrono::milliseconds>(t1-t0).count();
494 685 }
495 686  
496 687 };
... ...
stim/envi/bsq.h
... ... @@ -10,7 +10,7 @@
10 10 #include <deque>
11 11 #include <chrono>
12 12 #include <future>
13   -
  13 +#include <algorithm>
14 14  
15 15  
16 16 namespace stim{
... ... @@ -377,24 +377,40 @@ public:
377 377  
378 378 }
379 379  
380   - void readlines(T* dest, size_t start, size_t n){
381   - hsi<T>::read(dest, 0, start, 0, X(), n, Z());
  380 + size_t readlines(T* dest, size_t start, size_t n){
  381 + return hsi<T>::read(dest, 0, start, 0, X(), n, Z());
  382 + }
  383 +
  384 + size_t writeblock(std::ofstream* f, T* src, size_t n){
  385 + auto t0 = std::chrono::high_resolution_clock::now();
  386 + f->write((char*)src, n);
  387 + auto t1 = std::chrono::high_resolution_clock::now();
  388 + return std::chrono::duration_cast<std::chrono::milliseconds>(t1-t0).count();
382 389 }
383 390  
384 391 /// Convert this BSQ file to a BIL
385   - bool bil(std::string outname, bool PROGRESS = false){
  392 + bool bil(std::string outname, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){
386 393  
387 394 const size_t buffers = 4; //number of buffers required for this algorithm
  395 +
388 396 size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch
389 397  
390 398 size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case)
391 399 size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints
  400 +
  401 + //if(VERBOSE){
  402 + std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl;
  403 + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl;
  404 + //}
  405 +
392 406 if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error
393 407 std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl;
394 408 exit(1);
395 409 }
396 410 size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers
397 411  
  412 + stream_optimizer O(1, max_slices_per_batch);
  413 +
398 414 T* src[2]; //source double-buffer for asynchronous batching
399 415 src[0] = (T*) malloc(max_batch_bytes);
400 416 src[1] = (T*) malloc(max_batch_bytes);
... ... @@ -403,54 +419,70 @@ public:
403 419 dst[1] = (T*) malloc(max_batch_bytes);
404 420  
405 421 size_t N[2]; //number of slices stored in buffers 0 and 1
406   - N[0] = N[1] = min(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set)
  422 + N[0] = N[1] = std::min<size_t>(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set)
407 423  
408 424 std::ofstream target(outname.c_str(), std::ios::binary); //open an output file for writing
409 425 //initialize with buffer 0 (used for double buffering)
410 426 size_t y_load = 0;
411 427 size_t y_proc = 0;
412   - std::future<void> rthread;
  428 + std::future<size_t> rthread;
413 429 std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing
414 430  
415   - readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
416   - y_load += N[0]; //increment the loaded slice counter
417   - int b = 1;
418   -
419   - std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers
420   - std::chrono::high_resolution_clock::time_point t_end;
  431 + std::chrono::high_resolution_clock::time_point t_start, pt_start; //high-resolution timers
  432 + std::chrono::high_resolution_clock::time_point t_end, pt_end;
421 433 size_t t_batch; //number of milliseconds to process a batch
422   - size_t t_total = 0;
  434 + size_t t_total = 0; //total time for operation
  435 + size_t pt_total = 0; //total time spent processing data
  436 + size_t rt_total = 0; //total time spent reading data
  437 + size_t wt_total = 0;
  438 + size_t dr = 0;
  439 +
  440 + rt_total += readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
  441 + y_load += N[0]; //increment the loaded slice counter
  442 + int b = 1; //initialize the double buffer to 0
423 443 while(y_proc < Y()){ //while there are still slices to be processed
424 444 t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch
425 445 if(y_load < Y()){ //if there are still slices to be loaded, load them
  446 + //if(y_proc > 0){
  447 +
  448 +
  449 + //}
426 450 if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size
427 451 rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]);
428   -
  452 + //rt_total += rthread.get();
429 453 y_load += N[b]; //increment the number of loaded slices
430 454 }
431 455  
432 456 b = !b; //swap the double-buffer
433   -
434   - binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file
435   - target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file
  457 + pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file
  458 + wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file
436 459 y_proc += N[b]; //increment the counter of processed pixels
437 460 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
  461 + if(y_load < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations
438 462 t_end = std::chrono::high_resolution_clock::now();
439 463 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
440 464 t_total += t_batch;
441   - rthread.wait();
  465 + if(OPTIMIZATION)
  466 + N[b] = O.update(N[!b] * slice_bytes, t_batch, binary<T>::data_rate, VERBOSE); //set the batch size based on optimization
  467 + //binary<T>::data_rate = dr;
  468 + //std::cout<<"New N = "<<N[!b]<<" selected with "<<(double)data_rate / 1000000<<" MB/s"<<std::endl;
442 469 }
443   -
444   - std::cout<<"Total time to execute: "<<t_total<<" ms"<<std::endl;
  470 +
445 471 free(src[0]); //free buffer resources
446 472 free(src[1]);
447 473 free(dst[0]);
448 474 free(dst[1]);
  475 + if(VERBOSE){
  476 + std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl;
  477 + std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl;
  478 + std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl;
  479 + std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl;
  480 + }
449 481 return true; //return true
450 482 }
451 483  
452 484 /// Convert this BSQ file to a BIP
453   - bool bip(std::string outname, bool PROGRESS = false){
  485 + bool bip(std::string outname, bool PROGRESS = false, bool VERBOSE = false){
454 486  
455 487 const size_t buffers = 4; //number of buffers required for this algorithm
456 488 size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch
... ... @@ -471,13 +503,13 @@ public:
471 503 dst[1] = (T*) malloc(max_batch_bytes);
472 504  
473 505 size_t N[2]; //number of slices stored in buffers 0 and 1
474   - N[0] = N[1] = min(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set)
  506 + N[0] = N[1] = std::min<size_t>(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set)
475 507  
476 508 std::ofstream target(outname.c_str(), std::ios::binary); //open an output file for writing
477 509 //initialize with buffer 0 (used for double buffering)
478 510 size_t y_load = 0;
479 511 size_t y_proc = 0;
480   - std::future<void> rthread;
  512 + std::future<size_t> rthread;
481 513 std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing
482 514  
483 515 readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
... ... @@ -488,6 +520,8 @@ public:
488 520 std::chrono::high_resolution_clock::time_point t_end;
489 521 size_t t_batch; //number of milliseconds to process a batch
490 522 size_t t_total = 0;
  523 + size_t pt_total = 0;
  524 + size_t rt_total = 0;
491 525 while(y_proc < Y()){ //while there are still slices to be processed
492 526 t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch
493 527 if(y_load < Y()){ //if there are still slices to be loaded, load them
... ... @@ -499,17 +533,21 @@ public:
499 533  
500 534 b = !b; //swap the double-buffer
501 535  
502   - binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file
  536 + pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file
503 537 target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file
504 538 y_proc += N[b]; //increment the counter of processed pixels
505 539 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
506 540 t_end = std::chrono::high_resolution_clock::now();
507 541 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
508 542 t_total += t_batch;
509   - rthread.wait();
  543 + if(y_load < Y()) rt_total += rthread.get(); //if a batch was threaded to load, make sure it finishes
510 544 }
511 545  
512   - std::cout<<"Total time to execute: "<<t_total<<" ms"<<std::endl;
  546 + if(VERBOSE){
  547 + std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl;
  548 + std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl;
  549 + std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl;
  550 + }
513 551 free(src[0]); //free buffer resources
514 552 free(src[1]);
515 553 free(dst[0]);
... ...
stim/envi/envi.h
... ... @@ -79,30 +79,64 @@ public:
79 79 return alloc_array(header.samples * header.lines);
80 80 }
81 81  
82   - void set_buffer(double memfrac = 0.5){
  82 + void set_buffer_frac(double memfrac = 0.5){
83 83 if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
84 84 if(header.data_type ==envi_header::float32)
85   - ((bsq<float>*)file)->set_buffer(memfrac);
  85 + ((bsq<float>*)file)->set_buffer_frac(memfrac);
86 86 else if(header.data_type == envi_header::float64)
87   - ((bsq<double>*)file)->set_buffer(memfrac);
  87 + ((bsq<double>*)file)->set_buffer_frac(memfrac);
88 88 else
89 89 std::cout<<"ERROR: unidentified data type"<<std::endl;
90 90 }
91 91  
92 92 else if(header.interleave == envi_header::BIL){ //if the infile is bil file
93 93 if(header.data_type ==envi_header::float32)
94   - ((bil<float>*)file)->set_buffer(memfrac);
  94 + ((bil<float>*)file)->set_buffer_frac(memfrac);
95 95 else if(header.data_type == envi_header::float64)
96   - ((bil<double>*)file)->set_buffer(memfrac);
  96 + ((bil<double>*)file)->set_buffer_frac(memfrac);
97 97 else
98 98 std::cout<<"ERROR: unidentified data type"<<std::endl;
99 99 }
100 100  
101 101 else if(header.interleave == envi_header::BIP){ //if the infile is bip file
102 102 if(header.data_type ==envi_header::float32)
103   - ((bip<float>*)file)->set_buffer(memfrac);
  103 + ((bip<float>*)file)->set_buffer_frac(memfrac);
104 104 else if(header.data_type == envi_header::float64)
105   - ((bip<double>*)file)->set_buffer(memfrac);
  105 + ((bip<double>*)file)->set_buffer_frac(memfrac);
  106 + else
  107 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  108 + }
  109 +
  110 + else{
  111 + std::cout<<"ERROR: unidentified file type"<<std::endl;
  112 + exit(1);
  113 + }
  114 + }
  115 +
  116 + void set_buffer_raw(size_t bytes){
  117 + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
  118 + if(header.data_type ==envi_header::float32)
  119 + ((bsq<float>*)file)->set_buffer_raw(bytes);
  120 + else if(header.data_type == envi_header::float64)
  121 + ((bsq<double>*)file)->set_buffer_raw(bytes);
  122 + else
  123 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  124 + }
  125 +
  126 + else if(header.interleave == envi_header::BIL){ //if the infile is bil file
  127 + if(header.data_type ==envi_header::float32)
  128 + ((bil<float>*)file)->set_buffer_raw(bytes);
  129 + else if(header.data_type == envi_header::float64)
  130 + ((bil<double>*)file)->set_buffer_raw(bytes);
  131 + else
  132 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  133 + }
  134 +
  135 + else if(header.interleave == envi_header::BIP){ //if the infile is bip file
  136 + if(header.data_type ==envi_header::float32)
  137 + ((bip<float>*)file)->set_buffer_raw(bytes);
  138 + else if(header.data_type == envi_header::float64)
  139 + ((bip<double>*)file)->set_buffer_raw(bytes);
106 140 else
107 141 std::cout<<"ERROR: unidentified data type"<<std::endl;
108 142 }
... ... @@ -121,6 +155,16 @@ public:
121 155 exit(1);
122 156 }
123 157  
  158 + size_t X(){ return header.samples; }
  159 + size_t Y(){ return header.lines; }
  160 + size_t Z(){ return header.bands; }
  161 + size_t B(){ return Z(); }
  162 +
  163 + /// Return the size of the data set in bytes
  164 + size_t bytes(){
  165 + return X() * Y() * Z() * type_size();
  166 + }
  167 +
124 168 /// Returns the progress of the current processing operation as a percentage
125 169 void reset_progress(){
126 170  
... ... @@ -193,6 +237,42 @@ public:
193 237 return 0;
194 238 }
195 239  
  240 + /// Returns the progress of the current processing operation as a percentage
  241 + size_t data_rate(){
  242 +
  243 + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
  244 + if(header.data_type ==envi_header::float32)
  245 + return ((bsq<float>*)file)->get_data_rate();
  246 + else if(header.data_type == envi_header::float64)
  247 + return ((bsq<double>*)file)->get_data_rate();
  248 + else
  249 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  250 + }
  251 +
  252 + else if(header.interleave == envi_header::BIL){ //if the infile is bil file
  253 + if(header.data_type ==envi_header::float32)
  254 + return ((bil<float>*)file)->get_data_rate();
  255 + else if(header.data_type == envi_header::float64)
  256 + return ((bil<double>*)file)->get_data_rate();
  257 + else
  258 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  259 + }
  260 +
  261 + else if(header.interleave == envi_header::BIP){ //if the infile is bip file
  262 + if(header.data_type ==envi_header::float32)
  263 + return ((bip<float>*)file)->get_data_rate();
  264 + else if(header.data_type == envi_header::float64)
  265 + return ((bip<double>*)file)->get_data_rate();
  266 + else
  267 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  268 + }
  269 +
  270 + else{
  271 + std::cout<<"ERROR: unidentified file type"<<std::endl;
  272 + }
  273 + return 0;
  274 + }
  275 +
196 276 /// Allocate memory for a new ENVI file based on the current interleave format (BIP, BIL, BSQ) and data type.
197 277 void allocate(){
198 278  
... ... @@ -509,7 +589,7 @@ public:
509 589  
510 590 /// @param outfile is the file name for the converted output
511 591 /// @param interleave is the interleave format for the destination file
512   - bool convert(std::string outfile, stim::envi_header::interleaveType interleave, bool PROGRESS = false){
  592 + bool convert(std::string outfile, stim::envi_header::interleaveType interleave, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){
513 593  
514 594 if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
515 595  
... ... @@ -519,10 +599,10 @@ public:
519 599 exit(1);
520 600 }
521 601 else if(interleave == envi_header::BIL) //convert BSQ -> BIL
522   - ((bsq<float>*)file)->bil(outfile, PROGRESS);
  602 + ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE, OPTIMIZATION);
523 603 else if(interleave == envi_header::BIP){ //ERROR
524 604 //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
525   - ((bsq<float>*)file)->bip(outfile, PROGRESS);
  605 + ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE);
526 606 //exit(1);
527 607 }
528 608 }
... ... @@ -533,10 +613,10 @@ public:
533 613 exit(1);
534 614 }
535 615 else if(interleave == envi_header::BIL) //convert BSQ -> BIL
536   - ((bsq<double>*)file)->bil(outfile, PROGRESS);
  616 + ((bsq<double>*)file)->bil(outfile, PROGRESS, OPTIMIZATION);
537 617 else if(interleave == envi_header::BIP){ //ERROR
538 618 //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
539   - ((bsq<float>*)file)->bip(outfile, PROGRESS);
  619 + ((bsq<float>*)file)->bip(outfile, PROGRESS, OPTIMIZATION);
540 620 //exit(1);
541 621 }
542 622 }
... ...
stim/envi/hsi.h
... ... @@ -202,7 +202,7 @@ public:
202 202 }
203 203 }
204 204  
205   - void read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){
  205 + size_t read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){
206 206 size_t d[3]; //position in the binary coordinate system
207 207 size_t sd[3]; //size in the binary coordinate system
208 208  
... ... @@ -214,10 +214,7 @@ public:
214 214 sd[O[1]] = sy;
215 215 sd[O[2]] = sz;
216 216  
217   - if(!binary<T>::read(dest, d[0], d[1], d[2], sd[0], sd[1], sd[2])){
218   - std::cout<<"error reading block in stim::hsi: ("<<d[0]<<", "<<d[1]<<", "<<d[2]<<") - ["<<sd[0]<<", "<<sd[1]<<", "<<sd[2]<<"]"<<std::endl;
219   - exit(1);
220   - }
  217 + return binary<T>::read(dest, d[0], d[1], d[2], sd[0], sd[1], sd[2]);
221 218 }
222 219  
223 220 };
... ...