Commit 9c5854ecbb1a2f8e30a3468588a7063bb51c5a52

Authored by Pavel Govyadinov
2 parents 42b3c74f c6251f8b

Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib

stim/envi/binary.h
@@ -8,7 +8,7 @@ @@ -8,7 +8,7 @@
8 #include <fstream> 8 #include <fstream>
9 #include <sys/stat.h> 9 #include <sys/stat.h>
10 #include <cstring> 10 #include <cstring>
11 - 11 +#include <chrono>
12 #ifdef _WIN32 12 #ifdef _WIN32
13 #include <Windows.h> 13 #include <Windows.h>
14 #else 14 #else
@@ -17,6 +17,186 @@ @@ -17,6 +17,186 @@
17 17
18 namespace stim{ 18 namespace stim{
19 19
  20 +/// This class calculates the optimal setting for independent parameter b (batch size) for
  21 +/// minimizing the dependent parameter bps (bytes per second)
  22 +class stream_optimizer{
  23 +protected:
  24 + size_t Bps[2]; //bytes per second for the previous batch
  25 + size_t interval_B; //number of bytes processed this interval
  26 + size_t interval_ms; //number of milliseconds spent in the current interval
  27 + size_t n[2]; //current batch size (in bytes)
  28 + size_t h; //spacing used for finite difference calculations
  29 + size_t dn; //delta value (in bytes) for setting the batch size (minimum change in batch parameter)
  30 + size_t maxn; //maximum value for the batch size
  31 +
  32 + double alpha; //alpha value controls the factor of the gradient that is used to calculate the next point (speed of convergence)
  33 +
  34 + bool sample_step; //calculating the derivative (this class alternates between calculating dBps and B)
  35 + bool forward_diff; //evaluate the derivative using forward differences
  36 +
  37 + size_t window_ms; //size of the interval (in milliseconds) integrated to get a reliable bps value
  38 +
  39 + // This function rounds x to the nearest value within dB
  40 + size_t round_limit(double n0){
  41 + if(n0 < 0) return dn; //if n0 is less than zero, return the lowest possible n
  42 +
  43 + size_t new_n = (size_t)(n0 + 0.5); //now n0 must be positive, so round it to the nearest integer
  44 + if(new_n > maxn) new_n = maxn; //limit the returned size of x to within the specified bounds
  45 +
  46 + size_t lowest = new_n / dn;
  47 + size_t highest = lowest + dn;
  48 + size_t diff[2] = {new_n - lowest, highest - new_n}; //calculate the two differences
  49 + if(diff[0] < diff[1])
  50 + return lowest;
  51 + return highest;
  52 + }
  53 +
  54 +public:
  55 +
  56 + //constructor initializes a stream optimizer
  57 + stream_optimizer(size_t min_batch_size, size_t max_batch_size, double a = 0.003, size_t probe_step = 5, size_t window = 2000){
  58 + //Bps = 0; //initialize to zero bytes per second processed
  59 + Bps[0] = Bps[1] = 0; //initialize the bits per second to 0
  60 + interval_B = 0; //zero bytes have been processed at initialization
  61 + interval_ms = 0; //no time has been spent on the batch so far
  62 + dn = min_batch_size; //set the minimum batch size as the minimum change in batch size
  63 + maxn = max_batch_size; //set the maximum batch size
  64 + n[0] = max_batch_size; //set B
  65 + h = (max_batch_size / min_batch_size) / probe_step * dn;
  66 + std::cout<<"h = "<<h<<std::endl;
  67 + if(h < dn) h = dn;
  68 + alpha = a;
  69 + //n[0] = round_limit( (max_batch_size - min_batch_size)/2 );
  70 + window_ms = window; //minimum integration interval (for getting a reliable bps measure)
  71 + sample_step = true; //the first step is to calculate the derivative
  72 + forward_diff = true; //start with the forward difference (since we start at the maximum batch size)
  73 + }
  74 +
  75 + size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate, bool VERBOSE = false){
  76 + interval_B += bytes_processed; //increment the number of bytes processed
  77 + interval_ms += ms_spent; //increment the number of milliseconds spent processing
  78 + data_rate = interval_B / interval_ms;
  79 +
  80 + //if we have sufficient information to evaluate the optimization function at this point
  81 + if(interval_ms < window_ms){ //if insufficient time has passed to get a reliable Bps measurement
  82 + return n[0];
  83 + }
  84 + else{ //if we have collected enough information for a reliable Bps estimate
  85 +
  86 + if(Bps[0] == 0){ //if n[0] hasn't been evaluated yet, this is the first step
  87 + Bps[0] = data_rate; //set the initial Bps value
  88 + n[1] = n[0] - h; //set the position of the next sample point
  89 + if(VERBOSE)
  90 + std::cout<<"Bps value at n = "<<n[0]<<" is "<<Bps[0]<<" Bps, probing n = "<<n[1]<<std::endl;
  91 + return n[1]; //return the probe point
  92 + }
  93 + else{
  94 + Bps[1] = data_rate; //set the Bps for the current point (n[1])
  95 +
  96 + double Bps_p; //allocate a variable for the derivative
  97 + //calculate the derivative
  98 + if(n[0] < n[1]){ //if the current point is less than the previous one (probably the most common)
  99 + Bps_p = ((double)Bps[1] - (double)Bps[0]) / (double)h; //calculate the derivative using the forward finite difference
  100 + }
  101 + else{
  102 + Bps_p = ((double)Bps[0] - (double)Bps[1]) / (double)h; //calculate the derivative using the backward finite difference
  103 + }
  104 + if(VERBOSE)
  105 + std::cout<<" probed n = "<<n[1]<<" with "<<Bps[1]<<" Bps, gradient = "<<Bps_p<<" Bps"<<std::endl;
  106 +
  107 + double new_n_precise = n[0] + alpha * Bps_p; //calculate the next point (snap to closest integer)
  108 + size_t new_n_nearest = round_limit(new_n_precise); //calculate the next point (given batch parameters)
  109 +
  110 + if(new_n_nearest == n[0]){ //if the newest point is the same as the original point
  111 + Bps[0] = Bps[1]; //update the Bps
  112 + //if(n[0] == dn) n[1] = n[0] + h; //if we're on the left edge, probe forward
  113 + //else n[1] = n[0] - h; //otherwise probe backwards
  114 + if(VERBOSE)
  115 + std::cout<<" staying at n = "<<n[0]<<" for now"<<std::endl;
  116 + //return n[1]; //return the probe point
  117 +
  118 + Bps[0] = 0; //reset the Bps for the current point
  119 + return n[0]; //return the current point for a re-calculation
  120 + }
  121 + else{ //if the newest point is different from the original point
  122 + n[0] = new_n_nearest; //move to the new point
  123 + Bps[0] = 0; //set the Bps to zero (point hasn't been tested)
  124 + if(VERBOSE)
  125 + std::cout<<" moving to n = "<<n[0]<<std::endl;
  126 + return n[0]; //return the new point
  127 + }
  128 + }
  129 + }
  130 + }
  131 +
  132 + /*// this function updates the optimizer, given the number of bytes processed in an interval and time spent processing
  133 + size_t update(size_t bytes_processed, size_t ms_spent){
  134 + interval_B += bytes_processed; //increment the number of bytes processed
  135 + interval_ms += ms_spent; //increment the number of milliseconds spent processing
  136 +
  137 + //if we have sufficient information to evaluate the optimization function at this point
  138 + if(interval_ms >= window_ms){ //if sufficient time has passed to get a reliable Bps measurement
  139 + size_t new_Bps = interval_B / interval_ms; //calculate the current Bps
  140 +
  141 + if(sample_step){ //if this is a sample step, collect the information for Bps = f(n0)
  142 + Bps = new_Bps; //set the Bps to the evaluated value
  143 + n[1] = n[0] - dn; //reduce the batch size by one delta to take a second sample
  144 + if(n[1] == 0){ //if the resulting batch size is zero
  145 + n[1] = 2*dn; //we're at the left edge: set the new sample point to 2*dn
  146 + }
  147 +
  148 + interval_B = interval_ms = 0; //start a new interval at the new sample point
  149 + sample_step = false; //next step will calculate the new batch size via optimization
  150 + return n[1]; //return the new batch size
  151 + }
  152 + else{ //if we have sufficient information to evaluate the derivative and optimize
  153 + double f = (double)new_Bps; //we have evaluated the function at this location
  154 + double fprime;
  155 + if(n[1] < n[0] ){ //if the new point is less than the previous point (usually the case)
  156 + fprime = (double)(Bps - new_Bps) / (double)dn; //calculate the forward difference
  157 + }
  158 + else{ //if the new point is larger (only happens at the minimum limit)
  159 + fprime = (double)(new_Bps - Bps) / (double)dn; //calculate the backward difference
  160 + }
  161 + size_t bestn = n[1] - (size_t)(f / fprime); //calculate the best value for B using Newton's method
  162 + n[0] = round_limit( (size_t)bestn ); //set the new dependent point
  163 + sample_step = true; //the next step will be a sample step
  164 + }
  165 +
  166 + }
  167 + if(sample_step) return n[0];
  168 + return n[1]; //insufficient information, keep the same batch size
  169 + }*/
  170 +
  171 + /*size_t update(size_t bytes_processed, size_t ms_spent){
  172 + interval_B += bytes_processed; //increment the number of bytes processed
  173 + interval_ms += ms_spent; //increment the number of milliseconds spent processing
  174 +
  175 + //if( Bps[0] == 0 ){ //if the left boundary hasn't been processed
  176 +
  177 +
  178 + //if we have sufficient information to evaluate the optimization function at this point
  179 + if(interval_ms >= window_ms){
  180 + size_t new_Bps = interval_B / interval_ms; //calculate the current Bps
  181 +
  182 + if(Bps[0] == 0) //if the left interval Bps hasn't been calculated
  183 + Bps[0] = interval_B / interval_ms; //that is the interval being processed
  184 + else
  185 + Bps[1] = interval_B / interval_ms; //otherwise the right interval is being processed
  186 +
  187 + if(Bps[0] != 0 && Bps[1] != 0){ //if both intervals have been processed
  188 +
  189 +
  190 + }
  191 + }*/
  192 +
  193 + /*size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate, bool VERBOSE){
  194 + size_t time = update(bytes_processed, ms_spent, VERBOSE);
  195 + data_rate = Bps[0];
  196 + return time;
  197 + }*/
  198 +};
  199 +
20 /** This class manages the streaming of large multidimensional binary files. 200 /** This class manages the streaming of large multidimensional binary files.
21 * Generally these are hyperspectral files with 2 spatial and 1 spectral dimension. However, this class supports 201 * Generally these are hyperspectral files with 2 spatial and 1 spectral dimension. However, this class supports
22 * other dimensions via the template parameter D. 202 * other dimensions via the template parameter D.
@@ -36,6 +216,7 @@ protected: @@ -36,6 +216,7 @@ protected:
36 unsigned char* mask; //pointer to a character array: 0 = background, 1 = foreground (or valid data) 216 unsigned char* mask; //pointer to a character array: 0 = background, 1 = foreground (or valid data)
37 217
38 double progress; //stores the progress on the current operation (accessible using a thread) 218 double progress; //stores the progress on the current operation (accessible using a thread)
  219 + size_t data_rate; //data rate (currently in Bps)
39 220
40 size_t buffer_size; //available memory for processing large files 221 size_t buffer_size; //available memory for processing large files
41 222
@@ -45,8 +226,9 @@ protected: @@ -45,8 +226,9 @@ protected:
45 header = 0; //initialize the header size to zero 226 header = 0; //initialize the header size to zero
46 mask = NULL; 227 mask = NULL;
47 228
48 - progress = 0;  
49 - set_buffer(); //set the maximum buffer size to the default 229 + progress = 0; //initialize the progress for any algorithm to zero
  230 + data_rate = 0; //initialize the data rate to zero
  231 + set_buffer_frac(); //set the maximum buffer size to the default
50 } 232 }
51 233
52 /// Private helper function that returns the size of the file on disk using system functions. 234 /// Private helper function that returns the size of the file on disk using system functions.
@@ -127,8 +309,12 @@ public: @@ -127,8 +309,12 @@ public:
127 progress = 0; 309 progress = 0;
128 } 310 }
129 311
  312 + size_t get_data_rate(){
  313 + return data_rate;
  314 + }
  315 +
130 //specify the maximum fraction of available memory that this class will use for buffering 316 //specify the maximum fraction of available memory that this class will use for buffering
131 - void set_buffer(double mem_frac = 0.5){ //default to 50% 317 + void set_buffer_frac(double mem_frac = 0.5){ //default to 50%
132 #ifdef _WIN32 318 #ifdef _WIN32
133 MEMORYSTATUSEX statex; 319 MEMORYSTATUSEX statex;
134 statex.dwLength = sizeof (statex); 320 statex.dwLength = sizeof (statex);
@@ -141,6 +327,10 @@ public: @@ -141,6 +327,10 @@ public:
141 #endif 327 #endif
142 } 328 }
143 329
  330 + void set_buffer_raw(size_t bytes){
  331 + buffer_size = bytes;
  332 + }
  333 +
144 /// Open a binary file for streaming. 334 /// Open a binary file for streaming.
145 335
146 /// @param filename is the name of the binary file 336 /// @param filename is the name of the binary file
@@ -404,8 +594,8 @@ public: @@ -404,8 +594,8 @@ public:
404 } 594 }
405 595
406 /// Reads a block specified by an (x, y, z) position and size using the largest possible contiguous reads 596 /// Reads a block specified by an (x, y, z) position and size using the largest possible contiguous reads
407 - bool read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){  
408 - 597 + size_t read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){
  598 + auto t0 = std::chrono::high_resolution_clock::now();
409 size_t size_bytes = sx * sy * sz * sizeof(T); //size of the block to read in bytes 599 size_t size_bytes = sx * sy * sz * sizeof(T); //size of the block to read in bytes
410 600
411 size_t start = z * R[0] * R[1] + y * R[0] + x; //calculate the start postion 601 size_t start = z * R[0] * R[1] + y * R[0] + x; //calculate the start postion
@@ -415,10 +605,8 @@ public: @@ -415,10 +605,8 @@ public:
415 605
416 if(sx == R[0] && sy == R[1]){ //if sx and sy result in a contiguous volume along z 606 if(sx == R[0] && sy == R[1]){ //if sx and sy result in a contiguous volume along z
417 file.read((char*)dest, size_bytes); //read the block in one pass 607 file.read((char*)dest, size_bytes); //read the block in one pass
418 - return true;  
419 } 608 }
420 -  
421 - if(sx == R[0]){ //if sx is contiguous, read each z-axis slice can be read in one pass 609 + else if(sx == R[0]){ //if sx is contiguous, read each z-axis slice can be read in one pass
422 size_t jump_bytes = (R[1] - sy) * R[0] * sizeof(T); //jump between each slice 610 size_t jump_bytes = (R[1] - sy) * R[0] * sizeof(T); //jump between each slice
423 size_t slice_bytes = sx * sy * sizeof(T); //size of the slice to be read 611 size_t slice_bytes = sx * sy * sizeof(T); //size of the slice to be read
424 for(size_t zi = 0; zi < sz; zi++){ //for each z-axis slice 612 for(size_t zi = 0; zi < sz; zi++){ //for each z-axis slice
@@ -426,29 +614,31 @@ public: @@ -426,29 +614,31 @@ public:
426 dest += sx * sy; //move the destination pointer to the next slice 614 dest += sx * sy; //move the destination pointer to the next slice
427 file.seekg(jump_bytes, std::ios::cur); //skip to the next slice in the file 615 file.seekg(jump_bytes, std::ios::cur); //skip to the next slice in the file
428 } 616 }
429 - return true;  
430 } 617 }
431 -  
432 - //in this case, x is not contiguous so the volume must be read line-by-line  
433 - size_t jump_x_bytes = (R[0] - sx) * sizeof(T); //number of bytes skipped in the x direction  
434 - size_t jump_y_bytes = (R[1] - sy) * R[0] * sizeof(T) + jump_x_bytes; //number of bytes skipped between slices  
435 - size_t line_bytes = sx * sizeof(T); //size of the line to be read  
436 - size_t zi, yi;  
437 - for(zi = 0; zi < sz; zi++){ //for each slice  
438 - file.read((char*)dest, line_bytes); //read the first line  
439 - for(yi = 1; yi < sy; yi++){ //read each additional line  
440 - dest += sx; //move the pointer in the destination block to the next line  
441 - file.seekg(jump_x_bytes, std::ios::cur); //skip to the next line in the file  
442 - file.read((char*)dest, line_bytes); //read the line to the destination block 618 + else{
  619 + //in this case, x is not contiguous so the volume must be read line-by-line
  620 + size_t jump_x_bytes = (R[0] - sx) * sizeof(T); //number of bytes skipped in the x direction
  621 + size_t jump_y_bytes = (R[1] - sy) * R[0] * sizeof(T) + jump_x_bytes; //number of bytes skipped between slices
  622 + size_t line_bytes = sx * sizeof(T); //size of the line to be read
  623 + size_t zi, yi;
  624 + for(zi = 0; zi < sz; zi++){ //for each slice
  625 + file.read((char*)dest, line_bytes); //read the first line
  626 + for(yi = 1; yi < sy; yi++){ //read each additional line
  627 + dest += sx; //move the pointer in the destination block to the next line
  628 + file.seekg(jump_x_bytes, std::ios::cur); //skip to the next line in the file
  629 + file.read((char*)dest, line_bytes); //read the line to the destination block
  630 + }
  631 + file.seekg(jump_y_bytes, std::ios::cur); //skip to the beginning of the next slice
443 } 632 }
444 - file.seekg(jump_y_bytes, std::ios::cur); //skip to the beginning of the next slice  
445 } 633 }
446 - return false; 634 + auto t1 = std::chrono::high_resolution_clock::now();
  635 + return std::chrono::duration_cast<std::chrono::milliseconds>(t1-t0).count();
447 } 636 }
448 637
449 // permutes a block of data from the current interleave to the interleave specified (re-arranged dimensions to the order specified by [d0, d1, d2]) 638 // permutes a block of data from the current interleave to the interleave specified (re-arranged dimensions to the order specified by [d0, d1, d2])
450 639
451 - void permute(T* dest, T* src, size_t sx, size_t sy, size_t sz, size_t d0, size_t d1, size_t d2){ 640 + size_t permute(T* dest, T* src, size_t sx, size_t sy, size_t sz, size_t d0, size_t d1, size_t d2){
  641 + auto t0 = std::chrono::high_resolution_clock::now();
452 size_t d[3] = {d0, d1, d2}; 642 size_t d[3] = {d0, d1, d2};
453 size_t s[3] = {sx, sy, sz}; 643 size_t s[3] = {sx, sy, sz};
454 size_t p[3];// = {x, y, z}; 644 size_t p[3];// = {x, y, z};
@@ -467,7 +657,6 @@ public: @@ -467,7 +657,6 @@ public:
467 p[1] = y; 657 p[1] = y;
468 src_idx = z * sx * sy + y * sx; 658 src_idx = z * sx * sy + y * sx;
469 dest_idx = p[d[2]] * s[d[0]] * s[d[1]] + p[d[1]] * s[d[0]]; 659 dest_idx = p[d[2]] * s[d[0]] * s[d[1]] + p[d[1]] * s[d[0]];
470 - //std::cout<<z<<", "<<y<<" ------- "<<p[d[2]]<<" * "<<s[d[0]]<<" * "<<s[d[1]]<<" + "<<p[d[1]]<<" * "<<s[d[0]]<<std::endl;  
471 memcpy(dest + dest_idx, src + src_idx, x_bytes); 660 memcpy(dest + dest_idx, src + src_idx, x_bytes);
472 } 661 }
473 } 662 }
@@ -491,6 +680,8 @@ public: @@ -491,6 +680,8 @@ public:
491 } 680 }
492 } 681 }
493 } 682 }
  683 + auto t1 = std::chrono::high_resolution_clock::now();
  684 + return std::chrono::duration_cast<std::chrono::milliseconds>(t1-t0).count();
494 } 685 }
495 686
496 }; 687 };
@@ -10,7 +10,7 @@ @@ -10,7 +10,7 @@
10 #include <deque> 10 #include <deque>
11 #include <chrono> 11 #include <chrono>
12 #include <future> 12 #include <future>
13 - 13 +#include <algorithm>
14 14
15 15
16 namespace stim{ 16 namespace stim{
@@ -377,24 +377,40 @@ public: @@ -377,24 +377,40 @@ public:
377 377
378 } 378 }
379 379
380 - void readlines(T* dest, size_t start, size_t n){  
381 - hsi<T>::read(dest, 0, start, 0, X(), n, Z()); 380 + size_t readlines(T* dest, size_t start, size_t n){
  381 + return hsi<T>::read(dest, 0, start, 0, X(), n, Z());
  382 + }
  383 +
  384 + size_t writeblock(std::ofstream* f, T* src, size_t n){
  385 + auto t0 = std::chrono::high_resolution_clock::now();
  386 + f->write((char*)src, n);
  387 + auto t1 = std::chrono::high_resolution_clock::now();
  388 + return std::chrono::duration_cast<std::chrono::milliseconds>(t1-t0).count();
382 } 389 }
383 390
384 /// Convert this BSQ file to a BIL 391 /// Convert this BSQ file to a BIL
385 - bool bil(std::string outname, bool PROGRESS = false){ 392 + bool bil(std::string outname, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){
386 393
387 const size_t buffers = 4; //number of buffers required for this algorithm 394 const size_t buffers = 4; //number of buffers required for this algorithm
  395 +
388 size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch 396 size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch
389 397
390 size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) 398 size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case)
391 size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints 399 size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints
  400 +
  401 + //if(VERBOSE){
  402 + std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl;
  403 + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl;
  404 + //}
  405 +
392 if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error 406 if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error
393 std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl; 407 std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl;
394 exit(1); 408 exit(1);
395 } 409 }
396 size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers 410 size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers
397 411
  412 + stream_optimizer O(1, max_slices_per_batch);
  413 +
398 T* src[2]; //source double-buffer for asynchronous batching 414 T* src[2]; //source double-buffer for asynchronous batching
399 src[0] = (T*) malloc(max_batch_bytes); 415 src[0] = (T*) malloc(max_batch_bytes);
400 src[1] = (T*) malloc(max_batch_bytes); 416 src[1] = (T*) malloc(max_batch_bytes);
@@ -403,54 +419,70 @@ public: @@ -403,54 +419,70 @@ public:
403 dst[1] = (T*) malloc(max_batch_bytes); 419 dst[1] = (T*) malloc(max_batch_bytes);
404 420
405 size_t N[2]; //number of slices stored in buffers 0 and 1 421 size_t N[2]; //number of slices stored in buffers 0 and 1
406 - N[0] = N[1] = min(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set) 422 + N[0] = N[1] = std::min<size_t>(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set)
407 423
408 std::ofstream target(outname.c_str(), std::ios::binary); //open an output file for writing 424 std::ofstream target(outname.c_str(), std::ios::binary); //open an output file for writing
409 //initialize with buffer 0 (used for double buffering) 425 //initialize with buffer 0 (used for double buffering)
410 size_t y_load = 0; 426 size_t y_load = 0;
411 size_t y_proc = 0; 427 size_t y_proc = 0;
412 - std::future<void> rthread; 428 + std::future<size_t> rthread;
413 std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing 429 std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing
414 430
415 - readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer  
416 - y_load += N[0]; //increment the loaded slice counter  
417 - int b = 1;  
418 -  
419 - std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers  
420 - std::chrono::high_resolution_clock::time_point t_end; 431 + std::chrono::high_resolution_clock::time_point t_start, pt_start; //high-resolution timers
  432 + std::chrono::high_resolution_clock::time_point t_end, pt_end;
421 size_t t_batch; //number of milliseconds to process a batch 433 size_t t_batch; //number of milliseconds to process a batch
422 - size_t t_total = 0; 434 + size_t t_total = 0; //total time for operation
  435 + size_t pt_total = 0; //total time spent processing data
  436 + size_t rt_total = 0; //total time spent reading data
  437 + size_t wt_total = 0;
  438 + size_t dr = 0;
  439 +
  440 + rt_total += readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
  441 + y_load += N[0]; //increment the loaded slice counter
  442 + int b = 1; //initialize the double buffer to 0
423 while(y_proc < Y()){ //while there are still slices to be processed 443 while(y_proc < Y()){ //while there are still slices to be processed
424 t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch 444 t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch
425 if(y_load < Y()){ //if there are still slices to be loaded, load them 445 if(y_load < Y()){ //if there are still slices to be loaded, load them
  446 + //if(y_proc > 0){
  447 +
  448 +
  449 + //}
426 if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size 450 if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size
427 rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]); 451 rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]);
428 - 452 + //rt_total += rthread.get();
429 y_load += N[b]; //increment the number of loaded slices 453 y_load += N[b]; //increment the number of loaded slices
430 } 454 }
431 455
432 b = !b; //swap the double-buffer 456 b = !b; //swap the double-buffer
433 -  
434 - binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file  
435 - target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file 457 + pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file
  458 + wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file
436 y_proc += N[b]; //increment the counter of processed pixels 459 y_proc += N[b]; //increment the counter of processed pixels
437 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels 460 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
  461 + if(y_load < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations
438 t_end = std::chrono::high_resolution_clock::now(); 462 t_end = std::chrono::high_resolution_clock::now();
439 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); 463 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
440 t_total += t_batch; 464 t_total += t_batch;
441 - rthread.wait(); 465 + if(OPTIMIZATION)
  466 + N[b] = O.update(N[!b] * slice_bytes, t_batch, binary<T>::data_rate, VERBOSE); //set the batch size based on optimization
  467 + //binary<T>::data_rate = dr;
  468 + //std::cout<<"New N = "<<N[!b]<<" selected with "<<(double)data_rate / 1000000<<" MB/s"<<std::endl;
442 } 469 }
443 -  
444 - std::cout<<"Total time to execute: "<<t_total<<" ms"<<std::endl; 470 +
445 free(src[0]); //free buffer resources 471 free(src[0]); //free buffer resources
446 free(src[1]); 472 free(src[1]);
447 free(dst[0]); 473 free(dst[0]);
448 free(dst[1]); 474 free(dst[1]);
  475 + if(VERBOSE){
  476 + std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl;
  477 + std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl;
  478 + std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl;
  479 + std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl;
  480 + }
449 return true; //return true 481 return true; //return true
450 } 482 }
451 483
452 /// Convert this BSQ file to a BIP 484 /// Convert this BSQ file to a BIP
453 - bool bip(std::string outname, bool PROGRESS = false){ 485 + bool bip(std::string outname, bool PROGRESS = false, bool VERBOSE = false){
454 486
455 const size_t buffers = 4; //number of buffers required for this algorithm 487 const size_t buffers = 4; //number of buffers required for this algorithm
456 size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch 488 size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch
@@ -471,13 +503,13 @@ public: @@ -471,13 +503,13 @@ public:
471 dst[1] = (T*) malloc(max_batch_bytes); 503 dst[1] = (T*) malloc(max_batch_bytes);
472 504
473 size_t N[2]; //number of slices stored in buffers 0 and 1 505 size_t N[2]; //number of slices stored in buffers 0 and 1
474 - N[0] = N[1] = min(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set) 506 + N[0] = N[1] = std::min<size_t>(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set)
475 507
476 std::ofstream target(outname.c_str(), std::ios::binary); //open an output file for writing 508 std::ofstream target(outname.c_str(), std::ios::binary); //open an output file for writing
477 //initialize with buffer 0 (used for double buffering) 509 //initialize with buffer 0 (used for double buffering)
478 size_t y_load = 0; 510 size_t y_load = 0;
479 size_t y_proc = 0; 511 size_t y_proc = 0;
480 - std::future<void> rthread; 512 + std::future<size_t> rthread;
481 std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing 513 std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing
482 514
483 readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer 515 readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
@@ -488,6 +520,8 @@ public: @@ -488,6 +520,8 @@ public:
488 std::chrono::high_resolution_clock::time_point t_end; 520 std::chrono::high_resolution_clock::time_point t_end;
489 size_t t_batch; //number of milliseconds to process a batch 521 size_t t_batch; //number of milliseconds to process a batch
490 size_t t_total = 0; 522 size_t t_total = 0;
  523 + size_t pt_total = 0;
  524 + size_t rt_total = 0;
491 while(y_proc < Y()){ //while there are still slices to be processed 525 while(y_proc < Y()){ //while there are still slices to be processed
492 t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch 526 t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch
493 if(y_load < Y()){ //if there are still slices to be loaded, load them 527 if(y_load < Y()){ //if there are still slices to be loaded, load them
@@ -499,17 +533,21 @@ public: @@ -499,17 +533,21 @@ public:
499 533
500 b = !b; //swap the double-buffer 534 b = !b; //swap the double-buffer
501 535
502 - binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file 536 + pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file
503 target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file 537 target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file
504 y_proc += N[b]; //increment the counter of processed pixels 538 y_proc += N[b]; //increment the counter of processed pixels
505 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels 539 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
506 t_end = std::chrono::high_resolution_clock::now(); 540 t_end = std::chrono::high_resolution_clock::now();
507 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); 541 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
508 t_total += t_batch; 542 t_total += t_batch;
509 - rthread.wait(); 543 + if(y_load < Y()) rt_total += rthread.get(); //if a batch was threaded to load, make sure it finishes
510 } 544 }
511 545
512 - std::cout<<"Total time to execute: "<<t_total<<" ms"<<std::endl; 546 + if(VERBOSE){
  547 + std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl;
  548 + std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl;
  549 + std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl;
  550 + }
513 free(src[0]); //free buffer resources 551 free(src[0]); //free buffer resources
514 free(src[1]); 552 free(src[1]);
515 free(dst[0]); 553 free(dst[0]);
@@ -79,30 +79,64 @@ public: @@ -79,30 +79,64 @@ public:
79 return alloc_array(header.samples * header.lines); 79 return alloc_array(header.samples * header.lines);
80 } 80 }
81 81
82 - void set_buffer(double memfrac = 0.5){ 82 + void set_buffer_frac(double memfrac = 0.5){
83 if(header.interleave == envi_header::BSQ){ //if the infile is bsq file 83 if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
84 if(header.data_type ==envi_header::float32) 84 if(header.data_type ==envi_header::float32)
85 - ((bsq<float>*)file)->set_buffer(memfrac); 85 + ((bsq<float>*)file)->set_buffer_frac(memfrac);
86 else if(header.data_type == envi_header::float64) 86 else if(header.data_type == envi_header::float64)
87 - ((bsq<double>*)file)->set_buffer(memfrac); 87 + ((bsq<double>*)file)->set_buffer_frac(memfrac);
88 else 88 else
89 std::cout<<"ERROR: unidentified data type"<<std::endl; 89 std::cout<<"ERROR: unidentified data type"<<std::endl;
90 } 90 }
91 91
92 else if(header.interleave == envi_header::BIL){ //if the infile is bil file 92 else if(header.interleave == envi_header::BIL){ //if the infile is bil file
93 if(header.data_type ==envi_header::float32) 93 if(header.data_type ==envi_header::float32)
94 - ((bil<float>*)file)->set_buffer(memfrac); 94 + ((bil<float>*)file)->set_buffer_frac(memfrac);
95 else if(header.data_type == envi_header::float64) 95 else if(header.data_type == envi_header::float64)
96 - ((bil<double>*)file)->set_buffer(memfrac); 96 + ((bil<double>*)file)->set_buffer_frac(memfrac);
97 else 97 else
98 std::cout<<"ERROR: unidentified data type"<<std::endl; 98 std::cout<<"ERROR: unidentified data type"<<std::endl;
99 } 99 }
100 100
101 else if(header.interleave == envi_header::BIP){ //if the infile is bip file 101 else if(header.interleave == envi_header::BIP){ //if the infile is bip file
102 if(header.data_type ==envi_header::float32) 102 if(header.data_type ==envi_header::float32)
103 - ((bip<float>*)file)->set_buffer(memfrac); 103 + ((bip<float>*)file)->set_buffer_frac(memfrac);
104 else if(header.data_type == envi_header::float64) 104 else if(header.data_type == envi_header::float64)
105 - ((bip<double>*)file)->set_buffer(memfrac); 105 + ((bip<double>*)file)->set_buffer_frac(memfrac);
  106 + else
  107 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  108 + }
  109 +
  110 + else{
  111 + std::cout<<"ERROR: unidentified file type"<<std::endl;
  112 + exit(1);
  113 + }
  114 + }
  115 +
  116 + void set_buffer_raw(size_t bytes){
  117 + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
  118 + if(header.data_type ==envi_header::float32)
  119 + ((bsq<float>*)file)->set_buffer_raw(bytes);
  120 + else if(header.data_type == envi_header::float64)
  121 + ((bsq<double>*)file)->set_buffer_raw(bytes);
  122 + else
  123 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  124 + }
  125 +
  126 + else if(header.interleave == envi_header::BIL){ //if the infile is bil file
  127 + if(header.data_type ==envi_header::float32)
  128 + ((bil<float>*)file)->set_buffer_raw(bytes);
  129 + else if(header.data_type == envi_header::float64)
  130 + ((bil<double>*)file)->set_buffer_raw(bytes);
  131 + else
  132 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  133 + }
  134 +
  135 + else if(header.interleave == envi_header::BIP){ //if the infile is bip file
  136 + if(header.data_type ==envi_header::float32)
  137 + ((bip<float>*)file)->set_buffer_raw(bytes);
  138 + else if(header.data_type == envi_header::float64)
  139 + ((bip<double>*)file)->set_buffer_raw(bytes);
106 else 140 else
107 std::cout<<"ERROR: unidentified data type"<<std::endl; 141 std::cout<<"ERROR: unidentified data type"<<std::endl;
108 } 142 }
@@ -121,6 +155,16 @@ public: @@ -121,6 +155,16 @@ public:
121 exit(1); 155 exit(1);
122 } 156 }
123 157
  158 + size_t X(){ return header.samples; }
  159 + size_t Y(){ return header.lines; }
  160 + size_t Z(){ return header.bands; }
  161 + size_t B(){ return Z(); }
  162 +
  163 + /// Return the size of the data set in bytes
  164 + size_t bytes(){
  165 + return X() * Y() * Z() * type_size();
  166 + }
  167 +
124 /// Returns the progress of the current processing operation as a percentage 168 /// Returns the progress of the current processing operation as a percentage
125 void reset_progress(){ 169 void reset_progress(){
126 170
@@ -193,6 +237,42 @@ public: @@ -193,6 +237,42 @@ public:
193 return 0; 237 return 0;
194 } 238 }
195 239
  240 + /// Returns the progress of the current processing operation as a percentage
  241 + size_t data_rate(){
  242 +
  243 + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
  244 + if(header.data_type ==envi_header::float32)
  245 + return ((bsq<float>*)file)->get_data_rate();
  246 + else if(header.data_type == envi_header::float64)
  247 + return ((bsq<double>*)file)->get_data_rate();
  248 + else
  249 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  250 + }
  251 +
  252 + else if(header.interleave == envi_header::BIL){ //if the infile is bil file
  253 + if(header.data_type ==envi_header::float32)
  254 + return ((bil<float>*)file)->get_data_rate();
  255 + else if(header.data_type == envi_header::float64)
  256 + return ((bil<double>*)file)->get_data_rate();
  257 + else
  258 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  259 + }
  260 +
  261 + else if(header.interleave == envi_header::BIP){ //if the infile is bip file
  262 + if(header.data_type ==envi_header::float32)
  263 + return ((bip<float>*)file)->get_data_rate();
  264 + else if(header.data_type == envi_header::float64)
  265 + return ((bip<double>*)file)->get_data_rate();
  266 + else
  267 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  268 + }
  269 +
  270 + else{
  271 + std::cout<<"ERROR: unidentified file type"<<std::endl;
  272 + }
  273 + return 0;
  274 + }
  275 +
196 /// Allocate memory for a new ENVI file based on the current interleave format (BIP, BIL, BSQ) and data type. 276 /// Allocate memory for a new ENVI file based on the current interleave format (BIP, BIL, BSQ) and data type.
197 void allocate(){ 277 void allocate(){
198 278
@@ -509,7 +589,7 @@ public: @@ -509,7 +589,7 @@ public:
509 589
510 /// @param outfile is the file name for the converted output 590 /// @param outfile is the file name for the converted output
511 /// @param interleave is the interleave format for the destination file 591 /// @param interleave is the interleave format for the destination file
512 - bool convert(std::string outfile, stim::envi_header::interleaveType interleave, bool PROGRESS = false){ 592 + bool convert(std::string outfile, stim::envi_header::interleaveType interleave, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){
513 593
514 if(header.interleave == envi_header::BSQ){ //if the infile is bsq file 594 if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
515 595
@@ -519,10 +599,10 @@ public: @@ -519,10 +599,10 @@ public:
519 exit(1); 599 exit(1);
520 } 600 }
521 else if(interleave == envi_header::BIL) //convert BSQ -> BIL 601 else if(interleave == envi_header::BIL) //convert BSQ -> BIL
522 - ((bsq<float>*)file)->bil(outfile, PROGRESS); 602 + ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE, OPTIMIZATION);
523 else if(interleave == envi_header::BIP){ //ERROR 603 else if(interleave == envi_header::BIP){ //ERROR
524 //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl; 604 //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
525 - ((bsq<float>*)file)->bip(outfile, PROGRESS); 605 + ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE);
526 //exit(1); 606 //exit(1);
527 } 607 }
528 } 608 }
@@ -533,10 +613,10 @@ public: @@ -533,10 +613,10 @@ public:
533 exit(1); 613 exit(1);
534 } 614 }
535 else if(interleave == envi_header::BIL) //convert BSQ -> BIL 615 else if(interleave == envi_header::BIL) //convert BSQ -> BIL
536 - ((bsq<double>*)file)->bil(outfile, PROGRESS); 616 + ((bsq<double>*)file)->bil(outfile, PROGRESS, OPTIMIZATION);
537 else if(interleave == envi_header::BIP){ //ERROR 617 else if(interleave == envi_header::BIP){ //ERROR
538 //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl; 618 //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
539 - ((bsq<float>*)file)->bip(outfile, PROGRESS); 619 + ((bsq<float>*)file)->bip(outfile, PROGRESS, OPTIMIZATION);
540 //exit(1); 620 //exit(1);
541 } 621 }
542 } 622 }
@@ -202,7 +202,7 @@ public: @@ -202,7 +202,7 @@ public:
202 } 202 }
203 } 203 }
204 204
205 - void read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){ 205 + size_t read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){
206 size_t d[3]; //position in the binary coordinate system 206 size_t d[3]; //position in the binary coordinate system
207 size_t sd[3]; //size in the binary coordinate system 207 size_t sd[3]; //size in the binary coordinate system
208 208
@@ -214,10 +214,7 @@ public: @@ -214,10 +214,7 @@ public:
214 sd[O[1]] = sy; 214 sd[O[1]] = sy;
215 sd[O[2]] = sz; 215 sd[O[2]] = sz;
216 216
217 - if(!binary<T>::read(dest, d[0], d[1], d[2], sd[0], sd[1], sd[2])){  
218 - std::cout<<"error reading block in stim::hsi: ("<<d[0]<<", "<<d[1]<<", "<<d[2]<<") - ["<<sd[0]<<", "<<sd[1]<<", "<<sd[2]<<"]"<<std::endl;  
219 - exit(1);  
220 - } 217 + return binary<T>::read(dest, d[0], d[1], d[2], sd[0], sd[1], sd[2]);
221 } 218 }
222 219
223 }; 220 };