Commit c6251f8b0b348def72fa86d6c194200de27aeb1d

Authored by David Mayerich
1 parent 8933935f

allows user to deactivate optimization for profiling

Showing 3 changed files with 130 additions and 35 deletions   Show diff stats
stim/envi/binary.h
... ... @@ -54,7 +54,7 @@ protected:
54 54 public:
55 55  
56 56 //constructor initializes a stream optimizer
57   - stream_optimizer(size_t min_batch_size, size_t max_batch_size, double a = 0.001, double probe_step = 5, size_t window = 2000){
  57 + stream_optimizer(size_t min_batch_size, size_t max_batch_size, double a = 0.003, size_t probe_step = 5, size_t window = 2000){
58 58 //Bps = 0; //initialize to zero bytes per second processed
59 59 Bps[0] = Bps[1] = 0; //initialize the bits per second to 0
60 60 interval_B = 0; //zero bytes have been processed at initialization
... ... @@ -72,25 +72,26 @@ public:
72 72 forward_diff = true; //start with the forward difference (since we start at the maximum batch size)
73 73 }
74 74  
75   - size_t update(size_t bytes_processed, size_t ms_spent){
  75 + size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate, bool VERBOSE = false){
76 76 interval_B += bytes_processed; //increment the number of bytes processed
77 77 interval_ms += ms_spent; //increment the number of milliseconds spent processing
  78 + data_rate = interval_B / interval_ms;
78 79  
79 80 //if we have sufficient information to evaluate the optimization function at this point
80 81 if(interval_ms < window_ms){ //if insufficient time has passed to get a reliable Bps measurement
81 82 return n[0];
82 83 }
83 84 else{ //if we have collected enough information for a reliable Bps estimate
84   - size_t new_Bps = interval_B / interval_ms; //calculate the current Bps
85   -
  85 +
86 86 if(Bps[0] == 0){ //if n[0] hasn't been evaluated yet, this is the first step
87   - Bps[0] = new_Bps; //set the initial Bps value
  87 + Bps[0] = data_rate; //set the initial Bps value
88 88 n[1] = n[0] - h; //set the position of the next sample point
89   - std::cout<<"Bps value at n = "<<n[0]<<" is "<<Bps[0]<<" Bps, probing n = "<<n[1]<<std::endl;
  89 + if(VERBOSE)
  90 + std::cout<<"Bps value at n = "<<n[0]<<" is "<<Bps[0]<<" Bps, probing n = "<<n[1]<<std::endl;
90 91 return n[1]; //return the probe point
91 92 }
92 93 else{
93   - Bps[1] = new_Bps; //set the Bps for the current point (n[1])
  94 + Bps[1] = data_rate; //set the Bps for the current point (n[1])
94 95  
95 96 double Bps_p; //allocate a variable for the derivative
96 97 //calculate the derivative
... ... @@ -100,8 +101,8 @@ public:
100 101 else{
101 102 Bps_p = ((double)Bps[0] - (double)Bps[1]) / (double)h; //calculate the derivative using the backward finite difference
102 103 }
103   -
104   - std::cout<<" probed n = "<<n[1]<<" with "<<Bps[1]<<" Bps, gradient = "<<Bps_p<<" Bps"<<std::endl;
  104 + if(VERBOSE)
  105 + std::cout<<" probed n = "<<n[1]<<" with "<<Bps[1]<<" Bps, gradient = "<<Bps_p<<" Bps"<<std::endl;
105 106  
106 107 double new_n_precise = n[0] + alpha * Bps_p; //calculate the next point (snap to closest integer)
107 108 size_t new_n_nearest = round_limit(new_n_precise); //calculate the next point (given batch parameters)
... ... @@ -110,7 +111,8 @@ public:
110 111 Bps[0] = Bps[1]; //update the Bps
111 112 //if(n[0] == dn) n[1] = n[0] + h; //if we're on the left edge, probe forward
112 113 //else n[1] = n[0] - h; //otherwise probe backwards
113   - std::cout<<" staying at n = "<<n[0]<<" for now"<<std::endl;
  114 + if(VERBOSE)
  115 + std::cout<<" staying at n = "<<n[0]<<" for now"<<std::endl;
114 116 //return n[1]; //return the probe point
115 117  
116 118 Bps[0] = 0; //reset the Bps for the current point
... ... @@ -119,7 +121,8 @@ public:
119 121 else{ //if the newest point is different from the original point
120 122 n[0] = new_n_nearest; //move to the new point
121 123 Bps[0] = 0; //set the Bps to zero (point hasn't been tested)
122   - std::cout<<" moving to n = "<<n[0]<<std::endl;
  124 + if(VERBOSE)
  125 + std::cout<<" moving to n = "<<n[0]<<std::endl;
123 126 return n[0]; //return the new point
124 127 }
125 128 }
... ... @@ -187,11 +190,11 @@ public:
187 190 }
188 191 }*/
189 192  
190   - size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate){
191   - size_t time = update(bytes_processed, ms_spent);
  193 + /*size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate, bool VERBOSE){
  194 + size_t time = update(bytes_processed, ms_spent, VERBOSE);
192 195 data_rate = Bps[0];
193 196 return time;
194   - }
  197 + }*/
195 198 };
196 199  
197 200 /** This class manages the streaming of large multidimensional binary files.
... ... @@ -213,6 +216,7 @@ protected:
213 216 unsigned char* mask; //pointer to a character array: 0 = background, 1 = foreground (or valid data)
214 217  
215 218 double progress; //stores the progress on the current operation (accessible using a thread)
  219 + size_t data_rate; //data rate (currently in Bps)
216 220  
217 221 size_t buffer_size; //available memory for processing large files
218 222  
... ... @@ -222,8 +226,9 @@ protected:
222 226 header = 0; //initialize the header size to zero
223 227 mask = NULL;
224 228  
225   - progress = 0;
226   - set_buffer(); //set the maximum buffer size to the default
  229 + progress = 0; //initialize the progress for any algorithm to zero
  230 + data_rate = 0; //initialize the data rate to zero
  231 + set_buffer_frac(); //set the maximum buffer size to the default
227 232 }
228 233  
229 234 /// Private helper function that returns the size of the file on disk using system functions.
... ... @@ -304,8 +309,12 @@ public:
304 309 progress = 0;
305 310 }
306 311  
  312 + size_t get_data_rate(){
  313 + return data_rate;
  314 + }
  315 +
307 316 //specify the maximum fraction of available memory that this class will use for buffering
308   - void set_buffer(double mem_frac = 0.5){ //default to 50%
  317 + void set_buffer_frac(double mem_frac = 0.5){ //default to 50%
309 318 #ifdef _WIN32
310 319 MEMORYSTATUSEX statex;
311 320 statex.dwLength = sizeof (statex);
... ... @@ -318,6 +327,10 @@ public:
318 327 #endif
319 328 }
320 329  
  330 + void set_buffer_raw(size_t bytes){
  331 + buffer_size = bytes;
  332 + }
  333 +
321 334 /// Open a binary file for streaming.
322 335  
323 336 /// @param filename is the name of the binary file
... ...
stim/envi/bsq.h
... ... @@ -389,7 +389,7 @@ public:
389 389 }
390 390  
391 391 /// Convert this BSQ file to a BIL
392   - bool bil(std::string outname, bool PROGRESS = false, bool VERBOSE = false){
  392 + bool bil(std::string outname, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){
393 393  
394 394 const size_t buffers = 4; //number of buffers required for this algorithm
395 395  
... ... @@ -398,10 +398,10 @@ public:
398 398 size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case)
399 399 size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints
400 400  
401   - if(VERBOSE){
  401 + //if(VERBOSE){
402 402 std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl;
403   - std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<")"<<std::endl;
404   - }
  403 + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl;
  404 + //}
405 405  
406 406 if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error
407 407 std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl;
... ... @@ -435,7 +435,7 @@ public:
435 435 size_t pt_total = 0; //total time spent processing data
436 436 size_t rt_total = 0; //total time spent reading data
437 437 size_t wt_total = 0;
438   - size_t data_rate;
  438 + size_t dr = 0;
439 439  
440 440 rt_total += readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
441 441 y_load += N[0]; //increment the loaded slice counter
... ... @@ -458,11 +458,13 @@ public:
458 458 wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file
459 459 y_proc += N[b]; //increment the counter of processed pixels
460 460 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
  461 + if(y_load < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations
461 462 t_end = std::chrono::high_resolution_clock::now();
462 463 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
463 464 t_total += t_batch;
464   - if(y_load < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations
465   - N[b] = O.update(N[!b] * slice_bytes, t_batch, data_rate); //set the batch size based on optimization
  465 + if(OPTIMIZATION)
  466 + N[b] = O.update(N[!b] * slice_bytes, t_batch, binary<T>::data_rate, VERBOSE); //set the batch size based on optimization
  467 + //binary<T>::data_rate = dr;
466 468 //std::cout<<"New N = "<<N[!b]<<" selected with "<<(double)data_rate / 1000000<<" MB/s"<<std::endl;
467 469 }
468 470  
... ...
stim/envi/envi.h
... ... @@ -79,30 +79,64 @@ public:
79 79 return alloc_array(header.samples * header.lines);
80 80 }
81 81  
82   - void set_buffer(double memfrac = 0.5){
  82 + void set_buffer_frac(double memfrac = 0.5){
83 83 if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
84 84 if(header.data_type ==envi_header::float32)
85   - ((bsq<float>*)file)->set_buffer(memfrac);
  85 + ((bsq<float>*)file)->set_buffer_frac(memfrac);
86 86 else if(header.data_type == envi_header::float64)
87   - ((bsq<double>*)file)->set_buffer(memfrac);
  87 + ((bsq<double>*)file)->set_buffer_frac(memfrac);
88 88 else
89 89 std::cout<<"ERROR: unidentified data type"<<std::endl;
90 90 }
91 91  
92 92 else if(header.interleave == envi_header::BIL){ //if the infile is bil file
93 93 if(header.data_type ==envi_header::float32)
94   - ((bil<float>*)file)->set_buffer(memfrac);
  94 + ((bil<float>*)file)->set_buffer_frac(memfrac);
95 95 else if(header.data_type == envi_header::float64)
96   - ((bil<double>*)file)->set_buffer(memfrac);
  96 + ((bil<double>*)file)->set_buffer_frac(memfrac);
97 97 else
98 98 std::cout<<"ERROR: unidentified data type"<<std::endl;
99 99 }
100 100  
101 101 else if(header.interleave == envi_header::BIP){ //if the infile is bip file
102 102 if(header.data_type ==envi_header::float32)
103   - ((bip<float>*)file)->set_buffer(memfrac);
  103 + ((bip<float>*)file)->set_buffer_frac(memfrac);
104 104 else if(header.data_type == envi_header::float64)
105   - ((bip<double>*)file)->set_buffer(memfrac);
  105 + ((bip<double>*)file)->set_buffer_frac(memfrac);
  106 + else
  107 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  108 + }
  109 +
  110 + else{
  111 + std::cout<<"ERROR: unidentified file type"<<std::endl;
  112 + exit(1);
  113 + }
  114 + }
  115 +
  116 + void set_buffer_raw(size_t bytes){
  117 + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
  118 + if(header.data_type ==envi_header::float32)
  119 + ((bsq<float>*)file)->set_buffer_raw(bytes);
  120 + else if(header.data_type == envi_header::float64)
  121 + ((bsq<double>*)file)->set_buffer_raw(bytes);
  122 + else
  123 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  124 + }
  125 +
  126 + else if(header.interleave == envi_header::BIL){ //if the infile is bil file
  127 + if(header.data_type ==envi_header::float32)
  128 + ((bil<float>*)file)->set_buffer_raw(bytes);
  129 + else if(header.data_type == envi_header::float64)
  130 + ((bil<double>*)file)->set_buffer_raw(bytes);
  131 + else
  132 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  133 + }
  134 +
  135 + else if(header.interleave == envi_header::BIP){ //if the infile is bip file
  136 + if(header.data_type ==envi_header::float32)
  137 + ((bip<float>*)file)->set_buffer_raw(bytes);
  138 + else if(header.data_type == envi_header::float64)
  139 + ((bip<double>*)file)->set_buffer_raw(bytes);
106 140 else
107 141 std::cout<<"ERROR: unidentified data type"<<std::endl;
108 142 }
... ... @@ -121,6 +155,16 @@ public:
121 155 exit(1);
122 156 }
123 157  
  158 + size_t X(){ return header.samples; }
  159 + size_t Y(){ return header.lines; }
  160 + size_t Z(){ return header.bands; }
  161 + size_t B(){ return Z(); }
  162 +
  163 + /// Return the size of the data set in bytes
  164 + size_t bytes(){
  165 + return X() * Y() * Z() * type_size();
  166 + }
  167 +
124 168 /// Returns the progress of the current processing operation as a percentage
125 169 void reset_progress(){
126 170  
... ... @@ -193,6 +237,42 @@ public:
193 237 return 0;
194 238 }
195 239  
  240 + /// Returns the progress of the current processing operation as a percentage
  241 + size_t data_rate(){
  242 +
  243 + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
  244 + if(header.data_type ==envi_header::float32)
  245 + return ((bsq<float>*)file)->get_data_rate();
  246 + else if(header.data_type == envi_header::float64)
  247 + return ((bsq<double>*)file)->get_data_rate();
  248 + else
  249 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  250 + }
  251 +
  252 + else if(header.interleave == envi_header::BIL){ //if the infile is bil file
  253 + if(header.data_type ==envi_header::float32)
  254 + return ((bil<float>*)file)->get_data_rate();
  255 + else if(header.data_type == envi_header::float64)
  256 + return ((bil<double>*)file)->get_data_rate();
  257 + else
  258 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  259 + }
  260 +
  261 + else if(header.interleave == envi_header::BIP){ //if the infile is bip file
  262 + if(header.data_type ==envi_header::float32)
  263 + return ((bip<float>*)file)->get_data_rate();
  264 + else if(header.data_type == envi_header::float64)
  265 + return ((bip<double>*)file)->get_data_rate();
  266 + else
  267 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  268 + }
  269 +
  270 + else{
  271 + std::cout<<"ERROR: unidentified file type"<<std::endl;
  272 + }
  273 + return 0;
  274 + }
  275 +
196 276 /// Allocate memory for a new ENVI file based on the current interleave format (BIP, BIL, BSQ) and data type.
197 277 void allocate(){
198 278  
... ... @@ -509,7 +589,7 @@ public:
509 589  
510 590 /// @param outfile is the file name for the converted output
511 591 /// @param interleave is the interleave format for the destination file
512   - bool convert(std::string outfile, stim::envi_header::interleaveType interleave, bool PROGRESS = false, bool VERBOSE = false){
  592 + bool convert(std::string outfile, stim::envi_header::interleaveType interleave, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){
513 593  
514 594 if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
515 595  
... ... @@ -519,7 +599,7 @@ public:
519 599 exit(1);
520 600 }
521 601 else if(interleave == envi_header::BIL) //convert BSQ -> BIL
522   - ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE);
  602 + ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE, OPTIMIZATION);
523 603 else if(interleave == envi_header::BIP){ //ERROR
524 604 //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
525 605 ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE);
... ... @@ -533,10 +613,10 @@ public:
533 613 exit(1);
534 614 }
535 615 else if(interleave == envi_header::BIL) //convert BSQ -> BIL
536   - ((bsq<double>*)file)->bil(outfile, PROGRESS);
  616 + ((bsq<double>*)file)->bil(outfile, PROGRESS, OPTIMIZATION);
537 617 else if(interleave == envi_header::BIP){ //ERROR
538 618 //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
539   - ((bsq<float>*)file)->bip(outfile, PROGRESS);
  619 + ((bsq<float>*)file)->bip(outfile, PROGRESS, OPTIMIZATION);
540 620 //exit(1);
541 621 }
542 622 }
... ...