Commit c6251f8b0b348def72fa86d6c194200de27aeb1d
1 parent
8933935f
allows user to deactivate optimization for profiling
Showing
3 changed files
with
130 additions
and
35 deletions
Show diff stats
stim/envi/binary.h
... | ... | @@ -54,7 +54,7 @@ protected: |
54 | 54 | public: |
55 | 55 | |
56 | 56 | //constructor initializes a stream optimizer |
57 | - stream_optimizer(size_t min_batch_size, size_t max_batch_size, double a = 0.001, double probe_step = 5, size_t window = 2000){ | |
57 | + stream_optimizer(size_t min_batch_size, size_t max_batch_size, double a = 0.003, size_t probe_step = 5, size_t window = 2000){ | |
58 | 58 | //Bps = 0; //initialize to zero bytes per second processed |
59 | 59 | Bps[0] = Bps[1] = 0; //initialize the bits per second to 0 |
60 | 60 | interval_B = 0; //zero bytes have been processed at initialization |
... | ... | @@ -72,25 +72,26 @@ public: |
72 | 72 | forward_diff = true; //start with the forward difference (since we start at the maximum batch size) |
73 | 73 | } |
74 | 74 | |
75 | - size_t update(size_t bytes_processed, size_t ms_spent){ | |
75 | + size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate, bool VERBOSE = false){ | |
76 | 76 | interval_B += bytes_processed; //increment the number of bytes processed |
77 | 77 | interval_ms += ms_spent; //increment the number of milliseconds spent processing |
78 | + data_rate = interval_B / interval_ms; | |
78 | 79 | |
79 | 80 | //if we have sufficient information to evaluate the optimization function at this point |
80 | 81 | if(interval_ms < window_ms){ //if insufficient time has passed to get a reliable Bps measurement |
81 | 82 | return n[0]; |
82 | 83 | } |
83 | 84 | else{ //if we have collected enough information for a reliable Bps estimate |
84 | - size_t new_Bps = interval_B / interval_ms; //calculate the current Bps | |
85 | - | |
85 | + | |
86 | 86 | if(Bps[0] == 0){ //if n[0] hasn't been evaluated yet, this is the first step |
87 | - Bps[0] = new_Bps; //set the initial Bps value | |
87 | + Bps[0] = data_rate; //set the initial Bps value | |
88 | 88 | n[1] = n[0] - h; //set the position of the next sample point |
89 | - std::cout<<"Bps value at n = "<<n[0]<<" is "<<Bps[0]<<" Bps, probing n = "<<n[1]<<std::endl; | |
89 | + if(VERBOSE) | |
90 | + std::cout<<"Bps value at n = "<<n[0]<<" is "<<Bps[0]<<" Bps, probing n = "<<n[1]<<std::endl; | |
90 | 91 | return n[1]; //return the probe point |
91 | 92 | } |
92 | 93 | else{ |
93 | - Bps[1] = new_Bps; //set the Bps for the current point (n[1]) | |
94 | + Bps[1] = data_rate; //set the Bps for the current point (n[1]) | |
94 | 95 | |
95 | 96 | double Bps_p; //allocate a variable for the derivative |
96 | 97 | //calculate the derivative |
... | ... | @@ -100,8 +101,8 @@ public: |
100 | 101 | else{ |
101 | 102 | Bps_p = ((double)Bps[0] - (double)Bps[1]) / (double)h; //calculate the derivative using the backward finite difference |
102 | 103 | } |
103 | - | |
104 | - std::cout<<" probed n = "<<n[1]<<" with "<<Bps[1]<<" Bps, gradient = "<<Bps_p<<" Bps"<<std::endl; | |
104 | + if(VERBOSE) | |
105 | + std::cout<<" probed n = "<<n[1]<<" with "<<Bps[1]<<" Bps, gradient = "<<Bps_p<<" Bps"<<std::endl; | |
105 | 106 | |
106 | 107 | double new_n_precise = n[0] + alpha * Bps_p; //calculate the next point (snap to closest integer) |
107 | 108 | size_t new_n_nearest = round_limit(new_n_precise); //calculate the next point (given batch parameters) |
... | ... | @@ -110,7 +111,8 @@ public: |
110 | 111 | Bps[0] = Bps[1]; //update the Bps |
111 | 112 | //if(n[0] == dn) n[1] = n[0] + h; //if we're on the left edge, probe forward |
112 | 113 | //else n[1] = n[0] - h; //otherwise probe backwards |
113 | - std::cout<<" staying at n = "<<n[0]<<" for now"<<std::endl; | |
114 | + if(VERBOSE) | |
115 | + std::cout<<" staying at n = "<<n[0]<<" for now"<<std::endl; | |
114 | 116 | //return n[1]; //return the probe point |
115 | 117 | |
116 | 118 | Bps[0] = 0; //reset the Bps for the current point |
... | ... | @@ -119,7 +121,8 @@ public: |
119 | 121 | else{ //if the newest point is different from the original point |
120 | 122 | n[0] = new_n_nearest; //move to the new point |
121 | 123 | Bps[0] = 0; //set the Bps to zero (point hasn't been tested) |
122 | - std::cout<<" moving to n = "<<n[0]<<std::endl; | |
124 | + if(VERBOSE) | |
125 | + std::cout<<" moving to n = "<<n[0]<<std::endl; | |
123 | 126 | return n[0]; //return the new point |
124 | 127 | } |
125 | 128 | } |
... | ... | @@ -187,11 +190,11 @@ public: |
187 | 190 | } |
188 | 191 | }*/ |
189 | 192 | |
190 | - size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate){ | |
191 | - size_t time = update(bytes_processed, ms_spent); | |
193 | + /*size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate, bool VERBOSE){ | |
194 | + size_t time = update(bytes_processed, ms_spent, VERBOSE); | |
192 | 195 | data_rate = Bps[0]; |
193 | 196 | return time; |
194 | - } | |
197 | + }*/ | |
195 | 198 | }; |
196 | 199 | |
197 | 200 | /** This class manages the streaming of large multidimensional binary files. |
... | ... | @@ -213,6 +216,7 @@ protected: |
213 | 216 | unsigned char* mask; //pointer to a character array: 0 = background, 1 = foreground (or valid data) |
214 | 217 | |
215 | 218 | double progress; //stores the progress on the current operation (accessible using a thread) |
219 | + size_t data_rate; //data rate (currently in Bps) | |
216 | 220 | |
217 | 221 | size_t buffer_size; //available memory for processing large files |
218 | 222 | |
... | ... | @@ -222,8 +226,9 @@ protected: |
222 | 226 | header = 0; //initialize the header size to zero |
223 | 227 | mask = NULL; |
224 | 228 | |
225 | - progress = 0; | |
226 | - set_buffer(); //set the maximum buffer size to the default | |
229 | + progress = 0; //initialize the progress for any algorithm to zero | |
230 | + data_rate = 0; //initialize the data rate to zero | |
231 | + set_buffer_frac(); //set the maximum buffer size to the default | |
227 | 232 | } |
228 | 233 | |
229 | 234 | /// Private helper function that returns the size of the file on disk using system functions. |
... | ... | @@ -304,8 +309,12 @@ public: |
304 | 309 | progress = 0; |
305 | 310 | } |
306 | 311 | |
312 | + size_t get_data_rate(){ | |
313 | + return data_rate; | |
314 | + } | |
315 | + | |
307 | 316 | //specify the maximum fraction of available memory that this class will use for buffering |
308 | - void set_buffer(double mem_frac = 0.5){ //default to 50% | |
317 | + void set_buffer_frac(double mem_frac = 0.5){ //default to 50% | |
309 | 318 | #ifdef _WIN32 |
310 | 319 | MEMORYSTATUSEX statex; |
311 | 320 | statex.dwLength = sizeof (statex); |
... | ... | @@ -318,6 +327,10 @@ public: |
318 | 327 | #endif |
319 | 328 | } |
320 | 329 | |
330 | + void set_buffer_raw(size_t bytes){ | |
331 | + buffer_size = bytes; | |
332 | + } | |
333 | + | |
321 | 334 | /// Open a binary file for streaming. |
322 | 335 | |
323 | 336 | /// @param filename is the name of the binary file | ... | ... |
stim/envi/bsq.h
... | ... | @@ -389,7 +389,7 @@ public: |
389 | 389 | } |
390 | 390 | |
391 | 391 | /// Convert this BSQ file to a BIL |
392 | - bool bil(std::string outname, bool PROGRESS = false, bool VERBOSE = false){ | |
392 | + bool bil(std::string outname, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){ | |
393 | 393 | |
394 | 394 | const size_t buffers = 4; //number of buffers required for this algorithm |
395 | 395 | |
... | ... | @@ -398,10 +398,10 @@ public: |
398 | 398 | size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) |
399 | 399 | size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints |
400 | 400 | |
401 | - if(VERBOSE){ | |
401 | + //if(VERBOSE){ | |
402 | 402 | std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl; |
403 | - std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<")"<<std::endl; | |
404 | - } | |
403 | + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl; | |
404 | + //} | |
405 | 405 | |
406 | 406 | if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error |
407 | 407 | std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl; |
... | ... | @@ -435,7 +435,7 @@ public: |
435 | 435 | size_t pt_total = 0; //total time spent processing data |
436 | 436 | size_t rt_total = 0; //total time spent reading data |
437 | 437 | size_t wt_total = 0; |
438 | - size_t data_rate; | |
438 | + size_t dr = 0; | |
439 | 439 | |
440 | 440 | rt_total += readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer |
441 | 441 | y_load += N[0]; //increment the loaded slice counter |
... | ... | @@ -458,11 +458,13 @@ public: |
458 | 458 | wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file |
459 | 459 | y_proc += N[b]; //increment the counter of processed pixels |
460 | 460 | if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels |
461 | + if(y_load < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations | |
461 | 462 | t_end = std::chrono::high_resolution_clock::now(); |
462 | 463 | t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); |
463 | 464 | t_total += t_batch; |
464 | - if(y_load < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations | |
465 | - N[b] = O.update(N[!b] * slice_bytes, t_batch, data_rate); //set the batch size based on optimization | |
465 | + if(OPTIMIZATION) | |
466 | + N[b] = O.update(N[!b] * slice_bytes, t_batch, binary<T>::data_rate, VERBOSE); //set the batch size based on optimization | |
467 | + //binary<T>::data_rate = dr; | |
466 | 468 | //std::cout<<"New N = "<<N[!b]<<" selected with "<<(double)data_rate / 1000000<<" MB/s"<<std::endl; |
467 | 469 | } |
468 | 470 | ... | ... |
stim/envi/envi.h
... | ... | @@ -79,30 +79,64 @@ public: |
79 | 79 | return alloc_array(header.samples * header.lines); |
80 | 80 | } |
81 | 81 | |
82 | - void set_buffer(double memfrac = 0.5){ | |
82 | + void set_buffer_frac(double memfrac = 0.5){ | |
83 | 83 | if(header.interleave == envi_header::BSQ){ //if the infile is bsq file |
84 | 84 | if(header.data_type ==envi_header::float32) |
85 | - ((bsq<float>*)file)->set_buffer(memfrac); | |
85 | + ((bsq<float>*)file)->set_buffer_frac(memfrac); | |
86 | 86 | else if(header.data_type == envi_header::float64) |
87 | - ((bsq<double>*)file)->set_buffer(memfrac); | |
87 | + ((bsq<double>*)file)->set_buffer_frac(memfrac); | |
88 | 88 | else |
89 | 89 | std::cout<<"ERROR: unidentified data type"<<std::endl; |
90 | 90 | } |
91 | 91 | |
92 | 92 | else if(header.interleave == envi_header::BIL){ //if the infile is bil file |
93 | 93 | if(header.data_type ==envi_header::float32) |
94 | - ((bil<float>*)file)->set_buffer(memfrac); | |
94 | + ((bil<float>*)file)->set_buffer_frac(memfrac); | |
95 | 95 | else if(header.data_type == envi_header::float64) |
96 | - ((bil<double>*)file)->set_buffer(memfrac); | |
96 | + ((bil<double>*)file)->set_buffer_frac(memfrac); | |
97 | 97 | else |
98 | 98 | std::cout<<"ERROR: unidentified data type"<<std::endl; |
99 | 99 | } |
100 | 100 | |
101 | 101 | else if(header.interleave == envi_header::BIP){ //if the infile is bip file |
102 | 102 | if(header.data_type ==envi_header::float32) |
103 | - ((bip<float>*)file)->set_buffer(memfrac); | |
103 | + ((bip<float>*)file)->set_buffer_frac(memfrac); | |
104 | 104 | else if(header.data_type == envi_header::float64) |
105 | - ((bip<double>*)file)->set_buffer(memfrac); | |
105 | + ((bip<double>*)file)->set_buffer_frac(memfrac); | |
106 | + else | |
107 | + std::cout<<"ERROR: unidentified data type"<<std::endl; | |
108 | + } | |
109 | + | |
110 | + else{ | |
111 | + std::cout<<"ERROR: unidentified file type"<<std::endl; | |
112 | + exit(1); | |
113 | + } | |
114 | + } | |
115 | + | |
116 | + void set_buffer_raw(size_t bytes){ | |
117 | + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file | |
118 | + if(header.data_type ==envi_header::float32) | |
119 | + ((bsq<float>*)file)->set_buffer_raw(bytes); | |
120 | + else if(header.data_type == envi_header::float64) | |
121 | + ((bsq<double>*)file)->set_buffer_raw(bytes); | |
122 | + else | |
123 | + std::cout<<"ERROR: unidentified data type"<<std::endl; | |
124 | + } | |
125 | + | |
126 | + else if(header.interleave == envi_header::BIL){ //if the infile is bil file | |
127 | + if(header.data_type ==envi_header::float32) | |
128 | + ((bil<float>*)file)->set_buffer_raw(bytes); | |
129 | + else if(header.data_type == envi_header::float64) | |
130 | + ((bil<double>*)file)->set_buffer_raw(bytes); | |
131 | + else | |
132 | + std::cout<<"ERROR: unidentified data type"<<std::endl; | |
133 | + } | |
134 | + | |
135 | + else if(header.interleave == envi_header::BIP){ //if the infile is bip file | |
136 | + if(header.data_type ==envi_header::float32) | |
137 | + ((bip<float>*)file)->set_buffer_raw(bytes); | |
138 | + else if(header.data_type == envi_header::float64) | |
139 | + ((bip<double>*)file)->set_buffer_raw(bytes); | |
106 | 140 | else |
107 | 141 | std::cout<<"ERROR: unidentified data type"<<std::endl; |
108 | 142 | } |
... | ... | @@ -121,6 +155,16 @@ public: |
121 | 155 | exit(1); |
122 | 156 | } |
123 | 157 | |
158 | + size_t X(){ return header.samples; } | |
159 | + size_t Y(){ return header.lines; } | |
160 | + size_t Z(){ return header.bands; } | |
161 | + size_t B(){ return Z(); } | |
162 | + | |
163 | + /// Return the size of the data set in bytes | |
164 | + size_t bytes(){ | |
165 | + return X() * Y() * Z() * type_size(); | |
166 | + } | |
167 | + | |
124 | 168 | /// Returns the progress of the current processing operation as a percentage |
125 | 169 | void reset_progress(){ |
126 | 170 | |
... | ... | @@ -193,6 +237,42 @@ public: |
193 | 237 | return 0; |
194 | 238 | } |
195 | 239 | |
240 | + /// Returns the progress of the current processing operation as a percentage | |
241 | + size_t data_rate(){ | |
242 | + | |
243 | + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file | |
244 | + if(header.data_type ==envi_header::float32) | |
245 | + return ((bsq<float>*)file)->get_data_rate(); | |
246 | + else if(header.data_type == envi_header::float64) | |
247 | + return ((bsq<double>*)file)->get_data_rate(); | |
248 | + else | |
249 | + std::cout<<"ERROR: unidentified data type"<<std::endl; | |
250 | + } | |
251 | + | |
252 | + else if(header.interleave == envi_header::BIL){ //if the infile is bil file | |
253 | + if(header.data_type ==envi_header::float32) | |
254 | + return ((bil<float>*)file)->get_data_rate(); | |
255 | + else if(header.data_type == envi_header::float64) | |
256 | + return ((bil<double>*)file)->get_data_rate(); | |
257 | + else | |
258 | + std::cout<<"ERROR: unidentified data type"<<std::endl; | |
259 | + } | |
260 | + | |
261 | + else if(header.interleave == envi_header::BIP){ //if the infile is bip file | |
262 | + if(header.data_type ==envi_header::float32) | |
263 | + return ((bip<float>*)file)->get_data_rate(); | |
264 | + else if(header.data_type == envi_header::float64) | |
265 | + return ((bip<double>*)file)->get_data_rate(); | |
266 | + else | |
267 | + std::cout<<"ERROR: unidentified data type"<<std::endl; | |
268 | + } | |
269 | + | |
270 | + else{ | |
271 | + std::cout<<"ERROR: unidentified file type"<<std::endl; | |
272 | + } | |
273 | + return 0; | |
274 | + } | |
275 | + | |
196 | 276 | /// Allocate memory for a new ENVI file based on the current interleave format (BIP, BIL, BSQ) and data type. |
197 | 277 | void allocate(){ |
198 | 278 | |
... | ... | @@ -509,7 +589,7 @@ public: |
509 | 589 | |
510 | 590 | /// @param outfile is the file name for the converted output |
511 | 591 | /// @param interleave is the interleave format for the destination file |
512 | - bool convert(std::string outfile, stim::envi_header::interleaveType interleave, bool PROGRESS = false, bool VERBOSE = false){ | |
592 | + bool convert(std::string outfile, stim::envi_header::interleaveType interleave, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){ | |
513 | 593 | |
514 | 594 | if(header.interleave == envi_header::BSQ){ //if the infile is bsq file |
515 | 595 | |
... | ... | @@ -519,7 +599,7 @@ public: |
519 | 599 | exit(1); |
520 | 600 | } |
521 | 601 | else if(interleave == envi_header::BIL) //convert BSQ -> BIL |
522 | - ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE); | |
602 | + ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE, OPTIMIZATION); | |
523 | 603 | else if(interleave == envi_header::BIP){ //ERROR |
524 | 604 | //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl; |
525 | 605 | ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE); |
... | ... | @@ -533,10 +613,10 @@ public: |
533 | 613 | exit(1); |
534 | 614 | } |
535 | 615 | else if(interleave == envi_header::BIL) //convert BSQ -> BIL |
536 | - ((bsq<double>*)file)->bil(outfile, PROGRESS); | |
616 | + ((bsq<double>*)file)->bil(outfile, PROGRESS, OPTIMIZATION); | |
537 | 617 | else if(interleave == envi_header::BIP){ //ERROR |
538 | 618 | //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl; |
539 | - ((bsq<float>*)file)->bip(outfile, PROGRESS); | |
619 | + ((bsq<float>*)file)->bip(outfile, PROGRESS, OPTIMIZATION); | |
540 | 620 | //exit(1); |
541 | 621 | } |
542 | 622 | } | ... | ... |