Commit 9c5854ecbb1a2f8e30a3468588a7063bb51c5a52
Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib
Showing
4 changed files
with
375 additions
and
69 deletions
Show diff stats
stim/envi/binary.h
... | ... | @@ -8,7 +8,7 @@ |
8 | 8 | #include <fstream> |
9 | 9 | #include <sys/stat.h> |
10 | 10 | #include <cstring> |
11 | - | |
11 | +#include <chrono> | |
12 | 12 | #ifdef _WIN32 |
13 | 13 | #include <Windows.h> |
14 | 14 | #else |
... | ... | @@ -17,6 +17,186 @@ |
17 | 17 | |
18 | 18 | namespace stim{ |
19 | 19 | |
20 | +/// This class calculates the optimal setting for independent parameter b (batch size) for | |
21 | +/// minimizing the dependent parameter bps (bytes per second) | |
22 | +class stream_optimizer{ | |
23 | +protected: | |
24 | + size_t Bps[2]; //bytes per second for the previous batch | |
25 | + size_t interval_B; //number of bytes processed this interval | |
26 | + size_t interval_ms; //number of milliseconds spent in the current interval | |
27 | + size_t n[2]; //current batch size (in bytes) | |
28 | + size_t h; //spacing used for finite difference calculations | |
29 | + size_t dn; //delta value (in bytes) for setting the batch size (minimum change in batch parameter) | |
30 | + size_t maxn; //maximum value for the batch size | |
31 | + | |
32 | + double alpha; //alpha value controls the factor of the gradient that is used to calculate the next point (speed of convergence) | |
33 | + | |
34 | + bool sample_step; //calculating the derivative (this class alternates between calculating dBps and B) | |
35 | + bool forward_diff; //evaluate the derivative using forward differences | |
36 | + | |
37 | + size_t window_ms; //size of the interval (in milliseconds) integrated to get a reliable bps value | |
38 | + | |
39 | + // This function rounds x to the nearest value within dB | |
40 | + size_t round_limit(double n0){ | |
41 | + if(n0 < 0) return dn; //if n0 is less than zero, return the lowest possible n | |
42 | + | |
43 | + size_t new_n = (size_t)(n0 + 0.5); //now n0 must be positive, so round it to the nearest integer | |
44 | + if(new_n > maxn) new_n = maxn; //limit the returned size of x to within the specified bounds | |
45 | + | |
46 | + size_t lowest = new_n / dn; | |
47 | + size_t highest = lowest + dn; | |
48 | + size_t diff[2] = {new_n - lowest, highest - new_n}; //calculate the two differences | |
49 | + if(diff[0] < diff[1]) | |
50 | + return lowest; | |
51 | + return highest; | |
52 | + } | |
53 | + | |
54 | +public: | |
55 | + | |
56 | + //constructor initializes a stream optimizer | |
57 | + stream_optimizer(size_t min_batch_size, size_t max_batch_size, double a = 0.003, size_t probe_step = 5, size_t window = 2000){ | |
58 | + //Bps = 0; //initialize to zero bytes per second processed | |
59 | + Bps[0] = Bps[1] = 0; //initialize the bits per second to 0 | |
60 | + interval_B = 0; //zero bytes have been processed at initialization | |
61 | + interval_ms = 0; //no time has been spent on the batch so far | |
62 | + dn = min_batch_size; //set the minimum batch size as the minimum change in batch size | |
63 | + maxn = max_batch_size; //set the maximum batch size | |
64 | + n[0] = max_batch_size; //set B | |
65 | + h = (max_batch_size / min_batch_size) / probe_step * dn; | |
66 | + std::cout<<"h = "<<h<<std::endl; | |
67 | + if(h < dn) h = dn; | |
68 | + alpha = a; | |
69 | + //n[0] = round_limit( (max_batch_size - min_batch_size)/2 ); | |
70 | + window_ms = window; //minimum integration interval (for getting a reliable bps measure) | |
71 | + sample_step = true; //the first step is to calculate the derivative | |
72 | + forward_diff = true; //start with the forward difference (since we start at the maximum batch size) | |
73 | + } | |
74 | + | |
75 | + size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate, bool VERBOSE = false){ | |
76 | + interval_B += bytes_processed; //increment the number of bytes processed | |
77 | + interval_ms += ms_spent; //increment the number of milliseconds spent processing | |
78 | + data_rate = interval_B / interval_ms; | |
79 | + | |
80 | + //if we have sufficient information to evaluate the optimization function at this point | |
81 | + if(interval_ms < window_ms){ //if insufficient time has passed to get a reliable Bps measurement | |
82 | + return n[0]; | |
83 | + } | |
84 | + else{ //if we have collected enough information for a reliable Bps estimate | |
85 | + | |
86 | + if(Bps[0] == 0){ //if n[0] hasn't been evaluated yet, this is the first step | |
87 | + Bps[0] = data_rate; //set the initial Bps value | |
88 | + n[1] = n[0] - h; //set the position of the next sample point | |
89 | + if(VERBOSE) | |
90 | + std::cout<<"Bps value at n = "<<n[0]<<" is "<<Bps[0]<<" Bps, probing n = "<<n[1]<<std::endl; | |
91 | + return n[1]; //return the probe point | |
92 | + } | |
93 | + else{ | |
94 | + Bps[1] = data_rate; //set the Bps for the current point (n[1]) | |
95 | + | |
96 | + double Bps_p; //allocate a variable for the derivative | |
97 | + //calculate the derivative | |
98 | + if(n[0] < n[1]){ //if the current point is less than the previous one (probably the most common) | |
99 | + Bps_p = ((double)Bps[1] - (double)Bps[0]) / (double)h; //calculate the derivative using the forward finite difference | |
100 | + } | |
101 | + else{ | |
102 | + Bps_p = ((double)Bps[0] - (double)Bps[1]) / (double)h; //calculate the derivative using the backward finite difference | |
103 | + } | |
104 | + if(VERBOSE) | |
105 | + std::cout<<" probed n = "<<n[1]<<" with "<<Bps[1]<<" Bps, gradient = "<<Bps_p<<" Bps"<<std::endl; | |
106 | + | |
107 | + double new_n_precise = n[0] + alpha * Bps_p; //calculate the next point (snap to closest integer) | |
108 | + size_t new_n_nearest = round_limit(new_n_precise); //calculate the next point (given batch parameters) | |
109 | + | |
110 | + if(new_n_nearest == n[0]){ //if the newest point is the same as the original point | |
111 | + Bps[0] = Bps[1]; //update the Bps | |
112 | + //if(n[0] == dn) n[1] = n[0] + h; //if we're on the left edge, probe forward | |
113 | + //else n[1] = n[0] - h; //otherwise probe backwards | |
114 | + if(VERBOSE) | |
115 | + std::cout<<" staying at n = "<<n[0]<<" for now"<<std::endl; | |
116 | + //return n[1]; //return the probe point | |
117 | + | |
118 | + Bps[0] = 0; //reset the Bps for the current point | |
119 | + return n[0]; //return the current point for a re-calculation | |
120 | + } | |
121 | + else{ //if the newest point is different from the original point | |
122 | + n[0] = new_n_nearest; //move to the new point | |
123 | + Bps[0] = 0; //set the Bps to zero (point hasn't been tested) | |
124 | + if(VERBOSE) | |
125 | + std::cout<<" moving to n = "<<n[0]<<std::endl; | |
126 | + return n[0]; //return the new point | |
127 | + } | |
128 | + } | |
129 | + } | |
130 | + } | |
131 | + | |
132 | + /*// this function updates the optimizer, given the number of bytes processed in an interval and time spent processing | |
133 | + size_t update(size_t bytes_processed, size_t ms_spent){ | |
134 | + interval_B += bytes_processed; //increment the number of bytes processed | |
135 | + interval_ms += ms_spent; //increment the number of milliseconds spent processing | |
136 | + | |
137 | + //if we have sufficient information to evaluate the optimization function at this point | |
138 | + if(interval_ms >= window_ms){ //if sufficient time has passed to get a reliable Bps measurement | |
139 | + size_t new_Bps = interval_B / interval_ms; //calculate the current Bps | |
140 | + | |
141 | + if(sample_step){ //if this is a sample step, collect the information for Bps = f(n0) | |
142 | + Bps = new_Bps; //set the Bps to the evaluated value | |
143 | + n[1] = n[0] - dn; //reduce the batch size by one delta to take a second sample | |
144 | + if(n[1] == 0){ //if the resulting batch size is zero | |
145 | + n[1] = 2*dn; //we're at the left edge: set the new sample point to 2*dn | |
146 | + } | |
147 | + | |
148 | + interval_B = interval_ms = 0; //start a new interval at the new sample point | |
149 | + sample_step = false; //next step will calculate the new batch size via optimization | |
150 | + return n[1]; //return the new batch size | |
151 | + } | |
152 | + else{ //if we have sufficient information to evaluate the derivative and optimize | |
153 | + double f = (double)new_Bps; //we have evaluated the function at this location | |
154 | + double fprime; | |
155 | + if(n[1] < n[0] ){ //if the new point is less than the previous point (usually the case) | |
156 | + fprime = (double)(Bps - new_Bps) / (double)dn; //calculate the forward difference | |
157 | + } | |
158 | + else{ //if the new point is larger (only happens at the minimum limit) | |
159 | + fprime = (double)(new_Bps - Bps) / (double)dn; //calculate the backward difference | |
160 | + } | |
161 | + size_t bestn = n[1] - (size_t)(f / fprime); //calculate the best value for B using Newton's method | |
162 | + n[0] = round_limit( (size_t)bestn ); //set the new dependent point | |
163 | + sample_step = true; //the next step will be a sample step | |
164 | + } | |
165 | + | |
166 | + } | |
167 | + if(sample_step) return n[0]; | |
168 | + return n[1]; //insufficient information, keep the same batch size | |
169 | + }*/ | |
170 | + | |
171 | + /*size_t update(size_t bytes_processed, size_t ms_spent){ | |
172 | + interval_B += bytes_processed; //increment the number of bytes processed | |
173 | + interval_ms += ms_spent; //increment the number of milliseconds spent processing | |
174 | + | |
175 | + //if( Bps[0] == 0 ){ //if the left boundary hasn't been processed | |
176 | + | |
177 | + | |
178 | + //if we have sufficient information to evaluate the optimization function at this point | |
179 | + if(interval_ms >= window_ms){ | |
180 | + size_t new_Bps = interval_B / interval_ms; //calculate the current Bps | |
181 | + | |
182 | + if(Bps[0] == 0) //if the left interval Bps hasn't been calculated | |
183 | + Bps[0] = interval_B / interval_ms; //that is the interval being processed | |
184 | + else | |
185 | + Bps[1] = interval_B / interval_ms; //otherwise the right interval is being processed | |
186 | + | |
187 | + if(Bps[0] != 0 && Bps[1] != 0){ //if both intervals have been processed | |
188 | + | |
189 | + | |
190 | + } | |
191 | + }*/ | |
192 | + | |
193 | + /*size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate, bool VERBOSE){ | |
194 | + size_t time = update(bytes_processed, ms_spent, VERBOSE); | |
195 | + data_rate = Bps[0]; | |
196 | + return time; | |
197 | + }*/ | |
198 | +}; | |
199 | + | |
20 | 200 | /** This class manages the streaming of large multidimensional binary files. |
21 | 201 | * Generally these are hyperspectral files with 2 spatial and 1 spectral dimension. However, this class supports |
22 | 202 | * other dimensions via the template parameter D. |
... | ... | @@ -36,6 +216,7 @@ protected: |
36 | 216 | unsigned char* mask; //pointer to a character array: 0 = background, 1 = foreground (or valid data) |
37 | 217 | |
38 | 218 | double progress; //stores the progress on the current operation (accessible using a thread) |
219 | + size_t data_rate; //data rate (currently in Bps) | |
39 | 220 | |
40 | 221 | size_t buffer_size; //available memory for processing large files |
41 | 222 | |
... | ... | @@ -45,8 +226,9 @@ protected: |
45 | 226 | header = 0; //initialize the header size to zero |
46 | 227 | mask = NULL; |
47 | 228 | |
48 | - progress = 0; | |
49 | - set_buffer(); //set the maximum buffer size to the default | |
229 | + progress = 0; //initialize the progress for any algorithm to zero | |
230 | + data_rate = 0; //initialize the data rate to zero | |
231 | + set_buffer_frac(); //set the maximum buffer size to the default | |
50 | 232 | } |
51 | 233 | |
52 | 234 | /// Private helper function that returns the size of the file on disk using system functions. |
... | ... | @@ -127,8 +309,12 @@ public: |
127 | 309 | progress = 0; |
128 | 310 | } |
129 | 311 | |
312 | + size_t get_data_rate(){ | |
313 | + return data_rate; | |
314 | + } | |
315 | + | |
130 | 316 | //specify the maximum fraction of available memory that this class will use for buffering |
131 | - void set_buffer(double mem_frac = 0.5){ //default to 50% | |
317 | + void set_buffer_frac(double mem_frac = 0.5){ //default to 50% | |
132 | 318 | #ifdef _WIN32 |
133 | 319 | MEMORYSTATUSEX statex; |
134 | 320 | statex.dwLength = sizeof (statex); |
... | ... | @@ -141,6 +327,10 @@ public: |
141 | 327 | #endif |
142 | 328 | } |
143 | 329 | |
330 | + void set_buffer_raw(size_t bytes){ | |
331 | + buffer_size = bytes; | |
332 | + } | |
333 | + | |
144 | 334 | /// Open a binary file for streaming. |
145 | 335 | |
146 | 336 | /// @param filename is the name of the binary file |
... | ... | @@ -404,8 +594,8 @@ public: |
404 | 594 | } |
405 | 595 | |
406 | 596 | /// Reads a block specified by an (x, y, z) position and size using the largest possible contiguous reads |
407 | - bool read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){ | |
408 | - | |
597 | + size_t read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){ | |
598 | + auto t0 = std::chrono::high_resolution_clock::now(); | |
409 | 599 | size_t size_bytes = sx * sy * sz * sizeof(T); //size of the block to read in bytes |
410 | 600 | |
411 | 601 | size_t start = z * R[0] * R[1] + y * R[0] + x; //calculate the start postion |
... | ... | @@ -415,10 +605,8 @@ public: |
415 | 605 | |
416 | 606 | if(sx == R[0] && sy == R[1]){ //if sx and sy result in a contiguous volume along z |
417 | 607 | file.read((char*)dest, size_bytes); //read the block in one pass |
418 | - return true; | |
419 | 608 | } |
420 | - | |
421 | - if(sx == R[0]){ //if sx is contiguous, read each z-axis slice can be read in one pass | |
609 | + else if(sx == R[0]){ //if sx is contiguous, read each z-axis slice can be read in one pass | |
422 | 610 | size_t jump_bytes = (R[1] - sy) * R[0] * sizeof(T); //jump between each slice |
423 | 611 | size_t slice_bytes = sx * sy * sizeof(T); //size of the slice to be read |
424 | 612 | for(size_t zi = 0; zi < sz; zi++){ //for each z-axis slice |
... | ... | @@ -426,29 +614,31 @@ public: |
426 | 614 | dest += sx * sy; //move the destination pointer to the next slice |
427 | 615 | file.seekg(jump_bytes, std::ios::cur); //skip to the next slice in the file |
428 | 616 | } |
429 | - return true; | |
430 | 617 | } |
431 | - | |
432 | - //in this case, x is not contiguous so the volume must be read line-by-line | |
433 | - size_t jump_x_bytes = (R[0] - sx) * sizeof(T); //number of bytes skipped in the x direction | |
434 | - size_t jump_y_bytes = (R[1] - sy) * R[0] * sizeof(T) + jump_x_bytes; //number of bytes skipped between slices | |
435 | - size_t line_bytes = sx * sizeof(T); //size of the line to be read | |
436 | - size_t zi, yi; | |
437 | - for(zi = 0; zi < sz; zi++){ //for each slice | |
438 | - file.read((char*)dest, line_bytes); //read the first line | |
439 | - for(yi = 1; yi < sy; yi++){ //read each additional line | |
440 | - dest += sx; //move the pointer in the destination block to the next line | |
441 | - file.seekg(jump_x_bytes, std::ios::cur); //skip to the next line in the file | |
442 | - file.read((char*)dest, line_bytes); //read the line to the destination block | |
618 | + else{ | |
619 | + //in this case, x is not contiguous so the volume must be read line-by-line | |
620 | + size_t jump_x_bytes = (R[0] - sx) * sizeof(T); //number of bytes skipped in the x direction | |
621 | + size_t jump_y_bytes = (R[1] - sy) * R[0] * sizeof(T) + jump_x_bytes; //number of bytes skipped between slices | |
622 | + size_t line_bytes = sx * sizeof(T); //size of the line to be read | |
623 | + size_t zi, yi; | |
624 | + for(zi = 0; zi < sz; zi++){ //for each slice | |
625 | + file.read((char*)dest, line_bytes); //read the first line | |
626 | + for(yi = 1; yi < sy; yi++){ //read each additional line | |
627 | + dest += sx; //move the pointer in the destination block to the next line | |
628 | + file.seekg(jump_x_bytes, std::ios::cur); //skip to the next line in the file | |
629 | + file.read((char*)dest, line_bytes); //read the line to the destination block | |
630 | + } | |
631 | + file.seekg(jump_y_bytes, std::ios::cur); //skip to the beginning of the next slice | |
443 | 632 | } |
444 | - file.seekg(jump_y_bytes, std::ios::cur); //skip to the beginning of the next slice | |
445 | 633 | } |
446 | - return false; | |
634 | + auto t1 = std::chrono::high_resolution_clock::now(); | |
635 | + return std::chrono::duration_cast<std::chrono::milliseconds>(t1-t0).count(); | |
447 | 636 | } |
448 | 637 | |
449 | 638 | // permutes a block of data from the current interleave to the interleave specified (re-arranged dimensions to the order specified by [d0, d1, d2]) |
450 | 639 | |
451 | - void permute(T* dest, T* src, size_t sx, size_t sy, size_t sz, size_t d0, size_t d1, size_t d2){ | |
640 | + size_t permute(T* dest, T* src, size_t sx, size_t sy, size_t sz, size_t d0, size_t d1, size_t d2){ | |
641 | + auto t0 = std::chrono::high_resolution_clock::now(); | |
452 | 642 | size_t d[3] = {d0, d1, d2}; |
453 | 643 | size_t s[3] = {sx, sy, sz}; |
454 | 644 | size_t p[3];// = {x, y, z}; |
... | ... | @@ -467,7 +657,6 @@ public: |
467 | 657 | p[1] = y; |
468 | 658 | src_idx = z * sx * sy + y * sx; |
469 | 659 | dest_idx = p[d[2]] * s[d[0]] * s[d[1]] + p[d[1]] * s[d[0]]; |
470 | - //std::cout<<z<<", "<<y<<" ------- "<<p[d[2]]<<" * "<<s[d[0]]<<" * "<<s[d[1]]<<" + "<<p[d[1]]<<" * "<<s[d[0]]<<std::endl; | |
471 | 660 | memcpy(dest + dest_idx, src + src_idx, x_bytes); |
472 | 661 | } |
473 | 662 | } |
... | ... | @@ -491,6 +680,8 @@ public: |
491 | 680 | } |
492 | 681 | } |
493 | 682 | } |
683 | + auto t1 = std::chrono::high_resolution_clock::now(); | |
684 | + return std::chrono::duration_cast<std::chrono::milliseconds>(t1-t0).count(); | |
494 | 685 | } |
495 | 686 | |
496 | 687 | }; | ... | ... |
stim/envi/bsq.h
... | ... | @@ -10,7 +10,7 @@ |
10 | 10 | #include <deque> |
11 | 11 | #include <chrono> |
12 | 12 | #include <future> |
13 | - | |
13 | +#include <algorithm> | |
14 | 14 | |
15 | 15 | |
16 | 16 | namespace stim{ |
... | ... | @@ -377,24 +377,40 @@ public: |
377 | 377 | |
378 | 378 | } |
379 | 379 | |
380 | - void readlines(T* dest, size_t start, size_t n){ | |
381 | - hsi<T>::read(dest, 0, start, 0, X(), n, Z()); | |
380 | + size_t readlines(T* dest, size_t start, size_t n){ | |
381 | + return hsi<T>::read(dest, 0, start, 0, X(), n, Z()); | |
382 | + } | |
383 | + | |
384 | + size_t writeblock(std::ofstream* f, T* src, size_t n){ | |
385 | + auto t0 = std::chrono::high_resolution_clock::now(); | |
386 | + f->write((char*)src, n); | |
387 | + auto t1 = std::chrono::high_resolution_clock::now(); | |
388 | + return std::chrono::duration_cast<std::chrono::milliseconds>(t1-t0).count(); | |
382 | 389 | } |
383 | 390 | |
384 | 391 | /// Convert this BSQ file to a BIL |
385 | - bool bil(std::string outname, bool PROGRESS = false){ | |
392 | + bool bil(std::string outname, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){ | |
386 | 393 | |
387 | 394 | const size_t buffers = 4; //number of buffers required for this algorithm |
395 | + | |
388 | 396 | size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch |
389 | 397 | |
390 | 398 | size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) |
391 | 399 | size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints |
400 | + | |
401 | + //if(VERBOSE){ | |
402 | + std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl; | |
403 | + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl; | |
404 | + //} | |
405 | + | |
392 | 406 | if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error |
393 | 407 | std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl; |
394 | 408 | exit(1); |
395 | 409 | } |
396 | 410 | size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers |
397 | 411 | |
412 | + stream_optimizer O(1, max_slices_per_batch); | |
413 | + | |
398 | 414 | T* src[2]; //source double-buffer for asynchronous batching |
399 | 415 | src[0] = (T*) malloc(max_batch_bytes); |
400 | 416 | src[1] = (T*) malloc(max_batch_bytes); |
... | ... | @@ -403,54 +419,70 @@ public: |
403 | 419 | dst[1] = (T*) malloc(max_batch_bytes); |
404 | 420 | |
405 | 421 | size_t N[2]; //number of slices stored in buffers 0 and 1 |
406 | - N[0] = N[1] = min(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set) | |
422 | + N[0] = N[1] = std::min<size_t>(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set) | |
407 | 423 | |
408 | 424 | std::ofstream target(outname.c_str(), std::ios::binary); //open an output file for writing |
409 | 425 | //initialize with buffer 0 (used for double buffering) |
410 | 426 | size_t y_load = 0; |
411 | 427 | size_t y_proc = 0; |
412 | - std::future<void> rthread; | |
428 | + std::future<size_t> rthread; | |
413 | 429 | std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing |
414 | 430 | |
415 | - readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer | |
416 | - y_load += N[0]; //increment the loaded slice counter | |
417 | - int b = 1; | |
418 | - | |
419 | - std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers | |
420 | - std::chrono::high_resolution_clock::time_point t_end; | |
431 | + std::chrono::high_resolution_clock::time_point t_start, pt_start; //high-resolution timers | |
432 | + std::chrono::high_resolution_clock::time_point t_end, pt_end; | |
421 | 433 | size_t t_batch; //number of milliseconds to process a batch |
422 | - size_t t_total = 0; | |
434 | + size_t t_total = 0; //total time for operation | |
435 | + size_t pt_total = 0; //total time spent processing data | |
436 | + size_t rt_total = 0; //total time spent reading data | |
437 | + size_t wt_total = 0; | |
438 | + size_t dr = 0; | |
439 | + | |
440 | + rt_total += readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer | |
441 | + y_load += N[0]; //increment the loaded slice counter | |
442 | + int b = 1; //initialize the double buffer to 0 | |
423 | 443 | while(y_proc < Y()){ //while there are still slices to be processed |
424 | 444 | t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch |
425 | 445 | if(y_load < Y()){ //if there are still slices to be loaded, load them |
446 | + //if(y_proc > 0){ | |
447 | + | |
448 | + | |
449 | + //} | |
426 | 450 | if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size |
427 | 451 | rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]); |
428 | - | |
452 | + //rt_total += rthread.get(); | |
429 | 453 | y_load += N[b]; //increment the number of loaded slices |
430 | 454 | } |
431 | 455 | |
432 | 456 | b = !b; //swap the double-buffer |
433 | - | |
434 | - binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file | |
435 | - target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file | |
457 | + pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file | |
458 | + wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file | |
436 | 459 | y_proc += N[b]; //increment the counter of processed pixels |
437 | 460 | if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels |
461 | + if(y_load < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations | |
438 | 462 | t_end = std::chrono::high_resolution_clock::now(); |
439 | 463 | t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); |
440 | 464 | t_total += t_batch; |
441 | - rthread.wait(); | |
465 | + if(OPTIMIZATION) | |
466 | + N[b] = O.update(N[!b] * slice_bytes, t_batch, binary<T>::data_rate, VERBOSE); //set the batch size based on optimization | |
467 | + //binary<T>::data_rate = dr; | |
468 | + //std::cout<<"New N = "<<N[!b]<<" selected with "<<(double)data_rate / 1000000<<" MB/s"<<std::endl; | |
442 | 469 | } |
443 | - | |
444 | - std::cout<<"Total time to execute: "<<t_total<<" ms"<<std::endl; | |
470 | + | |
445 | 471 | free(src[0]); //free buffer resources |
446 | 472 | free(src[1]); |
447 | 473 | free(dst[0]); |
448 | 474 | free(dst[1]); |
475 | + if(VERBOSE){ | |
476 | + std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl; | |
477 | + std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; | |
478 | + std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; | |
479 | + std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl; | |
480 | + } | |
449 | 481 | return true; //return true |
450 | 482 | } |
451 | 483 | |
452 | 484 | /// Convert this BSQ file to a BIP |
453 | - bool bip(std::string outname, bool PROGRESS = false){ | |
485 | + bool bip(std::string outname, bool PROGRESS = false, bool VERBOSE = false){ | |
454 | 486 | |
455 | 487 | const size_t buffers = 4; //number of buffers required for this algorithm |
456 | 488 | size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch |
... | ... | @@ -471,13 +503,13 @@ public: |
471 | 503 | dst[1] = (T*) malloc(max_batch_bytes); |
472 | 504 | |
473 | 505 | size_t N[2]; //number of slices stored in buffers 0 and 1 |
474 | - N[0] = N[1] = min(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set) | |
506 | + N[0] = N[1] = std::min<size_t>(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set) | |
475 | 507 | |
476 | 508 | std::ofstream target(outname.c_str(), std::ios::binary); //open an output file for writing |
477 | 509 | //initialize with buffer 0 (used for double buffering) |
478 | 510 | size_t y_load = 0; |
479 | 511 | size_t y_proc = 0; |
480 | - std::future<void> rthread; | |
512 | + std::future<size_t> rthread; | |
481 | 513 | std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing |
482 | 514 | |
483 | 515 | readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer |
... | ... | @@ -488,6 +520,8 @@ public: |
488 | 520 | std::chrono::high_resolution_clock::time_point t_end; |
489 | 521 | size_t t_batch; //number of milliseconds to process a batch |
490 | 522 | size_t t_total = 0; |
523 | + size_t pt_total = 0; | |
524 | + size_t rt_total = 0; | |
491 | 525 | while(y_proc < Y()){ //while there are still slices to be processed |
492 | 526 | t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch |
493 | 527 | if(y_load < Y()){ //if there are still slices to be loaded, load them |
... | ... | @@ -499,17 +533,21 @@ public: |
499 | 533 | |
500 | 534 | b = !b; //swap the double-buffer |
501 | 535 | |
502 | - binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file | |
536 | + pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file | |
503 | 537 | target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file |
504 | 538 | y_proc += N[b]; //increment the counter of processed pixels |
505 | 539 | if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels |
506 | 540 | t_end = std::chrono::high_resolution_clock::now(); |
507 | 541 | t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); |
508 | 542 | t_total += t_batch; |
509 | - rthread.wait(); | |
543 | + if(y_load < Y()) rt_total += rthread.get(); //if a batch was threaded to load, make sure it finishes | |
510 | 544 | } |
511 | 545 | |
512 | - std::cout<<"Total time to execute: "<<t_total<<" ms"<<std::endl; | |
546 | + if(VERBOSE){ | |
547 | + std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl; | |
548 | + std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; | |
549 | + std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; | |
550 | + } | |
513 | 551 | free(src[0]); //free buffer resources |
514 | 552 | free(src[1]); |
515 | 553 | free(dst[0]); | ... | ... |
stim/envi/envi.h
... | ... | @@ -79,30 +79,64 @@ public: |
79 | 79 | return alloc_array(header.samples * header.lines); |
80 | 80 | } |
81 | 81 | |
82 | - void set_buffer(double memfrac = 0.5){ | |
82 | + void set_buffer_frac(double memfrac = 0.5){ | |
83 | 83 | if(header.interleave == envi_header::BSQ){ //if the infile is bsq file |
84 | 84 | if(header.data_type ==envi_header::float32) |
85 | - ((bsq<float>*)file)->set_buffer(memfrac); | |
85 | + ((bsq<float>*)file)->set_buffer_frac(memfrac); | |
86 | 86 | else if(header.data_type == envi_header::float64) |
87 | - ((bsq<double>*)file)->set_buffer(memfrac); | |
87 | + ((bsq<double>*)file)->set_buffer_frac(memfrac); | |
88 | 88 | else |
89 | 89 | std::cout<<"ERROR: unidentified data type"<<std::endl; |
90 | 90 | } |
91 | 91 | |
92 | 92 | else if(header.interleave == envi_header::BIL){ //if the infile is bil file |
93 | 93 | if(header.data_type ==envi_header::float32) |
94 | - ((bil<float>*)file)->set_buffer(memfrac); | |
94 | + ((bil<float>*)file)->set_buffer_frac(memfrac); | |
95 | 95 | else if(header.data_type == envi_header::float64) |
96 | - ((bil<double>*)file)->set_buffer(memfrac); | |
96 | + ((bil<double>*)file)->set_buffer_frac(memfrac); | |
97 | 97 | else |
98 | 98 | std::cout<<"ERROR: unidentified data type"<<std::endl; |
99 | 99 | } |
100 | 100 | |
101 | 101 | else if(header.interleave == envi_header::BIP){ //if the infile is bip file |
102 | 102 | if(header.data_type ==envi_header::float32) |
103 | - ((bip<float>*)file)->set_buffer(memfrac); | |
103 | + ((bip<float>*)file)->set_buffer_frac(memfrac); | |
104 | 104 | else if(header.data_type == envi_header::float64) |
105 | - ((bip<double>*)file)->set_buffer(memfrac); | |
105 | + ((bip<double>*)file)->set_buffer_frac(memfrac); | |
106 | + else | |
107 | + std::cout<<"ERROR: unidentified data type"<<std::endl; | |
108 | + } | |
109 | + | |
110 | + else{ | |
111 | + std::cout<<"ERROR: unidentified file type"<<std::endl; | |
112 | + exit(1); | |
113 | + } | |
114 | + } | |
115 | + | |
116 | + void set_buffer_raw(size_t bytes){ | |
117 | + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file | |
118 | + if(header.data_type ==envi_header::float32) | |
119 | + ((bsq<float>*)file)->set_buffer_raw(bytes); | |
120 | + else if(header.data_type == envi_header::float64) | |
121 | + ((bsq<double>*)file)->set_buffer_raw(bytes); | |
122 | + else | |
123 | + std::cout<<"ERROR: unidentified data type"<<std::endl; | |
124 | + } | |
125 | + | |
126 | + else if(header.interleave == envi_header::BIL){ //if the infile is bil file | |
127 | + if(header.data_type ==envi_header::float32) | |
128 | + ((bil<float>*)file)->set_buffer_raw(bytes); | |
129 | + else if(header.data_type == envi_header::float64) | |
130 | + ((bil<double>*)file)->set_buffer_raw(bytes); | |
131 | + else | |
132 | + std::cout<<"ERROR: unidentified data type"<<std::endl; | |
133 | + } | |
134 | + | |
135 | + else if(header.interleave == envi_header::BIP){ //if the infile is bip file | |
136 | + if(header.data_type ==envi_header::float32) | |
137 | + ((bip<float>*)file)->set_buffer_raw(bytes); | |
138 | + else if(header.data_type == envi_header::float64) | |
139 | + ((bip<double>*)file)->set_buffer_raw(bytes); | |
106 | 140 | else |
107 | 141 | std::cout<<"ERROR: unidentified data type"<<std::endl; |
108 | 142 | } |
... | ... | @@ -121,6 +155,16 @@ public: |
121 | 155 | exit(1); |
122 | 156 | } |
123 | 157 | |
158 | + size_t X(){ return header.samples; } | |
159 | + size_t Y(){ return header.lines; } | |
160 | + size_t Z(){ return header.bands; } | |
161 | + size_t B(){ return Z(); } | |
162 | + | |
163 | + /// Return the size of the data set in bytes | |
164 | + size_t bytes(){ | |
165 | + return X() * Y() * Z() * type_size(); | |
166 | + } | |
167 | + | |
124 | 168 | /// Returns the progress of the current processing operation as a percentage |
125 | 169 | void reset_progress(){ |
126 | 170 | |
... | ... | @@ -193,6 +237,42 @@ public: |
193 | 237 | return 0; |
194 | 238 | } |
195 | 239 | |
240 | + /// Returns the progress of the current processing operation as a percentage | |
241 | + size_t data_rate(){ | |
242 | + | |
243 | + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file | |
244 | + if(header.data_type ==envi_header::float32) | |
245 | + return ((bsq<float>*)file)->get_data_rate(); | |
246 | + else if(header.data_type == envi_header::float64) | |
247 | + return ((bsq<double>*)file)->get_data_rate(); | |
248 | + else | |
249 | + std::cout<<"ERROR: unidentified data type"<<std::endl; | |
250 | + } | |
251 | + | |
252 | + else if(header.interleave == envi_header::BIL){ //if the infile is bil file | |
253 | + if(header.data_type ==envi_header::float32) | |
254 | + return ((bil<float>*)file)->get_data_rate(); | |
255 | + else if(header.data_type == envi_header::float64) | |
256 | + return ((bil<double>*)file)->get_data_rate(); | |
257 | + else | |
258 | + std::cout<<"ERROR: unidentified data type"<<std::endl; | |
259 | + } | |
260 | + | |
261 | + else if(header.interleave == envi_header::BIP){ //if the infile is bip file | |
262 | + if(header.data_type ==envi_header::float32) | |
263 | + return ((bip<float>*)file)->get_data_rate(); | |
264 | + else if(header.data_type == envi_header::float64) | |
265 | + return ((bip<double>*)file)->get_data_rate(); | |
266 | + else | |
267 | + std::cout<<"ERROR: unidentified data type"<<std::endl; | |
268 | + } | |
269 | + | |
270 | + else{ | |
271 | + std::cout<<"ERROR: unidentified file type"<<std::endl; | |
272 | + } | |
273 | + return 0; | |
274 | + } | |
275 | + | |
196 | 276 | /// Allocate memory for a new ENVI file based on the current interleave format (BIP, BIL, BSQ) and data type. |
197 | 277 | void allocate(){ |
198 | 278 | |
... | ... | @@ -509,7 +589,7 @@ public: |
509 | 589 | |
510 | 590 | /// @param outfile is the file name for the converted output |
511 | 591 | /// @param interleave is the interleave format for the destination file |
512 | - bool convert(std::string outfile, stim::envi_header::interleaveType interleave, bool PROGRESS = false){ | |
592 | + bool convert(std::string outfile, stim::envi_header::interleaveType interleave, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){ | |
513 | 593 | |
514 | 594 | if(header.interleave == envi_header::BSQ){ //if the infile is bsq file |
515 | 595 | |
... | ... | @@ -519,10 +599,10 @@ public: |
519 | 599 | exit(1); |
520 | 600 | } |
521 | 601 | else if(interleave == envi_header::BIL) //convert BSQ -> BIL |
522 | - ((bsq<float>*)file)->bil(outfile, PROGRESS); | |
602 | + ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE, OPTIMIZATION); | |
523 | 603 | else if(interleave == envi_header::BIP){ //ERROR |
524 | 604 | //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl; |
525 | - ((bsq<float>*)file)->bip(outfile, PROGRESS); | |
605 | + ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE); | |
526 | 606 | //exit(1); |
527 | 607 | } |
528 | 608 | } |
... | ... | @@ -533,10 +613,10 @@ public: |
533 | 613 | exit(1); |
534 | 614 | } |
535 | 615 | else if(interleave == envi_header::BIL) //convert BSQ -> BIL |
536 | - ((bsq<double>*)file)->bil(outfile, PROGRESS); | |
616 | + ((bsq<double>*)file)->bil(outfile, PROGRESS, OPTIMIZATION); | |
537 | 617 | else if(interleave == envi_header::BIP){ //ERROR |
538 | 618 | //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl; |
539 | - ((bsq<float>*)file)->bip(outfile, PROGRESS); | |
619 | + ((bsq<float>*)file)->bip(outfile, PROGRESS, OPTIMIZATION); | |
540 | 620 | //exit(1); |
541 | 621 | } |
542 | 622 | } | ... | ... |
stim/envi/hsi.h
... | ... | @@ -202,7 +202,7 @@ public: |
202 | 202 | } |
203 | 203 | } |
204 | 204 | |
205 | - void read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){ | |
205 | + size_t read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){ | |
206 | 206 | size_t d[3]; //position in the binary coordinate system |
207 | 207 | size_t sd[3]; //size in the binary coordinate system |
208 | 208 | |
... | ... | @@ -214,10 +214,7 @@ public: |
214 | 214 | sd[O[1]] = sy; |
215 | 215 | sd[O[2]] = sz; |
216 | 216 | |
217 | - if(!binary<T>::read(dest, d[0], d[1], d[2], sd[0], sd[1], sd[2])){ | |
218 | - std::cout<<"error reading block in stim::hsi: ("<<d[0]<<", "<<d[1]<<", "<<d[2]<<") - ["<<sd[0]<<", "<<sd[1]<<", "<<sd[2]<<"]"<<std::endl; | |
219 | - exit(1); | |
220 | - } | |
217 | + return binary<T>::read(dest, d[0], d[1], d[2], sd[0], sd[1], sd[2]); | |
221 | 218 | } |
222 | 219 | |
223 | 220 | }; | ... | ... |