Commit 846b3f1829c2a5f5f92fd4cf3ed328c01842c691
1 parent
8b899c24
fixed asynchronous reading/writing bugs
Showing
3 changed files
with
71 additions
and
37 deletions
Show diff stats
stim/envi/binary.h
... | ... | @@ -404,7 +404,7 @@ public: |
404 | 404 | } |
405 | 405 | |
406 | 406 | /// Reads a block specified by an (x, y, z) position and size using the largest possible contiguous reads |
407 | - bool read_block(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){ | |
407 | + bool read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){ | |
408 | 408 | |
409 | 409 | size_t size_bytes = sx * sy * sz * sizeof(T); //size of the block to read in bytes |
410 | 410 | |
... | ... | @@ -443,6 +443,7 @@ public: |
443 | 443 | } |
444 | 444 | file.seekg(jump_y_bytes, std::ios::cur); //skip to the beginning of the next slice |
445 | 445 | } |
446 | + return false; | |
446 | 447 | } |
447 | 448 | |
448 | 449 | }; | ... | ... |
stim/envi/bsq.h
... | ... | @@ -9,6 +9,7 @@ |
9 | 9 | #include <vector> |
10 | 10 | #include <deque> |
11 | 11 | #include <chrono> |
12 | +#include <future> | |
12 | 13 | |
13 | 14 | |
14 | 15 | |
... | ... | @@ -376,6 +377,10 @@ public: |
376 | 377 | |
377 | 378 | } |
378 | 379 | |
380 | + void readlines(T* dest, size_t start, size_t n){ | |
381 | + hsi<T>::read(dest, 0, start, 0, X(), n, Z()); | |
382 | + } | |
383 | + | |
379 | 384 | bool bil(std::string outname, bool PROGRESS = false){ |
380 | 385 | |
381 | 386 | size_t in_time, out_time, calc_time; //initialize the timing variables |
... | ... | @@ -385,66 +390,94 @@ public: |
385 | 390 | size_t XB = X() * Z(); //number of elements in an output slice |
386 | 391 | size_t XYbytes = XY * sizeof(T); //number of bytes in an input slice |
387 | 392 | size_t XBbytes = XB * sizeof(T); //number of bytes in an output slice |
388 | - size_t batch_slices = binary<T>::buffer_size / (2*XBbytes); //calculate the number of slices that can fit in memory | |
389 | - if(Y() < batch_slices) batch_slices = Y(); //if the entire data set will fit in memory, do it | |
390 | - size_t batchN = XB * batch_slices; //number of elements in a batch | |
393 | + size_t batch_slices[2]; | |
394 | + batch_slices[0] = binary<T>::buffer_size / (4*XBbytes); //calculate the number of slices that can fit in memory | |
395 | + batch_slices[1] = batch_slices[0]; | |
396 | + if(batch_slices == 0){ | |
397 | + std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl; | |
398 | + exit(1); | |
399 | + } | |
400 | + if(Y() < batch_slices[0]) batch_slices[0] = Y(); //if the entire data set will fit in memory, do it | |
401 | + size_t batchN = XB * batch_slices[0]; //number of elements in a batch | |
391 | 402 | size_t batch_bytes = batchN * sizeof(T); //calculate the number of bytes in a batch |
392 | 403 | |
393 | - T* ptrIn = (T*) malloc(batch_bytes); //allocate a large buffer storing the read data | |
394 | - T* ptrOut = (T*) malloc(batch_bytes); //allocate space for storing an output buffer | |
404 | + //T* ptrIn = (T*) malloc(batch_bytes); //allocate a large buffer storing the read data | |
405 | + //T* ptrOut = (T*) malloc(batch_bytes); //allocate space for storing an output buffer | |
406 | + T* ptrIn[2]; //input double-buffer for asynchronous batching | |
407 | + ptrIn[0] = (T*) malloc(batch_bytes); | |
408 | + ptrIn[1] = (T*) malloc(batch_bytes); | |
409 | + T* ptrOut[2]; //output double-buffer for asynchronous batching | |
410 | + ptrOut[0] = (T*) malloc(batch_bytes); | |
411 | + ptrOut[1] = (T*) malloc(batch_bytes); | |
395 | 412 | |
396 | - size_t jump = (Y() - batch_slices) * X() * sizeof(T); //jump between reads in the input file | |
413 | + size_t jump = (Y() - batch_slices[0]) * X() * sizeof(T); //jump between reads in the input file | |
397 | 414 | |
398 | 415 | std::ofstream target(outname.c_str(), std::ios::binary); |
399 | 416 | std::string headername = outname + ".hdr"; |
400 | 417 | |
401 | - size_t batches = (size_t)ceil((double)(Y()) / (double)batch_slices); //calculate the number of batches | |
418 | + size_t batches = (size_t)ceil((double)(Y()) / (double)batch_slices[0]); //calculate the number of batches | |
402 | 419 | T* ptrDst; |
403 | 420 | T* ptrSrc; |
404 | 421 | size_t y = 0; //initialize the current y-slice position |
422 | + int i = 0; | |
423 | + std::future<void> rthread; | |
424 | + | |
425 | + readlines(ptrIn[0], 0, batch_slices[0]); | |
426 | + y += batch_slices[i]; | |
427 | + | |
428 | + std::future<std::ostream&> wthread; | |
429 | + | |
430 | + std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers | |
431 | + std::chrono::high_resolution_clock::time_point t_end; | |
432 | + size_t t_batch; //number of milliseconds to process a batch | |
433 | + size_t t_total = 0; | |
434 | + | |
405 | 435 | for(size_t c = 0; c < batches; c++){ |
406 | - if(c == (batches - 1)){ | |
407 | - batch_slices = Y() - (batches - 1) * batch_slices; //if this is the last batch, calculate the remaining # of bands | |
408 | - jump = (Y() - batch_slices) * X() * sizeof(T); | |
409 | - batchN = XB * batch_slices; | |
410 | - batch_bytes = batchN * sizeof(T); | |
436 | + t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch | |
437 | + if(c == (batches - 2)){ | |
438 | + batch_slices[!i] = Y() - (batches - 1) * batch_slices[!i]; //if this is the last batch, calculate the remaining # of bands | |
411 | 439 | } |
440 | + jump = (Y() - batch_slices[i]) * X() * sizeof(T); | |
441 | + batchN = XB * batch_slices[i]; | |
442 | + batch_bytes = batchN * sizeof(T); | |
412 | 443 | |
413 | - auto in_begin = std::chrono::high_resolution_clock::now(); | |
414 | - hsi<T>::read_block(ptrIn, 0, y, 0, X(), batch_slices, Z()); //read the input block | |
415 | - y += batch_slices; | |
416 | - auto in_end = std::chrono::high_resolution_clock::now(); | |
417 | - in_time += std::chrono::duration_cast<std::chrono::milliseconds>(in_end-in_begin).count(); | |
418 | - | |
419 | - auto calc_begin = std::chrono::high_resolution_clock::now(); | |
420 | - | |
444 | + rthread = std::async(&stim::bsq<T>::readlines, this, ptrIn[!i], y, batch_slices[!i]); //start reading the next batch | |
445 | + y += batch_slices[i]; | |
446 | + | |
421 | 447 | for(size_t b = 0; b < Z(); b++){ //for each line, store an XB slice in ptrDest |
422 | - ptrSrc = ptrIn + (b * X() * batch_slices); | |
423 | - ptrDst = ptrOut + (b * X()); //initialize ptrDst to the start of the XB output slice | |
448 | + ptrSrc = ptrIn[i] + (b * X() * batch_slices[i]); | |
449 | + ptrDst = ptrOut[i] + (b * X()); //initialize ptrDst to the start of the XB output slice | |
424 | 450 | |
425 | - for(size_t y = 0; y < batch_slices; y++){ //for each band in the current line | |
451 | + for(size_t y = 0; y < batch_slices[i]; y++){ //for each band in the current line | |
426 | 452 | memcpy(ptrDst, ptrSrc, X() * sizeof(T)); //copy the band line from the source to the destination |
427 | 453 | ptrSrc += X(); //increment the pointer within the current buffer array (batch) |
428 | 454 | ptrDst += X() * Z(); //increment the pointer within the XB slice (to be output) |
429 | 455 | } |
430 | 456 | } |
431 | - auto calc_end = std::chrono::high_resolution_clock::now(); | |
432 | - calc_time += std::chrono::duration_cast<std::chrono::milliseconds>(calc_end-calc_begin).count(); | |
457 | + | |
458 | + wthread = std::async( &std::fstream::write, &target, (char*)ptrOut[i], batch_bytes); | |
433 | 459 | |
434 | - auto out_begin = std::chrono::high_resolution_clock::now(); | |
435 | - target.write((char*)ptrOut, batch_bytes); //write the batch to disk | |
436 | - auto out_end = std::chrono::high_resolution_clock::now(); | |
437 | - out_time += std::chrono::duration_cast<std::chrono::milliseconds>(out_end-out_begin).count(); | |
438 | 460 | if(PROGRESS) progress = (double)( c + 1 ) / (batches) * 100; |
461 | + i = !i; | |
462 | + | |
463 | + rthread.wait(); | |
464 | + wthread.wait(); | |
465 | + t_end = std::chrono::high_resolution_clock::now(); | |
466 | + t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); | |
467 | + t_total += t_batch; | |
439 | 468 | } |
469 | + | |
470 | + std::cout<<"Total time to execute: "<<t_total<<" ms"<<std::endl; | |
440 | 471 | |
441 | - free(ptrIn); | |
442 | - free(ptrOut); | |
472 | + free(ptrIn[0]); | |
473 | + free(ptrIn[1]); | |
474 | + free(ptrOut[0]); | |
475 | + free(ptrOut[1]); | |
443 | 476 | target.close(); |
444 | 477 | |
445 | - std::cout<<"BSQ->BIL reads: "<<(double)in_time / (1000 * 60)<<" min"<<std::endl; | |
446 | - std::cout<<"BSQ->BIL calculations: "<<(double)calc_time / (1000 * 60)<<" min"<<std::endl; | |
447 | - std::cout<<"BSQ->BIL writes: "<<(double)out_time / (1000 * 60)<<" min"<<std::endl; | |
478 | + //std::cout<<"BSQ->BIL reads: "<<(double)in_time / (1000 * 60)<<" min"<<std::endl; | |
479 | + //std::cout<<"BSQ->BIL calculations: "<<(double)calc_time / (1000 * 60)<<" min"<<std::endl; | |
480 | + //std::cout<<"BSQ->BIL writes: "<<(double)out_time / (1000 * 60)<<" min"<<std::endl; | |
448 | 481 | |
449 | 482 | return true; |
450 | 483 | } | ... | ... |
stim/envi/hsi.h
... | ... | @@ -202,7 +202,7 @@ public: |
202 | 202 | } |
203 | 203 | } |
204 | 204 | |
205 | - void read_block(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){ | |
205 | + void read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){ | |
206 | 206 | size_t d[3]; //position in the binary coordinate system |
207 | 207 | size_t sd[3]; //size in the binary coordinate system |
208 | 208 | |
... | ... | @@ -214,7 +214,7 @@ public: |
214 | 214 | sd[O[1]] = sy; |
215 | 215 | sd[O[2]] = sz; |
216 | 216 | |
217 | - if(!binary<T>::read_block(dest, d[0], d[1], d[2], sd[0], sd[1], sd[2])){ | |
217 | + if(!binary<T>::read(dest, d[0], d[1], d[2], sd[0], sd[1], sd[2])){ | |
218 | 218 | std::cout<<"error reading block in stim::hsi: ("<<d[0]<<", "<<d[1]<<", "<<d[2]<<") - ["<<sd[0]<<", "<<sd[1]<<", "<<sd[2]<<"]"<<std::endl; |
219 | 219 | exit(1); |
220 | 220 | } | ... | ... |