Commit 474c351a8d9ad9c032ae6e5717c0d5fa054ed93d
1 parent
c6251f8b
output timing data after optimization
Showing
2 changed files
with
44 additions
and
29 deletions
Show diff stats
stim/envi/bsq.h
... | ... | @@ -398,10 +398,8 @@ public: |
398 | 398 | size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) |
399 | 399 | size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints |
400 | 400 | |
401 | - //if(VERBOSE){ | |
402 | - std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl; | |
403 | - std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl; | |
404 | - //} | |
401 | + std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl; | |
402 | + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl; | |
405 | 403 | |
406 | 404 | if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error |
407 | 405 | std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl; |
... | ... | @@ -458,7 +456,7 @@ public: |
458 | 456 | wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file |
459 | 457 | y_proc += N[b]; //increment the counter of processed pixels |
460 | 458 | if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels |
461 | - if(y_load < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations | |
459 | + if(y_proc < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations | |
462 | 460 | t_end = std::chrono::high_resolution_clock::now(); |
463 | 461 | t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); |
464 | 462 | t_total += t_batch; |
... | ... | @@ -472,29 +470,36 @@ public: |
472 | 470 | free(src[1]); |
473 | 471 | free(dst[0]); |
474 | 472 | free(dst[1]); |
475 | - if(VERBOSE){ | |
473 | + //if(VERBOSE){ | |
476 | 474 | std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl; |
477 | 475 | std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; |
478 | 476 | std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; |
479 | 477 | std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl; |
480 | - } | |
478 | + //} | |
481 | 479 | return true; //return true |
482 | 480 | } |
483 | 481 | |
484 | 482 | /// Convert this BSQ file to a BIP |
485 | - bool bip(std::string outname, bool PROGRESS = false, bool VERBOSE = false){ | |
483 | + bool bip(std::string outname, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){ | |
486 | 484 | |
487 | 485 | const size_t buffers = 4; //number of buffers required for this algorithm |
486 | + | |
488 | 487 | size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch |
489 | 488 | |
490 | 489 | size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) |
491 | 490 | size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints |
491 | + | |
492 | + std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl; | |
493 | + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl; | |
494 | + | |
492 | 495 | if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error |
493 | 496 | std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl; |
494 | 497 | exit(1); |
495 | 498 | } |
496 | 499 | size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers |
497 | 500 | |
501 | + stream_optimizer O(1, max_slices_per_batch); | |
502 | + | |
498 | 503 | T* src[2]; //source double-buffer for asynchronous batching |
499 | 504 | src[0] = (T*) malloc(max_batch_bytes); |
500 | 505 | src[1] = (T*) malloc(max_batch_bytes); |
... | ... | @@ -512,46 +517,56 @@ public: |
512 | 517 | std::future<size_t> rthread; |
513 | 518 | std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing |
514 | 519 | |
515 | - readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer | |
516 | - y_load += N[0]; //increment the loaded slice counter | |
517 | - int b = 1; | |
518 | - | |
519 | - std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers | |
520 | - std::chrono::high_resolution_clock::time_point t_end; | |
520 | + std::chrono::high_resolution_clock::time_point t_start, pt_start; //high-resolution timers | |
521 | + std::chrono::high_resolution_clock::time_point t_end, pt_end; | |
521 | 522 | size_t t_batch; //number of milliseconds to process a batch |
522 | - size_t t_total = 0; | |
523 | - size_t pt_total = 0; | |
524 | - size_t rt_total = 0; | |
523 | + size_t t_total = 0; //total time for operation | |
524 | + size_t pt_total = 0; //total time spent processing data | |
525 | + size_t rt_total = 0; //total time spent reading data | |
526 | + size_t wt_total = 0; | |
527 | + size_t dr = 0; | |
528 | + | |
529 | + rt_total += readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer | |
530 | + y_load += N[0]; //increment the loaded slice counter | |
531 | + int b = 1; //initialize the double buffer to 0 | |
525 | 532 | while(y_proc < Y()){ //while there are still slices to be processed |
526 | 533 | t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch |
527 | 534 | if(y_load < Y()){ //if there are still slices to be loaded, load them |
535 | + //if(y_proc > 0){ | |
536 | + | |
537 | + | |
538 | + //} | |
528 | 539 | if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size |
529 | 540 | rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]); |
530 | - | |
541 | + //rt_total += rthread.get(); | |
531 | 542 | y_load += N[b]; //increment the number of loaded slices |
532 | 543 | } |
533 | 544 | |
534 | 545 | b = !b; //swap the double-buffer |
535 | - | |
536 | - pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file | |
537 | - target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file | |
546 | + pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file | |
547 | + wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file | |
538 | 548 | y_proc += N[b]; //increment the counter of processed pixels |
539 | 549 | if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels |
550 | + if(y_proc < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations | |
540 | 551 | t_end = std::chrono::high_resolution_clock::now(); |
541 | 552 | t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); |
542 | 553 | t_total += t_batch; |
543 | - if(y_load < Y()) rt_total += rthread.get(); //if a batch was threaded to load, make sure it finishes | |
544 | - } | |
545 | - | |
546 | - if(VERBOSE){ | |
547 | - std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl; | |
548 | - std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; | |
549 | - std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; | |
554 | + if(OPTIMIZATION) | |
555 | + N[b] = O.update(N[!b] * slice_bytes, t_batch, binary<T>::data_rate, VERBOSE); //set the batch size based on optimization | |
556 | + //binary<T>::data_rate = dr; | |
557 | + //std::cout<<"New N = "<<N[!b]<<" selected with "<<(double)data_rate / 1000000<<" MB/s"<<std::endl; | |
550 | 558 | } |
559 | + | |
551 | 560 | free(src[0]); //free buffer resources |
552 | 561 | free(src[1]); |
553 | 562 | free(dst[0]); |
554 | 563 | free(dst[1]); |
564 | + //if(VERBOSE){ | |
565 | + std::cout<<"total time to execute bsq::bip(): "<<t_total<<" ms"<<std::endl; | |
566 | + std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; | |
567 | + std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; | |
568 | + std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl; | |
569 | + //} | |
555 | 570 | return true; //return true |
556 | 571 | } |
557 | 572 | ... | ... |
stim/envi/envi.h
... | ... | @@ -602,7 +602,7 @@ public: |
602 | 602 | ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE, OPTIMIZATION); |
603 | 603 | else if(interleave == envi_header::BIP){ //ERROR |
604 | 604 | //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl; |
605 | - ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE); | |
605 | + ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE, OPTIMIZATION); | |
606 | 606 | //exit(1); |
607 | 607 | } |
608 | 608 | } | ... | ... |