Commit 474c351a8d9ad9c032ae6e5717c0d5fa054ed93d
1 parent
c6251f8b
output timing data after optimization
Showing
2 changed files
with
44 additions
and
29 deletions
Show diff stats
stim/envi/bsq.h
@@ -398,10 +398,8 @@ public: | @@ -398,10 +398,8 @@ public: | ||
398 | size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) | 398 | size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) |
399 | size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints | 399 | size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints |
400 | 400 | ||
401 | - //if(VERBOSE){ | ||
402 | - std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl; | ||
403 | - std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl; | ||
404 | - //} | 401 | + std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl; |
402 | + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl; | ||
405 | 403 | ||
406 | if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error | 404 | if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error |
407 | std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl; | 405 | std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl; |
@@ -458,7 +456,7 @@ public: | @@ -458,7 +456,7 @@ public: | ||
458 | wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file | 456 | wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file |
459 | y_proc += N[b]; //increment the counter of processed pixels | 457 | y_proc += N[b]; //increment the counter of processed pixels |
460 | if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels | 458 | if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels |
461 | - if(y_load < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations | 459 | + if(y_proc < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations |
462 | t_end = std::chrono::high_resolution_clock::now(); | 460 | t_end = std::chrono::high_resolution_clock::now(); |
463 | t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); | 461 | t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); |
464 | t_total += t_batch; | 462 | t_total += t_batch; |
@@ -472,29 +470,36 @@ public: | @@ -472,29 +470,36 @@ public: | ||
472 | free(src[1]); | 470 | free(src[1]); |
473 | free(dst[0]); | 471 | free(dst[0]); |
474 | free(dst[1]); | 472 | free(dst[1]); |
475 | - if(VERBOSE){ | 473 | + //if(VERBOSE){ |
476 | std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl; | 474 | std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl; |
477 | std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; | 475 | std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; |
478 | std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; | 476 | std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; |
479 | std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl; | 477 | std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl; |
480 | - } | 478 | + //} |
481 | return true; //return true | 479 | return true; //return true |
482 | } | 480 | } |
483 | 481 | ||
484 | /// Convert this BSQ file to a BIP | 482 | /// Convert this BSQ file to a BIP |
485 | - bool bip(std::string outname, bool PROGRESS = false, bool VERBOSE = false){ | 483 | + bool bip(std::string outname, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){ |
486 | 484 | ||
487 | const size_t buffers = 4; //number of buffers required for this algorithm | 485 | const size_t buffers = 4; //number of buffers required for this algorithm |
486 | + | ||
488 | size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch | 487 | size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch |
489 | 488 | ||
490 | size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) | 489 | size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) |
491 | size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints | 490 | size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints |
491 | + | ||
492 | + std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl; | ||
493 | + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl; | ||
494 | + | ||
492 | if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error | 495 | if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error |
493 | std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl; | 496 | std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl; |
494 | exit(1); | 497 | exit(1); |
495 | } | 498 | } |
496 | size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers | 499 | size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers |
497 | 500 | ||
501 | + stream_optimizer O(1, max_slices_per_batch); | ||
502 | + | ||
498 | T* src[2]; //source double-buffer for asynchronous batching | 503 | T* src[2]; //source double-buffer for asynchronous batching |
499 | src[0] = (T*) malloc(max_batch_bytes); | 504 | src[0] = (T*) malloc(max_batch_bytes); |
500 | src[1] = (T*) malloc(max_batch_bytes); | 505 | src[1] = (T*) malloc(max_batch_bytes); |
@@ -512,46 +517,56 @@ public: | @@ -512,46 +517,56 @@ public: | ||
512 | std::future<size_t> rthread; | 517 | std::future<size_t> rthread; |
513 | std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing | 518 | std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing |
514 | 519 | ||
515 | - readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer | ||
516 | - y_load += N[0]; //increment the loaded slice counter | ||
517 | - int b = 1; | ||
518 | - | ||
519 | - std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers | ||
520 | - std::chrono::high_resolution_clock::time_point t_end; | 520 | + std::chrono::high_resolution_clock::time_point t_start, pt_start; //high-resolution timers |
521 | + std::chrono::high_resolution_clock::time_point t_end, pt_end; | ||
521 | size_t t_batch; //number of milliseconds to process a batch | 522 | size_t t_batch; //number of milliseconds to process a batch |
522 | - size_t t_total = 0; | ||
523 | - size_t pt_total = 0; | ||
524 | - size_t rt_total = 0; | 523 | + size_t t_total = 0; //total time for operation |
524 | + size_t pt_total = 0; //total time spent processing data | ||
525 | + size_t rt_total = 0; //total time spent reading data | ||
526 | + size_t wt_total = 0; | ||
527 | + size_t dr = 0; | ||
528 | + | ||
529 | + rt_total += readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer | ||
530 | + y_load += N[0]; //increment the loaded slice counter | ||
531 | + int b = 1; //initialize the double buffer to 0 | ||
525 | while(y_proc < Y()){ //while there are still slices to be processed | 532 | while(y_proc < Y()){ //while there are still slices to be processed |
526 | t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch | 533 | t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch |
527 | if(y_load < Y()){ //if there are still slices to be loaded, load them | 534 | if(y_load < Y()){ //if there are still slices to be loaded, load them |
535 | + //if(y_proc > 0){ | ||
536 | + | ||
537 | + | ||
538 | + //} | ||
528 | if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size | 539 | if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size |
529 | rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]); | 540 | rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]); |
530 | - | 541 | + //rt_total += rthread.get(); |
531 | y_load += N[b]; //increment the number of loaded slices | 542 | y_load += N[b]; //increment the number of loaded slices |
532 | } | 543 | } |
533 | 544 | ||
534 | b = !b; //swap the double-buffer | 545 | b = !b; //swap the double-buffer |
535 | - | ||
536 | - pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file | ||
537 | - target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file | 546 | + pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file |
547 | + wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file | ||
538 | y_proc += N[b]; //increment the counter of processed pixels | 548 | y_proc += N[b]; //increment the counter of processed pixels |
539 | if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels | 549 | if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels |
550 | + if(y_proc < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations | ||
540 | t_end = std::chrono::high_resolution_clock::now(); | 551 | t_end = std::chrono::high_resolution_clock::now(); |
541 | t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); | 552 | t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); |
542 | t_total += t_batch; | 553 | t_total += t_batch; |
543 | - if(y_load < Y()) rt_total += rthread.get(); //if a batch was threaded to load, make sure it finishes | ||
544 | - } | ||
545 | - | ||
546 | - if(VERBOSE){ | ||
547 | - std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl; | ||
548 | - std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; | ||
549 | - std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; | 554 | + if(OPTIMIZATION) |
555 | + N[b] = O.update(N[!b] * slice_bytes, t_batch, binary<T>::data_rate, VERBOSE); //set the batch size based on optimization | ||
556 | + //binary<T>::data_rate = dr; | ||
557 | + //std::cout<<"New N = "<<N[!b]<<" selected with "<<(double)data_rate / 1000000<<" MB/s"<<std::endl; | ||
550 | } | 558 | } |
559 | + | ||
551 | free(src[0]); //free buffer resources | 560 | free(src[0]); //free buffer resources |
552 | free(src[1]); | 561 | free(src[1]); |
553 | free(dst[0]); | 562 | free(dst[0]); |
554 | free(dst[1]); | 563 | free(dst[1]); |
564 | + //if(VERBOSE){ | ||
565 | + std::cout<<"total time to execute bsq::bip(): "<<t_total<<" ms"<<std::endl; | ||
566 | + std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; | ||
567 | + std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; | ||
568 | + std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl; | ||
569 | + //} | ||
555 | return true; //return true | 570 | return true; //return true |
556 | } | 571 | } |
557 | 572 |
stim/envi/envi.h
@@ -602,7 +602,7 @@ public: | @@ -602,7 +602,7 @@ public: | ||
602 | ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE, OPTIMIZATION); | 602 | ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE, OPTIMIZATION); |
603 | else if(interleave == envi_header::BIP){ //ERROR | 603 | else if(interleave == envi_header::BIP){ //ERROR |
604 | //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl; | 604 | //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl; |
605 | - ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE); | 605 | + ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE, OPTIMIZATION); |
606 | //exit(1); | 606 | //exit(1); |
607 | } | 607 | } |
608 | } | 608 | } |