Commit 474c351a8d9ad9c032ae6e5717c0d5fa054ed93d

Authored by David Mayerich
1 parent c6251f8b

output timing data after optimization

Showing 2 changed files with 44 additions and 29 deletions   Show diff stats
@@ -398,10 +398,8 @@ public: @@ -398,10 +398,8 @@ public:
398 size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) 398 size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case)
399 size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints 399 size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints
400 400
401 - //if(VERBOSE){  
402 - std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl;  
403 - std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl;  
404 - //} 401 + std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl;
  402 + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl;
405 403
406 if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error 404 if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error
407 std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl; 405 std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl;
@@ -458,7 +456,7 @@ public: @@ -458,7 +456,7 @@ public:
458 wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file 456 wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file
459 y_proc += N[b]; //increment the counter of processed pixels 457 y_proc += N[b]; //increment the counter of processed pixels
460 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels 458 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
461 - if(y_load < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations 459 + if(y_proc < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations
462 t_end = std::chrono::high_resolution_clock::now(); 460 t_end = std::chrono::high_resolution_clock::now();
463 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); 461 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
464 t_total += t_batch; 462 t_total += t_batch;
@@ -472,29 +470,36 @@ public: @@ -472,29 +470,36 @@ public:
472 free(src[1]); 470 free(src[1]);
473 free(dst[0]); 471 free(dst[0]);
474 free(dst[1]); 472 free(dst[1]);
475 - if(VERBOSE){ 473 + //if(VERBOSE){
476 std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl; 474 std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl;
477 std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; 475 std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl;
478 std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; 476 std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl;
479 std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl; 477 std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl;
480 - } 478 + //}
481 return true; //return true 479 return true; //return true
482 } 480 }
483 481
484 /// Convert this BSQ file to a BIP 482 /// Convert this BSQ file to a BIP
485 - bool bip(std::string outname, bool PROGRESS = false, bool VERBOSE = false){ 483 + bool bip(std::string outname, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){
486 484
487 const size_t buffers = 4; //number of buffers required for this algorithm 485 const size_t buffers = 4; //number of buffers required for this algorithm
  486 +
488 size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch 487 size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch
489 488
490 size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) 489 size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case)
491 size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints 490 size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints
  491 +
  492 + std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl;
  493 + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl;
  494 +
492 if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error 495 if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error
493 std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl; 496 std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl;
494 exit(1); 497 exit(1);
495 } 498 }
496 size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers 499 size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers
497 500
  501 + stream_optimizer O(1, max_slices_per_batch);
  502 +
498 T* src[2]; //source double-buffer for asynchronous batching 503 T* src[2]; //source double-buffer for asynchronous batching
499 src[0] = (T*) malloc(max_batch_bytes); 504 src[0] = (T*) malloc(max_batch_bytes);
500 src[1] = (T*) malloc(max_batch_bytes); 505 src[1] = (T*) malloc(max_batch_bytes);
@@ -512,46 +517,56 @@ public: @@ -512,46 +517,56 @@ public:
512 std::future<size_t> rthread; 517 std::future<size_t> rthread;
513 std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing 518 std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing
514 519
515 - readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer  
516 - y_load += N[0]; //increment the loaded slice counter  
517 - int b = 1;  
518 -  
519 - std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers  
520 - std::chrono::high_resolution_clock::time_point t_end; 520 + std::chrono::high_resolution_clock::time_point t_start, pt_start; //high-resolution timers
  521 + std::chrono::high_resolution_clock::time_point t_end, pt_end;
521 size_t t_batch; //number of milliseconds to process a batch 522 size_t t_batch; //number of milliseconds to process a batch
522 - size_t t_total = 0;  
523 - size_t pt_total = 0;  
524 - size_t rt_total = 0; 523 + size_t t_total = 0; //total time for operation
  524 + size_t pt_total = 0; //total time spent processing data
  525 + size_t rt_total = 0; //total time spent reading data
  526 + size_t wt_total = 0;
  527 + size_t dr = 0;
  528 +
  529 + rt_total += readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
  530 + y_load += N[0]; //increment the loaded slice counter
  531 + int b = 1; //initialize the double buffer to 0
525 while(y_proc < Y()){ //while there are still slices to be processed 532 while(y_proc < Y()){ //while there are still slices to be processed
526 t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch 533 t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch
527 if(y_load < Y()){ //if there are still slices to be loaded, load them 534 if(y_load < Y()){ //if there are still slices to be loaded, load them
  535 + //if(y_proc > 0){
  536 +
  537 +
  538 + //}
528 if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size 539 if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size
529 rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]); 540 rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]);
530 - 541 + //rt_total += rthread.get();
531 y_load += N[b]; //increment the number of loaded slices 542 y_load += N[b]; //increment the number of loaded slices
532 } 543 }
533 544
534 b = !b; //swap the double-buffer 545 b = !b; //swap the double-buffer
535 -  
536 - pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file  
537 - target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file 546 + pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file
  547 + wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file
538 y_proc += N[b]; //increment the counter of processed pixels 548 y_proc += N[b]; //increment the counter of processed pixels
539 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels 549 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
  550 + if(y_proc < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations
540 t_end = std::chrono::high_resolution_clock::now(); 551 t_end = std::chrono::high_resolution_clock::now();
541 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count(); 552 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
542 t_total += t_batch; 553 t_total += t_batch;
543 - if(y_load < Y()) rt_total += rthread.get(); //if a batch was threaded to load, make sure it finishes  
544 - }  
545 -  
546 - if(VERBOSE){  
547 - std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl;  
548 - std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl;  
549 - std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; 554 + if(OPTIMIZATION)
  555 + N[b] = O.update(N[!b] * slice_bytes, t_batch, binary<T>::data_rate, VERBOSE); //set the batch size based on optimization
  556 + //binary<T>::data_rate = dr;
  557 + //std::cout<<"New N = "<<N[!b]<<" selected with "<<(double)data_rate / 1000000<<" MB/s"<<std::endl;
550 } 558 }
  559 +
551 free(src[0]); //free buffer resources 560 free(src[0]); //free buffer resources
552 free(src[1]); 561 free(src[1]);
553 free(dst[0]); 562 free(dst[0]);
554 free(dst[1]); 563 free(dst[1]);
  564 + //if(VERBOSE){
  565 + std::cout<<"total time to execute bsq::bip(): "<<t_total<<" ms"<<std::endl;
  566 + std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl;
  567 + std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl;
  568 + std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl;
  569 + //}
555 return true; //return true 570 return true; //return true
556 } 571 }
557 572
@@ -602,7 +602,7 @@ public: @@ -602,7 +602,7 @@ public:
602 ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE, OPTIMIZATION); 602 ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE, OPTIMIZATION);
603 else if(interleave == envi_header::BIP){ //ERROR 603 else if(interleave == envi_header::BIP){ //ERROR
604 //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl; 604 //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
605 - ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE); 605 + ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE, OPTIMIZATION);
606 //exit(1); 606 //exit(1);
607 } 607 }
608 } 608 }