Commit 474c351a8d9ad9c032ae6e5717c0d5fa054ed93d

Authored by David Mayerich
1 parent c6251f8b

output timing data after optimization

Showing 2 changed files with 44 additions and 29 deletions   Show diff stats
stim/envi/bsq.h
... ... @@ -398,10 +398,8 @@ public:
398 398 size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case)
399 399 size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints
400 400  
401   - //if(VERBOSE){
402   - std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl;
403   - std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl;
404   - //}
  401 + std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl;
  402 + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl;
405 403  
406 404 if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error
407 405 std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl;
... ... @@ -458,7 +456,7 @@ public:
458 456 wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file
459 457 y_proc += N[b]; //increment the counter of processed pixels
460 458 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
461   - if(y_load < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations
  459 + if(y_proc < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations
462 460 t_end = std::chrono::high_resolution_clock::now();
463 461 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
464 462 t_total += t_batch;
... ... @@ -472,29 +470,36 @@ public:
472 470 free(src[1]);
473 471 free(dst[0]);
474 472 free(dst[1]);
475   - if(VERBOSE){
  473 + //if(VERBOSE){
476 474 std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl;
477 475 std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl;
478 476 std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl;
479 477 std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl;
480   - }
  478 + //}
481 479 return true; //return true
482 480 }
483 481  
484 482 /// Convert this BSQ file to a BIP
485   - bool bip(std::string outname, bool PROGRESS = false, bool VERBOSE = false){
  483 + bool bip(std::string outname, bool PROGRESS = false, bool VERBOSE = false, bool OPTIMIZATION = true){
486 484  
487 485 const size_t buffers = 4; //number of buffers required for this algorithm
  486 +
488 487 size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch
489 488  
490 489 size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case)
491 490 size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints
  491 +
  492 + std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl;
  493 + std::cout<<" this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices ("<<X()<<" x "<<Z()<<") = "<<X() * Z() * sizeof(T) * max_slices_per_batch/1000000<<" MB"<<std::endl;
  494 +
492 495 if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error
493 496 std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl;
494 497 exit(1);
495 498 }
496 499 size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers
497 500  
  501 + stream_optimizer O(1, max_slices_per_batch);
  502 +
498 503 T* src[2]; //source double-buffer for asynchronous batching
499 504 src[0] = (T*) malloc(max_batch_bytes);
500 505 src[1] = (T*) malloc(max_batch_bytes);
... ... @@ -512,46 +517,56 @@ public:
512 517 std::future<size_t> rthread;
513 518 std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing
514 519  
515   - readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
516   - y_load += N[0]; //increment the loaded slice counter
517   - int b = 1;
518   -
519   - std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers
520   - std::chrono::high_resolution_clock::time_point t_end;
  520 + std::chrono::high_resolution_clock::time_point t_start, pt_start; //high-resolution timers
  521 + std::chrono::high_resolution_clock::time_point t_end, pt_end;
521 522 size_t t_batch; //number of milliseconds to process a batch
522   - size_t t_total = 0;
523   - size_t pt_total = 0;
524   - size_t rt_total = 0;
  523 + size_t t_total = 0; //total time for operation
  524 + size_t pt_total = 0; //total time spent processing data
  525 + size_t rt_total = 0; //total time spent reading data
  526 + size_t wt_total = 0;
  527 + size_t dr = 0;
  528 +
  529 + rt_total += readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
  530 + y_load += N[0]; //increment the loaded slice counter
  531 + int b = 1; //initialize the double buffer to 0
525 532 while(y_proc < Y()){ //while there are still slices to be processed
526 533 t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch
527 534 if(y_load < Y()){ //if there are still slices to be loaded, load them
  535 + //if(y_proc > 0){
  536 +
  537 +
  538 + //}
528 539 if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size
529 540 rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]);
530   -
  541 + //rt_total += rthread.get();
531 542 y_load += N[b]; //increment the number of loaded slices
532 543 }
533 544  
534 545 b = !b; //swap the double-buffer
535   -
536   - pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file
537   - target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file
  546 + pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file
  547 + wt_total += writeblock(&target, dst[b], N[b] * slice_bytes); //write the permuted data to the output file
538 548 y_proc += N[b]; //increment the counter of processed pixels
539 549 if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
  550 + if(y_proc < Y()) rt_total += rthread.get(); //if a new batch was set to load, make sure it loads after calculations
540 551 t_end = std::chrono::high_resolution_clock::now();
541 552 t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
542 553 t_total += t_batch;
543   - if(y_load < Y()) rt_total += rthread.get(); //if a batch was threaded to load, make sure it finishes
544   - }
545   -
546   - if(VERBOSE){
547   - std::cout<<"total time to execute bsq::bil(): "<<t_total<<" ms"<<std::endl;
548   - std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl;
549   - std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl;
  554 + if(OPTIMIZATION)
  555 + N[b] = O.update(N[!b] * slice_bytes, t_batch, binary<T>::data_rate, VERBOSE); //set the batch size based on optimization
  556 + //binary<T>::data_rate = dr;
  557 + //std::cout<<"New N = "<<N[!b]<<" selected with "<<(double)data_rate / 1000000<<" MB/s"<<std::endl;
550 558 }
  559 +
551 560 free(src[0]); //free buffer resources
552 561 free(src[1]);
553 562 free(dst[0]);
554 563 free(dst[1]);
  564 + //if(VERBOSE){
  565 + std::cout<<"total time to execute bsq::bip(): "<<t_total<<" ms"<<std::endl;
  566 + std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl;
  567 + std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl;
  568 + std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl;
  569 + //}
555 570 return true; //return true
556 571 }
557 572  
... ...
stim/envi/envi.h
... ... @@ -602,7 +602,7 @@ public:
602 602 ((bsq<float>*)file)->bil(outfile, PROGRESS, VERBOSE, OPTIMIZATION);
603 603 else if(interleave == envi_header::BIP){ //ERROR
604 604 //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
605   - ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE);
  605 + ((bsq<float>*)file)->bip(outfile, PROGRESS, VERBOSE, OPTIMIZATION);
606 606 //exit(1);
607 607 }
608 608 }
... ...