Commit 008ae9901b1fa4b72a720dee4737956db42cacd0

Authored by David Mayerich
1 parent 3c25631c

implemented Windows code for automatically estimating buffer sizes

Showing 3 changed files with 74 additions and 49 deletions   Show diff stats
stim/envi/binary.h
@@ -8,6 +8,10 @@ @@ -8,6 +8,10 @@
8 #include <fstream> 8 #include <fstream>
9 #include <sys/stat.h> 9 #include <sys/stat.h>
10 10
  11 +#ifdef _WIN32
  12 +#include <Windows.h>
  13 +#endif
  14 +
11 namespace stim{ 15 namespace stim{
12 16
13 /** This class manages the streaming of large multidimensional binary files. 17 /** This class manages the streaming of large multidimensional binary files.
@@ -30,16 +34,16 @@ protected: @@ -30,16 +34,16 @@ protected:
30 34
31 double progress; //stores the progress on the current operation (accessible using a thread) 35 double progress; //stores the progress on the current operation (accessible using a thread)
32 36
33 - static const size_t bufferSize = 1000000000;  
34 - 37 + size_t buffer_size; //available memory for processing large files
35 38
36 /// Private initialization function used to set default parameters in the data structure. 39 /// Private initialization function used to set default parameters in the data structure.
37 void init(){ 40 void init(){
38 memset(R, 0, sizeof(unsigned long long) * D); //initialize the resolution to zero 41 memset(R, 0, sizeof(unsigned long long) * D); //initialize the resolution to zero
39 - header = 0; //initialize the header size to zero 42 + header = 0; //initialize the header size to zero
40 mask = NULL; 43 mask = NULL;
41 44
42 progress = 0; 45 progress = 0;
  46 + set_buffer(); //set the maximum buffer size to the default
43 } 47 }
44 48
45 /// Private helper function that returns the size of the file on disk using system functions. 49 /// Private helper function that returns the size of the file on disk using system functions.
@@ -107,6 +111,11 @@ protected: @@ -107,6 +111,11 @@ protected:
107 111
108 public: 112 public:
109 113
  114 + //default constructor
  115 + binary(){
  116 + init();
  117 + }
  118 +
110 double get_progress(){ 119 double get_progress(){
111 return progress; 120 return progress;
112 } 121 }
@@ -115,6 +124,16 @@ public: @@ -115,6 +124,16 @@ public:
115 progress = 0; 124 progress = 0;
116 } 125 }
117 126
  127 + //specify the maximum fraction of available memory that this class will use for buffering
  128 + void set_buffer(double mem_frac = 0.5){ //default to 50%
  129 +#ifdef _WIN32
  130 + MEMORYSTATUSEX statex;
  131 + statex.dwLength = sizeof (statex);
  132 + GlobalMemoryStatusEx (&statex);
  133 + buffer_size = (size_t)(statex.ullAvailPhys * mem_frac);
  134 +#endif
  135 + }
  136 +
118 /// Open a binary file for streaming. 137 /// Open a binary file for streaming.
119 138
120 /// @param filename is the name of the binary file 139 /// @param filename is the name of the binary file
@@ -378,80 +378,52 @@ public: @@ -378,80 +378,52 @@ public:
378 378
379 /// Convert the current BSQ file to a BIL file with the specified file name. 379 /// Convert the current BSQ file to a BIL file with the specified file name.
380 bool bil(std::string outname, bool PROGRESS = false){ 380 bool bil(std::string outname, bool PROGRESS = false){
381 - size_t XY = X() * Y(); //number of elements in a band  
382 - size_t XYbytes = XY * sizeof(T); //number of bytes in a band  
383 - size_t batchB = binary<T>::bufferSize / (XYbytes); //calculate the number of slices that can fit in memory  
384 - size_t batchXB = X() * batchB; //number of elements in a batch 381 + size_t XY = X() * Y(); //number of elements in a band
  382 + size_t XYbytes = XY * sizeof(T); //number of bytes in a band
  383 + size_t batchB = binary<T>::buffer_size / (XYbytes); //calculate the number of slices that can fit in memory
  384 + if(Z() < batchB) batchB = Z(); //if the entire data set will fit in memory, do it
  385 + size_t batchXB = X() * batchB; //number of elements in a batch
  386 + T* ptrBatch = (T*) malloc(batchB * XYbytes); //allocate a large buffer storing the read data
  387 + T* ptrSlice = (T*) malloc(sizeof(T) * batchB * X()); //allocate space for storing an output buffer
385 388
386 - T* ptrBatch = (T*) malloc(batchB * XYbytes); //allocate a large buffer storing the read data  
387 - T* ptrSlice = (T*) malloc(sizeof(T) * batchB * X()); //allocate space for storing an output buffer  
388 -  
389 - size_t jump = (Z() - batchB) * X() * sizeof(T); //jump between writes in the output file 389 + size_t jump = (Z() - batchB) * X() * sizeof(T); //jump between writes in the output file
390 390
391 std::ofstream target(outname.c_str(), std::ios::binary); 391 std::ofstream target(outname.c_str(), std::ios::binary);
392 std::string headername = outname + ".hdr"; 392 std::string headername = outname + ".hdr";
393 393
394 - size_t batches = ceil((double)(Z()) / (double)batchB); //calculate the number of batches 394 + size_t batches = ceil((double)(Z()) / (double)batchB); //calculate the number of batches
395 T* ptrDst; 395 T* ptrDst;
396 T* ptrSrc; 396 T* ptrSrc;
397 for(size_t c = 0; c < batches; c++){ 397 for(size_t c = 0; c < batches; c++){
398 target.seekp(c * batchB * sizeof(T) * X(), std::ios::beg); //seek to the start of the current batch in the output file 398 target.seekp(c * batchB * sizeof(T) * X(), std::ios::beg); //seek to the start of the current batch in the output file
399 - file.read((char*)ptrBatch, sizeof(T) * X() * Y() * batchB); //read a batch 399 + file.read((char*)ptrBatch, sizeof(T) * X() * Y() * batchB); //read a batch
  400 +
  401 + auto pbegin = std::chrono::high_resolution_clock::now();
400 if(c == (batches - 1)){ 402 if(c == (batches - 1)){
401 batchB = Z() - (batches - 1) * batchB; //if this is the last batch, calculate the remaining # of bands 403 batchB = Z() - (batches - 1) * batchB; //if this is the last batch, calculate the remaining # of bands
402 jump = (Z() - batchB) * X() * sizeof(T); 404 jump = (Z() - batchB) * X() * sizeof(T);
403 } 405 }
404 - for(size_t y = 0; y < Y(); y++){ //for each line, store an XB slice in ptrDest 406 + for(size_t y = 0; y < Y(); y++){ //for each line, store an XB slice in ptrDest
405 ptrDst = ptrSlice; //initialize ptrDst to the start of the XB output slice 407 ptrDst = ptrSlice; //initialize ptrDst to the start of the XB output slice
406 ptrSrc = ptrBatch + (y * X()); 408 ptrSrc = ptrBatch + (y * X());
407 for(size_t b = 0; b < batchB; b++){ //for each band in the current line 409 for(size_t b = 0; b < batchB; b++){ //for each band in the current line
408 memcpy(ptrDst, ptrSrc, X() * sizeof(T)); //copy the band line from the source to the destination 410 memcpy(ptrDst, ptrSrc, X() * sizeof(T)); //copy the band line from the source to the destination
409 - ptrDst += X();  
410 - ptrSrc += X() * Y(); 411 + ptrDst += X(); //increment the pointer within the XB slice (to be output)
  412 + ptrSrc += X() * Y(); //increment the pointer within the current buffer array (batch)
411 } 413 }
  414 +
412 target.write((char*)ptrSlice, sizeof(T) * X() * batchB); //write the XB slice to disk 415 target.write((char*)ptrSlice, sizeof(T) * X() * batchB); //write the XB slice to disk
413 - target.seekp(jump, std::ios::cur); //seek to the beginning of the next XB slice in the batch 416 + target.seekp(jump, std::ios::cur); //seek to the beginning of the next XB slice in the batch
  417 + if(PROGRESS) progress = (double)( c * Y() + y + 1 ) / (Y() * batches) * 100;
414 } 418 }
415 - if(PROGRESS) progress = (double)(c+1)/(double)(batches) * 100;  
416 } 419 }
417 420
418 - //if(PROGRESS) progress = 100;  
419 -  
420 free(ptrBatch); 421 free(ptrBatch);
421 free(ptrSlice); 422 free(ptrSlice);
422 target.close(); 423 target.close();
423 424
424 return true; 425 return true;
425 } 426 }
426 - /// @param outname is the name of the output BIL file to be saved to disk.  
427 - /*bool bil(std::string outname, bool PROGRESS = false){  
428 -  
429 - //simplify image resolution  
430 - unsigned long long jump = (Y() - 1) * X() * sizeof(T);  
431 -  
432 - std::ofstream target(outname.c_str(), std::ios::binary);  
433 - std::string headername = outname + ".hdr";  
434 -  
435 - unsigned long long L = X(); //calculate the number of pixels in a line  
436 - T* line = (T*)malloc(sizeof(T) * L); //allocate space for a line  
437 -  
438 - for ( unsigned long long y = 0; y < Y(); y++) //for each y position  
439 - {  
440 - file.seekg(y * X() * sizeof(T), std::ios::beg); //seek to the beginning of the xz slice  
441 - for ( unsigned long long z = 0; z < Z(); z++ ) //for each band  
442 - {  
443 - file.read((char *)line, sizeof(T) * X()); //read a line  
444 - target.write((char*)line, sizeof(T) * X()); //write the line to the output file  
445 - file.seekg(jump, std::ios::cur); //seek to the next band  
446 - if(PROGRESS) progress = (double)((y+1) * Z() + z + 1) / (Z() * Y()) * 100; //update the progress counter  
447 - }  
448 - }  
449 -  
450 - free(line);  
451 - target.close();  
452 -  
453 - return true;  
454 - }*/  
455 427
456 /// Return a baseline corrected band given two adjacent baseline points and their bands. The result is stored in a pre-allocated array. 428 /// Return a baseline corrected band given two adjacent baseline points and their bands. The result is stored in a pre-allocated array.
457 429
@@ -77,6 +77,40 @@ public: @@ -77,6 +77,40 @@ public:
77 return alloc_array(header.samples * header.lines); 77 return alloc_array(header.samples * header.lines);
78 } 78 }
79 79
  80 + void set_buffer(double memfrac = 0.5){
  81 + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
  82 + if(header.data_type ==envi_header::float32)
  83 + ((bsq<float>*)file)->set_buffer(memfrac);
  84 + else if(header.data_type == envi_header::float64)
  85 + ((bsq<double>*)file)->set_buffer(memfrac);
  86 + else
  87 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  88 + }
  89 +
  90 + else if(header.interleave == envi_header::BIL){ //if the infile is bil file
  91 + if(header.data_type ==envi_header::float32)
  92 + ((bil<float>*)file)->set_buffer(memfrac);
  93 + else if(header.data_type == envi_header::float64)
  94 + ((bil<double>*)file)->set_buffer(memfrac);
  95 + else
  96 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  97 + }
  98 +
  99 + else if(header.interleave == envi_header::BIP){ //if the infile is bip file
  100 + if(header.data_type ==envi_header::float32)
  101 + ((bip<float>*)file)->set_buffer(memfrac);
  102 + else if(header.data_type == envi_header::float64)
  103 + ((bip<double>*)file)->set_buffer(memfrac);
  104 + else
  105 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  106 + }
  107 +
  108 + else{
  109 + std::cout<<"ERROR: unidentified file type"<<std::endl;
  110 + exit(1);
  111 + }
  112 + }
  113 +
80 /// Returns the size of the data type in bytes 114 /// Returns the size of the data type in bytes
81 unsigned int type_size(){ 115 unsigned int type_size(){
82 if(header.data_type == envi_header::float32) return 4; 116 if(header.data_type == envi_header::float32) return 4;