Commit 008ae9901b1fa4b72a720dee4737956db42cacd0

Authored by David Mayerich
1 parent 3c25631c

implemented Windows code for automatically estimating buffer sizes

Showing 3 changed files with 74 additions and 49 deletions   Show diff stats
stim/envi/binary.h
... ... @@ -8,6 +8,10 @@
8 8 #include <fstream>
9 9 #include <sys/stat.h>
10 10  
  11 +#ifdef _WIN32
  12 +#include <Windows.h>
  13 +#endif
  14 +
11 15 namespace stim{
12 16  
13 17 /** This class manages the streaming of large multidimensional binary files.
... ... @@ -30,16 +34,16 @@ protected:
30 34  
31 35 double progress; //stores the progress on the current operation (accessible using a thread)
32 36  
33   - static const size_t bufferSize = 1000000000;
34   -
  37 + size_t buffer_size; //available memory for processing large files
35 38  
36 39 /// Private initialization function used to set default parameters in the data structure.
37 40 void init(){
38 41 memset(R, 0, sizeof(unsigned long long) * D); //initialize the resolution to zero
39   - header = 0; //initialize the header size to zero
  42 + header = 0; //initialize the header size to zero
40 43 mask = NULL;
41 44  
42 45 progress = 0;
  46 + set_buffer(); //set the maximum buffer size to the default
43 47 }
44 48  
45 49 /// Private helper function that returns the size of the file on disk using system functions.
... ... @@ -107,6 +111,11 @@ protected:
107 111  
108 112 public:
109 113  
  114 + //default constructor
  115 + binary(){
  116 + init();
  117 + }
  118 +
110 119 double get_progress(){
111 120 return progress;
112 121 }
... ... @@ -115,6 +124,16 @@ public:
115 124 progress = 0;
116 125 }
117 126  
  127 + //specify the maximum fraction of available memory that this class will use for buffering
  128 + void set_buffer(double mem_frac = 0.5){ //default to 50%
  129 +#ifdef _WIN32
  130 + MEMORYSTATUSEX statex;
  131 + statex.dwLength = sizeof (statex);
  132 + GlobalMemoryStatusEx (&statex);
  133 + buffer_size = (size_t)(statex.ullAvailPhys * mem_frac);
  134 +#endif
  135 + }
  136 +
118 137 /// Open a binary file for streaming.
119 138  
120 139 /// @param filename is the name of the binary file
... ...
stim/envi/bsq.h
... ... @@ -378,80 +378,52 @@ public:
378 378  
379 379 /// Convert the current BSQ file to a BIL file with the specified file name.
380 380 bool bil(std::string outname, bool PROGRESS = false){
381   - size_t XY = X() * Y(); //number of elements in a band
382   - size_t XYbytes = XY * sizeof(T); //number of bytes in a band
383   - size_t batchB = binary<T>::bufferSize / (XYbytes); //calculate the number of slices that can fit in memory
384   - size_t batchXB = X() * batchB; //number of elements in a batch
  381 + size_t XY = X() * Y(); //number of elements in a band
  382 + size_t XYbytes = XY * sizeof(T); //number of bytes in a band
  383 + size_t batchB = binary<T>::buffer_size / (XYbytes); //calculate the number of slices that can fit in memory
  384 + if(Z() < batchB) batchB = Z(); //if the entire data set will fit in memory, do it
  385 + size_t batchXB = X() * batchB; //number of elements in a batch
  386 + T* ptrBatch = (T*) malloc(batchB * XYbytes); //allocate a large buffer storing the read data
  387 + T* ptrSlice = (T*) malloc(sizeof(T) * batchB * X()); //allocate space for storing an output buffer
385 388  
386   - T* ptrBatch = (T*) malloc(batchB * XYbytes); //allocate a large buffer storing the read data
387   - T* ptrSlice = (T*) malloc(sizeof(T) * batchB * X()); //allocate space for storing an output buffer
388   -
389   - size_t jump = (Z() - batchB) * X() * sizeof(T); //jump between writes in the output file
  389 + size_t jump = (Z() - batchB) * X() * sizeof(T); //jump between writes in the output file
390 390  
391 391 std::ofstream target(outname.c_str(), std::ios::binary);
392 392 std::string headername = outname + ".hdr";
393 393  
394   - size_t batches = ceil((double)(Z()) / (double)batchB); //calculate the number of batches
  394 + size_t batches = ceil((double)(Z()) / (double)batchB); //calculate the number of batches
395 395 T* ptrDst;
396 396 T* ptrSrc;
397 397 for(size_t c = 0; c < batches; c++){
398 398 target.seekp(c * batchB * sizeof(T) * X(), std::ios::beg); //seek to the start of the current batch in the output file
399   - file.read((char*)ptrBatch, sizeof(T) * X() * Y() * batchB); //read a batch
  399 + file.read((char*)ptrBatch, sizeof(T) * X() * Y() * batchB); //read a batch
  400 +
  401 + auto pbegin = std::chrono::high_resolution_clock::now();
400 402 if(c == (batches - 1)){
401 403 batchB = Z() - (batches - 1) * batchB; //if this is the last batch, calculate the remaining # of bands
402 404 jump = (Z() - batchB) * X() * sizeof(T);
403 405 }
404   - for(size_t y = 0; y < Y(); y++){ //for each line, store an XB slice in ptrDest
  406 + for(size_t y = 0; y < Y(); y++){ //for each line, store an XB slice in ptrDest
405 407 ptrDst = ptrSlice; //initialize ptrDst to the start of the XB output slice
406 408 ptrSrc = ptrBatch + (y * X());
407 409 for(size_t b = 0; b < batchB; b++){ //for each band in the current line
408 410 memcpy(ptrDst, ptrSrc, X() * sizeof(T)); //copy the band line from the source to the destination
409   - ptrDst += X();
410   - ptrSrc += X() * Y();
  411 + ptrDst += X(); //increment the pointer within the XB slice (to be output)
  412 + ptrSrc += X() * Y(); //increment the pointer within the current buffer array (batch)
411 413 }
  414 +
412 415 target.write((char*)ptrSlice, sizeof(T) * X() * batchB); //write the XB slice to disk
413   - target.seekp(jump, std::ios::cur); //seek to the beginning of the next XB slice in the batch
  416 + target.seekp(jump, std::ios::cur); //seek to the beginning of the next XB slice in the batch
  417 + if(PROGRESS) progress = (double)( c * Y() + y + 1 ) / (Y() * batches) * 100;
414 418 }
415   - if(PROGRESS) progress = (double)(c+1)/(double)(batches) * 100;
416 419 }
417 420  
418   - //if(PROGRESS) progress = 100;
419   -
420 421 free(ptrBatch);
421 422 free(ptrSlice);
422 423 target.close();
423 424  
424 425 return true;
425 426 }
426   - /// @param outname is the name of the output BIL file to be saved to disk.
427   - /*bool bil(std::string outname, bool PROGRESS = false){
428   -
429   - //simplify image resolution
430   - unsigned long long jump = (Y() - 1) * X() * sizeof(T);
431   -
432   - std::ofstream target(outname.c_str(), std::ios::binary);
433   - std::string headername = outname + ".hdr";
434   -
435   - unsigned long long L = X(); //calculate the number of pixels in a line
436   - T* line = (T*)malloc(sizeof(T) * L); //allocate space for a line
437   -
438   - for ( unsigned long long y = 0; y < Y(); y++) //for each y position
439   - {
440   - file.seekg(y * X() * sizeof(T), std::ios::beg); //seek to the beginning of the xz slice
441   - for ( unsigned long long z = 0; z < Z(); z++ ) //for each band
442   - {
443   - file.read((char *)line, sizeof(T) * X()); //read a line
444   - target.write((char*)line, sizeof(T) * X()); //write the line to the output file
445   - file.seekg(jump, std::ios::cur); //seek to the next band
446   - if(PROGRESS) progress = (double)((y+1) * Z() + z + 1) / (Z() * Y()) * 100; //update the progress counter
447   - }
448   - }
449   -
450   - free(line);
451   - target.close();
452   -
453   - return true;
454   - }*/
455 427  
456 428 /// Return a baseline corrected band given two adjacent baseline points and their bands. The result is stored in a pre-allocated array.
457 429  
... ...
stim/envi/envi.h
... ... @@ -77,6 +77,40 @@ public:
77 77 return alloc_array(header.samples * header.lines);
78 78 }
79 79  
  80 + void set_buffer(double memfrac = 0.5){
  81 + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
  82 + if(header.data_type ==envi_header::float32)
  83 + ((bsq<float>*)file)->set_buffer(memfrac);
  84 + else if(header.data_type == envi_header::float64)
  85 + ((bsq<double>*)file)->set_buffer(memfrac);
  86 + else
  87 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  88 + }
  89 +
  90 + else if(header.interleave == envi_header::BIL){ //if the infile is bil file
  91 + if(header.data_type ==envi_header::float32)
  92 + ((bil<float>*)file)->set_buffer(memfrac);
  93 + else if(header.data_type == envi_header::float64)
  94 + ((bil<double>*)file)->set_buffer(memfrac);
  95 + else
  96 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  97 + }
  98 +
  99 + else if(header.interleave == envi_header::BIP){ //if the infile is bip file
  100 + if(header.data_type ==envi_header::float32)
  101 + ((bip<float>*)file)->set_buffer(memfrac);
  102 + else if(header.data_type == envi_header::float64)
  103 + ((bip<double>*)file)->set_buffer(memfrac);
  104 + else
  105 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  106 + }
  107 +
  108 + else{
  109 + std::cout<<"ERROR: unidentified file type"<<std::endl;
  110 + exit(1);
  111 + }
  112 + }
  113 +
80 114 /// Returns the size of the data type in bytes
81 115 unsigned int type_size(){
82 116 if(header.data_type == envi_header::float32) return 4;
... ...