diff --git a/stim/envi/binary.h b/stim/envi/binary.h index f6d0dc2..2fd0a6b 100644 --- a/stim/envi/binary.h +++ b/stim/envi/binary.h @@ -446,6 +446,53 @@ public: return false; } + // permutes a block of data from the current interleave to the interleave specified (re-arranged dimensions to the order specified by [d0, d1, d2]) + + void permute(T* dest, T* src, size_t sx, size_t sy, size_t sz, size_t d0, size_t d1, size_t d2){ + size_t d[3] = {d0, d1, d2}; + size_t s[3] = {sx, sy, sz}; + size_t p[3];// = {x, y, z}; + + if(d[0] == 0 && d[1] == 1 && d[2] == 2){ + //this isn't actually a permute - just copy the data + memcpy(dest, src, sizeof(T) * sx * sy * sz); + } + else if(d[0] == 0){ //the individual lines are contiguous, so you can memcpy line-by-line + size_t y, z; + size_t src_idx, dest_idx; + size_t x_bytes = sizeof(T) * sx; + for(z = 0; z < sz; z++){ + p[2] = z; + for(y = 0; y < sy; y++){ + p[1] = y; + src_idx = z * sx * sy + y * sx; + dest_idx = p[d[2]] * s[d[0]] * s[d[1]] + p[d[1]] * s[d[0]]; + //std::cout<::read(dest, 0, start, 0, X(), n, Z()); } + /// Convert this BSQ file to a BIL bool bil(std::string outname, bool PROGRESS = false){ - size_t in_time, out_time, calc_time; //initialize the timing variables - in_time = out_time = calc_time = 0; - - size_t XY = X() * Y(); //number of elements in an input slice - size_t XB = X() * Z(); //number of elements in an output slice - size_t XYbytes = XY * sizeof(T); //number of bytes in an input slice - size_t XBbytes = XB * sizeof(T); //number of bytes in an output slice - size_t batch_slices[2]; - batch_slices[0] = binary::buffer_size / (4*XBbytes); //calculate the number of slices that can fit in memory - batch_slices[1] = batch_slices[0]; - if(batch_slices == 0){ + const size_t buffers = 4; //number of buffers required for this algorithm + size_t mem_per_batch = binary::buffer_size / buffers; //calculate the maximum memory available for a batch + + size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) + size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints + if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error std::cout<<"error, insufficient memory for stim::bsq::bil()"< rthread; + std::future wthread; //create asynchronous threads for reading and writing - readlines(ptrIn[0], 0, batch_slices[0]); - y += batch_slices[i]; - - std::future wthread; + readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer + y_load += N[0]; //increment the loaded slice counter + int b = 1; std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers std::chrono::high_resolution_clock::time_point t_end; size_t t_batch; //number of milliseconds to process a batch size_t t_total = 0; - - for(size_t c = 0; c < batches; c++){ + while(y_proc < Y()){ //while there are still slices to be processed t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch - if(c == (batches - 2)){ - batch_slices[!i] = Y() - (batches - 1) * batch_slices[!i]; //if this is the last batch, calculate the remaining # of bands - } - jump = (Y() - batch_slices[i]) * X() * sizeof(T); - batchN = XB * batch_slices[i]; - batch_bytes = batchN * sizeof(T); - - rthread = std::async(&stim::bsq::readlines, this, ptrIn[!i], y, batch_slices[!i]); //start reading the next batch - y += batch_slices[i]; - - for(size_t b = 0; b < Z(); b++){ //for each line, store an XB slice in ptrDest - ptrSrc = ptrIn[i] + (b * X() * batch_slices[i]); - ptrDst = ptrOut[i] + (b * X()); //initialize ptrDst to the start of the XB output slice + if(y_load < Y()){ //if there are still slices to be loaded, load them + if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size + rthread = std::async(std::launch::async, &stim::bsq::readlines, this, src[b], y_load, N[b]); - for(size_t y = 0; y < batch_slices[i]; y++){ //for each band in the current line - memcpy(ptrDst, ptrSrc, X() * sizeof(T)); //copy the band line from the source to the destination - ptrSrc += X(); //increment the pointer within the current buffer array (batch) - ptrDst += X() * Z(); //increment the pointer within the XB slice (to be output) - } + y_load += N[b]; //increment the number of loaded slices } - - wthread = std::async( &std::fstream::write, &target, (char*)ptrOut[i], batch_bytes); - if(PROGRESS) progress = (double)( c + 1 ) / (batches) * 100; - i = !i; + b = !b; //swap the double-buffer - rthread.wait(); - wthread.wait(); + binary::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file + target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file + y_proc += N[b]; //increment the counter of processed pixels + if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels t_end = std::chrono::high_resolution_clock::now(); t_batch = std::chrono::duration_cast(t_end-t_start).count(); t_total += t_batch; + rthread.wait(); } - - std::cout<<"Total time to execute: "<BIL reads: "<<(double)in_time / (1000 * 60)<<" min"<BIL calculations: "<<(double)calc_time / (1000 * 60)<<" min"<BIL writes: "<<(double)out_time / (1000 * 60)<<" min"<::buffer_size / (2*XBbytes); //calculate the number of slices that can fit in memory - if(Y() < batch_slices) batch_slices = Y(); //if the entire data set will fit in memory, do it - size_t batchN = XB * batch_slices; //number of elements in a batch - size_t batch_bytes = batchN * sizeof(T); //calculate the number of bytes in a batch - - T* ptrIn = (T*) malloc(batch_bytes); //allocate a large buffer storing the read data - T* ptrOut = (T*) malloc(batch_bytes); //allocate space for storing an output buffer + /// Convert this BSQ file to a BIP + bool bip(std::string outname, bool PROGRESS = false){ - size_t jump = (Y() - batch_slices) * X() * sizeof(T); //jump between reads in the input file + const size_t buffers = 4; //number of buffers required for this algorithm + size_t mem_per_batch = binary::buffer_size / buffers; //calculate the maximum memory available for a batch - std::ofstream target(outname.c_str(), std::ios::binary); - std::string headername = outname + ".hdr"; - - size_t batches = (size_t)ceil((double)(Y()) / (double)batch_slices); //calculate the number of batches - T* ptrDst; - T* ptrSrc; - for(size_t c = 0; c < batches; c++){ - file.seekg(c * X() * batch_slices * sizeof(T), std::ios::beg); - - if(c == (batches - 1)){ - batch_slices = Y() - (batches - 1) * batch_slices; //if this is the last batch, calculate the remaining # of bands - jump = (Y() - batch_slices) * X() * sizeof(T); - batchN = XB * batch_slices; - batch_bytes = batchN * sizeof(T); - } - - auto in_begin = std::chrono::high_resolution_clock::now(); - for(size_t b = 0; b < Z(); b++){ - file.read((char*)(ptrIn + b * X() * batch_slices), sizeof(T) * X() * batch_slices); //read a number of lines equal to "batch_slices" - file.seekg(jump, std::ios::cur); //jump to the next band - } - auto in_end = std::chrono::high_resolution_clock::now(); - in_time += std::chrono::duration_cast(in_end-in_begin).count(); - - auto calc_begin = std::chrono::high_resolution_clock::now(); - - for(size_t b = 0; b < Z(); b++){ //for each line, store an XB slice in ptrDest - ptrSrc = ptrIn + (b * X() * batch_slices); - ptrDst = ptrOut + (b * X()); //initialize ptrDst to the start of the XB output slice - - for(size_t y = 0; y < batch_slices; y++){ //for each band in the current line - memcpy(ptrDst, ptrSrc, X() * sizeof(T)); //copy the band line from the source to the destination - ptrSrc += X(); //increment the pointer within the current buffer array (batch) - ptrDst += X() * Z(); //increment the pointer within the XB slice (to be output) - } - } - auto calc_end = std::chrono::high_resolution_clock::now(); - calc_time += std::chrono::duration_cast(calc_end-calc_begin).count(); - - auto out_begin = std::chrono::high_resolution_clock::now(); - target.write((char*)ptrOut, batch_bytes); //write the batch to disk - auto out_end = std::chrono::high_resolution_clock::now(); - out_time += std::chrono::duration_cast(out_end-out_begin).count(); - if(PROGRESS) progress = (double)( c + 1 ) / (batches) * 100; + size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case) + size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints + if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error + std::cout<<"error, insufficient memory for stim::bsq::bil()"< rthread; + std::future wthread; //create asynchronous threads for reading and writing - free(ptrIn); - free(ptrOut); - target.close(); - - std::cout<<"BSQ->BIL reads: "<<(double)in_time / (1000 * 60)<<" min"<BIL calculations: "<<(double)calc_time / (1000 * 60)<<" min"<BIL writes: "<<(double)out_time / (1000 * 60)<<" min"<::buffer_size / (2*XYbytes); //calculate the number of slices that can fit in memory - if(Z() < batch_bands) batch_bands = Z(); //if the entire data set will fit in memory, do it - size_t batchXB = X() * batch_bands; //number of elements in a batch - - size_t batch_bytes = batch_bands * XYbytes; //calculate the number of bytes in a batch - T* ptrIn = (T*) malloc(batch_bytes); //allocate a large buffer storing the read data - T* ptrOut = (T*) malloc(batch_bytes); //allocate space for storing an output buffer - - size_t jump = (Z() - batch_bands) * X() * sizeof(T); //jump between writes in the output file - - std::ofstream target(outname.c_str(), std::ios::binary); - std::string headername = outname + ".hdr"; - - size_t batches = ceil((double)(Z()) / (double)batch_bands); //calculate the number of batches - T* ptrDst; - T* ptrSrc; - for(size_t c = 0; c < batches; c++){ - auto in_begin = std::chrono::high_resolution_clock::now(); - target.seekp(c * batch_bands * sizeof(T) * X(), std::ios::beg); //seek to the start of the current batch in the output file - file.read((char*)ptrIn, sizeof(T) * X() * Y() * batch_bands); //read a batch - auto in_end = std::chrono::high_resolution_clock::now(); - std::cout << std::chrono::duration_cast(in_end-in_begin).count() << "ms" << std::endl; - - auto calc_begin = std::chrono::high_resolution_clock::now(); - if(c == (batches - 1)){ - batch_bands = Z() - (batches - 1) * batch_bands; //if this is the last batch, calculate the remaining # of bands - jump = (Z() - batch_bands) * X() * sizeof(T); - } - for(size_t y = 0; y < Y(); y++){ //for each line, store an XB slice in ptrDest - ptrDst = ptrOut + (y * X() * batch_bands); //initialize ptrDst to the start of the XB output slice - ptrSrc = ptrIn + (y * X()); - for(size_t b = 0; b < batch_bands; b++){ //for each band in the current line - memcpy(ptrDst, ptrSrc, X() * sizeof(T)); //copy the band line from the source to the destination - ptrDst += X(); //increment the pointer within the XB slice (to be output) - ptrSrc += X() * Y(); //increment the pointer within the current buffer array (batch) - } - } - auto calc_end = std::chrono::high_resolution_clock::now(); - std::cout << std::chrono::duration_cast(calc_end-calc_begin).count() << "ms" << std::endl; - - auto out_begin = std::chrono::high_resolution_clock::now(); - target.seekp(0, std::ios::beg); - for(size_t y = 0; y < Y(); y++){ //for each y-slice - target.write((char*)(ptrOut + y * X() * batch_bands), sizeof(T) * X() * batch_bands); //write the XB slice to disk - target.seekp(jump, std::ios::cur); //seek to the beginning of the next XB slice in the batch + std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers + std::chrono::high_resolution_clock::time_point t_end; + size_t t_batch; //number of milliseconds to process a batch + size_t t_total = 0; + while(y_proc < Y()){ //while there are still slices to be processed + t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch + if(y_load < Y()){ //if there are still slices to be loaded, load them + if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size + rthread = std::async(std::launch::async, &stim::bsq::readlines, this, src[b], y_load, N[b]); + + y_load += N[b]; //increment the number of loaded slices } - auto out_end = std::chrono::high_resolution_clock::now(); - std::cout << std::chrono::duration_cast(out_end-out_begin).count() << "ms" << std::endl; - if(PROGRESS) progress = (double)( c + 1 ) / (batches) * 100; - } - - free(ptrIn); - free(ptrOut); - target.close(); - return true; - }*/ - /* - /// @param outname is the name of the output BIL file to be saved to disk. - bool bil(std::string outname, bool PROGRESS = false) - { - //simplify image resolution - unsigned long long jump = (Y() - 1) * X() * sizeof(T); + b = !b; //swap the double-buffer - std::ofstream target(outname.c_str(), std::ios::binary); - std::string headername = outname + ".hdr"; - - unsigned long long L = X(); - T* line = (T*)malloc(sizeof(T) * L); - - for ( unsigned long long y = 0; y < Y(); y++) //for each y position - { - file.seekg(y * X() * sizeof(T), std::ios::beg); //seek to the beginning of the xz slice - for ( unsigned long long z = 0; z < Z(); z++ ) //for each band - { - file.read((char *)line, sizeof(T) * X()); //read a line - target.write((char*)line, sizeof(T) * X()); //write the line to the output file - file.seekg(jump, std::ios::cur); //seek to the next band - if(PROGRESS) progress = (double)((y+1) * Z() + z + 1) / (Z() * Y()) * 100; //update the progress counter - } + binary::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file + target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file + y_proc += N[b]; //increment the counter of processed pixels + if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels + t_end = std::chrono::high_resolution_clock::now(); + t_batch = std::chrono::duration_cast(t_end-t_start).count(); + t_total += t_batch; + rthread.wait(); } - free(line); - target.close(); - - return true; - }*/ + std::cout<<"Total time to execute: "< BIL ((bsq*)file)->bil(outfile, PROGRESS); else if(interleave == envi_header::BIP){ //ERROR - std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<*)file)->bip(outfile, PROGRESS); - exit(1); + //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<*)file)->bip(outfile, PROGRESS); + //exit(1); } } @@ -535,9 +535,9 @@ public: else if(interleave == envi_header::BIL) //convert BSQ -> BIL ((bsq*)file)->bil(outfile, PROGRESS); else if(interleave == envi_header::BIP){ //ERROR - std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<*)file)->bip(outfile, PROGRESS); - exit(1); + //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<*)file)->bip(outfile, PROGRESS); + //exit(1); } } -- libgit2 0.21.4