Commit 9d34229ee0ce59cdfcd71da4a4589be0326428b6

Authored by David Mayerich
1 parent b4cea01a

restructured the FFT, allow FFT on interferogram mosaics

stim/envi/agilent_binary.h
... ... @@ -11,6 +11,7 @@
11 11 #include <cuda_runtime.h>
12 12 #include "cufft.h"
13 13 #include <stim/cuda/cudatools/error.h>
  14 +#include <stim/envi/envi_header.h>
14 15 //#endif
15 16  
16 17 namespace stim{
... ... @@ -275,6 +276,21 @@ public:
275 276  
276 277 return result;
277 278 }
  279 +
  280 + //saves the binary as an ENVI file with a BIP interleave format
  281 + int bip(T* bip_ptr){
  282 + //std::ofstream out(outfile.c_str(), std::ios::binary); //create a binary file stream for output
  283 + size_t XY = R[0] * R[1];
  284 + size_t B = R[2];
  285 + size_t b;
  286 +
  287 + for(size_t xy = 0; xy < XY; xy++){
  288 + for(b = 0; b < B; b++){
  289 + bip_ptr[xy * B + b] = ptr[b * XY + xy];
  290 + }
  291 + }
  292 + return 0;
  293 + }
278 294 //#endif
279 295  
280 296 };
... ...
stim/envi/bip.h
... ... @@ -5,6 +5,7 @@
5 5 #include "../envi/bil.h"
6 6 #include "../envi/hsi.h"
7 7 #include <cstring>
  8 +#include <complex>
8 9 #include <utility>
9 10  
10 11 //CUDA
... ... @@ -12,6 +13,7 @@
12 13 #include <stim/cuda/cudatools/error.h>
13 14 #include <cuda_runtime.h>
14 15 #include "cublas_v2.h"
  16 +#include "cufft.h"
15 17 //#endif
16 18  
17 19 namespace stim{
... ... @@ -1720,7 +1722,117 @@ public:
1720 1722 return true;
1721 1723 }
1722 1724  
  1725 + int fft(std::string outname, size_t bandmin, size_t bandmax, size_t samples = 0, T* ratio = NULL, size_t rx = 0, size_t ry = 0, bool PROGRESS = false, int device = 0){
  1726 + if(device == -1){
  1727 + std::cout<<"ERROR: GPU required for FFT (uses cuFFT)."<<std::endl;
  1728 + exit(1);
  1729 + }
  1730 + if(samples == 0) samples = Z(); //if samples are specified, use all of them
  1731 + if(samples > Z()){
  1732 + std::cout<<"ERROR: stim::envi doesn't support FFT padding just yet."<<std::endl;
  1733 + exit(1);
  1734 + }
  1735 + int nd; //stores the number of CUDA devices
  1736 + HANDLE_ERROR(cudaGetDeviceCount(&nd)); //get the number of CUDA devices
  1737 + if(device >= nd){ //test for the existence of the requested device
  1738 + std::cout<<"ERROR: requested CUDA device for stim::envi::fft() doesn't exist"<<std::endl;
  1739 + exit(1);
  1740 + }
  1741 + HANDLE_ERROR(cudaSetDevice(device)); //set the CUDA device
  1742 + cudaDeviceProp prop;
  1743 + HANDLE_ERROR(cudaGetDeviceProperties(&prop, device)); //get the CUDA device properties
  1744 +
  1745 + size_t B = Z();
  1746 + size_t S = samples;
  1747 + size_t fft_size = S * sizeof(T); //number of bytes for each FFT
  1748 + size_t cuda_bytes = prop.totalGlobalMem; //get the number of bytes of global memory available
  1749 + size_t cuda_use = (size_t)floor(cuda_bytes * 0.2); //only use 80%
  1750 + size_t nS = cuda_use / fft_size; //calculate the number of spectra that can be loaded onto the GPU as a single batch
  1751 + size_t batch_bytes = nS * fft_size; //calculate the size of a batch (in bytes)
  1752 + size_t fft_bytes = nS * (S/2 + 1) * sizeof(cufftComplex);
  1753 + T* batch = (T*) malloc(batch_bytes); //allocate space in host memory to store a batch
  1754 + memset(batch, 0, batch_bytes);
  1755 + std::complex<T>* batch_fft = (std::complex<T>*) malloc(fft_bytes);
  1756 + T* gpu_batch; //device pointer to the batch
  1757 + HANDLE_ERROR(cudaMalloc(&gpu_batch, batch_bytes)); //allocate space on the device for the FFT batch
  1758 + cufftComplex* gpu_batch_fft; //allocate space for the FFT result
  1759 + HANDLE_ERROR(cudaMalloc(&gpu_batch_fft, fft_bytes));
  1760 + int N[1]; //create an array with the interferogram size (required for cuFFT input)
  1761 + N[0] = (int)S; //set the only array value to the interferogram size
  1762 +
  1763 + //if a background is provided for a ratio
  1764 + std::complex<T>* ratio_fft = NULL; //create a pointer for the FFT of the ratio image (if it exists)
  1765 + if(ratio){
  1766 + size_t bkg_bytes = rx * ry * S * sizeof(T); //calculate the total number of bytes in the background image
  1767 + T* bkg_copy = (T*) malloc(bkg_bytes); //allocate space to copy the background
  1768 + if(S == Z()) memcpy(bkg_copy, ratio, bkg_bytes); //if the number of samples used in processing equals the number of available samples
  1769 + else{
  1770 + for(size_t xyi = 0; xyi < rx*ry; xyi++)
  1771 + memcpy(&bkg_copy[xyi * S], &ratio[xyi * B], S * sizeof(T));
  1772 + }
  1773 + T* gpu_ratio;
  1774 + HANDLE_ERROR(cudaMalloc(&gpu_ratio, bkg_bytes));
  1775 + HANDLE_ERROR(cudaMemcpy(gpu_ratio, bkg_copy, bkg_bytes, cudaMemcpyHostToDevice));
  1776 + cufftHandle bkg_plan;
  1777 + CUFFT_HANDLE_ERROR(cufftPlanMany(&bkg_plan, 1, N, NULL, 1, N[0], NULL, 1, N[0], CUFFT_R2C, (int)(rx * ry)));
  1778 + size_t bkg_fft_bytes = rx * ry * (S / 2 + 1) * sizeof(cufftComplex);
  1779 + T* gpu_ratio_fft;
  1780 + HANDLE_ERROR(cudaMalloc(&gpu_ratio_fft, bkg_fft_bytes));
  1781 + CUFFT_HANDLE_ERROR(cufftExecR2C(bkg_plan, (cufftReal*)gpu_ratio, (cufftComplex*)gpu_ratio_fft));
  1782 + ratio_fft = (std::complex<T>*) malloc(bkg_fft_bytes);
  1783 + HANDLE_ERROR(cudaMemcpy(ratio_fft, gpu_ratio_fft, bkg_fft_bytes, cudaMemcpyDeviceToHost));
  1784 + HANDLE_ERROR(cudaFree(gpu_ratio));
  1785 + HANDLE_ERROR(cudaFree(gpu_ratio_fft));
  1786 + CUFFT_HANDLE_ERROR(cufftDestroy(bkg_plan));
  1787 + }
1723 1788  
  1789 + cufftHandle plan; //create a CUFFT plan
  1790 + CUFFT_HANDLE_ERROR(cufftPlanMany(&plan, 1, N, NULL, 1, N[0], NULL, 1, N[0], CUFFT_R2C, (int)nS));
  1791 +
  1792 + std::ofstream outfile(outname, std::ios::binary); //open a file for writing
  1793 +
  1794 + size_t XY = X() * Y(); //calculate the number of spectra
  1795 + size_t xy = 0;
  1796 + size_t bs; //stores the number of spectra in the current batch
  1797 + size_t s, b;
  1798 + size_t S_fft = S/2 + 1;
  1799 + size_t bandkeep = bandmax - bandmin + 1;
  1800 + size_t x, y;
  1801 + size_t ratio_i;
  1802 + T* temp_spec = (T*) malloc(Z() * sizeof(T)); //allocate space to hold a single pixel
  1803 + while(xy < XY){ //while there are unprocessed spectra
  1804 + bs = min(XY - xy, nS); //calculate the number of spectra to include in the batch
  1805 + for(s = 0; s < bs; s++){ //for each spectrum in the batch
  1806 + pixel(temp_spec, xy + s); //read a pixel from disk
  1807 + memcpy(&batch[s * S], temp_spec, S * sizeof(T));
  1808 + //pixel(&batch[s * S], xy + s); //read the next spectrum
  1809 + }
  1810 + HANDLE_ERROR(cudaMemcpy(gpu_batch, batch, batch_bytes, cudaMemcpyHostToDevice));
  1811 + CUFFT_HANDLE_ERROR(cufftExecR2C(plan, (cufftReal*)gpu_batch, gpu_batch_fft)); //execute the (implicitly forward) transform
  1812 + HANDLE_ERROR(cudaMemcpy(batch_fft, gpu_batch_fft, fft_bytes, cudaMemcpyDeviceToHost)); //copy the data back to the GPU
  1813 + for(s = 0; s < bs; s++){ //for each spectrum in the batch
  1814 + y = (xy + s)/X();
  1815 + x = xy + s - y * X();
  1816 + if(ratio_fft) ratio_i = (y % ry) * rx + (x % rx); //if a background is used, calculate the coordinates into it
  1817 + for(b = 0; b < S/2 + 1; b++){ //for each sample
  1818 + if(ratio_fft)
  1819 + batch[s * S + b] = -log(abs(batch_fft[s * S_fft + b]) / abs(ratio_fft[ratio_i * S_fft + b]));
  1820 + else
  1821 + batch[s * S + b] = abs(batch_fft[s * S_fft + b]); //calculate the magnitude of the spectrum
  1822 + }
  1823 + outfile.write((char*)&batch[s * S + bandmin], bandkeep * sizeof(T)); //save the resulting spectrum
  1824 + }
  1825 + xy += bs; //increment xy by the number of spectra processed
  1826 + if(PROGRESS) progress = (double)xy / (double)XY * 100;
  1827 + }
  1828 + outfile.close();
  1829 + free(ratio_fft);
  1830 + free(batch_fft);
  1831 + free(batch);
  1832 + HANDLE_ERROR(cudaFree(gpu_batch));
  1833 + HANDLE_ERROR(cudaFree(gpu_batch_fft));
  1834 + return 0;
  1835 + }
1724 1836  
1725 1837 /// Close the file.
1726 1838 bool close(){
... ...
stim/envi/envi.h
... ... @@ -6,6 +6,7 @@
6 6 #include "../envi/bip.h"
7 7 #include "../envi/bil.h"
8 8 #include "../math/fd_coefficients.h"
  9 +#include <stim/parser/filename.h>
9 10 #include <iostream>
10 11 #include <fstream>
11 12 //#include "../image/image.h"
... ... @@ -76,7 +77,32 @@ public:
76 77  
77 78 allocate();
78 79 }
  80 + //used to test if the current ENVI file is valid
  81 + operator bool(){
  82 + if(file == NULL) return false;
  83 + return true;
  84 + }
79 85  
  86 + //test to determine if the specified file is an ENVI file
  87 + static bool is_envi(std::string fname, std::string hname = ""){
  88 + stim::filename data_file(fname);
  89 + stim::filename header_file;
  90 + if(hname == ""){ //if the header isn't provided
  91 + header_file = data_file; //assume that it's the same name as the data file, with a .hdr extension
  92 + header_file = header_file.extension("hdr");
  93 + }
  94 + else header_file = hname; //otherwise load the passed header
  95 +
  96 + stim::envi_header H;
  97 + if(H.load(header_file) == false) //load the header file, if it doesn't load return false
  98 + return false;
  99 + size_t targetBytes = H.data_bytes(); //get the number of bytes that SHOULD be in the data file
  100 + std::ifstream infile(data_file.str(), std::ifstream::ate | std::ifstream::binary); //load the data file
  101 + if(!infile) return false; //if the data file doesn't load, return false
  102 + size_t bytes = infile.tellg(); //get the number of actual bytes in the file
  103 + if(bytes != targetBytes) return false; //if the data doesn't match the header, return false
  104 + return true; //otherwise everything looks fine
  105 + }
80 106  
81 107  
82 108 void* malloc_spectrum(){
... ... @@ -1870,6 +1896,43 @@ public:
1870 1896 }
1871 1897 exit(1);
1872 1898 }
  1899 +
  1900 +
  1901 +
  1902 +
  1903 + void fft(std::string outfile, double band_min, double band_max, size_t samples = 0, void* ratio = NULL, size_t rx = 0, size_t ry = 0, bool PROGRESS = false, int cuda_device = 0){
  1904 +
  1905 + double B = (double)header.bands;
  1906 + double delta = header.wavelength[1] - header.wavelength[0]; //calculate spacing in the current domain
  1907 + double span = samples * delta; //calculate the span in the current domain
  1908 + double fft_delta = 1.0 / span; //calculate the span in the FFT domain
  1909 + double fft_max = fft_delta * samples/2; //calculate the maximum range of the FFT
  1910 +
  1911 + if(band_max > fft_max) band_max = fft_max; //the user gave a band outside of the FFT range, reset the band to the maximum available
  1912 + size_t start_i = (size_t)std::ceil(band_min / fft_delta); //calculate the first band to store
  1913 + size_t size_i = (size_t)std::floor(band_max / fft_delta) - start_i + 1; //calculate the number of bands to store
  1914 + size_t end_i = start_i + size_i - 1; //last band number
  1915 +
  1916 + envi_header new_header = header;
  1917 + new_header.bands = size_i;
  1918 + new_header.set_wavelengths(start_i * fft_delta, fft_delta);
  1919 + new_header.save(outfile + ".hdr");
  1920 +
  1921 + if (header.interleave == envi_header::BIP){
  1922 + if (header.data_type == envi_header::float32)
  1923 + ((bip<float>*)file)->fft(outfile, start_i, end_i, samples, (float*)ratio, rx, ry, PROGRESS, cuda_device);
  1924 + else if (header.data_type == envi_header::float64)
  1925 + ((bip<double>*)file)->fft(outfile, start_i, end_i, samples, (double*)ratio, rx, ry, PROGRESS, cuda_device);
  1926 + else{
  1927 + std::cout << "ERROR: unidentified data type" << std::endl;
  1928 + exit(1);
  1929 + }
  1930 + }
  1931 + else{
  1932 + std::cout<<"ERROR: only BIP files supported for FFT"<<std::endl;
  1933 + exit(1);
  1934 + }
  1935 + }
1873 1936 }; //end ENVI
1874 1937  
1875 1938 } //end namespace rts
... ...
stim/envi/envi_header.h
... ... @@ -78,6 +78,14 @@ struct envi_header
78 78 load(name);
79 79 }
80 80  
  81 + //sets the wavelength vector given a starting value and uniform step size
  82 + void set_wavelengths(double start, double step){
  83 + size_t B = bands; //get the number of bands
  84 + wavelength.resize(B);
  85 + for(size_t b = 0; b < B; b++)
  86 + wavelength[b] = start + b * step;
  87 + }
  88 +
81 89 std::string trim(std::string line){
82 90  
83 91 if(line.length() == 0)
... ... @@ -417,8 +425,13 @@ struct envi_header
417 425 default:
418 426 return 0;
419 427 }
  428 + }
420 429  
  430 + //return the number of bytes that SHOULD be in the data file
  431 + size_t data_bytes(){
  432 + return samples * lines * bands * valsize() + header_offset;
421 433 }
  434 +
422 435  
423 436 /// Convert an interleave type to a string
424 437 static std::string interleave_str(interleaveType t){
... ...