Commit 9d34229ee0ce59cdfcd71da4a4589be0326428b6
1 parent
b4cea01a
restructured the FFT, allow FFT on interferogram mosaics
Showing
4 changed files
with
204 additions
and
0 deletions
Show diff stats
stim/envi/agilent_binary.h
... | ... | @@ -11,6 +11,7 @@ |
11 | 11 | #include <cuda_runtime.h> |
12 | 12 | #include "cufft.h" |
13 | 13 | #include <stim/cuda/cudatools/error.h> |
14 | +#include <stim/envi/envi_header.h> | |
14 | 15 | //#endif |
15 | 16 | |
16 | 17 | namespace stim{ |
... | ... | @@ -275,6 +276,21 @@ public: |
275 | 276 | |
276 | 277 | return result; |
277 | 278 | } |
279 | + | |
280 | + //saves the binary as an ENVI file with a BIP interleave format | |
281 | + int bip(T* bip_ptr){ | |
282 | + //std::ofstream out(outfile.c_str(), std::ios::binary); //create a binary file stream for output | |
283 | + size_t XY = R[0] * R[1]; | |
284 | + size_t B = R[2]; | |
285 | + size_t b; | |
286 | + | |
287 | + for(size_t xy = 0; xy < XY; xy++){ | |
288 | + for(b = 0; b < B; b++){ | |
289 | + bip_ptr[xy * B + b] = ptr[b * XY + xy]; | |
290 | + } | |
291 | + } | |
292 | + return 0; | |
293 | + } | |
278 | 294 | //#endif |
279 | 295 | |
280 | 296 | }; | ... | ... |
stim/envi/bip.h
... | ... | @@ -5,6 +5,7 @@ |
5 | 5 | #include "../envi/bil.h" |
6 | 6 | #include "../envi/hsi.h" |
7 | 7 | #include <cstring> |
8 | +#include <complex> | |
8 | 9 | #include <utility> |
9 | 10 | |
10 | 11 | //CUDA |
... | ... | @@ -12,6 +13,7 @@ |
12 | 13 | #include <stim/cuda/cudatools/error.h> |
13 | 14 | #include <cuda_runtime.h> |
14 | 15 | #include "cublas_v2.h" |
16 | +#include "cufft.h" | |
15 | 17 | //#endif |
16 | 18 | |
17 | 19 | namespace stim{ |
... | ... | @@ -1720,7 +1722,117 @@ public: |
1720 | 1722 | return true; |
1721 | 1723 | } |
1722 | 1724 | |
1725 | + int fft(std::string outname, size_t bandmin, size_t bandmax, size_t samples = 0, T* ratio = NULL, size_t rx = 0, size_t ry = 0, bool PROGRESS = false, int device = 0){ | |
1726 | + if(device == -1){ | |
1727 | + std::cout<<"ERROR: GPU required for FFT (uses cuFFT)."<<std::endl; | |
1728 | + exit(1); | |
1729 | + } | |
1730 | + if(samples == 0) samples = Z(); //if samples are specified, use all of them | |
1731 | + if(samples > Z()){ | |
1732 | + std::cout<<"ERROR: stim::envi doesn't support FFT padding just yet."<<std::endl; | |
1733 | + exit(1); | |
1734 | + } | |
1735 | + int nd; //stores the number of CUDA devices | |
1736 | + HANDLE_ERROR(cudaGetDeviceCount(&nd)); //get the number of CUDA devices | |
1737 | + if(device >= nd){ //test for the existence of the requested device | |
1738 | + std::cout<<"ERROR: requested CUDA device for stim::envi::fft() doesn't exist"<<std::endl; | |
1739 | + exit(1); | |
1740 | + } | |
1741 | + HANDLE_ERROR(cudaSetDevice(device)); //set the CUDA device | |
1742 | + cudaDeviceProp prop; | |
1743 | + HANDLE_ERROR(cudaGetDeviceProperties(&prop, device)); //get the CUDA device properties | |
1744 | + | |
1745 | + size_t B = Z(); | |
1746 | + size_t S = samples; | |
1747 | + size_t fft_size = S * sizeof(T); //number of bytes for each FFT | |
1748 | + size_t cuda_bytes = prop.totalGlobalMem; //get the number of bytes of global memory available | |
1749 | + size_t cuda_use = (size_t)floor(cuda_bytes * 0.2); //only use 80% | |
1750 | + size_t nS = cuda_use / fft_size; //calculate the number of spectra that can be loaded onto the GPU as a single batch | |
1751 | + size_t batch_bytes = nS * fft_size; //calculate the size of a batch (in bytes) | |
1752 | + size_t fft_bytes = nS * (S/2 + 1) * sizeof(cufftComplex); | |
1753 | + T* batch = (T*) malloc(batch_bytes); //allocate space in host memory to store a batch | |
1754 | + memset(batch, 0, batch_bytes); | |
1755 | + std::complex<T>* batch_fft = (std::complex<T>*) malloc(fft_bytes); | |
1756 | + T* gpu_batch; //device pointer to the batch | |
1757 | + HANDLE_ERROR(cudaMalloc(&gpu_batch, batch_bytes)); //allocate space on the device for the FFT batch | |
1758 | + cufftComplex* gpu_batch_fft; //allocate space for the FFT result | |
1759 | + HANDLE_ERROR(cudaMalloc(&gpu_batch_fft, fft_bytes)); | |
1760 | + int N[1]; //create an array with the interferogram size (required for cuFFT input) | |
1761 | + N[0] = (int)S; //set the only array value to the interferogram size | |
1762 | + | |
1763 | + //if a background is provided for a ratio | |
1764 | + std::complex<T>* ratio_fft = NULL; //create a pointer for the FFT of the ratio image (if it exists) | |
1765 | + if(ratio){ | |
1766 | + size_t bkg_bytes = rx * ry * S * sizeof(T); //calculate the total number of bytes in the background image | |
1767 | + T* bkg_copy = (T*) malloc(bkg_bytes); //allocate space to copy the background | |
1768 | + if(S == Z()) memcpy(bkg_copy, ratio, bkg_bytes); //if the number of samples used in processing equals the number of available samples | |
1769 | + else{ | |
1770 | + for(size_t xyi = 0; xyi < rx*ry; xyi++) | |
1771 | + memcpy(&bkg_copy[xyi * S], &ratio[xyi * B], S * sizeof(T)); | |
1772 | + } | |
1773 | + T* gpu_ratio; | |
1774 | + HANDLE_ERROR(cudaMalloc(&gpu_ratio, bkg_bytes)); | |
1775 | + HANDLE_ERROR(cudaMemcpy(gpu_ratio, bkg_copy, bkg_bytes, cudaMemcpyHostToDevice)); | |
1776 | + cufftHandle bkg_plan; | |
1777 | + CUFFT_HANDLE_ERROR(cufftPlanMany(&bkg_plan, 1, N, NULL, 1, N[0], NULL, 1, N[0], CUFFT_R2C, (int)(rx * ry))); | |
1778 | + size_t bkg_fft_bytes = rx * ry * (S / 2 + 1) * sizeof(cufftComplex); | |
1779 | + T* gpu_ratio_fft; | |
1780 | + HANDLE_ERROR(cudaMalloc(&gpu_ratio_fft, bkg_fft_bytes)); | |
1781 | + CUFFT_HANDLE_ERROR(cufftExecR2C(bkg_plan, (cufftReal*)gpu_ratio, (cufftComplex*)gpu_ratio_fft)); | |
1782 | + ratio_fft = (std::complex<T>*) malloc(bkg_fft_bytes); | |
1783 | + HANDLE_ERROR(cudaMemcpy(ratio_fft, gpu_ratio_fft, bkg_fft_bytes, cudaMemcpyDeviceToHost)); | |
1784 | + HANDLE_ERROR(cudaFree(gpu_ratio)); | |
1785 | + HANDLE_ERROR(cudaFree(gpu_ratio_fft)); | |
1786 | + CUFFT_HANDLE_ERROR(cufftDestroy(bkg_plan)); | |
1787 | + } | |
1723 | 1788 | |
1789 | + cufftHandle plan; //create a CUFFT plan | |
1790 | + CUFFT_HANDLE_ERROR(cufftPlanMany(&plan, 1, N, NULL, 1, N[0], NULL, 1, N[0], CUFFT_R2C, (int)nS)); | |
1791 | + | |
1792 | + std::ofstream outfile(outname, std::ios::binary); //open a file for writing | |
1793 | + | |
1794 | + size_t XY = X() * Y(); //calculate the number of spectra | |
1795 | + size_t xy = 0; | |
1796 | + size_t bs; //stores the number of spectra in the current batch | |
1797 | + size_t s, b; | |
1798 | + size_t S_fft = S/2 + 1; | |
1799 | + size_t bandkeep = bandmax - bandmin + 1; | |
1800 | + size_t x, y; | |
1801 | + size_t ratio_i; | |
1802 | + T* temp_spec = (T*) malloc(Z() * sizeof(T)); //allocate space to hold a single pixel | |
1803 | + while(xy < XY){ //while there are unprocessed spectra | |
1804 | + bs = min(XY - xy, nS); //calculate the number of spectra to include in the batch | |
1805 | + for(s = 0; s < bs; s++){ //for each spectrum in the batch | |
1806 | + pixel(temp_spec, xy + s); //read a pixel from disk | |
1807 | + memcpy(&batch[s * S], temp_spec, S * sizeof(T)); | |
1808 | + //pixel(&batch[s * S], xy + s); //read the next spectrum | |
1809 | + } | |
1810 | + HANDLE_ERROR(cudaMemcpy(gpu_batch, batch, batch_bytes, cudaMemcpyHostToDevice)); | |
1811 | + CUFFT_HANDLE_ERROR(cufftExecR2C(plan, (cufftReal*)gpu_batch, gpu_batch_fft)); //execute the (implicitly forward) transform | |
1812 | + HANDLE_ERROR(cudaMemcpy(batch_fft, gpu_batch_fft, fft_bytes, cudaMemcpyDeviceToHost)); //copy the data back to the GPU | |
1813 | + for(s = 0; s < bs; s++){ //for each spectrum in the batch | |
1814 | + y = (xy + s)/X(); | |
1815 | + x = xy + s - y * X(); | |
1816 | + if(ratio_fft) ratio_i = (y % ry) * rx + (x % rx); //if a background is used, calculate the coordinates into it | |
1817 | + for(b = 0; b < S/2 + 1; b++){ //for each sample | |
1818 | + if(ratio_fft) | |
1819 | + batch[s * S + b] = -log(abs(batch_fft[s * S_fft + b]) / abs(ratio_fft[ratio_i * S_fft + b])); | |
1820 | + else | |
1821 | + batch[s * S + b] = abs(batch_fft[s * S_fft + b]); //calculate the magnitude of the spectrum | |
1822 | + } | |
1823 | + outfile.write((char*)&batch[s * S + bandmin], bandkeep * sizeof(T)); //save the resulting spectrum | |
1824 | + } | |
1825 | + xy += bs; //increment xy by the number of spectra processed | |
1826 | + if(PROGRESS) progress = (double)xy / (double)XY * 100; | |
1827 | + } | |
1828 | + outfile.close(); | |
1829 | + free(ratio_fft); | |
1830 | + free(batch_fft); | |
1831 | + free(batch); | |
1832 | + HANDLE_ERROR(cudaFree(gpu_batch)); | |
1833 | + HANDLE_ERROR(cudaFree(gpu_batch_fft)); | |
1834 | + return 0; | |
1835 | + } | |
1724 | 1836 | |
1725 | 1837 | /// Close the file. |
1726 | 1838 | bool close(){ | ... | ... |
stim/envi/envi.h
... | ... | @@ -6,6 +6,7 @@ |
6 | 6 | #include "../envi/bip.h" |
7 | 7 | #include "../envi/bil.h" |
8 | 8 | #include "../math/fd_coefficients.h" |
9 | +#include <stim/parser/filename.h> | |
9 | 10 | #include <iostream> |
10 | 11 | #include <fstream> |
11 | 12 | //#include "../image/image.h" |
... | ... | @@ -76,7 +77,32 @@ public: |
76 | 77 | |
77 | 78 | allocate(); |
78 | 79 | } |
80 | + //used to test if the current ENVI file is valid | |
81 | + operator bool(){ | |
82 | + if(file == NULL) return false; | |
83 | + return true; | |
84 | + } | |
79 | 85 | |
86 | + //test to determine if the specified file is an ENVI file | |
87 | + static bool is_envi(std::string fname, std::string hname = ""){ | |
88 | + stim::filename data_file(fname); | |
89 | + stim::filename header_file; | |
90 | + if(hname == ""){ //if the header isn't provided | |
91 | + header_file = data_file; //assume that it's the same name as the data file, with a .hdr extension | |
92 | + header_file = header_file.extension("hdr"); | |
93 | + } | |
94 | + else header_file = hname; //otherwise load the passed header | |
95 | + | |
96 | + stim::envi_header H; | |
97 | + if(H.load(header_file) == false) //load the header file, if it doesn't load return false | |
98 | + return false; | |
99 | + size_t targetBytes = H.data_bytes(); //get the number of bytes that SHOULD be in the data file | |
100 | + std::ifstream infile(data_file.str(), std::ifstream::ate | std::ifstream::binary); //load the data file | |
101 | + if(!infile) return false; //if the data file doesn't load, return false | |
102 | + size_t bytes = infile.tellg(); //get the number of actual bytes in the file | |
103 | + if(bytes != targetBytes) return false; //if the data doesn't match the header, return false | |
104 | + return true; //otherwise everything looks fine | |
105 | + } | |
80 | 106 | |
81 | 107 | |
82 | 108 | void* malloc_spectrum(){ |
... | ... | @@ -1870,6 +1896,43 @@ public: |
1870 | 1896 | } |
1871 | 1897 | exit(1); |
1872 | 1898 | } |
1899 | + | |
1900 | + | |
1901 | + | |
1902 | + | |
1903 | + void fft(std::string outfile, double band_min, double band_max, size_t samples = 0, void* ratio = NULL, size_t rx = 0, size_t ry = 0, bool PROGRESS = false, int cuda_device = 0){ | |
1904 | + | |
1905 | + double B = (double)header.bands; | |
1906 | + double delta = header.wavelength[1] - header.wavelength[0]; //calculate spacing in the current domain | |
1907 | + double span = samples * delta; //calculate the span in the current domain | |
1908 | + double fft_delta = 1.0 / span; //calculate the span in the FFT domain | |
1909 | + double fft_max = fft_delta * samples/2; //calculate the maximum range of the FFT | |
1910 | + | |
1911 | + if(band_max > fft_max) band_max = fft_max; //the user gave a band outside of the FFT range, reset the band to the maximum available | |
1912 | + size_t start_i = (size_t)std::ceil(band_min / fft_delta); //calculate the first band to store | |
1913 | + size_t size_i = (size_t)std::floor(band_max / fft_delta) - start_i + 1; //calculate the number of bands to store | |
1914 | + size_t end_i = start_i + size_i - 1; //last band number | |
1915 | + | |
1916 | + envi_header new_header = header; | |
1917 | + new_header.bands = size_i; | |
1918 | + new_header.set_wavelengths(start_i * fft_delta, fft_delta); | |
1919 | + new_header.save(outfile + ".hdr"); | |
1920 | + | |
1921 | + if (header.interleave == envi_header::BIP){ | |
1922 | + if (header.data_type == envi_header::float32) | |
1923 | + ((bip<float>*)file)->fft(outfile, start_i, end_i, samples, (float*)ratio, rx, ry, PROGRESS, cuda_device); | |
1924 | + else if (header.data_type == envi_header::float64) | |
1925 | + ((bip<double>*)file)->fft(outfile, start_i, end_i, samples, (double*)ratio, rx, ry, PROGRESS, cuda_device); | |
1926 | + else{ | |
1927 | + std::cout << "ERROR: unidentified data type" << std::endl; | |
1928 | + exit(1); | |
1929 | + } | |
1930 | + } | |
1931 | + else{ | |
1932 | + std::cout<<"ERROR: only BIP files supported for FFT"<<std::endl; | |
1933 | + exit(1); | |
1934 | + } | |
1935 | + } | |
1873 | 1936 | }; //end ENVI |
1874 | 1937 | |
1875 | 1938 | } //end namespace rts | ... | ... |
stim/envi/envi_header.h
... | ... | @@ -78,6 +78,14 @@ struct envi_header |
78 | 78 | load(name); |
79 | 79 | } |
80 | 80 | |
81 | + //sets the wavelength vector given a starting value and uniform step size | |
82 | + void set_wavelengths(double start, double step){ | |
83 | + size_t B = bands; //get the number of bands | |
84 | + wavelength.resize(B); | |
85 | + for(size_t b = 0; b < B; b++) | |
86 | + wavelength[b] = start + b * step; | |
87 | + } | |
88 | + | |
81 | 89 | std::string trim(std::string line){ |
82 | 90 | |
83 | 91 | if(line.length() == 0) |
... | ... | @@ -417,8 +425,13 @@ struct envi_header |
417 | 425 | default: |
418 | 426 | return 0; |
419 | 427 | } |
428 | + } | |
420 | 429 | |
430 | + //return the number of bytes that SHOULD be in the data file | |
431 | + size_t data_bytes(){ | |
432 | + return samples * lines * bands * valsize() + header_offset; | |
421 | 433 | } |
434 | + | |
422 | 435 | |
423 | 436 | /// Convert an interleave type to a string |
424 | 437 | static std::string interleave_str(interleaveType t){ | ... | ... |