restructured the FFT, allow FFT on interferogram mosaics

David Mayerich
1 parent b4cea01a
Showing 4 changed files with 204 additions and 0 deletions Show diff stats
stim/envi/agilent_binary.h
stim/envi/bip.h
stim/envi/envi.h
stim/envi/envi_header.h
@@ -11,6 +11,7 @@
 #include <cuda_runtime.h>
 #include "cufft.h"
 #include <stim/cuda/cudatools/error.h>
+#include <stim/envi/envi_header.h>
 //#endif
  
 namespace stim{
@@ -275,6 +276,21 @@ public:
  
 		return result;
 	}
+
+	//saves the binary as an ENVI file with a BIP interleave format
+	int bip(T* bip_ptr){
+		//std::ofstream out(outfile.c_str(), std::ios::binary);			//create a binary file stream for output
+		size_t XY = R[0] * R[1];
+		size_t B = R[2];
+		size_t b;
+
+		for(size_t xy = 0; xy < XY; xy++){
+			for(b = 0; b < B; b++){
+				bip_ptr[xy * B + b] = ptr[b * XY + xy];
+			}
+		}
+		return 0;
+	}
 //#endif
  
 };
@@ -5,6 +5,7 @@
 #include "../envi/bil.h"
 #include "../envi/hsi.h"
 #include <cstring>
+#include <complex>
 #include <utility>
  
 //CUDA
@@ -12,6 +13,7 @@
 #include <stim/cuda/cudatools/error.h>
 #include <cuda_runtime.h>
 #include "cublas_v2.h"
+#include "cufft.h"
 //#endif
  
 namespace stim{
@@ -1720,7 +1722,117 @@ public:
 		return true;
 	}
  
+	int fft(std::string outname, size_t bandmin, size_t bandmax, size_t samples = 0, T* ratio = NULL, size_t rx = 0, size_t ry = 0, bool PROGRESS = false, int device = 0){
+		if(device == -1){
+			std::cout<<"ERROR: GPU required for FFT (uses cuFFT)."<<std::endl;
+			exit(1);
+		}
+		if(samples == 0) samples = Z();								//if samples are specified, use all of them
+		if(samples > Z()){
+			std::cout<<"ERROR: stim::envi doesn't support FFT padding just yet."<<std::endl;
+			exit(1);
+		}
+		int nd;															//stores the number of CUDA devices
+		HANDLE_ERROR(cudaGetDeviceCount(&nd));							//get the number of CUDA devices
+		if(device >= nd){												//test for the existence of the requested device
+			std::cout<<"ERROR: requested CUDA device for stim::envi::fft() doesn't exist"<<std::endl;
+			exit(1);
+		}
+		HANDLE_ERROR(cudaSetDevice(device));							//set the CUDA device
+		cudaDeviceProp prop;
+		HANDLE_ERROR(cudaGetDeviceProperties(&prop, device));			//get the CUDA device properties
+
+		size_t B = Z();
+		size_t S = samples;
+		size_t fft_size = S * sizeof(T);								//number of bytes for each FFT
+		size_t cuda_bytes = prop.totalGlobalMem;						//get the number of bytes of global memory available
+		size_t cuda_use = (size_t)floor(cuda_bytes * 0.2);								//only use 80%
+		size_t nS = cuda_use / fft_size;								//calculate the number of spectra that can be loaded onto the GPU as a single batch
+		size_t batch_bytes = nS * fft_size;								//calculate the size of a batch (in bytes)
+		size_t fft_bytes = nS * (S/2 + 1) * sizeof(cufftComplex);
+		T* batch = (T*) malloc(batch_bytes);							//allocate space in host memory to store a batch
+		memset(batch, 0, batch_bytes);
+		std::complex<T>* batch_fft = (std::complex<T>*) malloc(fft_bytes);
+		T* gpu_batch;													//device pointer to the batch
+		HANDLE_ERROR(cudaMalloc(&gpu_batch, batch_bytes));				//allocate space on the device for the FFT batch
+		cufftComplex* gpu_batch_fft;												//allocate space for the FFT result
+		HANDLE_ERROR(cudaMalloc(&gpu_batch_fft, fft_bytes));
+		int N[1];														//create an array with the interferogram size (required for cuFFT input)
+		N[0] = (int)S;													//set the only array value to the interferogram size
+
+		//if a background is provided for a ratio
+		std::complex<T>* ratio_fft = NULL;											//create a pointer for the FFT of the ratio image (if it exists)
+		if(ratio){
+			size_t bkg_bytes = rx * ry * S * sizeof(T);								//calculate the total number of bytes in the background image
+			T* bkg_copy = (T*) malloc(bkg_bytes);									//allocate space to copy the background
+			if(S == Z()) memcpy(bkg_copy, ratio, bkg_bytes);						//if the number of samples used in processing equals the number of available samples
+			else{
+				for(size_t xyi = 0; xyi < rx*ry; xyi++)
+					memcpy(&bkg_copy[xyi * S], &ratio[xyi * B], S * sizeof(T));
+			}
+			T* gpu_ratio;
+			HANDLE_ERROR(cudaMalloc(&gpu_ratio, bkg_bytes));
+			HANDLE_ERROR(cudaMemcpy(gpu_ratio, bkg_copy, bkg_bytes, cudaMemcpyHostToDevice));
+			cufftHandle bkg_plan;
+			CUFFT_HANDLE_ERROR(cufftPlanMany(&bkg_plan, 1, N, NULL, 1, N[0], NULL, 1, N[0], CUFFT_R2C, (int)(rx * ry)));
+			size_t bkg_fft_bytes = rx * ry * (S / 2 + 1) * sizeof(cufftComplex);
+			T* gpu_ratio_fft;
+			HANDLE_ERROR(cudaMalloc(&gpu_ratio_fft, bkg_fft_bytes));
+			CUFFT_HANDLE_ERROR(cufftExecR2C(bkg_plan, (cufftReal*)gpu_ratio, (cufftComplex*)gpu_ratio_fft));
+			ratio_fft = (std::complex<T>*) malloc(bkg_fft_bytes);
+			HANDLE_ERROR(cudaMemcpy(ratio_fft, gpu_ratio_fft, bkg_fft_bytes, cudaMemcpyDeviceToHost));
+			HANDLE_ERROR(cudaFree(gpu_ratio));
+			HANDLE_ERROR(cudaFree(gpu_ratio_fft));
+			CUFFT_HANDLE_ERROR(cufftDestroy(bkg_plan));
+		}
  
+		cufftHandle plan;												//create a CUFFT plan
+		CUFFT_HANDLE_ERROR(cufftPlanMany(&plan, 1, N, NULL, 1, N[0], NULL, 1, N[0], CUFFT_R2C, (int)nS));
+
+		std::ofstream outfile(outname, std::ios::binary);				//open a file for writing
+
+		size_t XY = X() * Y();											//calculate the number of spectra
+		size_t xy = 0;
+		size_t bs;														//stores the number of spectra in the current batch
+		size_t s, b;
+		size_t S_fft = S/2 + 1;
+		size_t bandkeep = bandmax - bandmin + 1;
+		size_t x, y;
+		size_t ratio_i;
+		T* temp_spec = (T*) malloc(Z() * sizeof(T));					//allocate space to hold a single pixel
+		while(xy < XY){													//while there are unprocessed spectra
+			bs = min(XY - xy, nS);										//calculate the number of spectra to include in the batch
+			for(s = 0; s < bs; s++){									//for each spectrum in the batch
+				pixel(temp_spec, xy + s);						//read a pixel from disk
+				memcpy(&batch[s * S], temp_spec, S * sizeof(T));
+				//pixel(&batch[s * S], xy + s);							//read the next spectrum
+			}
+			HANDLE_ERROR(cudaMemcpy(gpu_batch, batch, batch_bytes, cudaMemcpyHostToDevice));
+			CUFFT_HANDLE_ERROR(cufftExecR2C(plan, (cufftReal*)gpu_batch, gpu_batch_fft));			//execute the (implicitly forward) transform
+			HANDLE_ERROR(cudaMemcpy(batch_fft, gpu_batch_fft, fft_bytes, cudaMemcpyDeviceToHost));	//copy the data back to the GPU
+			for(s = 0; s < bs; s++){															//for each spectrum in the batch
+				y = (xy + s)/X();
+				x = xy + s - y * X();
+				if(ratio_fft)	ratio_i = (y % ry) * rx + (x % rx);								//if a background is used, calculate the coordinates into it
+				for(b = 0; b < S/2 + 1; b++){														//for each sample
+					if(ratio_fft)						
+						batch[s * S + b] = -log(abs(batch_fft[s * S_fft + b]) / abs(ratio_fft[ratio_i * S_fft + b]));
+					else
+						batch[s * S + b] = abs(batch_fft[s * S_fft + b]);		//calculate the magnitude of the spectrum					
+				}
+				outfile.write((char*)&batch[s * S + bandmin], bandkeep * sizeof(T));							//save the resulting spectrum
+			}
+			xy += bs;													//increment xy by the number of spectra processed
+			if(PROGRESS) progress = (double)xy / (double)XY * 100;
+		}
+		outfile.close();
+		free(ratio_fft);
+		free(batch_fft);
+		free(batch);
+		HANDLE_ERROR(cudaFree(gpu_batch));
+		HANDLE_ERROR(cudaFree(gpu_batch_fft));
+		return 0;
+	}
  
 	/// Close the file.
 	bool close(){
@@ -6,6 +6,7 @@
 #include "../envi/bip.h"
 #include "../envi/bil.h"
 #include "../math/fd_coefficients.h"
+#include <stim/parser/filename.h>
 #include <iostream>
 #include <fstream>
 //#include "../image/image.h"
@@ -76,7 +77,32 @@ public:
  
 		allocate();
 	}
+	//used to test if the current ENVI file is valid
+	operator bool(){
+		if(file == NULL) return false;
+		return true;
+	}
  
+	//test to determine if the specified file is an ENVI file
+	static bool is_envi(std::string fname, std::string hname = ""){
+		stim::filename data_file(fname);
+		stim::filename header_file;
+		if(hname == ""){								//if the header isn't provided
+			header_file = data_file;					//assume that it's the same name as the data file, with a .hdr extension
+			header_file = header_file.extension("hdr");
+		}
+		else header_file = hname;						//otherwise load the passed header
+
+		stim::envi_header H;
+		if(H.load(header_file) == false)				//load the header file, if it doesn't load return false
+			return false;
+		size_t targetBytes = H.data_bytes();			//get the number of bytes that SHOULD be in the data file
+		std::ifstream infile(data_file.str(), std::ifstream::ate | std::ifstream::binary);			//load the data file
+		if(!infile) return false;						//if the data file doesn't load, return false
+		size_t bytes = infile.tellg();					//get the number of actual bytes in the file
+		if(bytes != targetBytes) return false;			//if the data doesn't match the header, return false
+		return true;									//otherwise everything looks fine
+	}
  
  
 	void* malloc_spectrum(){
@@ -1870,6 +1896,43 @@ public:
 		}
 		exit(1);
 	}
+
+	
+
+
+	void fft(std::string outfile, double band_min, double band_max, size_t samples = 0, void* ratio = NULL, size_t rx = 0, size_t ry = 0, bool PROGRESS = false, int cuda_device = 0){
+		
+		double B = (double)header.bands;
+		double delta = header.wavelength[1] - header.wavelength[0];					//calculate spacing in the current domain
+		double span = samples * delta;											//calculate the span in the current domain
+		double fft_delta = 1.0 / span;												//calculate the span in the FFT domain
+		double fft_max = fft_delta * samples/2;										//calculate the maximum range of the FFT
+
+		if(band_max > fft_max) band_max = fft_max;									//the user gave a band outside of the FFT range, reset the band to the maximum available
+		size_t start_i = (size_t)std::ceil(band_min / fft_delta);					//calculate the first band to store
+		size_t size_i = (size_t)std::floor(band_max / fft_delta) - start_i + 1;		//calculate the number of bands to store
+		size_t end_i = start_i + size_i - 1;										//last band number
+
+		envi_header new_header = header;
+		new_header.bands = size_i;
+		new_header.set_wavelengths(start_i * fft_delta, fft_delta);
+		new_header.save(outfile + ".hdr");
+		
+		if (header.interleave == envi_header::BIP){
+			if (header.data_type == envi_header::float32)
+				((bip<float>*)file)->fft(outfile, start_i, end_i, samples, (float*)ratio, rx, ry, PROGRESS, cuda_device);
+			else if (header.data_type == envi_header::float64)
+				((bip<double>*)file)->fft(outfile, start_i, end_i, samples, (double*)ratio, rx, ry, PROGRESS, cuda_device);
+			else{
+				std::cout << "ERROR: unidentified data type" << std::endl;
+				exit(1);
+			}
+		}
+		else{
+			std::cout<<"ERROR: only BIP files supported for FFT"<<std::endl;
+			exit(1);
+		}
+	}
 };	//end ENVI
  
 }	//end namespace rts
@@ -78,6 +78,14 @@ struct envi_header
 		load(name);
 	}
  
+	//sets the wavelength vector given a starting value and uniform step size
+	void set_wavelengths(double start, double step){
+		size_t B = bands;						//get the number of bands
+		wavelength.resize(B);
+		for(size_t b = 0; b < B; b++)
+			wavelength[b] = start + b * step;
+	}
+
 	std::string trim(std::string line){
  
 		if(line.length() == 0)
@@ -417,8 +425,13 @@ struct envi_header
 		default:
 			return 0;
 		}
+	}
  
+	//return the number of bytes that SHOULD be in the data file
+	size_t data_bytes(){
+		return samples * lines * bands * valsize() + header_offset;
 	}
+	
  
 	/// Convert an interleave type to a string
 	static std::string interleave_str(interleaveType t){