stim/envi/agilent_binary.h

//make sure that this header file is only loaded once
#ifndef STIM_AGILENT_BINARY_H
#define STIM_AGILENT_BINARY_H
#include <string>
#include <fstream>
#include <complex>
//CUDA
//#ifdef CUDA_FOUND
#include <cuda_runtime.h>
#include "cufft.h"
#include <stim/cuda/cudatools/error.h>
#include <stim/envi/envi_header.h>
//#endif
namespace stim{
template<typename T>
class agilent_binary{
protected:
	std::string fname;
	T* ptr;														//pointer to the image data
	size_t R[3];												//size of the binary image in X, Y, and Z
	static const size_t header = 1020;							//header size
	double Z[2];												//range of z values (position or wavelength)
public:
	size_t size(){
		return (size_t)R[0] * (size_t)R[1] * (size_t)R[2];
	}
	size_t bytes(){
		return size() * sizeof(T);
	}
	void alloc(){
		ptr = (T*) malloc(bytes());
	}
	void alloc(size_t x, size_t y, size_t z){
		R[0] = x;
		R[1] = y;
		R[2] = z;
		alloc();
	}
	size_t dim(size_t i){
		return R[i];
	}
	/// Create a deep copy of an agileng_binary object
	void deep_copy(agilent_binary<T>* dst, const agilent_binary<T>* src){
		dst->alloc(src->R[0], src->R[1], src->R[2]);			//allocate memory
		memcpy(dst->ptr, src->ptr, bytes());					//copy the data
		memcpy(dst->Z, src->Z, sizeof(double) * 2);				//copy the data z range
	}
	/// Default constructor, sets the resolution to zero and the data pointer to NULL
	agilent_binary(){
		memset(R, 0, sizeof(size_t) * 3);				//set the resolution to zero
		ptr = NULL;
	}
	/// Constructor with resolution
	agilent_binary(size_t x, size_t y, size_t z){
		alloc(x, y, z);
	}
	/// Constructor with filename
	agilent_binary(std::string filename){
		ptr = NULL;
		load(filename);
	}
	
	/// Copy constructor
	agilent_binary(const agilent_binary<T> &obj){
		deep_copy(this, &obj);
	}
	agilent_binary<T>& operator=(const agilent_binary<T> rhs){
		if(this != &rhs){								//check for self-assignment
			deep_copy(this, &rhs);						//make a deep copy
		}
		return *this;									//return the result
	}
	~agilent_binary(){
		free(ptr);
	}
	void load(std::string filename){
		if(ptr != NULL) free(ptr);						//if memory has been allocated, free it
		fname = filename;						//save the filename
		short x, y, z;
		std::ifstream infile(fname, std::ios::binary);				//open the input file
		infile.seekg(9, std::ios::beg);				//seek past 9 bytes from the beginning of the file
		infile.read((char*)(&z), 2);							//read two bytes of data (the number of samples is stored as a 16-bit integer)
		infile.seekg(13, std::ios::cur);				//skip another 13 bytes
		infile.read((char*)(&x), 2);				//read the X and Y dimensions
		infile.read((char*)(&y), 2);
		infile.seekg(header, std::ios::beg);			//seek to the start of the data
		alloc(x, y, z);
		ptr = (T*) malloc(bytes());							//allocate space for the data
		infile.read((char*)ptr, bytes());				//read the data		
		infile.close();
	}
	void save(std::string filename){
		std::ofstream outfile(filename, std::ios::binary);			//create an output file
		char zero = 0;
		for(size_t i = 0; i < 9; i++) outfile.write(&zero, 1);		//write 9 zeros
		outfile.write((char*)&R[2], 2);
		for(size_t i = 0; i < 13; i++) outfile.write(&zero, 1);		//write 13 zeros
		outfile.write((char*)&R[0], 2);
		outfile.write((char*)&R[1], 2);
		for(size_t i = 0; i < 992; i++) outfile.write(&zero, 1);		//write 992 zeros
		size_t b = bytes();
		outfile.write((char*)ptr, b);							//write the data to the output file
		outfile.close();
	}
	stim::envi_header create_header(){
		stim::envi_header header;
		header.samples = R[0];
		header.lines = R[1];
		header.bands = R[2];
		double z_delta = (double)(Z[1] - Z[0]) / (double)(R[2] - 1);
		header.wavelength.resize(R[2]);
		for(size_t i = 0; i < R[2]; i++)
			header.wavelength[i] = i * z_delta + Z[0];
		return header;
	}
	/// Subtract the mean from each pixel. Generally used for centering an interferogram.
	void meancenter(){
		size_t Z = R[2];											//store the number of bands
		size_t XY = R[0] * R[1];									//store the number of pixels in the image
		T sum = (T)0;
		T mean;
		for(size_t xy = 0; xy < XY; xy++){							//for each pixel
			sum = 0;
			for(size_t z = 0; z < Z; z++){							//for each band
				sum += ptr[ z * XY + xy ];							//add the band value to a running sum
			}
			mean = sum / (T)Z;										//calculate the pixel mean
			for(size_t z = 0; z < Z; z++){
				ptr[ z * XY + xy ] -= mean;							//subtract the mean from each band
			}
		}
	}
	/// adds n bands of zero padding to the end of the file
	void zeropad(size_t n){
		size_t newZ = R[2] + n;
		T* temp = (T*) calloc(R[0] * R[1] * newZ, sizeof(T));	//allocate space for the new image
		memcpy(temp, ptr, size() * sizeof(T));					//copy the old data to the new image
		
		free(ptr);												//free the old data
		ptr = temp;												//swap in the new data
		R[2] = newZ;											//set the z-dimension to the new zero value
	}
	//pads to the nearest power-of-two
	void zeropad(){
		size_t newZ = (size_t)pow(2, ceil(log(R[2])/log(2)));			//find the nearest power-of-two
		size_t n = newZ - R[2];									//calculate the number of bands to add
		zeropad(n);												//add the padding
	}
	/// Calculate the absorbance spectrum from the transmission spectrum given a background
	void absorbance(stim::agilent_binary<T>* background){
		size_t N = size();											//calculate the number of values to be ratioed
		if(N != background->size()){
			std::cerr<<"ERROR in stim::agilent_binary::absorbance() - transmission image size doesn't match background"<<std::endl;
			exit(1);
		}
		for(size_t i = 0; i < N; i++)
			ptr[i] = -log10(ptr[i] / background->ptr[i]);
	}
//#ifdef CUDA_FOUND
	/// Perform an FFT and return a binary file with bands in the specified range
	agilent_binary<T> fft(double band_min, double band_max, double ELWN = 15798, int UDR = 2){
		auto total_start = std::chrono::high_resolution_clock::now();
		auto start = std::chrono::high_resolution_clock::now();
		T* cpu_data = (T*) malloc( bytes() );										//allocate space for the transposed data
		for(size_t b = 0; b < R[2]; b++){
			for(size_t x = 0; x < R[0] * R[1]; x++){
				cpu_data[x * R[2] + b] = ptr[b * R[0] * R[1] + x];
			}
		}
		auto end = std::chrono::high_resolution_clock::now();
		std::chrono::duration<double> diff = end-start;
       // std::cout << "Transpose data: " << diff.count() << " s\n";
		start = std::chrono::high_resolution_clock::now();
		cufftHandle plan;															//allocate space for a cufft plan
		cufftReal* gpu_data;														//create a pointer to the data
		size_t batch = R[0] * R[1];													//calculate the batch size (X * Y)
		HANDLE_ERROR(cudaMalloc((void**)&gpu_data, bytes()));						//allocate space on the GPU
		HANDLE_ERROR(cudaMemcpy(gpu_data, cpu_data, bytes(), cudaMemcpyHostToDevice));	//copy the data to the GPU
		cufftComplex* gpu_fft;
		HANDLE_ERROR(cudaMalloc((void**)&gpu_fft, R[0] * R[1] * (R[2]/2 + 1) * sizeof(cufftComplex)));
		end = std::chrono::high_resolution_clock::now();
		diff = end-start;
		//std::cout << "Allocate/copy: " << diff.count() << " s\n";
		start = std::chrono::high_resolution_clock::now();
		int N[1];					//create an array with the interferogram size (required for cuFFT input)
		N[0] = (int)R[2];				//set the only array value to the interferogram size
		if(cufftPlanMany(&plan, 1, N, NULL, 1, (int)R[2], NULL, 1, (int)R[2], CUFFT_R2C, (int)batch) != CUFFT_SUCCESS){
			std::cout<<"cuFFT Error: unable to create 1D plan."<<std::endl;
			exit(1);
		}
		end = std::chrono::high_resolution_clock::now();
		diff = end-start;
		//std::cout << "Create a plan: " << diff.count() << " s\n";
		start = std::chrono::high_resolution_clock::now();
		if (cufftExecR2C(plan, gpu_data, gpu_fft) != CUFFT_SUCCESS){		//execute the (implicitly forward) transform
			std::cout<<"CUFFT error: ExecR2C Forward failed";
			exit(1);
		}
		end = std::chrono::high_resolution_clock::now();
		diff = end-start;
		//std::cout << "Perform FFT: " << diff.count() << " s\n";
		start = std::chrono::high_resolution_clock::now();
		std::complex<T>* cpu_fft = (std::complex<T>*) malloc( R[0] * R[1] * (R[2]/2+1) * sizeof(std::complex<T>) );
		HANDLE_ERROR(cudaMemcpy(cpu_fft, gpu_fft, R[0] * R[1] * (R[2]/2+1) * sizeof(cufftComplex), cudaMemcpyDeviceToHost));	//copy data from the host to the device
		//double int_delta = 0.00012656;									//interferogram sample spacing in centimeters
		double int_delta = (1.0 / ELWN) * ((double)UDR / 2.0);			//calculate the interferogram spacing
		double int_length = int_delta * R[2];							//interferogram length in centimeters
		double fft_delta = 1/int_length;								//spectrum spacing (in inverse centimeters, wavenumber)
		double fft_max = fft_delta * R[2]/2;							//get the maximum wavenumber value supported by the specified number of interferogram samples
		if(band_max > fft_max) band_max = fft_max;						//the user gave a band outside of the FFT range, reset the band to the maximum available
		size_t start_i = (size_t)std::ceil(band_min / fft_delta);				//calculate the first band to store
		size_t size_i = (size_t)std::floor(band_max / fft_delta) - start_i;		//calculate the number of bands to store
		size_t end_i = start_i + size_i;								//last band number
		agilent_binary<T> result(R[0], R[1], size_i);
		result.Z[0] = start_i * fft_delta;								//set the range for the FFT result
		result.Z[1] = end_i * fft_delta;
		for(size_t b = start_i; b < end_i; b++){
			for(size_t x = 0; x < R[0] * R[1]; x++){
				result.ptr[(b - start_i) * R[0] * R[1] + x] = abs(cpu_fft[x * (R[2]/2+1) + b]);
			}
		}
		end = std::chrono::high_resolution_clock::now();
		diff = end-start;
		//std::cout << "Transpose/Crop: " << diff.count() << " s\n";
		auto total_end = std::chrono::high_resolution_clock::now();
		diff = total_end-total_start;
		cufftDestroy(plan);
		HANDLE_ERROR(cudaFree(gpu_data));
		HANDLE_ERROR(cudaFree(gpu_fft));
		free(cpu_data);
		free(cpu_fft);
		return result;
	}
	//saves the binary as an ENVI file with a BIP interleave format
	int bip(T* bip_ptr){
		//std::ofstream out(outfile.c_str(), std::ios::binary);			//create a binary file stream for output
		size_t XY = R[0] * R[1];
		size_t B = R[2];
		size_t b;
		for(size_t xy = 0; xy < XY; xy++){
			for(b = 0; b < B; b++){
				bip_ptr[xy * B + b] = ptr[b * XY + xy];
			}
		}
		return 0;
	}
//#endif
};
}
#endif