stim/envi/agilent_binary.h

//make sure that this header file is only loaded once
#ifndef STIM_AGILENT_BINARY_H
#define STIM_AGILENT_BINARY_H
#include <string>
#include <fstream>
//CUDA
#ifdef CUDA_FOUND
	#include <cuda_runtime.h>
	#include "cufft.h"
	#include <stim/cuda/cudatools/error.h>
#endif
namespace stim{
template<typename T>
class agilent_binary{
protected:
	std::string fname;
	T* ptr;
	size_t R[3];
	static const size_t header = 1020;
	double Z[2];
public:
	size_t size(){
		return (size_t)R[0] * (size_t)R[1] * (size_t)R[2];
	}
	size_t bytes(){
		return size() * sizeof(T);
	}
	void alloc(){
		ptr = (T*) malloc(bytes());
	}
	void alloc(short x, short y, short z){
		R[0] = x;
		R[1] = y;
		R[2] = z;
		alloc();
	}
	void deep_copy(agilent_binary<T>* dst, const agilent_binary<T>* src){
		dst->alloc(src->R[0], src->R[1], src->R[2]);			//allocate memory
		memcpy(dst->ptr, src->ptr, bytes());					//copy the data
		memcpy(dst->Z, src->Z, sizeof(double) * 2);				//copy the data z range
	}
	agilent_binary(){
		memset(R, 0, sizeof(short) * 3);				//set the resolution to zero
		ptr = NULL;
	}
	/// Constructor with resolution
	agilent_binary(short x, short y, short z){
		alloc(x, y, z);
	}
	/// Constructor with filename
	agilent_binary(std::string filename){
		ptr = NULL;
		load(filename);
	}
	
	/// Copy constructor
	agilent_binary(const agilent_binary<T> &obj){
		deep_copy(this, &obj);
	}
	agilent_binary<T>& operator=(const agilent_binary<T> rhs){
		if(this != &rhs){								//check for self-assignment
			deep_copy(this, &rhs);						//make a deep copy
		}
		return *this;									//return the result
	}
	~agilent_binary(){
		free(ptr);
	}
	void load(std::string filename){
		if(ptr != NULL) free(ptr);						//if memory has been allocated, free it
		fname = filename;						//save the filename
		short x, y, z;
		std::ifstream infile(fname, std::ios::binary);				//open the input file
		infile.seekg(9, std::ios::beg);				//seek past 9 bytes from the beginning of the file
		infile.read((char*)(&z), 2);							//read two bytes of data (the number of samples is stored as a 16-bit integer)
		infile.seekg(13, std::ios::cur);				//skip another 13 bytes
		infile.read((char*)(&x), 2);				//read the X and Y dimensions
		infile.read((char*)(&y), 2);
		infile.seekg(header, std::ios::beg);			//seek to the start of the data
		alloc(x, y, z);
		ptr = (T*) malloc(bytes());							//allocate space for the data
		infile.read((char*)ptr, bytes());				//read the data		
		infile.close();
	}
	void save(std::string filename){
		std::ofstream outfile(filename, std::ios::binary);			//create an output file
		char zero = 0;
		for(size_t i = 0; i < 9; i++) outfile.write(&zero, 1);		//write 9 zeros
		outfile.write((char*)&R[0], 2);
		for(size_t i = 0; i < 13; i++) outfile.write(&zero, 1);		//write 13 zeros
		outfile.write((char*)&R[1], 2);
		outfile.write((char*)&R[2], 2);
		for(size_t i = 0; i < 992; i++) outfile.write(&zero, 1);		//write 992 zeros
		//char zerovec[1020];
		//outfile.write((char*)zerovec, 1020);
		size_t b = bytes();
		outfile.write((char*)ptr, b);							//write the data to the output file
		outfile.close();
	}
	stim::envi_header create_header(){
		stim::envi_header header;
		header.samples = R[0];
		header.lines = R[1];
		header.bands = R[2];
		double z_delta = (double)(Z[1] - Z[0]) / (double)(R[2] - 1);
		header.wavelength.resize(R[2]);
		for(size_t i = 0; i < R[2]; i++)
			header.wavelength[i] = i * z_delta + Z[0];
		return header;
	}
	/// Calculate the absorbance spectrum from the transmission spectrum given a background
	void absorbance(stim::agilent_binary<T>* background){
		size_t N = size();											//calculate the number of values to be ratioed
		if(N != background->size()){
			std::cerr<<"ERROR in stim::agilent_binary::absorbance() - transmission image size doesn't match background"<<std::endl;
			exit(1);
		}
		for(size_t i = 0; i < N; i++)
			ptr[i] = -log10(ptr[i] / background->ptr[i]);
	}
#ifdef CUDA_FOUND
	/// Perform an FFT and return a binary file with bands in the specified range
	agilent_binary<T> fft(float band_min, float band_max){
		auto total_start = std::chrono::high_resolution_clock::now();
		auto start = std::chrono::high_resolution_clock::now();
		T* cpu_data = (T*) malloc( bytes() );										//allocate space for the transposed data
		for(size_t b = 0; b < R[2]; b++){
			for(size_t x = 0; x < R[0] * R[1]; x++){
				cpu_data[x * R[2] + b] = ptr[b * R[0] * R[1] + x];
			}
		}
		auto end = std::chrono::high_resolution_clock::now();
		std::chrono::duration<double> diff = end-start;
       // std::cout << "Transpose data: " << diff.count() << " s\n";
		start = std::chrono::high_resolution_clock::now();
		cufftHandle plan;															//allocate space for a cufft plan
		cufftReal* gpu_data;														//create a pointer to the data
		size_t batch = R[0] * R[1];													//calculate the batch size (X * Y)
		HANDLE_ERROR(cudaMalloc((void**)&gpu_data, bytes()));						//allocate space on the GPU
		HANDLE_ERROR(cudaMemcpy(gpu_data, cpu_data, bytes(), cudaMemcpyHostToDevice));	//copy the data to the GPU
		cufftComplex* gpu_fft;
		HANDLE_ERROR(cudaMalloc((void**)&gpu_fft, R[0] * R[1] * (R[2]/2 + 1) * sizeof(cufftComplex)));
		end = std::chrono::high_resolution_clock::now();
		diff = end-start;
		//std::cout << "Allocate/copy: " << diff.count() << " s\n";
		start = std::chrono::high_resolution_clock::now();
		int N[1];					//create an array with the interferogram size (required for cuFFT input)
		N[0] = R[2];				//set the only array value to the interferogram size
		if(cufftPlanMany(&plan, 1, N, NULL, 1, R[2], NULL, 1, R[2], CUFFT_R2C, batch) != CUFFT_SUCCESS){
			std::cout<<"cuFFT Error: unable to create 1D plan."<<std::endl;
			exit(1);
		}
		end = std::chrono::high_resolution_clock::now();
		diff = end-start;
		//std::cout << "Create a plan: " << diff.count() << " s\n";
		start = std::chrono::high_resolution_clock::now();
		if (cufftExecR2C(plan, gpu_data, gpu_fft) != CUFFT_SUCCESS){		//execute the (implicitly forward) transform
			std::cout<<"CUFFT error: ExecR2C Forward failed";
			exit(1);
		}
		end = std::chrono::high_resolution_clock::now();
		diff = end-start;
		//std::cout << "Perform FFT: " << diff.count() << " s\n";
		start = std::chrono::high_resolution_clock::now();
		std::complex<T>* cpu_fft = (std::complex<T>*) malloc( R[0] * R[1] * (R[2]/2+1) * sizeof(std::complex<T>) );
		HANDLE_ERROR(cudaMemcpy(cpu_fft, gpu_fft, R[0] * R[1] * (R[2]/2+1) * sizeof(cufftComplex), cudaMemcpyDeviceToHost));	//copy data from the host to the device
		double int_delta = 0.00012656;									//interferogram sample spacing in centimeters
		double int_length = int_delta * R[2];							//interferogram length in centimeters
		double fft_delta = 1/int_length;								//spectrum spacing (in inverse centimeters, wavenumber
		size_t start_i = std::ceil(band_min / fft_delta);				//calculate the first band to store
		size_t size_i = std::floor(band_max / fft_delta) - start_i;		//calculate the number of bands to store
		size_t end_i = start_i + size_i;								//last band number
		agilent_binary<T> result(R[0], R[1], size_i);
		result.Z[0] = start_i * fft_delta;								//set the range for the FFT result
		result.Z[1] = end_i * fft_delta;
		for(size_t b = start_i; b < end_i; b++){
			for(size_t x = 0; x < R[0] * R[1]; x++){
				result.ptr[(b - start_i) * R[0] * R[1] + x] = abs(cpu_fft[x * (R[2]/2+1) + b]);
			}
		}
		end = std::chrono::high_resolution_clock::now();
		diff = end-start;
		//std::cout << "Transpose/Crop: " << diff.count() << " s\n";
		auto total_end = std::chrono::high_resolution_clock::now();
		diff = total_end-total_start;
		cufftDestroy(plan);
		HANDLE_ERROR(cudaFree(gpu_data));
		HANDLE_ERROR(cudaFree(gpu_fft));
		free(cpu_data);
		free(cpu_fft);
		return result;
	}
#endif
};
}
#endif