stim/optics/mirst-1d.cuh

#include "../optics/material.h"
#include "../math/complexfield.cuh"
#include "../math/constants.h"
//#include "../envi/bil.h"

#include "cufft.h"

#include <vector>
#include <sstream>

namespace stim{

//this function writes a sinc function to "dest" such that an iFFT produces a slab
template<typename T>
__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri, 
									  T* src, T* zf, 
									  T w, unsigned int zR, unsigned int nuR){
	//dest = complex field representing the sample
	//ri = refractive indices for each wavelength
	//src = intensity of the light source for each wavelength
	//zf = z position of the slab interface for each wavelength (accounting for optical path length)
	//w = width of the slab (in pixels)
	//zR = number of z-axis samples
	//nuR = number of wavelengths

    //get the current coordinate in the plane slice
	int ifz = blockIdx.x * blockDim.x + threadIdx.x;
	int inu = blockIdx.y * blockDim.y + threadIdx.y;

	//make sure that the thread indices are in-bounds
	if(inu >= nuR || ifz >= zR) return;

	int i = inu * zR + ifz;

    T fz;
    if(ifz < zR/2)
        fz = ifz / (T)zR;
    else
        fz = -(zR - ifz) / (T)zR;

    //if the slab starts outside of the simulation domain, just return
    if(zf[inu] >= zR) return;

	//fill the array along z with a sinc function representing the Fourier transform of the layer

	T opl = w * ri[inu].real();			//optical path length

	//handle the case where the slab goes outside the simulation domain
	if(zf[inu] + opl >= zR)
		opl = zR - zf[inu];

	if(opl == 0) return;

	//T l = w * ri[inu].real();
	//complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
	complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));

	complex<T> eta = ri[inu] * ri[inu] - 1;

	//dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
	if(ifz == 0)
        dest[i] += opl * exp(e) * eta * src[inu];
    else
        dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
}

template<typename T>
__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
	//zf = current z depth (optical path length) in pixels
	//ri = refractive index of the material
	//w = actual width of the layer (in pixels)


	//compute the index for this thread
	int i = blockIdx.x * blockDim.x + threadIdx.x;
	if(i >= S) return;

	if(ri == NULL)
		zf[i] += w;
	else
		zf[i] += ri[i].real() * w;
}

//apply the 1D MIRST filter to an existing sample (overwriting the sample)
template<typename T>
__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda, 
								 T dFz,
								 T inNA, T outNA, 
								 unsigned int lambdaR, unsigned int zR, 
								 T sigma = 0){
	//sampleFFT = the sample in the Fourier domain (will be overwritten)
	//lambda = list of wavelengths
	//dFz = delta along the Fz axis in the frequency domain
	//inNA = NA of the internal obscuration
	//outNA = NA of the objective
	//zR = number of pixels along the Fz axis (same as the z-axis)
	//lambdaR = number of wavelengths
	//sigma = width of the Gaussian source
	int ifz = blockIdx.x * blockDim.x + threadIdx.x;
	int inu = blockIdx.y * blockDim.y + threadIdx.y;

	if(inu >= lambdaR || ifz >= zR) return;

	//calculate the index into the sample FT
	int i = inu * zR + ifz;

	//compute the frequency (and set all negative spatial frequencies to zero)
	T fz;
	if(ifz < zR / 2)
	    fz = ifz * dFz;
	//if the spatial frequency is negative, set it to zero and exit
	else{
	    sampleFFT[i] = 0;
	    return;
	}

	//compute the frequency in inverse microns
	T nu = 1/lambda[inu];

	//determine the radius of the integration circle
	T nu_sq = nu * nu;
	T fz_sq = (fz * fz) / 4;

	//cut off frequencies above the diffraction limit
	T r;
	if(fz_sq < nu_sq)
	    r = sqrt(nu_sq - fz_sq);
	else
	    r = 0;

	//account for the optics
	T Q = 0;
	if(r > nu * inNA && r < nu * outNA)
	    Q = 1;

	//account for the source
	//T sigma = 30.0;
	T s = exp( - (r*r * sigma*sigma) / 2 );
	//T s=1;

	//compute the final filter
	T mirst = 0;
	if(fz != 0)
	    mirst = 2 * stimPI * r * s * Q * (1/fz);

	sampleFFT[i] *= mirst;

}

/*This object performs a 1-dimensional (layered) MIRST simulation
*/
template<typename T>
class mirst1d{

private:
	unsigned int Z;	//z-axis resolution
	unsigned int pad;	//pixel padding on either side of the sample

	std::vector< material<T> > matlist;	//list of materials
	std::vector< T > layers;				//list of layer thicknesses

	std::vector< T > lambdas;		//list of wavelengths that are being simulated
	unsigned int S;					//number of wavelengths (size of "lambdas")

	T NA[2];						//numerical aperature (central obscuration and outer diameter)

	function<T, T> source_profile;	//profile (spectrum) of the source (expressed in inverse centimeters)

	complexfield<T, 1> scratch;		//scratch GPU memory used to build samples, transforms, etc.

	void fft(int direction = CUFFT_FORWARD){

		unsigned padZ = Z + pad;
		
		//create cuFFT handles
		cufftHandle plan;
		cufftResult result;
		
		if(sizeof(T) == 4)
			result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size());	//single precision
		else
			result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size());	//double precision

		//check for Plan 1D errors
		if(result != CUFFT_SUCCESS){
			std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
			CufftError(result);
			exit(1);
		}

		if(sizeof(T) == 4)
			result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
		else
			result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);

		//check for FFT errors
		if(result != CUFFT_SUCCESS){
			std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
			CufftError(result);
			exit(1);
		}

		cufftDestroy(plan);
	}


	//initialize the scratch memory
	void init_scratch(){
		scratch = complexfield<T, 1>(Z + pad , lambdas.size());
		scratch = 0;
	}

	//get the list of scattering efficiency (eta) values for a specified layer
	std::vector< complex<T> > layer_etas(unsigned int l){

		std::vector< complex<T> > etas;

		//fill the list of etas
		for(unsigned int i=0; i<lambdas.size(); i++)
			etas.push_back( matlist[l].eta(lambdas[i]) );
		return etas;
	}

	//calculates the optimal block and grid sizes using information from the GPU
	void cuda_params(dim3& grids, dim3& blocks){
		int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
		int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);

		//create one thread for each detector pixel
		blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
		grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
	}

	//add the fourier transform of layer n to the scratch space
	void build_layer_fft(unsigned int n, T* zf){
		unsigned int paddedZ = Z + pad;

		T wpx = layers[n] / dz();	//calculate the width of the layer in pixels

		//allocate memory for the refractive index
		complex<T>* gpuRi;
		HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));

		//allocate memory for the source profile
		T* gpuSrc;
		HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));

		complex<T> ri;
		T source;
		//store the refractive index and source profile in a CPU array
		for(int inu=0; inu<S; inu++){
			//save the refractive index to the GPU
			ri = matlist[n].getN(lambdas[inu]);
			HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));

			//save the source profile to the GPU
			source = source_profile(10000 / lambdas[inu]);
			HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));

		}

		//create one thread for each pixel of the field slice
		dim3 gridDim, blockDim;
		cuda_params(gridDim, blockDim);
		stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);

		int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
		int linGrid = S / linBlock + 1;
		stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);

		//free memory
		HANDLE_ERROR(cudaFree(gpuRi));
		HANDLE_ERROR(cudaFree(gpuSrc));
	}

	void build_sample(){
		init_scratch();		//initialize the GPU scratch space
		//build_layer(1);

		T* zf;
		HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
		HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));

		//render each layer of the sample
		for(unsigned int l=0; l<layers.size(); l++){
			build_layer_fft(l, zf);
		}

		HANDLE_ERROR(cudaFree(zf));
	}

	void apply_filter(){
		dim3 gridDim, blockDim;
		cuda_params(gridDim, blockDim);

		unsigned int Zpad = Z + pad;

		T sim_range = dz() * Zpad;
    	T dFz = 1 / sim_range;

		//copy the array of wavelengths to the GPU
		T* gpuLambdas;
		HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
		HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
		stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas, 
								 dFz,
								 NA[0], NA[1], 
								 S, Zpad);
	}

	//crop the image to the sample thickness - keep in mind that sample thickness != optical path length
	void crop(){

		scratch = scratch.crop(Z, S);
	}
	
	//save the scratch field as a binary file
	void to_binary(std::string filename){

	}


public:

	//constructor
	mirst1d(unsigned int rZ = 100,
			unsigned int padding = 0){
		Z = rZ;
		pad = padding;
		NA[0] = 0;
		NA[1] = 0.8;
		S = 0;
		source_profile = 1;
	}

	//add a layer, thickness = microns
	void add_layer(material<T> mat, T thickness){
		matlist.push_back(mat);
		layers.push_back(thickness);
	}

	void add_layer(std::string filename, T thickness){
		add_layer(material<T>(filename), thickness);
	}

	//adds a profile spectrum for the light source
	void set_source(std::string filename){
		source_profile.load(filename);
	}

	//adds a block of wavenumbers (cm^-1) to the simulation parameters
	void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
		unsigned int nu = start;
		while(nu <= stop){
			lambdas.push_back((T)10000 / nu);
			nu += step;
		}
		S = lambdas.size();		//increment the number of wavelengths (shorthand for later)
	}

	T thickness(){
		T t = 0;
		for(unsigned int l=0; l<layers.size(); l++)
			t += layers[l];
		return t;
	}

	void padding(unsigned int padding = 0){
		pad = padding;
	}

	T dz(){
		return thickness() / Z;		//calculate the z-axis step size
	}

	void na(T in, T out){
		NA[0] = in;
		NA[1] = out;
	}

	void na(T out){
		na(0, out);
	}

	stim::function<T, T> get_source(){
		return source_profile;
	}

	void save_sample(std::string filename){
		//create a sample and save the magnitude as an image
		build_sample();
		fft(CUFFT_INVERSE);
		scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
	}

	void save_mirst(std::string filename, bool binary = true){
		//apply the MIRST filter to a sample and save the image

		//build the sample in the Fourier domain
		build_sample();

		//apply the MIRST filter
		apply_filter();

		//apply an inverse FFT to bring the results back into the spatial domain
		fft(CUFFT_INVERSE);

		crop();

		//save the image
		if(binary)
			to_binary(filename);
		else
			scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
	}


	std::string str(){

		stringstream ss;
		ss<<"1D MIRST Simulation========================="<<std::endl;
		ss<<"z-axis resolution: "<<Z<<std::endl;
		ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
		ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
		ss<<"padding: "<<pad<<std::endl;
		ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
		ss<<"dz: "<<dz()<<" um"<<std::endl;
		ss<<std::endl;
		ss<<layers.size()<<" layers-------------"<<std::endl;
		for(unsigned int l=0; l<layers.size(); l++)
			ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;

		ss<<"source profile-----------"<<std::endl;
		ss<<get_source().str()<<std::endl;

		return ss.str();


	}


};

}