scalarwave.h 13.7 KB
``````#ifndef STIM_SCALARWAVE_H
#define STIM_SCALARWAVE_H

#include <string>
#include <sstream>
#include <cmath>

//#include "../math/vector.h"
#include "../math/vec3.h"
#include "../math/quaternion.h"
#include "../math/constants.h"
#include "../math/plane.h"
#include "../math/complex.h"

//CUDA
#include "../cuda/cudatools/devices.h"
#include "../cuda/cudatools/error.h"
#include "../cuda/sharedmem.cuh"

namespace stim{

template<typename T>
class scalarwave{

public:

stim::vec3<T> k;							//k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
stim::complex<T> E0;						//amplitude

/// Bend a plane wave via refraction, given that the new propagation direction is known
CUDA_CALLABLE scalarwave<T> bend(stim::vec3<T> kn) const{
return scalarwave<T>(kn.norm() * kmag(), E0);
}

public:

///constructor: create a plane wave propagating along k
CUDA_CALLABLE scalarwave(vec3<T> kvec = stim::vec3<T>(0, 0, (T)stim::TAU), complex<T> E = 1){
k = kvec;
E0 = E;
}

CUDA_CALLABLE scalarwave(T kx, T ky, T kz, complex<T> E = 1){
k = vec3<T>(kx, ky, kz);
E0 = E;
}

///multiplication operator: scale E0
CUDA_CALLABLE scalarwave<T> & operator* (const T & rhs){
E0 = E0 * rhs;
return *this;
}

CUDA_CALLABLE T lambda() const{
return stim::TAU / k.len();
}

CUDA_CALLABLE T kmag() const{
return k.len();
}

CUDA_CALLABLE complex<T> E(){
return E0;
}

CUDA_CALLABLE vec3<T> kvec(){
return k;
}

/// calculate the value of the field produced by the plane wave given a three-dimensional position
CUDA_CALLABLE complex<T> pos(T x, T y, T z){
return pos( stim::vec3<T>(x, y, z) );
}

CUDA_CALLABLE complex<T> pos(vec3<T> p = vec3<T>(0, 0, 0)){
return E0 * exp(complex<T>(0, k.dot(p)));
}

//scales k based on a transition from material ni to material nt
CUDA_CALLABLE scalarwave<T> n(T ni, T nt){
return scalarwave<T>(k * (nt / ni), E0);
}

CUDA_CALLABLE scalarwave<T> refract(stim::vec3<T> kn) const{
return bend(kn);
}

/// Calculate the result of a plane wave hitting an interface between two refractive indices

/// @param P is a plane representing the position and orientation of the surface
/// @param n0 is the refractive index outside of the surface (in the direction of the normal)
/// @param n1 is the refractive index inside the surface (in the direction away from the normal)
/// @param r is the reflected component of the plane wave
/// @param t is the transmitted component of the plane wave
void scatter(stim::plane<T> P, T n0, T n1, scalarwave<T> &r, scalarwave<T> &t){
scatter(P, n1/n0, r, t);
}

/// Calculate the scattering result when nr = n1/n0

/// @param P is a plane representing the position and orientation of the surface
/// @param r is the ration n1/n0
/// @param n1 is the refractive index inside the surface (in the direction away from the normal)
/// @param r is the reflected component of the plane wave
/// @param t is the transmitted component of the plane wave
void scatter(stim::plane<T> P, T nr, scalarwave<T> &r, scalarwave<T> &t){
/*
int facing = P.face(k);		//determine which direction the plane wave is coming in

if(facing == -1){		//if the wave hits the back of the plane, invert the plane and nr
P = P.flip();			//flip the plane
nr = 1/nr;				//invert the refractive index (now nr = n0/n1)
}

//use Snell's Law to calculate the transmitted angle
T cos_theta_i = k.norm().dot(-P.norm());				//compute the cosine of theta_i
T theta_i = acos(cos_theta_i);							//compute theta_i
T sin_theta_t = (1/nr) * sin(theta_i);						//compute the sine of theta_t using Snell's law
T theta_t = asin(sin_theta_t);							//compute the cosine of theta_t

bool tir = false;						//flag for total internal reflection
if(theta_t != theta_t){
tir = true;
theta_t = stim::PI / (T)2;
}

//handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
if(theta_i == 0){
T rp = (1 - nr) / (1 + nr);		//compute the Fresnel coefficients
T tp = 2 / (1 + nr);
vec3<T> kr = -k;
vec3<T> kt = k * nr;			//set the k vectors for theta_i = 0
vec3< complex<T> > Er = E0 * rp;		//compute the E vectors
vec3< complex<T> > Et = E0 * tp;
T phase_t = P.p().dot(k - kt);	//compute the phase offset
T phase_r = P.p().dot(k - kr);

//create the plane waves
r = planewave<T>(kr, Er, phase_r);
t = planewave<T>(kt, Et, phase_t);
return;
}

//compute the Fresnel coefficients
T rp, rs, tp, ts;
rp = tan(theta_t - theta_i) / tan(theta_t + theta_i);
rs = sin(theta_t - theta_i) / sin(theta_t + theta_i);

if(tir){
tp = ts = 0;
}
else{
tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) );
ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i);
}

//compute the coordinate space for the plane of incidence
vec3<T> z_hat = -P.norm();
vec3<T> y_hat = P.parallel(k).norm();
vec3<T> x_hat = y_hat.cross(z_hat).norm();

//compute the k vectors for r and t
vec3<T> kr, kt;
kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag();
kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;

//compute the magnitude of the p- and s-polarized components of the incident E vector
complex<T> Ei_s = E0.dot(x_hat);
int sgn = E0.dot(y_hat).sgn();
vec3< complex<T> > cx_hat = x_hat;
complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
//compute the magnitude of the p- and s-polarized components of the reflected E vector
complex<T> Er_s = Ei_s * rs;
complex<T> Er_p = Ei_p * rp;
//compute the magnitude of the p- and s-polarized components of the transmitted E vector
complex<T> Et_s = Ei_s * ts;
complex<T> Et_p = Ei_p * tp;

//compute the reflected E vector
vec3< complex<T> > Er = vec3< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
//compute the transmitted E vector
vec3< complex<T> > Et = vec3< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;

T phase_t = P.p().dot(k - kt);
T phase_r = P.p().dot(k - kr);

//create the plane waves
r.k = kr;
r.E0 = Er * exp( complex<T>(0, phase_r) );

t.k = kt;
t.E0 = Et * exp( complex<T>(0, phase_t) );
*/
}

std::string str()
{
std::stringstream ss;
ss<<"Plane Wave:"<<std::endl;
ss<<"	"<<E0<<" e^i ( "<<k<<" . r )";
return ss.str();
}
};					//end planewave class

/// CUDA kernel for computing the field produced by a batch of plane waves at an array of locations
template<typename T>
__global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){
extern __shared__ stim::scalarwave<T> shared_W[];		//declare the list of waves in shared memory

stim::cuda::threadedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x);	//copy the plane waves into shared memory for faster access

size_t i = blockIdx.x * blockDim.x + threadIdx.x;				//get the index into the array
if(i >= N) return;												//exit if this thread is outside the array
T px, py, pz;
(x == NULL) ? px = 0 : px = x[i];								// test for NULL values and set positions
(y == NULL) ? py = 0 : py = y[i];
(z == NULL) ? pz = 0 : pz = z[i];

stim::complex<T> f = 0;											//create a register to store the result
for(size_t w = 0; w < n_waves; w++)
f += shared_W[w].pos(px, py, pz);							//evaluate the plane wave
F[i] += f;														//copy the result to device memory
}

/// evaluate a scalar wave at several points, where all arrays are on the GPU
template<typename T>
void gpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){

dim3 blocks(N / threads + 1);						//calculate the optimal number of blocks
cuda_scalarwave<T><<< blocks, threads >>>(F, N, x, y, z, w);			//call the kernel
}

template<typename T>
void gpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW){

size_t wave_bytes = sizeof(stim::scalarwave<T>);
size_t shared_bytes = stim::sharedMemPerBlock();									//calculate the maximum amount of shared memory available
size_t max_batch = shared_bytes / wave_bytes;				//calculate number of plane waves that will fit into shared memory
size_t batch_bytes = min(nW, max_batch) * wave_bytes;				//initialize the batch size (in bytes) to the maximum batch required

stim::scalarwave<T>* batch_W;
HANDLE_ERROR(cudaMalloc(&batch_W, batch_bytes));										//allocate memory for a single batch of plane waves

dim3 blocks((unsigned)(N / threads + 1));										//calculate the optimal number of blocks

size_t batch_size;																	//declare a variable to store the size of the current batch
size_t waves_processed = 0;															//initialize the number of waves processed to zero
while(waves_processed < nW){												//while there are still waves to be processed
batch_size = min<size_t>(max_batch, nW - waves_processed);			//process either a whole batch, or whatever is left
batch_bytes = batch_size * sizeof(stim::scalarwave<T>);
HANDLE_ERROR(cudaMemcpy(batch_W, W + waves_processed, batch_bytes, cudaMemcpyDeviceToDevice));	//copy the plane waves into global memory
cuda_scalarwave<T><<< blocks, threads, batch_bytes >>>(F, N, x, y, z, batch_W, batch_size);	//call the kernel
waves_processed += batch_size;													//increment the counter indicating how many waves have been processed
}
cudaFree(batch_W);
}

/// Sums a series of coherent plane waves at a specified point
/// @param field is the output array of field values corresponding to each input point
/// @param x is an array of x coordinates for the field point
/// @param y is an array of y coordinates for the field point
/// @param z is an array of z coordinates for the field point
/// @param N is the number of points in the input and output arrays
/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
/// @param A is the list of amplitudes for each wave
/// @param S is the list of propagation directions for each wave
template<typename T>
void cpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > W){
size_t S = W.size();											//store the number of waves
#ifdef __CUDACC__
stim::complex<T>* dev_F;										//allocate space for the field
cudaMalloc(&dev_F, N * sizeof(stim::complex<T>));
cudaMemcpy(dev_F, F, N * sizeof(stim::complex<T>), cudaMemcpyHostToDevice);
//cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>));				//set the field to zero (necessary because a sum is used)

T* dev_x = NULL;												//allocate space and copy the X coordinate (if specified)
if(x != NULL){
HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
}

T* dev_y = NULL;												//allocate space and copy the Y coordinate (if specified)
if(y != NULL){
HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
}

T* dev_z = NULL;												//allocate space and copy the Z coordinate (if specified)
if(z != NULL){
HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
}

stim::scalarwave<T>* dev_W;
HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) );
HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) );

gpu_scalarwaves(dev_F, N, dev_x, dev_y, dev_z, dev_W, W.size());

cudaMemcpy(F, dev_F, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost);			//copy the field from device memory

if(x != NULL) cudaFree(dev_x);														//free everything
if(y != NULL) cudaFree(dev_y);
if(z != NULL) cudaFree(dev_z);
cudaFree(dev_F);
#else
memset(F, 0, N * sizeof(stim::complex<T>));
T px, py, pz;
for(size_t i = 0; i < N; i++){										// for each element in the array
(x == NULL) ? px = 0 : px = x[i];								// test for NULL values
(y == NULL) ? py = 0 : py = y[i];
(z == NULL) ? pz = 0 : pz = z[i];

for(size_t s = 0; s < S; s++){
F[i] += w_array[s].pos(px, py, pz);						//sum all plane waves at this point
}
}
#endif
}

template<typename T>
void cpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
std::vector< stim::scalarwave<T> > w_array(1, w);
cpu_scalarwaves(F, N, x, y, z, w_array);
}

template<typename T>
void cpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
std::vector< stim::scalarwave<T> > w_array(1, w);
cpu_scalarwaves(F, N, x, y, z, w_array);
}

/// Sums a series of coherent plane waves at a specified point
/// @param x is the x coordinate of the field point
/// @param y is the y coordinate of the field point
/// @param z is the z coordinate of the field point
/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
/// @param A is the list of amplitudes for each wave
/// @param S is the list of propagation directions for each wave
template<typename T>
CUDA_CALLABLE stim::complex<T> cpu_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave<T> > W){
size_t N = W.size();												//get the number of plane wave samples
stim::complex<T> field(0, 0);										//initialize the field to zero (0)
stim::vec3<T> k;													//allocate space for the direction vector
for(size_t i = 0; i < N; i++){
field += W[i].pos(x, y, z);
}
return field;
}

}					//end namespace stim

template <typename T>
std::ostream& operator<<(std::ostream& os, stim::scalarwave<T> p)
{
os<<p.str();
return os;
}

#endif``````