From 8e4f836425e5bee321a75e5ee33dee9867df0a85 Mon Sep 17 00:00:00 2001 From: David Date: Mon, 6 Jun 2016 14:57:04 -0500 Subject: [PATCH] started a new optical framework focused on scalar simulation --- stim/cuda/cudatools/devices.h | 12 +++++++++++- stim/cuda/sharedmem.cuh | 15 ++++++++++++++- stim/math/bessel.h | 61 ++++++++++++++++++++++++++++++++++++++----------------------- stim/math/complex.h | 67 +++++++++++++++++++++++++++---------------------------------------- stim/math/constants.h | 10 ++++++---- stim/math/matrix.h | 18 +++++++++++------- stim/math/meshgrid.h | 42 ++++++++++++++++++++++++++++++++++++++++++ stim/math/quaternion.h | 10 +++++----- stim/math/vec3.h | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ stim/math/vector.h | 6 ++---- stim/optics/beam.h | 203 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- stim/optics/efield.cuh | 409 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- stim/optics/esphere.cuh | 162 ------------------------------------------------------------------------------------------------------------------------------------------------------------------ stim/optics/halfspace.cuh | 339 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- stim/optics/material.h | 153 --------------------------------------------------------------------------------------------------------------------------------------------------------- stim/optics/mirst-1d.cuh | 447 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- stim/optics/planewave.h | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------- stim/optics/scalarbeam.h | 217 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ stim/optics/scalarwave.h | 353 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ stim/optics_old/beam.h | 203 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ stim/optics_old/efield.cuh | 409 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ stim/optics_old/esphere.cuh | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ stim/optics_old/halfspace.cuh | 339 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ stim/optics_old/material.h | 153 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ stim/optics_old/mirst-1d.cuh | 447 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ stim/optics_old/planewave.h | 320 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 26 files changed, 3096 insertions(+), 1914 deletions(-) create mode 100644 stim/math/meshgrid.h create mode 100644 stim/math/vec3.h delete mode 100644 stim/optics/beam.h delete mode 100644 stim/optics/efield.cuh delete mode 100644 stim/optics/esphere.cuh delete mode 100644 stim/optics/halfspace.cuh delete mode 100644 stim/optics/material.h delete mode 100644 stim/optics/mirst-1d.cuh create mode 100644 stim/optics/scalarbeam.h create mode 100644 stim/optics/scalarwave.h create mode 100644 stim/optics_old/beam.h create mode 100644 stim/optics_old/efield.cuh create mode 100644 stim/optics_old/esphere.cuh create mode 100644 stim/optics_old/halfspace.cuh create mode 100644 stim/optics_old/material.h create mode 100644 stim/optics_old/mirst-1d.cuh create mode 100644 stim/optics_old/planewave.h diff --git a/stim/cuda/cudatools/devices.h b/stim/cuda/cudatools/devices.h index b794d40..74509ee 100644 --- a/stim/cuda/cudatools/devices.h +++ b/stim/cuda/cudatools/devices.h @@ -15,7 +15,7 @@ int maxThreadsPerBlock() } extern "C" -int sharedMemPerBlock() +size_t sharedMemPerBlock() { int device; cudaGetDevice(&device); //get the id of the current device @@ -23,6 +23,16 @@ int sharedMemPerBlock() cudaGetDeviceProperties(&props, device); return props.sharedMemPerBlock; } + +extern "C" +size_t constMem() +{ + int device; + cudaGetDevice(&device); //get the id of the current device + cudaDeviceProp props; //device property structure + cudaGetDeviceProperties(&props, device); + return props.totalConstMem; +} } //end namespace rts #endif diff --git a/stim/cuda/sharedmem.cuh b/stim/cuda/sharedmem.cuh index 1ce22d3..b8d4105 100644 --- a/stim/cuda/sharedmem.cuh +++ b/stim/cuda/sharedmem.cuh @@ -5,7 +5,7 @@ namespace stim{ namespace cuda{ - // Copies values from global memory to shared memory, optimizing threads + // Copies values from texture memory to shared memory, optimizing threads template __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src, unsigned int x, unsigned int y, unsigned int X, unsigned int Y, @@ -35,6 +35,19 @@ namespace stim{ } } + // Copies values from global memory to shared memory, optimizing threads + template + __device__ void sharedMemcpy(T* dest, T* src, size_t N, size_t tid, size_t nt){ + + size_t I = N / nt + 1; //calculate the number of iterations required to make the copy + size_t xi = tid; //initialize the source and destination index to the thread ID + for(size_t i = 0; i < I; i++){ //for each iteration + if(xi < N) //if the index is within the copy region + dest[xi] = src[xi]; //perform the copy + xi += nt; + } + } + } } diff --git a/stim/math/bessel.h b/stim/math/bessel.h index d291deb..1be8607 100644 --- a/stim/math/bessel.h +++ b/stim/math/bessel.h @@ -17,6 +17,11 @@ static complex czero(0.0,0.0); template< typename P > P gamma(P x) { + const P EPS = numeric_limits

::epsilon(); + const P FPMIN_MAG = numeric_limits

::min(); + const P FPMIN = numeric_limits

::lowest(); + const P FPMAX = numeric_limits

::max(); + int i,k,m; P ga,gr,r,z; @@ -47,7 +52,7 @@ P gamma(P x) -0.54e-14, 0.14e-14}; - if (x > 171.0) return 1e308; // This value is an overflow flag. + if (x > 171.0) return FPMAX; // This value is an overflow flag. if (x == (int)x) { if (x > 0.0) { ga = 1.0; // use factorial @@ -56,7 +61,7 @@ P gamma(P x) } } else - ga = 1e308; + ga = FPMAX; } else { if (fabs(x) > 1.0) { @@ -89,6 +94,11 @@ template int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1, P &j0p,P &j1p,P &y0p,P &y1p) { + const P EPS = numeric_limits

::epsilon(); + const P FPMIN_MAG = numeric_limits

::min(); + const P FPMIN = numeric_limits

::lowest(); + const P FPMAX = numeric_limits

::max(); + P x2,r,ec,w0,w1,r0,r1,cs0,cs1; P cu,p0,q0,p1,q1,t1,t2; int k,kz; @@ -157,12 +167,12 @@ int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1, if (x == 0.0) { j0 = 1.0; j1 = 0.0; - y0 = -1e308; - y1 = -1e308; + y0 = -FPMIN; + y1 = -FPMIN; j0p = 0.0; j1p = 0.5; - y0p = 1e308; - y1p = 1e308; + y0p = FPMAX; + y1p = FPMAX; return 0; } x2 = x*x; @@ -329,7 +339,7 @@ int msta1(P x,int mp) for (i=0;i<20;i++) { nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp; - if (abs(nn-n1) < 1) break; + if (std::abs(nn-n1) < 1) break; n0 = n1; f0 = f1; n1 = nn; @@ -361,7 +371,7 @@ int msta2(P x,int n,int mp) for (i=0;i<20;i++) { nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj; - if (abs(nn-n1) < 1) break; + if (std::abs(nn-n1) < 1) break; n0 = n1; f0 = f1; n1 = nn; @@ -596,21 +606,26 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv, P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk; int j,k,l,m,n,kz; + const P EPS = numeric_limits

::epsilon(); + const P FPMIN_MAG = numeric_limits

::min(); + const P FPMIN = numeric_limits

::lowest(); + const P FPMAX = numeric_limits

::max(); + x2 = x*x; n = (int)v; v0 = v-n; if ((x < 0.0) || (v < 0.0)) return 1; - if (x < 1e-15) { + if (x < EPS) { for (k=0;k<=n;k++) { jv[k] = 0.0; - yv[k] = -1e308; + yv[k] = FPMIN; djv[k] = 0.0; - dyv[k] = 1e308; + dyv[k] = FPMAX; if (v0 == 0.0) { jv[0] = 1.0; djv[1] = 0.5; } - else djv[0] = 1e308; + else djv[0] = FPMAX; } vm = v; return 0; @@ -623,7 +638,7 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv, for (k=1;k<=40;k++) { r *= -0.25*x2/(k*(k+vl)); bjvl += r; - if (fabs(r) < fabs(bjvl)*1e-15) break; + if (fabs(r) < fabs(bjvl)*EPS) break; } vg = 1.0 + vl; a = pow(0.5*x,vl)/gamma(vg); @@ -686,7 +701,7 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv, if (m < n) n = m; else m = msta2(x,n,15); f2 = 0.0; - f1 = 1.0e-100; + f1 = FPMIN_MAG; for (k=m;k>=0;k--) { f = 2.0*(v0+k+1.0)*f1/x-f2; if (k <= n) jv[k] = f; @@ -766,17 +781,17 @@ int bessjyv_sph(int v, P z, P &vm, P* cjv, P* cyv, P* cjvp, P* cyvp) { //first, compute the bessel functions of fractional order - bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); + bessjyv

(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp); //iterate through each and scale for(int n = 0; n<=v; n++) { - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0)); - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0)); + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0)); - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0)); + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); } return 0; @@ -1498,11 +1513,11 @@ int cbessjyva_sph(int v,complex

z,P &vm,complex

*cjv, for(int n = 0; n<=v; n++) { - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0)); - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0)); + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0)); - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0)); + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); } return 0; diff --git a/stim/math/complex.h b/stim/math/complex.h index 84ba67e..e5d0a35 100644 --- a/stim/math/complex.h +++ b/stim/math/complex.h @@ -1,11 +1,9 @@ -/*RTS Complex number class. This class is CUDA compatible, -and can therefore be used in CUDA code and on CUDA devices. -*/ +/// CUDA compatible complex number class -#ifndef RTS_COMPLEX -#define RTS_COMPLEX +#ifndef STIM_COMPLEX +#define STIM_COMPLEX -#include "../cuda/callable.h" +#include "../cuda/cudatools/callable.h" #include #include #include @@ -230,12 +228,6 @@ struct complex return result; } - /*CUDA_CALLABLE complex pow(int y) - { - - return pow((double)y); - }*/ - CUDA_CALLABLE complex pow(T y) { complex result; @@ -328,8 +320,31 @@ struct complex return *this; } + + }; +/// Cast an array of complex values to an array of real values +template +static void real(T* r, complex* c, size_t n){ + for(size_t i = 0; i < n; i++) + r[i] = c[i].real(); +} + +/// Cast an array of complex values to an array of real values +template +static void imag(T* r, complex* c, size_t n){ + for(size_t i = 0; i < n; i++) + r[i] = c[i].imag(); +} + +/// Calculate the magnitude of an array of complex values +template +static void abs(T* m, complex* c, size_t n){ + for(size_t i = 0; i < n; i++) + m[i] = c[i].abs(); +} + } //end RTS namespace //addition @@ -432,17 +447,6 @@ CUDA_CALLABLE static T imag(stim::complex a) return a.i; } -//trigonometric functions -//template -/*CUDA_CALLABLE static stim::complex sinf(const stim::complex x) -{ - stim::complex result; - result.r = sinf(x.r) * coshf(x.i); - result.i = cosf(x.r) * sinhf(x.i); - - return result; -}*/ - template CUDA_CALLABLE stim::complex sin(const stim::complex x) { @@ -453,17 +457,6 @@ CUDA_CALLABLE stim::complex sin(const stim::complex x) return result; } -//floating point template -//template -/*CUDA_CALLABLE static stim::complex cosf(const stim::complex x) -{ - stim::complex result; - result.r = cosf(x.r) * coshf(x.i); - result.i = -(sinf(x.r) * sinhf(x.i)); - - return result; -}*/ - template CUDA_CALLABLE stim::complex cos(const stim::complex x) { @@ -496,10 +489,4 @@ std::istream& operator>>(std::istream& is, stim::complex& x) return is; //return the stream } -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 -//template using rtsComplex = stim::complex; -//#endif - - - #endif diff --git a/stim/math/constants.h b/stim/math/constants.h index d333887..3ea11f9 100644 --- a/stim/math/constants.h +++ b/stim/math/constants.h @@ -1,7 +1,9 @@ -#ifndef RTS_CONSTANTS_H -#define RTS_CONSTANTS_H +#ifndef STIM_CONSTANTS_H +#define STIM_CONSTANTS_H -#define stimPI 3.14159 -#define stimTAU 2 * rtsPI +namespace stim{ + const double PI = 3.1415926535897932384626433832795028841971693993751058209749445923078164062862; + const double TAU = 2 * stim::PI; +} #endif diff --git a/stim/math/matrix.h b/stim/math/matrix.h index 777ba9e..b9bebb6 100644 --- a/stim/math/matrix.h +++ b/stim/math/matrix.h @@ -50,10 +50,8 @@ struct matrix return *this; } - template - CUDA_CALLABLE vec operator*(vec rhs) - { + vec operator*(vec rhs){ unsigned int N = rhs.size(); vec result; @@ -66,6 +64,16 @@ struct matrix return result; } + template + CUDA_CALLABLE vec3 operator*(vec3 rhs){ + vec3 result = 0; + for(int r=0; r<3; r++) + for(int c=0; c<3; c++) + result[r] += (*this)(r, c) * rhs[c]; + + return result; + } + std::string toStr() { std::stringstream ss; @@ -82,10 +90,6 @@ struct matrix return ss.str(); } - - - - }; } //end namespace rts diff --git a/stim/math/meshgrid.h b/stim/math/meshgrid.h new file mode 100644 index 0000000..acfa8bb --- /dev/null +++ b/stim/math/meshgrid.h @@ -0,0 +1,42 @@ +#ifndef STIM_MESHGRID_H +#define STIM_MESHGRID_H + +namespace stim{ + + /// Create a 2D grid based on a pair of vectors representing the grid spacing (see Matlab) + /// @param X is an [nx x ny] array that will store the X coordinates for each 2D point + /// @param Y is an [nx x ny] array that will store the Y coordinates for each 2D point + /// @param x is an [nx] array that provides the positions of grid points in the x direction + /// @param nx is the number of grid points in the x direction + /// @param y is an [ny] array that provides the positions of grid points in the y direction + /// @param ny is the number of grid points in the y direction + template + void meshgrid(T* X, T* Y, T* x, size_t nx, T* y, size_t ny){ + size_t xi, yi; //allocate index variables + for(yi = 0; yi < ny; yi++){ //iterate through each column + for(xi = 0; xi < nx; xi++){ //iterate through each row + X[yi * nx + xi] = x[xi]; + Y[yi * nx + xi] = y[yi]; + } + } + } + + /// Creates an array of n equally spaced values in the range [xmin xmax] + /// @param X is an array of length n that stores the values + /// @param xmin is the start point of the array + /// @param xmax is the end point of the array + /// @param n is the number of points in the array + template + void linspace(T* X, T xmin, T xmax, size_t n){ + T alpha; + for(size_t i = 0; i < n; i++){ + alpha = (T)i / (T)n; + X[i] = (1 - alpha) * xmin + alpha * xmax; + } + } + + +} + + +#endif \ No newline at end of file diff --git a/stim/math/quaternion.h b/stim/math/quaternion.h index 1475037..1fde328 100644 --- a/stim/math/quaternion.h +++ b/stim/math/quaternion.h @@ -26,13 +26,13 @@ public: CUDA_CALLABLE void CreateRotation(T theta, T ux, T uy, T uz){ - vec u(ux, uy, uz); + vec3 u(ux, uy, uz); CreateRotation(theta, u); } - CUDA_CALLABLE void CreateRotation(T theta, vec u){ + CUDA_CALLABLE void CreateRotation(T theta, vec3 u){ - vec u_hat = u.norm(); + vec3 u_hat = u.norm(); //assign the given Euler rotation to this quaternion w = (T)cos(theta/2); @@ -41,9 +41,9 @@ public: z = u_hat[2]*(T)sin(theta/2); } - void CreateRotation(vec from, vec to){ + CUDA_CALLABLE void CreateRotation(vec3 from, vec3 to){ - vec r = from.cross(to); //compute the rotation vector + vec3 r = from.cross(to); //compute the rotation vector T theta = asin(r.len()); //compute the angle of the rotation about r //deal with a zero vector (both k and kn point in the same direction) if(theta == (T)0){ diff --git a/stim/math/vec3.h b/stim/math/vec3.h new file mode 100644 index 0000000..0fd4dc0 --- /dev/null +++ b/stim/math/vec3.h @@ -0,0 +1,242 @@ +#ifndef STIM_VEC3_H +#define STIM_VEC3_H + + +#include + + +namespace stim{ + + +/// A class designed to act as a 3D vector with CUDA compatibility +template +class vec3{ + +protected: + T ptr[3]; + +public: + + CUDA_CALLABLE vec3(){} + + CUDA_CALLABLE vec3(T v){ + ptr[0] = ptr[1] = ptr[2] = v; + } + + CUDA_CALLABLE vec3(T x, T y, T z){ + ptr[0] = x; + ptr[1] = y; + ptr[2] = z; + } + + //copy constructor + CUDA_CALLABLE vec3( const vec3& other){ + ptr[0] = other.ptr[0]; + ptr[1] = other.ptr[1]; + ptr[2] = other.ptr[2]; + } + + //access an element using an index + CUDA_CALLABLE T& operator[](int idx){ + return ptr[idx]; + } + +/// Casting operator. Creates a new vector with a new type U. + template< typename U > + CUDA_CALLABLE operator vec3(){ + vec3 result; + result.ptr[0] = (U)ptr[0]; + result.ptr[1] = (U)ptr[1]; + result.ptr[2] = (U)ptr[2]; + + return result; + } + + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) + CUDA_CALLABLE T len_sq() const{ + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; + } + + /// computes the Euclidean length of the vector + CUDA_CALLABLE T len() const{ + return sqrt(len_sq()); + } + + + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) + CUDA_CALLABLE vec3 cart2sph() const{ + vec3 sph; + sph.ptr[0] = len(); + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); + if(sph.ptr[0] == 0) + sph.ptr[2] = 0; + else + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); + return sph; + } + + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) + CUDA_CALLABLE vec3 sph2cart() const{ + vec3 cart; + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); + + return cart; + } + + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) + CUDA_CALLABLE vec3 norm() const{ + vec3 result; + T l = len(); //compute the vector length + return (*this) / l; + } + + /// Computes the cross product of a 3-dimensional vector + CUDA_CALLABLE vec3 cross(const vec3 rhs) const{ + + vec3 result; + + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); + + return result; + } + + /// Compute the Euclidean inner (dot) product + CUDA_CALLABLE T dot(vec3 rhs) const{ + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; + } + + /// Arithmetic addition operator + + /// @param rhs is the right-hand-side operator for the addition + CUDA_CALLABLE vec3 operator+(vec3 rhs) const{ + vec3 result; + result.ptr[0] = ptr[0] + rhs[0]; + result.ptr[1] = ptr[1] + rhs[1]; + result.ptr[2] = ptr[2] + rhs[2]; + return result; + } + + /// Arithmetic addition to a scalar + + /// @param rhs is the right-hand-side operator for the addition + CUDA_CALLABLE vec3 operator+(T rhs) const{ + vec3 result; + result.ptr[0] = ptr[0] + rhs; + result.ptr[1] = ptr[1] + rhs; + result.ptr[2] = ptr[2] + rhs; + return result; + } + + /// Arithmetic subtraction operator + + /// @param rhs is the right-hand-side operator for the subtraction + CUDA_CALLABLE vec3 operator-(vec3 rhs) const{ + vec3 result; + result.ptr[0] = ptr[0] - rhs[0]; + result.ptr[1] = ptr[1] - rhs[1]; + result.ptr[2] = ptr[2] - rhs[2]; + return result; + } + /// Arithmetic subtraction to a scalar + + /// @param rhs is the right-hand-side operator for the addition + CUDA_CALLABLE vec3 operator-(T rhs) const{ + vec3 result; + result.ptr[0] = ptr[0] - rhs; + result.ptr[1] = ptr[1] - rhs; + result.ptr[2] = ptr[2] - rhs; + return result; + } + + /// Arithmetic scalar multiplication operator + + /// @param rhs is the right-hand-side operator for the subtraction + CUDA_CALLABLE vec3 operator*(T rhs) const{ + vec3 result; + result.ptr[0] = ptr[0] * rhs; + result.ptr[1] = ptr[1] * rhs; + result.ptr[2] = ptr[2] * rhs; + return result; + } + + /// Arithmetic scalar division operator + + /// @param rhs is the right-hand-side operator for the subtraction + CUDA_CALLABLE vec3 operator/(T rhs) const{ + return (*this) * ((T)1.0/rhs); + } + + /// Multiplication by a scalar, followed by assignment + CUDA_CALLABLE vec3 operator*=(T rhs){ + ptr[0] = ptr[0] * rhs; + ptr[1] = ptr[1] * rhs; + ptr[2] = ptr[2] * rhs; + return *this; + } + + /// Addition and assignment + CUDA_CALLABLE vec3 operator+=(vec3 rhs){ + ptr[0] = ptr[0] + rhs; + ptr[1] = ptr[1] + rhs; + ptr[2] = ptr[2] + rhs; + return *this; + } + + /// Assign a scalar to all values + CUDA_CALLABLE vec3 & operator=(T rhs){ + ptr[0] = ptr[0] = rhs; + ptr[1] = ptr[1] = rhs; + ptr[2] = ptr[2] = rhs; + return *this; + } + + /// Casting and assignment + template + CUDA_CALLABLE vec3 & operator=(vec3 rhs){ + ptr[0] = (T)rhs.ptr[0]; + ptr[1] = (T)rhs.ptr[1]; + ptr[2] = (T)rhs.ptr[2]; + return *this; + } + + /// Unary minus (returns the negative of the vector) + CUDA_CALLABLE vec3 operator-() const{ + vec3 result; + result.ptr[0] = -ptr[0]; + result.ptr[1] = -ptr[1]; + result.ptr[2] = -ptr[2]; + return result; + } + + + /// Outputs the vector as a string + std::string str() const{ + std::stringstream ss; + + size_t N = size(); + + ss<<"["; + for(size_t i=0; i +stim::vec3 operator*(T lhs, stim::vec3 rhs){ + return rhs * lhs; +} + +#endif \ No newline at end of file diff --git a/stim/math/vector.h b/stim/math/vector.h index ef40f93..5db6c69 100644 --- a/stim/math/vector.h +++ b/stim/math/vector.h @@ -1,5 +1,5 @@ -#ifndef RTS_VECTOR_H -#define RTS_VECTOR_H +#ifndef STIM_VECTOR_H +#define STIM_VECTOR_H #include #include @@ -11,8 +11,6 @@ namespace stim { - - template struct vec : public std::vector { diff --git a/stim/optics/beam.h b/stim/optics/beam.h deleted file mode 100644 index 8587bb4..0000000 --- a/stim/optics/beam.h +++ /dev/null @@ -1,203 +0,0 @@ -#ifndef RTS_BEAM -#define RTS_BEAM - -#include "../math/vector.h" -#include "../math/function.h" -#include "../optics/planewave.h" -#include - -namespace stim{ - -template -class beam : public planewave

-{ -public: - enum beam_type {Uniform, Bartlett, Hamming, Hanning}; - -private: - - P _na[2]; //numerical aperature of the focusing optics - vec

f; //focal point - function apod; //apodization function - unsigned int apod_res; //resolution of apodization filter functions - - void apod_uniform() - { - apod = (P)1; - } - void apod_bartlett() - { - apod = (P)1; - apod.insert((P)1, (P)0); - } - void apod_hanning() - { - apod = (P)0; - P x, y; - for(unsigned int n=0; n k = rts::vec

(0, 0, rtsTAU), - vec

_E0 = rts::vec

(1, 0, 0), - beam_type _apod = Uniform) - : planewave

(k, _E0) - { - _na[0] = (P)0.0; - _na[1] = (P)1.0; - f = vec

( (P)0, (P)0, (P)0 ); - apod_res = 256; //set the default resolution for apodization filters - set_apod(_apod); //set the apodization function type - } - - beam

refract(rts::vec

kn) const{ - - beam

new_beam; - new_beam._na[0] = _na[0]; - new_beam._na[1] = _na[1]; - - - rts::planewave

pw = planewave

::bend(kn); - //std::cout< > mc(unsigned int N = 100000, unsigned int seed = 0) const - { - /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling - of a sphere and projecting these samples onto an inscribed sphere. - - seed = seed for the random number generator - */ - srand(seed); //seed the random number generator - - vec

k_hat = beam::k.norm(); - - ///compute the rotation operator to transform (0, 0, 1) to k - P cos_angle = k_hat.dot(rts::vec

(0, 0, 1)); - rts::matrix rotation; - - //if the cosine of the angle is -1, the rotation is just a flip across the z axis - if(cos_angle == -1){ - rotation(2, 2) = -1; - } - else if(cos_angle != 1.0) - { - rts::vec

r_axis = rts::vec

(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation - P angle = acos(cos_angle); //compute the angle of rotation - rts::quaternion

quat; //create a quaternion describing the rotation - quat.CreateRotation(angle, r_axis); - rotation = quat.toMatrix3(); //compute the rotation matrix - } - - //find the phi values associated with the cassegrain ring - P PHI[2]; - PHI[0] = (P)asin(_na[0]); - PHI[1] = (P)asin(_na[1]); - - //calculate the z-axis cylinder coordinates associated with these angles - P Z[2]; - Z[0] = cos(PHI[0]); - Z[1] = cos(PHI[1]); - P range = Z[0] - Z[1]; - - std::vector< planewave

> samples; //create a vector of plane waves - - //draw a distribution of random phi, z values - P z, phi, theta; - for(int i=0; i spherical(1, theta, phi); //convert from spherical to cartesian coordinates - rts::vec

cart = spherical.sph2cart(); - vec

k_prime = rotation * cart; //create a sample vector - - //store a wave refracted along the given direction - //std::cout<<"k prime: "<::refract(k_prime) * apod(phi/PHI[1])); - } - - return samples; - } - - std::string str() - { - std::stringstream ss; - ss<<"Beam:"< -__global__ void gpu_planewave2efield(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, - planewave w, rect q) -{ - int iu = blockIdx.x * blockDim.x + threadIdx.x; - int iv = blockIdx.y * blockDim.y + threadIdx.y; - - //make sure that the thread indices are in-bounds - if(iu >= r0 || iv >= r1) return; - - //compute the index into the field - int i = iv*r0 + iu; - - //get the current position - vec p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); - vec r(p[0], p[1], p[2]); - - complex x( 0.0f, w.k.dot(r) ); - - if(Y == NULL) //if this is a scalar simulation - X[i] += w.E0.len() * exp(x); //use the vector magnitude as the plane wave amplitude - else - { - X[i] += w.E0[0] * exp(x); - Y[i] += w.E0[1] * exp(x); - Z[i] += w.E0[2] * exp(x); - } -} - -template -__global__ void gpu_efield_magnitude(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, T* M) -{ - int iu = blockIdx.x * blockDim.x + threadIdx.x; - int iv = blockIdx.y * blockDim.y + threadIdx.y; - - //make sure that the thread indices are in-bounds - if(iu >= r0 || iv >= r1) return; - - //compute the index into the field - int i = iv*r0 + iu; - - if(Y == NULL) //if this is a scalar simulation - M[i] = X[i].abs(); //compute the magnitude of the X component - else - { - M[i] = rts::vec(X[i].abs(), Y[i].abs(), Z[i].abs()).len(); - //M[i] = Z[i].abs(); - } -} - -template -__global__ void gpu_efield_real_magnitude(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, T* M) -{ - int iu = blockIdx.x * blockDim.x + threadIdx.x; - int iv = blockIdx.y * blockDim.y + threadIdx.y; - - //make sure that the thread indices are in-bounds - if(iu >= r0 || iv >= r1) return; - - //compute the index into the field - int i = iv*r0 + iu; - - if(Y == NULL) //if this is a scalar simulation - M[i] = X[i].abs(); //compute the magnitude of the X component - else - { - M[i] = rts::vec(X[i].real(), Y[i].real(), Z[i].real()).len(); - //M[i] = Z[i].abs(); - } -} - -template -__global__ void gpu_efield_polarization(complex* X, complex* Y, complex* Z, - unsigned int r0, unsigned int r1, - T* Px, T* Py, T* Pz) -{ - int iu = blockIdx.x * blockDim.x + threadIdx.x; - int iv = blockIdx.y * blockDim.y + threadIdx.y; - - //make sure that the thread indices are in-bounds - if(iu >= r0 || iv >= r1) return; - - //compute the index into the field - int i = iv*r0 + iu; - - //compute the field polarization - Px[i] = X[i].abs(); - Py[i] = Y[i].abs(); - Pz[i] = Z[i].abs(); - -} - -/* This function computes the sum of two complex fields and stores the result in *dest -*/ -template -__global__ void gpu_efield_sum(complex* dest, complex* src, unsigned int r0, unsigned int r1) -{ - int iu = blockIdx.x * blockDim.x + threadIdx.x; - int iv = blockIdx.y * blockDim.y + threadIdx.y; - - //make sure that the thread indices are in-bounds - if(iu >= r0 || iv >= r1) return; - - //compute the index into the field - int i = iv*r0 + iu; - - //sum the fields - dest[i] += src[i]; -} - -/* This class implements a discrete representation of an electromagnetic field - in 2D. The majority of this representation is done on the GPU. -*/ -template -class efield -{ -protected: - - bool vector; - - //gpu pointer to the field data - rts::complex* X; - rts::complex* Y; - rts::complex* Z; - - //resolution of the discrete field - unsigned int R[2]; - - //shape of the 2D field - rect pos; - - void from_planewave(planewave p) - { - unsigned int SQRT_BLOCK = 16; - //create one thread for each detector pixel - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); - - - gpu_planewave2efield <<>> (X, Y, Z, R[0], R[1], p, pos); - } - - void init(unsigned int res0, unsigned int res1, bool _vector) - { - vector = _vector; //initialize field type - - X = Y = Z = NULL; //initialize all pointers to NULL - R[0] = res0; - R[1] = res1; - - //allocate memory on the gpu - cudaMalloc(&X, sizeof(rts::complex) * R[0] * R[1]); - cudaMemset(X, 0, sizeof(rts::complex) * R[0] * R[1]); - - if(vector) - { - cudaMalloc(&Y, sizeof(rts::complex) * R[0] * R[1]); - cudaMemset(Y, 0, sizeof(rts::complex) * R[0] * R[1]); - - cudaMalloc(&Z, sizeof(rts::complex) * R[0] * R[1]); - cudaMemset(Z, 0, sizeof(rts::complex) * R[0] * R[1]); - } - } - - void destroy() - { - if(X != NULL) cudaFree(X); - if(Y != NULL) cudaFree(Y); - if(Z != NULL) cudaFree(Z); - } - - void shallowcpy(const rts::efield & src) - { - vector = src.vector; - R[0] = src.R[0]; - R[1] = src.R[1]; - } - - void deepcpy(const rts::efield & src) - { - //perform a shallow copy - shallowcpy(src); - - //allocate memory on the gpu - if(src.X != NULL) - { - cudaMalloc(&X, sizeof(rts::complex) * R[0] * R[1]); - cudaMemcpy(X, src.X, sizeof(rts::complex) * R[0] * R[1], cudaMemcpyDeviceToDevice); - } - if(src.Y != NULL) - { - cudaMalloc(&Y, sizeof(rts::complex) * R[0] * R[1]); - cudaMemcpy(Y, src.Y, sizeof(rts::complex) * R[0] * R[1], cudaMemcpyDeviceToDevice); - } - if(src.Z != NULL) - { - cudaMalloc(&Z, sizeof(rts::complex) * R[0] * R[1]); - cudaMemcpy(Z, src.Z, sizeof(rts::complex) * R[0] * R[1], cudaMemcpyDeviceToDevice); - } - } - -public: - efield(unsigned int res0, unsigned int res1, bool _vector = true) - { - init(res0, res1, _vector); - //pos = rts::rect(rts::vec(-10, 0, -10), rts::vec(-10, 0, 10), rts::vec(10, 0, 10)); - } - - //destructor - ~efield() - { - destroy(); - } - - ///Clear the field - set all points to zero - void clear() - { - cudaMemset(X, 0, sizeof(rts::complex) * R[0] * R[1]); - - if(vector) - { - cudaMemset(Y, 0, sizeof(rts::complex) * R[0] * R[1]); - cudaMemset(Z, 0, sizeof(rts::complex) * R[0] * R[1]); - } - } - - void position(rect _p) - { - pos = _p; - } - - //access functions - complex* x(){ - return X; - } - complex* y(){ - return Y; - } - complex* z(){ - return Z; - } - unsigned int Ru(){ - return R[0]; - } - unsigned int Rv(){ - return R[1]; - } - rect p(){ - return pos; - } - - std::string str() - { - stringstream ss; - ss< & operator= (const efield & rhs) - { - destroy(); //destroy any previous information about this field - deepcpy(rhs); //create a deep copy - return *this; //return the current object - } - - //assignment operator: build an electric field from a plane wave - efield & operator= (const planewave & rhs) - { - - clear(); //clear any previous field data - from_planewave(rhs); //create a field from the planewave - return *this; //return the current object - } - - //assignment operator: add an existing electric field - efield & operator+= (const efield & rhs) - { - //if this field and the source field represent the same regions in space - if(R[0] == rhs.R[0] && R[1] == rhs.R[1] && pos == rhs.pos) - { - int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); - - //create one thread for each detector pixel - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); - - //sum the fields - gpu_efield_sum <<>> (X, rhs.X, R[0], R[1]); - if(Y != NULL) - { - gpu_efield_sum <<>> (Y, rhs.Y, R[0], R[1]); - gpu_efield_sum <<>> (Z, rhs.Z, R[0], R[1]); - } - } - else - { - std::cout<<"ERROR in efield: The two summed fields do not represent the same geometry."< & operator= (const rts::beam & rhs) - { - //get a vector of monte-carlo samples - std::vector< rts::planewave > p_list = rhs.mc(); - - clear(); //clear any previous field data - for(unsigned int i = 0; i < p_list.size(); i++) - from_planewave(p_list[i]); - return *this; - } - - - //return a scalar field representing field magnitude - realfield mag() - { - int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); - - //create a scalar field to store the result - realfield M(R[0], R[1]); - - //create one thread for each detector pixel - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); - - //compute the magnitude and store it in a scalar field - gpu_efield_magnitude <<>> (X, Y, Z, R[0], R[1], M.ptr(0)); - - return M; - } - - //return a sacalar field representing the field magnitude at an infinitely small point in time - realfield real_mag() - { - int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); - - //create a scalar field to store the result - realfield M(R[0], R[1]); - - //create one thread for each detector pixel - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); - - //compute the magnitude and store it in a scalar field - gpu_efield_real_magnitude <<>> (X, Y, Z, R[0], R[1], M.ptr(0)); - - return M; - } - - //return a vector field representing field polarization - realfield polarization() - { - if(!vector) - { - std::cout<<"ERROR: Cannot compute polarization of a scalar field."< Pol(R[0], R[1]); //create a vector field to store the result - - //compute the polarization and store it in the vector field - gpu_efield_polarization <<>> (X, Y, Z, R[0], R[1], Pol.ptr(0), Pol.ptr(1), Pol.ptr(2)); - - return Pol; //return the vector field - } - - ////////FRIENDSHIP - //friend void operator<< (rts::efield &ef, rts::halfspace hs); - - -}; - - - -} //end namespace rts - -#endif diff --git a/stim/optics/esphere.cuh b/stim/optics/esphere.cuh deleted file mode 100644 index 1b5a559..0000000 --- a/stim/optics/esphere.cuh +++ /dev/null @@ -1,162 +0,0 @@ -#ifndef RTS_ESPHERE -#define RTS_ESPHERE - -#include "../math/complex.h" -#include "../math/bessel.h" -#include "../visualization/colormap.h" -#include "../optics/planewave.h" -#include "../cuda/devices.h" -#include "../optics/efield.cuh" - -namespace stim{ - -/* This class implements a discrete representation of an electromagnetic field - in 2D scattered by a sphere. This class implements Mie scattering. -*/ -template -class esphere : public efield

-{ -private: - stim::complex

n; //sphere refractive index - P a; //sphere radius - - //parameters dependent on wavelength - unsigned int Nl; //number of orders for the calculation - rts::complex

* A; //internal scattering coefficients - rts::complex

* B; //external scattering coefficients - - void calcNl(P kmag) - { - //return ceil( ((P)6.282 * a) / lambda + 4 * pow( ((P)6.282 * a) / lambda, ((P)1/(P)3)) + 2); - Nl = ceil( kmag*a + 4 * pow(kmag * a, (P)1/(P)3) + 2); - } - - void calcAB(P k, unsigned int Nl, rts::complex

* A, rts::complex

* B) - { - /* These calculations require double precision, so they are computed - using doubles and converted to P at the end. - - Input: - - k = magnitude of the k vector (tau/lambda) - Nl = highest order coefficient ([0 Nl] are computed) - */ - - //clear the previous coefficients - rts::complex

* cpuA = (rts::complex

*)malloc(sizeof(rts::complex

) * (Nl+1)); - rts::complex

* cpuB = (rts::complex

*)malloc(sizeof(rts::complex

) * (Nl+1)); - - //convert to an std complex value - complex nc = (rts::complex)n; - - //compute the magnitude of the k vector - complex kna = nc * k * (double)a; - - //compute the arguments k*a and k*n*a - complex ka(k * a, 0.0); - - //allocate space for the Bessel functions of the first and second kind (and derivatives) - unsigned int bytes = sizeof(complex) * (Nl + 1); - complex* cjv_ka = (complex*)malloc(bytes); - complex* cyv_ka = (complex*)malloc(bytes); - complex* cjvp_ka = (complex*)malloc(bytes); - complex* cyvp_ka = (complex*)malloc(bytes); - complex* cjv_kna = (complex*)malloc(bytes); - complex* cyv_kna = (complex*)malloc(bytes); - complex* cjvp_kna = (complex*)malloc(bytes); - complex* cyvp_kna = (complex*)malloc(bytes); - - //allocate space for the spherical Hankel functions and derivative - complex* chv_ka = (complex*)malloc(bytes); - complex* chvp_ka = (complex*)malloc(bytes); - - //compute the bessel functions using the CPU-based algorithm - double vm; - cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka); - cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna); - - //compute A for each order - complex i(0, 1); - complex a, b, c, d; - complex An, Bn; - for(int l=0; l<=Nl; l++) - { - //compute the Hankel functions from j and y - chv_ka[l] = cjv_ka[l] + i * cyv_ka[l]; - chvp_ka[l] = cjvp_ka[l] + i * cyvp_ka[l]; - - //Compute A (internal scattering coefficient) - //compute the numerator and denominator for A - a = cjv_ka[l] * chvp_ka[l] - cjvp_ka[l] * chv_ka[l]; - b = cjv_kna[l] * chvp_ka[l] - chv_ka[l] * cjvp_kna[l] * nc; - - //calculate A and add it to the list - rts::complex An = (2.0 * l + 1.0) * pow(i, l) * (a / b); - cpuA[l] = (rts::complex

)An; - - //Compute B (external scattering coefficient) - c = cjv_ka[l] * cjvp_kna[l] * nc - cjv_kna[l] * cjvp_ka[l]; - d = cjv_kna[l] * chvp_ka[l] - chv_ka[l] * cjvp_kna[l] * nc; - - //calculate B and add it to the list - rts::complex Bn = (2.0 * l + 1.0) * pow(i, l) * (c / d); - cpuB[l] = (rts::complex

)Bn; - - std::cout<<"A: "<) * (Nl+1)); //allocate memory for new coefficients - cudaMalloc(&B, sizeof(rts::complex

) * (Nl+1)); - - //copy the calculations from the CPU to the GPU - cudaMemcpy(A, cpuA, sizeof(rts::complex

) * (Nl+1), cudaMemcpyDeviceToHost); - cudaMemcpy(B, cpuB, sizeof(rts::complex

) * (Nl+1), cudaMemcpyDeviceToHost); - } - -public: - - esphere(unsigned int res0, unsigned int res1, P _a, rts::complex

_n, bool _scalar = false) : - efield

(res0, res1, _scalar) - { - std::cout<<"Sphere scattered field created."< & operator= (const planewave

& rhs) - { - calcNl(rhs.kmag()); //compute the number of scattering coefficients - std::cout<<"Nl: "<::str()< -__global__ void gpu_halfspace_pw2ef(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, - plane P, planewave w, rect q, bool at_surface = false) -{ - int iu = blockIdx.x * blockDim.x + threadIdx.x; - int iv = blockIdx.y * blockDim.y + threadIdx.y; - - //make sure that the thread indices are in-bounds - if(iu >= r0 || iv >= r1) return; - - //compute the index into the field - int i = iv*r0 + iu; - - //get the current position - vec p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); - - if(at_surface){ - if(P.side(p) > 0) - return; - } - else{ - if(P.side(p) >= 0) - return; - } - - //if the current position is on the wrong side of the plane - - //vec r(p[0], p[1], p[2]); - - complex x( 0.0f, w.kvec().dot(p) ); - //complex phase( 0.0f, w.phase()); - - if(Y == NULL) //if this is a scalar simulation - X[i] += w.E().len() * exp(x); //use the vector magnitude as the plane wave amplitude - else - { - //X[i] = Y[i] = Z[i] = 1; - X[i] += w.E()[0] * exp(x);// * exp(phase); - Y[i] += w.E()[1] * exp(x);// * exp(phase); - Z[i] += w.E()[2] * exp(x);// * exp(phase); - } -} - -//GPU kernel to compute the electric displacement field -template -__global__ void gpu_halfspace_pw2df(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, - plane P, planewave w, rect q, T n, bool at_surface = false) -{ - int iu = blockIdx.x * blockDim.x + threadIdx.x; - int iv = blockIdx.y * blockDim.y + threadIdx.y; - - //make sure that the thread indices are in-bounds - if(iu >= r0 || iv >= r1) return; - - //compute the index into the field - int i = iv*r0 + iu; - - //get the current position - vec p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); - - if(at_surface){ - if(P.side(p) > 0) - return; - } - else{ - if(P.side(p) >= 0) - return; - } - - //if the current position is on the wrong side of the plane - - //vec r(p[0], p[1], p[2]); - - complex x( 0.0f, w.kvec().dot(p) ); - //complex phase( 0.0f, w.phase()); - - //vec< complex > testE(1, 0, 0); - - - - if(Y == NULL) //if this is a scalar simulation - X[i] += w.E().len() * exp(x); //use the vector magnitude as the plane wave amplitude - else - { - plane< complex > cplane = plane< complex, 3>(P); - vec< complex > E_para;// = cplane.parallel(w.E()); - vec< complex > E_perp;// = cplane.perpendicular(w.E()) * (n*n); - cplane.decompose(w.E(), E_para, E_perp); - T epsilon = n*n; - - X[i] += (E_para[0] + E_perp[0] * epsilon) * exp(x); - Y[i] += (E_para[1] + E_perp[1] * epsilon) * exp(x); - Z[i] += (E_para[2] + E_perp[2] * epsilon) * exp(x); - } -} - -//computes a scalar field containing the refractive index of the half-space at each point -template -__global__ void gpu_halfspace_n(T* n, unsigned int r0, unsigned int r1, rect q, plane P, T n0, T n1){ - - int iu = blockIdx.x * blockDim.x + threadIdx.x; - int iv = blockIdx.y * blockDim.y + threadIdx.y; - - //make sure that the thread indices are in-bounds - if(iu >= r0 || iv >= r1) return; - - //compute the index into the field - int i = iv*r0 + iu; - - //get the current position - vec p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); - - //set the appropriate refractive index - if(P.side(p) < 0) n[i] = n0; - else n[i] = n1; -} - -template -class halfspace -{ -private: - rts::plane S; //surface plane splitting the half space - rts::complex n0; //refractive index at the front of the plane - rts::complex n1; //refractive index at the back of the plane - - //lists of waves in front (pw0) and behind (pw1) the plane - std::vector< rts::planewave > w0; - std::vector< rts::planewave > w1; - - //rts::planewave pi; //incident plane wave - //rts::planewave pr; //plane wave reflected from the surface - //rts::planewave pt; //plane wave transmitted through the surface - - void init(){ - n0 = 1.0; - n1 = 1.5; - } - - //compute which side of the interface is hit by the incoming plane wave (0 = front, 1 = back) - bool facing(planewave p){ - if(p.kvec().dot(S.norm()) > 0) - return 1; - else - return 0; - } - - T calc_theta_i(vec v){ - - vec v_hat = v.norm(); - - //compute the cosine of the angle between k_hat and the plane normal - T cos_theta_i = v_hat.dot(S.norm()); - - return acos(abs(cos_theta_i)); - } - - T calc_theta_t(T ni_nt, T theta_i){ - - T sin_theta_t = ni_nt * sin(theta_i); - return asin(sin_theta_t); - } - - -public: - - //constructors - halfspace(){ - init(); - } - - halfspace(T na, T nb){ - init(); - n0 = na; - n1 = nb; - } - - halfspace(T na, T nb, plane p){ - n0 = na; - n1 = nb; - S = p; - } - - //compute the transmitted and reflective waves given the incident (vacuum) plane wave p - void incident(rts::planewave p){ - - planewave r, t; - p.scatter(S, n1.real()/n0.real(), r, t); - - //std::cout<<"i+r: "< b, unsigned int N = 10000){ - - //generate a plane wave decomposition for the beam - std::vector< planewave > pw_list = b.mc(N); - - //calculate the reflected and refracted waves for each incident wave - for(unsigned int w = 0; w < pw_list.size(); w++){ - incident(pw_list[w]); - } - } - - //return the electric field at the specified resolution and position - rts::efield E(unsigned int r0, unsigned int r1, rect R){ - - int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); - - //create one thread for each detector pixel - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); - dim3 dimGrid((r0 + SQRT_BLOCK -1)/SQRT_BLOCK, (r1 + SQRT_BLOCK - 1)/SQRT_BLOCK); - - //create an electric field - rts::efield ef(r0, r1); - ef.position(R); - - //render each plane wave - - //plane waves at the surface front - for(unsigned int w = 0; w < w0.size(); w++){ - //std::cout<<"w0 "< <<>> (ef.x(), ef.y(), ef.z(), r0, r1, S, w0[w], ef.p()); - } - - //plane waves at the surface back - for(unsigned int w = 0; w < w1.size(); w++){ - //std::cout<<"w1 "< <<>> (ef.x(), ef.y(), ef.z(), r0, r1, -S, w1[w], ef.p(), true); - } - - return ef; - } - - //return the electric displacement at the specified resolution and position - rts::efield D(unsigned int r0, unsigned int r1, rect R){ - - int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); - - //create one thread for each detector pixel - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); - dim3 dimGrid((r0 + SQRT_BLOCK -1)/SQRT_BLOCK, (r1 + SQRT_BLOCK - 1)/SQRT_BLOCK); - - //create a complex vector field - rts::efield df(r0, r1); - df.position(R); - - //render each plane wave - - //plane waves at the surface front - for(unsigned int w = 0; w < w0.size(); w++){ - //std::cout<<"w0 "< <<>> (df.x(), df.y(), df.z(), r0, r1, S, w0[w], df.p(), n0.real()); - } - - //plane waves at the surface back - for(unsigned int w = 0; w < w1.size(); w++){ - //std::cout<<"w1 "< <<>> (df.x(), df.y(), df.z(), r0, r1, -S, w1[w], df.p(), n1.real(), true); - } - - return df; - } - - realfield nfield(unsigned int Ru, unsigned int Rv, rect p){ - - int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); - - //create one thread for each detector pixel - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); - dim3 dimGrid((Ru + SQRT_BLOCK -1)/SQRT_BLOCK, (Rv + SQRT_BLOCK - 1)/SQRT_BLOCK); - - realfield n(Ru, Rv); //create a scalar field to store the result of n - - rts::gpu_halfspace_n <<>> (n.ptr(), Ru, Rv, p, S, n0.real(), n1.real()); - - return n; - } - - std::string str(){ - std::stringstream ss; - ss<<"Half Space-------------"< (rts::efield &ef, rts::halfspace hs); - -}; - - - - -} - - -#endif diff --git a/stim/optics/material.h b/stim/optics/material.h deleted file mode 100644 index fb97cd6..0000000 --- a/stim/optics/material.h +++ /dev/null @@ -1,153 +0,0 @@ -#ifndef RTS_MATERIAL_H -#define RTS_MATERIAL_H - -#include -#include -#include -#include -#include -#include -#include -#include "../math/complex.h" -#include "../math/constants.h" -#include "../math/function.h" - -namespace stim{ - -//Material class - default representation for the material property is the refractive index (RI) -template -class material : public function< T, complex >{ - -public: - enum wave_property{microns, inverse_cm}; - enum material_property{ri, absorbance}; - -private: - - using function< T, complex >::X; - using function< T, complex >::Y; - using function< T, complex >::insert; - using function< T, complex >::bounding; - - std::string name; //name for the material (defaults to file name) - - void process_header(std::string str, wave_property& wp, material_property& mp){ - - std::stringstream ss(str); //create a stream from the data string - std::string line; - std::getline(ss, line); //get the first line as a string - while(line[0] == '#'){ //continue looping while the line is a comment - - std::stringstream lstream(line); //create a stream from the line - lstream.ignore(); //ignore the first character ('#') - - std::string prop; //get the property name - lstream>>prop; - - if(prop == "X"){ - std::string wp_name; - lstream>>wp_name; - if(wp_name == "microns") wp = microns; - else if(wp_name == "inverse_cm") wp = inverse_cm; - } - else if(prop == "Y"){ - std::string mp_name; - lstream>>mp_name; - if(mp_name == "ri") mp = ri; - else if(mp_name == "absorbance") mp = absorbance; - } - - std::getline(ss, line); //get the next line - } - - function< T, stim::complex >::process_string(str); - } - - void from_inverse_cm(){ - //convert inverse centimeters to wavelength (in microns) - for(unsigned int i=0; i(1, 0); - } - - -public: - - material(std::string filename, wave_property wp, material_property mp){ - name = filename; - load(filename, wp, mp); - } - - material(std::string filename){ - name = filename; - load(filename); - } - - material(){ - init(); - } - - complex getN(T lambda){ - return function< T, complex >::linear(lambda); - } - - void load(std::string filename, wave_property wp, material_property mp){ - - //load the file as a function - function< T, complex >::load(filename); - } - - void load(std::string filename){ - - wave_property wp = inverse_cm; - material_property mp = ri; - //turn the file into a string - std::ifstream t(filename.c_str()); //open the file as a stream - - if(!t){ - std::cout<<"ERROR: Couldn't open the material file '"<(t)), - std::istreambuf_iterator()); - - //process the header information - process_header(str, wp, mp); - - //convert units - if(wp == inverse_cm) - from_inverse_cm(); - //set the bounding values - bounding[0] = Y[0]; - bounding[1] = Y.back(); - } - std::string str(){ - std::stringstream ss; - ss< >::str(); - return ss.str(); - } - std::string get_name(){ - return name; - } - - void set_name(std::string str){ - name = str; - } - -}; - -} - - - - -#endif diff --git a/stim/optics/mirst-1d.cuh b/stim/optics/mirst-1d.cuh deleted file mode 100644 index 9279087..0000000 --- a/stim/optics/mirst-1d.cuh +++ /dev/null @@ -1,447 +0,0 @@ -#include "../optics/material.h" -#include "../math/complexfield.cuh" -#include "../math/constants.h" -//#include "../envi/bil.h" - -#include "cufft.h" - -#include -#include - -namespace stim{ - -//this function writes a sinc function to "dest" such that an iFFT produces a slab -template -__global__ void gpu_mirst1d_layer_fft(complex* dest, complex* ri, - T* src, T* zf, - T w, unsigned int zR, unsigned int nuR){ - //dest = complex field representing the sample - //ri = refractive indices for each wavelength - //src = intensity of the light source for each wavelength - //zf = z position of the slab interface for each wavelength (accounting for optical path length) - //w = width of the slab (in pixels) - //zR = number of z-axis samples - //nuR = number of wavelengths - - //get the current coordinate in the plane slice - int ifz = blockIdx.x * blockDim.x + threadIdx.x; - int inu = blockIdx.y * blockDim.y + threadIdx.y; - - //make sure that the thread indices are in-bounds - if(inu >= nuR || ifz >= zR) return; - - int i = inu * zR + ifz; - - T fz; - if(ifz < zR/2) - fz = ifz / (T)zR; - else - fz = -(zR - ifz) / (T)zR; - - //if the slab starts outside of the simulation domain, just return - if(zf[inu] >= zR) return; - - //fill the array along z with a sinc function representing the Fourier transform of the layer - - T opl = w * ri[inu].real(); //optical path length - - //handle the case where the slab goes outside the simulation domain - if(zf[inu] + opl >= zR) - opl = zR - zf[inu]; - - if(opl == 0) return; - - //T l = w * ri[inu].real(); - //complex e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0)); - complex e(0, -2 * stimPI * fz * (zf[inu] + opl/2)); - - complex eta = ri[inu] * ri[inu] - 1; - - //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz); - if(ifz == 0) - dest[i] += opl * exp(e) * eta * src[inu]; - else - dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl); -} - -template -__global__ void gpu_mirst1d_increment_z(T* zf, complex* ri, T w, unsigned int S){ - //zf = current z depth (optical path length) in pixels - //ri = refractive index of the material - //w = actual width of the layer (in pixels) - - - //compute the index for this thread - int i = blockIdx.x * blockDim.x + threadIdx.x; - if(i >= S) return; - - if(ri == NULL) - zf[i] += w; - else - zf[i] += ri[i].real() * w; -} - -//apply the 1D MIRST filter to an existing sample (overwriting the sample) -template -__global__ void gpu_mirst1d_apply_filter(complex* sampleFFT, T* lambda, - T dFz, - T inNA, T outNA, - unsigned int lambdaR, unsigned int zR, - T sigma = 0){ - //sampleFFT = the sample in the Fourier domain (will be overwritten) - //lambda = list of wavelengths - //dFz = delta along the Fz axis in the frequency domain - //inNA = NA of the internal obscuration - //outNA = NA of the objective - //zR = number of pixels along the Fz axis (same as the z-axis) - //lambdaR = number of wavelengths - //sigma = width of the Gaussian source - int ifz = blockIdx.x * blockDim.x + threadIdx.x; - int inu = blockIdx.y * blockDim.y + threadIdx.y; - - if(inu >= lambdaR || ifz >= zR) return; - - //calculate the index into the sample FT - int i = inu * zR + ifz; - - //compute the frequency (and set all negative spatial frequencies to zero) - T fz; - if(ifz < zR / 2) - fz = ifz * dFz; - //if the spatial frequency is negative, set it to zero and exit - else{ - sampleFFT[i] = 0; - return; - } - - //compute the frequency in inverse microns - T nu = 1/lambda[inu]; - - //determine the radius of the integration circle - T nu_sq = nu * nu; - T fz_sq = (fz * fz) / 4; - - //cut off frequencies above the diffraction limit - T r; - if(fz_sq < nu_sq) - r = sqrt(nu_sq - fz_sq); - else - r = 0; - - //account for the optics - T Q = 0; - if(r > nu * inNA && r < nu * outNA) - Q = 1; - - //account for the source - //T sigma = 30.0; - T s = exp( - (r*r * sigma*sigma) / 2 ); - //T s=1; - - //compute the final filter - T mirst = 0; - if(fz != 0) - mirst = 2 * stimPI * r * s * Q * (1/fz); - - sampleFFT[i] *= mirst; - -} - -/*This object performs a 1-dimensional (layered) MIRST simulation -*/ -template -class mirst1d{ - -private: - unsigned int Z; //z-axis resolution - unsigned int pad; //pixel padding on either side of the sample - - std::vector< material > matlist; //list of materials - std::vector< T > layers; //list of layer thicknesses - - std::vector< T > lambdas; //list of wavelengths that are being simulated - unsigned int S; //number of wavelengths (size of "lambdas") - - T NA[2]; //numerical aperature (central obscuration and outer diameter) - - function source_profile; //profile (spectrum) of the source (expressed in inverse centimeters) - - complexfield scratch; //scratch GPU memory used to build samples, transforms, etc. - - void fft(int direction = CUFFT_FORWARD){ - - unsigned padZ = Z + pad; - - //create cuFFT handles - cufftHandle plan; - cufftResult result; - - if(sizeof(T) == 4) - result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision - else - result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision - - //check for Plan 1D errors - if(result != CUFFT_SUCCESS){ - std::cout<<"Error creating CUFFT plan for computing the FFT:"<(Z + pad , lambdas.size()); - scratch = 0; - } - - //get the list of scattering efficiency (eta) values for a specified layer - std::vector< complex > layer_etas(unsigned int l){ - - std::vector< complex > etas; - - //fill the list of etas - for(unsigned int i=0; i* gpuRi; - HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex) * S)); - - //allocate memory for the source profile - T* gpuSrc; - HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S)); - - complex ri; - T source; - //store the refractive index and source profile in a CPU array - for(int inu=0; inu), cudaMemcpyHostToDevice )); - - //save the source profile to the GPU - source = source_profile(10000 / lambdas[inu]); - HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice )); - - } - - //create one thread for each pixel of the field slice - dim3 gridDim, blockDim; - cuda_params(gridDim, blockDim); - stim::gpu_mirst1d_layer_fft<<>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S); - - int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size - int linGrid = S / linBlock + 1; - stim::gpu_mirst1d_increment_z <<>>(zf, gpuRi, wpx, S); - - //free memory - HANDLE_ERROR(cudaFree(gpuRi)); - HANDLE_ERROR(cudaFree(gpuSrc)); - } - - void build_sample(){ - init_scratch(); //initialize the GPU scratch space - //build_layer(1); - - T* zf; - HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S)); - HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S)); - - //render each layer of the sample - for(unsigned int l=0; l>>(scratch.ptr(), gpuLambdas, - dFz, - NA[0], NA[1], - S, Zpad); - } - - //crop the image to the sample thickness - keep in mind that sample thickness != optical path length - void crop(){ - - scratch = scratch.crop(Z, S); - } - - //save the scratch field as a binary file - void to_binary(std::string filename){ - - } - - -public: - - //constructor - mirst1d(unsigned int rZ = 100, - unsigned int padding = 0){ - Z = rZ; - pad = padding; - NA[0] = 0; - NA[1] = 0.8; - S = 0; - source_profile = 1; - } - - //add a layer, thickness = microns - void add_layer(material mat, T thickness){ - matlist.push_back(mat); - layers.push_back(thickness); - } - - void add_layer(std::string filename, T thickness){ - add_layer(material(filename), thickness); - } - - //adds a profile spectrum for the light source - void set_source(std::string filename){ - source_profile.load(filename); - } - - //adds a block of wavenumbers (cm^-1) to the simulation parameters - void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){ - unsigned int nu = start; - while(nu <= stop){ - lambdas.push_back((T)10000 / nu); - nu += step; - } - S = lambdas.size(); //increment the number of wavelengths (shorthand for later) - } - - T thickness(){ - T t = 0; - for(unsigned int l=0; l get_source(){ - return source_profile; - } - - void save_sample(std::string filename){ - //create a sample and save the magnitude as an image - build_sample(); - fft(CUFFT_INVERSE); - scratch.toImage(filename, stim::complexfield::magnitude); - } - - void save_mirst(std::string filename, bool binary = true){ - //apply the MIRST filter to a sample and save the image - - //build the sample in the Fourier domain - build_sample(); - - //apply the MIRST filter - apply_filter(); - - //apply an inverse FFT to bring the results back into the spatial domain - fft(CUFFT_INVERSE); - - crop(); - - //save the image - if(binary) - to_binary(filename); - else - scratch.toImage(filename, stim::complexfield::magnitude); - } - - - - - std::string str(){ - - stringstream ss; - ss<<"1D MIRST Simulation========================="< #include +#include #include "../math/vector.h" #include "../math/quaternion.h" #include "../math/constants.h" #include "../math/plane.h" -#include "../cuda/callable.h" - -/*Basic conversions used here (assuming a vacuum) - lambda = -*/ +#include "../math/complex.h" namespace stim{ + namespace optics{ + + /// evaluate the scalar field produced by a plane wave at a point (x, y, z) + + /// @param x is the x-coordinate of the point + /// @param y is the y-coordinate of the point + /// @param z is the z-coordinate of the point + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0) + /// @param kx is the k-vector component in the x direction + /// @param ky is the k-vector component in the y direction + /// @param kz is the k-vector component in the z direction + template + stim::complex planewave_scalar(T x, T y, T z, stim::complex A, T kx, T ky, T kz){ + T d = x * kx + y * ky + z * kz; //calculate the dot product between k and p = (x, y, z) to find the distance p is along the propagation direction + stim::complex di = stim::complex(0, d); //calculate the phase shift that will have to be applied to propagate the wave distance d + return A * exp(di); //multiply the phase term by the amplitude at (0, 0, 0) to propagate the wave to p + } + + /// evaluate the scalar field produced by a plane wave at several positions + + /// @param field is a pre-allocated block of memory that will store the complex field at all points + /// @param N is the number of field values to be evaluated + /// @param x is a set of x coordinates defining positions within the field (NULL implies that all values are zero) + /// @param y is a set of y coordinates defining positions within the field (NULL implies that all values are zero) + /// @param z is a set of z coordinates defining positions within the field (NULL implies that all values are zero) + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0) + /// @param kx is the k-vector component in the x direction + /// @param ky is the k-vector component in the y direction + /// @param kz is the k-vector component in the z direction + template + void cpu_planewave_scalar(stim::complex* field, size_t N, T* x, T* y = NULL, T* z = NULL, stim::complex A = 1.0, T kx = 0.0, T ky = 0.0, T kz = 0.0){ + T px, py, pz; + for(size_t i = 0; i < N; i++){ // for each element in the array + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values + (y == NULL) ? py = 0 : py = y[i]; + (z == NULL) ? pz = 0 : pz = z[i]; + + field[i] = planewave_scalar(px, py, pz, A, kx, ky, kz); // call the single-value plane wave function + } + } template class planewave{ protected: - vec k; //k = tau / lambda - vec< complex > E0; //amplitude - //T phi; - - CUDA_CALLABLE planewave bend(rts::vec kn) const{ + stim::vec k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda + stim::vec< stim::complex > E0; //amplitude (for a scalar plane wave, only E0[0] is used) - vec kn_hat = kn.norm(); //normalize the new k - vec k_hat = k.norm(); //normalize the current k + /// Bend a plane wave via refraction, given that the new propagation direction is known + CUDA_CALLABLE planewave bend(stim::vec kn) const{ - //std::cout<<"PLANE WAVE BENDING------------------"< kn_hat = kn.norm(); //normalize the new k + stim::vec k_hat = k.norm(); //normalize the current k - planewave new_p; //create a new plane wave + planewave new_p; //create a new plane wave - //if kn is equal to k or -k, handle the degenerate case - T k_dot_kn = k_hat.dot(kn_hat); + T k_dot_kn = k_hat.dot(kn_hat); //if kn is equal to k or -k, handle the degenerate case //if k . n < 0, then the bend is a reflection - //flip k_hat - if(k_dot_kn < 0) k_hat = -k_hat; + if(k_dot_kn < 0) k_hat = -k_hat; //flip k_hat - //std::cout<<"k dot kn: "< r = k_hat.cross(kn_hat); //compute the rotation vector - - //std::cout<<"r: "< q; - q.CreateRotation(theta, r.norm()); - - //apply the rotation to E0 - vec< complex > E0n = q.toMatrix3() * E0; - + vec r = k_hat.cross(kn_hat); //compute the rotation vector + T theta = asin(r.len()); //compute the angle of the rotation about r + quaternion q; //create a quaternion to capture the rotation + q.CreateRotation(theta, r.norm()); + vec< complex > E0n = q.toMatrix3() * E0; //apply the rotation to E0 new_p.k = kn_hat * kmag(); new_p.E0 = E0n; @@ -86,16 +98,9 @@ protected: public: - - ///constructor: create a plane wave propagating along z, polarized along x - /*planewave(T lambda = (T)1) - { - k = rts::vec(0, 0, 1) * (TAU/lambda); - E0 = rts::vec(1, 0, 0); - }*/ - ///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega - CUDA_CALLABLE planewave(vec kvec = rts::vec(0, 0, rtsTAU), - vec< complex > E = rts::vec(1, 0, 0), T phase = 0) + ///constructor: create a plane wave propagating along k + CUDA_CALLABLE planewave(vec kvec = stim::vec(0, 0, stim::TAU), + vec< complex > E = stim::vec(1, 0, 0)) { //phi = phase; @@ -107,27 +112,23 @@ public: else{ vec< complex > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector vec< complex > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector - E0 = E_hat * E_hat.dot(E); //compute the projection of _E0 onto E0_hat + E0 = E_hat;// * E_hat.dot(E); //compute the projection of _E0 onto E0_hat } E0 = E0 * exp( complex(0, phase) ); } ///multiplication operator: scale E0 - CUDA_CALLABLE planewave & operator* (const T & rhs) - { - + CUDA_CALLABLE planewave & operator* (const T & rhs){ E0 = E0 * rhs; return *this; } - CUDA_CALLABLE T lambda() const - { - return rtsTAU / k.len(); + CUDA_CALLABLE T lambda() const{ + return stim::TAU / k.len(); } - CUDA_CALLABLE T kmag() const - { + CUDA_CALLABLE T kmag() const{ return k.len(); } @@ -139,14 +140,11 @@ public: return k; } - /*CUDA_CALLABLE T phase(){ - return phi; + /// calculate the value of the field produced by the plane wave given a three-dimensional position + CUDA_CALLABLE vec< complex > pos(T x, T y, T z){ + return pos( stim::vec(x, y, z) ); } - CUDA_CALLABLE void phase(T p){ - phi = p; - }*/ - CUDA_CALLABLE vec< complex > pos(vec p = vec(0, 0, 0)){ vec< complex > result; @@ -166,18 +164,32 @@ public: return planewave(k * (nt / ni), E0); } - CUDA_CALLABLE planewave refract(rts::vec kn) const - { + CUDA_CALLABLE planewave refract(stim::vec kn) const{ return bend(kn); } - void scatter(rts::plane P, T nr, planewave &r, planewave &t){ + /// Calculate the result of a plane wave hitting an interface between two refractive indices + + /// @param P is a plane representing the position and orientation of the surface + /// @param n0 is the refractive index outside of the surface (in the direction of the normal) + /// @param n1 is the refractive index inside the surface (in the direction away from the normal) + /// @param r is the reflected component of the plane wave + /// @param t is the transmitted component of the plane wave + void scatter(stim::plane P, T n0, T n1, planewave &r, planewave &t){ + scatter(P, n1/n0, r, t); + } + + /// Calculate the scattering result when nr = n1/n0 + + /// @param P is a plane representing the position and orientation of the surface + /// @param r is the ration n1/n0 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal) + /// @param r is the reflected component of the plane wave + /// @param t is the transmitted component of the plane wave + void scatter(stim::plane P, T nr, planewave &r, planewave &t){ int facing = P.face(k); //determine which direction the plane wave is coming in - //if(facing == 0) //if the wave is tangent to the plane, return an identical wave - // return *this; - //else if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr P = P.flip(); //flip the plane nr = 1/nr; //invert the refractive index (now nr = n0/n1) @@ -192,7 +204,7 @@ public: bool tir = false; //flag for total internal reflection if(theta_t != theta_t){ tir = true; - theta_t = rtsPI / (T)2; + theta_t = stim::PI / (T)2; } //handle the degenerate case where theta_i is 0 (the plane wave hits head-on) @@ -205,17 +217,10 @@ public: vec< complex > Et = E0 * tp; T phase_t = P.p().dot(k - kt); //compute the phase offset T phase_r = P.p().dot(k - kr); - //std::cout<<"Degeneracy: Head-On"<(kr, Er, phase_r); t = planewave(kt, Et, phase_t); - - //std::cout<<"i + r: "< Ei_s = E0.dot(x_hat); - //int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0); int sgn = E0.dot(y_hat).sgn(); vec< complex > cx_hat = x_hat; complex Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn; - //T Ei_p = ( E0 - x_hat * Ei_s ).len(); //compute the magnitude of the p- and s-polarized components of the reflected E vector complex Er_s = Ei_s * rs; complex Er_p = Ei_p * rp; @@ -257,14 +260,6 @@ public: complex Et_s = Ei_s * ts; complex Et_p = Ei_p * tp; - //std::cout<<"E0: "< > Er = vec< complex >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s; //compute the transmitted E vector @@ -273,29 +268,12 @@ public: T phase_t = P.p().dot(k - kt); T phase_r = P.p().dot(k - kr); - //std::cout<<"phase r: "<(0, phase_r) ); - //r.phi = phase_r; - - //t = bend(kt); - //t.k = t.k * nr; t.k = kt; t.E0 = Et * exp( complex(0, phase_t) ); - //t.phi = phase_t; - //std::cout<<"i: "< -std::ostream& operator<<(std::ostream& os, rts::planewave p) +std::ostream& operator<<(std::ostream& os, stim::optics::planewave p) { os< + +//Boost +//#include +//#include + +namespace stim{ + + /// Function returns the value of the scalar field produced by a beam with the specified parameters + + template + std::vector< stim::vec3 > generate_focusing_vectors(size_t N, stim::vec3 d, T NA, T NA_in = 0){ + + std::vector< stim::vec3 > dirs(N); //allocate an array to store the focusing vectors + + ///compute the rotation operator to transform (0, 0, 1) to k + T cos_angle = d.dot(vec3(0, 0, 1)); + stim::matrix rotation; + + //if the cosine of the angle is -1, the rotation is just a flip across the z axis + if(cos_angle == -1){ + rotation(2, 2) = -1; + } + else if(cos_angle != 1.0) + { + vec3 r_axis = vec3(0, 0, 1).cross(d).norm(); //compute the axis of rotation + T angle = acos(cos_angle); //compute the angle of rotation + quaternion quat; //create a quaternion describing the rotation + quat.CreateRotation(angle, r_axis); + rotation = quat.toMatrix3(); //compute the rotation matrix + } + + //find the phi values associated with the cassegrain ring + T PHI[2]; + PHI[0] = (T)asin(NA); + PHI[1] = (T)asin(NA_in); + + //calculate the z-axis cylinder coordinates associated with these angles + T Z[2]; + Z[0] = cos(PHI[0]); + Z[1] = cos(PHI[1]); + T range = Z[0] - Z[1]; + + //draw a distribution of random phi, z values + T z, phi, theta; + //T kmag = stim::TAU / lambda; + for(int i=0; i spherical(1, theta, phi); //convert from spherical to cartesian coordinates + vec3 cart = spherical.sph2cart(); + dirs[i] = rotation * cart; //create a sample vector + } + return dirs; + } + +/// Class stim::beam represents a beam of light focused at a point and composed of several plane waves +template +class scalarbeam +{ +public: + //enum beam_type {Uniform, Bartlett, Hamming, Hanning}; + +private: + + T NA[2]; //numerical aperature of the focusing optics + vec3 f; //focal point + vec3 d; //propagation direction + stim::complex A; //beam amplitude + T lambda; //beam wavelength +public: + + ///constructor: build a default beam (NA=1.0) + scalarbeam(T wavelength = 1, stim::complex amplitude = 1, vec3 focal_point = vec3(0, 0, 0), vec3 direction = vec3(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){ + lambda = wavelength; + A = amplitude; + f = focal_point; + d = direction.norm(); //make sure that the direction vector is normalized (makes calculations more efficient later on) + NA[0] = numerical_aperture; + NA[1] = center_obsc; + } + + ///Numerical Aperature functions + void setNA(T na) + { + NA[0] = (T)0; + NA[1] = na; + } + void setNA(T na0, T na1) + { + NA[0] = na0; + NA[1] = na1; + } + + //Monte-Carlo decomposition into plane waves + std::vector< scalarwave > mc(size_t N = 100000) const{ + + std::vector< stim::vec3 > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]); //generate a random set of N vectors forming a focus + std::vector< scalarwave > samples(N); //create a vector of plane waves + T kmag = (T)stim::TAU / lambda; //calculate the wavenumber + stim::complex apw; //allocate space for the amplitude at the focal point + stim::vec3 kpw; //declare the new k-vector based on the focused plane wave direction + for(size_t i=0; i(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave + samples[i] = scalarwave(kpw, apw); //create a plane wave based on the direction + } + + return samples; + } + + /// Calculate the field at a given point + /// @param x is the x-coordinate of the field point + /// @O is the approximation accuracy + stim::complex field(T x, T y, T z, size_t O){ + std::vector< scalarwave > W = mc(O); + T result = 0; //initialize the result to zero (0) + for(size_t i = 0; i < O; i++){ //for each plane wave + result += W[i].pos(x, y, z); + } + return result; + } + + /// Calculate the field at a set of positions + /*void field(stim::complex* F, T* x, T* y, T* z, size_t N, size_t O){ + + memset(F, 0, N * sizeof(stim::complex)); + std::vector< scalarwave > W = mc(O); //get a random set of plane waves representing the beam + size_t o, n; + T px, py, pz; + for(n = 0; n < N; n++){ //for each point in the field + (x == NULL) ? px = 0 : px = x[n]; // test for NULL values + (y == NULL) ? py = 0 : py = y[n]; + (z == NULL) ? pz = 0 : pz = z[n]; + for(o = 0; o < O; o++){ //for each plane wave + F[n] += W[o].pos(px, py, pz); + } + } + }*/ + + std::string str() + { + std::stringstream ss; + ss<<"Beam:"< +void cpu_scalar_psf(stim::complex* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3 f, T NA, T NA_in, int Nl){ + + memset(F, 0, N * sizeof(stim::complex)); + T k = stim::TAU / lambda; + T jl, Pl, C, kr, cos_phi; + T cos_alpha_1 = cos(asin(NA_in)); + T cos_alpha_2 = cos(asin(NA)); + stim::vec3 p, ps; + + /*double vm; + size_t table_bytes = (Nl + 1) * sizeof(double); + double* jv = (double*) malloc( table_bytes ); + double* yv = (double*) malloc( table_bytes ); + double* djv= (double*) malloc( table_bytes ); + double* dyv= (double*) malloc( table_bytes ); + */ + + T vm; + size_t table_bytes = (Nl + 1) * sizeof(T); + T* jv = (T*) malloc( table_bytes ); + T* yv = (T*) malloc( table_bytes ); + T* djv= (T*) malloc( table_bytes ); + T* dyv= (T*) malloc( table_bytes ); + + for(size_t n = 0; n < N; n++){ + (x == NULL) ? p[0] = 0 : p[0] = x[n]; // test for NULL values and set positions + (y == NULL) ? p[1] = 0 : p[1] = y[n]; + (z == NULL) ? p[2] = 0 : p[2] = z[n]; + + ps = p.cart2sph(); //convert this point to spherical coordinates + kr = k * ps[0]; + cos_phi = std::cos(ps[2]); + stim::bessjyv_sph(Nl, kr, vm, jv, yv, djv, dyv); + + for(int l = 0; l <= Nl; l++){ + //jl = boost::math::sph_bessel(l, kr); + //jl = stim::bessjyv(l, kr + jl = (T)jv[l]; + Pl = 1;//boost::math::legendre_p(l, cos_phi); + C = 1;//boost::math::legendre_p(l+1, cos_alpha_1) - boost::math::legendre_p(l + 1, cos_alpha_2) - boost::math::legendre_p(l - 1, cos_alpha_1) + boost::math::legendre_p(l - 1, cos_alpha_2); + F[n] += pow(complex(0, 1), l) * jl * Pl * C; + } + F[n] *= A * stim::TAU; + } +} + +} //end namespace stim + +#endif diff --git a/stim/optics/scalarwave.h b/stim/optics/scalarwave.h new file mode 100644 index 0000000..54ba253 --- /dev/null +++ b/stim/optics/scalarwave.h @@ -0,0 +1,353 @@ +#ifndef STIM_SCALARWAVE_H +#define STIM_SCALARWAVE_H + + +#include +#include +#include + +//#include "../math/vector.h" +#include "../math/vec3.h" +#include "../math/quaternion.h" +#include "../math/constants.h" +#include "../math/plane.h" +#include "../math/complex.h" + +//CUDA +#include "../cuda/cudatools/devices.h" +#include "../cuda/cudatools/error.h" +#include "../cuda/sharedmem.cuh" + +namespace stim{ + +template +class scalarwave{ + +protected: + + stim::vec3 k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda + stim::complex E0; //amplitude + + /// Bend a plane wave via refraction, given that the new propagation direction is known + CUDA_CALLABLE scalarwave bend(stim::vec3 kn) const{ + return scalarwave(kn.norm() * kmag(), E0); + } + +public: + + ///constructor: create a plane wave propagating along k + CUDA_CALLABLE scalarwave(vec3 kvec = stim::vec3(0, 0, (T)stim::TAU), complex E = 1){ + k = kvec; + E0 = E; + } + + CUDA_CALLABLE scalarwave(T kx, T ky, T kz, complex E = 1){ + k = vec3(kx, ky, kz); + E0 = E; + } + + ///multiplication operator: scale E0 + CUDA_CALLABLE scalarwave & operator* (const T & rhs){ + E0 = E0 * rhs; + return *this; + } + + CUDA_CALLABLE T lambda() const{ + return stim::TAU / k.len(); + } + + CUDA_CALLABLE T kmag() const{ + return k.len(); + } + + CUDA_CALLABLE vec3< complex > E(){ + return E0; + } + + CUDA_CALLABLE vec3 kvec(){ + return k; + } + + /// calculate the value of the field produced by the plane wave given a three-dimensional position + CUDA_CALLABLE complex pos(T x, T y, T z){ + return pos( stim::vec3(x, y, z) ); + } + + CUDA_CALLABLE complex pos(vec3 p = vec3(0, 0, 0)){ + return E0 * exp(complex(0, k.dot(p))); + } + + //scales k based on a transition from material ni to material nt + CUDA_CALLABLE scalarwave n(T ni, T nt){ + return scalarwave(k * (nt / ni), E0); + } + + CUDA_CALLABLE scalarwave refract(stim::vec3 kn) const{ + return bend(kn); + } + + /// Calculate the result of a plane wave hitting an interface between two refractive indices + + /// @param P is a plane representing the position and orientation of the surface + /// @param n0 is the refractive index outside of the surface (in the direction of the normal) + /// @param n1 is the refractive index inside the surface (in the direction away from the normal) + /// @param r is the reflected component of the plane wave + /// @param t is the transmitted component of the plane wave + void scatter(stim::plane P, T n0, T n1, scalarwave &r, scalarwave &t){ + scatter(P, n1/n0, r, t); + } + + /// Calculate the scattering result when nr = n1/n0 + + /// @param P is a plane representing the position and orientation of the surface + /// @param r is the ration n1/n0 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal) + /// @param r is the reflected component of the plane wave + /// @param t is the transmitted component of the plane wave + void scatter(stim::plane P, T nr, scalarwave &r, scalarwave &t){ + /* + int facing = P.face(k); //determine which direction the plane wave is coming in + + if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr + P = P.flip(); //flip the plane + nr = 1/nr; //invert the refractive index (now nr = n0/n1) + } + + //use Snell's Law to calculate the transmitted angle + T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i + T theta_i = acos(cos_theta_i); //compute theta_i + T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law + T theta_t = asin(sin_theta_t); //compute the cosine of theta_t + + bool tir = false; //flag for total internal reflection + if(theta_t != theta_t){ + tir = true; + theta_t = stim::PI / (T)2; + } + + //handle the degenerate case where theta_i is 0 (the plane wave hits head-on) + if(theta_i == 0){ + T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients + T tp = 2 / (1 + nr); + vec3 kr = -k; + vec3 kt = k * nr; //set the k vectors for theta_i = 0 + vec3< complex > Er = E0 * rp; //compute the E vectors + vec3< complex > Et = E0 * tp; + T phase_t = P.p().dot(k - kt); //compute the phase offset + T phase_r = P.p().dot(k - kr); + + //create the plane waves + r = planewave(kr, Er, phase_r); + t = planewave(kt, Et, phase_t); + return; + } + + + //compute the Fresnel coefficients + T rp, rs, tp, ts; + rp = tan(theta_t - theta_i) / tan(theta_t + theta_i); + rs = sin(theta_t - theta_i) / sin(theta_t + theta_i); + + if(tir){ + tp = ts = 0; + } + else{ + tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) ); + ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i); + } + + //compute the coordinate space for the plane of incidence + vec3 z_hat = -P.norm(); + vec3 y_hat = P.parallel(k).norm(); + vec3 x_hat = y_hat.cross(z_hat).norm(); + + //compute the k vectors for r and t + vec3 kr, kt; + kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag(); + kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr; + + //compute the magnitude of the p- and s-polarized components of the incident E vector + complex Ei_s = E0.dot(x_hat); + int sgn = E0.dot(y_hat).sgn(); + vec3< complex > cx_hat = x_hat; + complex Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn; + //compute the magnitude of the p- and s-polarized components of the reflected E vector + complex Er_s = Ei_s * rs; + complex Er_p = Ei_p * rp; + //compute the magnitude of the p- and s-polarized components of the transmitted E vector + complex Et_s = Ei_s * ts; + complex Et_p = Ei_p * tp; + + //compute the reflected E vector + vec3< complex > Er = vec3< complex >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s; + //compute the transmitted E vector + vec3< complex > Et = vec3< complex >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s; + + T phase_t = P.p().dot(k - kt); + T phase_r = P.p().dot(k - kr); + + //create the plane waves + r.k = kr; + r.E0 = Er * exp( complex(0, phase_r) ); + + t.k = kt; + t.E0 = Et * exp( complex(0, phase_t) ); + */ + } + + std::string str() + { + std::stringstream ss; + ss<<"Plane Wave:"< +__global__ void cuda_scalarwave(stim::complex* F, size_t N, T* x, T* y, T* z, stim::scalarwave* W, size_t n_waves){ + extern __shared__ stim::scalarwave shared_W[]; //declare the list of waves in shared memory + + stim::cuda::sharedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x); //copy the plane waves into shared memory for faster access + __syncthreads(); //synchronize threads to insure all data is copied + + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array + if(i >= N) return; //exit if this thread is outside the array + T px, py, pz; + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values and set positions + (y == NULL) ? py = 0 : py = y[i]; + (z == NULL) ? pz = 0 : pz = z[i]; + + stim::complex f = 0; //create a register to store the result + for(size_t w = 0; w < n_waves; w++) + f += shared_W[w].pos(px, py, pz); //evaluate the plane wave + F[i] += f; //copy the result to device memory +} + +/// evaluate a scalar wave at several points, where all arrays are on the GPU +template +void gpu_scalarwave(stim::complex* F, size_t N, T* x, T* y, T* z, stim::scalarwave w){ + + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device + dim3 blocks(N / threads + 1); //calculate the optimal number of blocks + cuda_scalarwave<<< blocks, threads >>>(F, N, x, y, z, w); //call the kernel +} + +/// Sums a series of coherent plane waves at a specified point +/// @param field is the output array of field values corresponding to each input point +/// @param x is an array of x coordinates for the field point +/// @param y is an array of y coordinates for the field point +/// @param z is an array of z coordinates for the field point +/// @param N is the number of points in the input and output arrays +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength) +/// @param A is the list of amplitudes for each wave +/// @param S is the list of propagation directions for each wave +template +void cpu_sum_scalarwaves(stim::complex* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave > w_array){ + size_t S = w_array.size(); //store the number of waves +#ifdef NO_CUDA + memset(F, 0, N * sizeof(stim::complex)); + T px, py, pz; + for(size_t i = 0; i < N; i++){ // for each element in the array + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values + (y == NULL) ? py = 0 : py = y[i]; + (z == NULL) ? pz = 0 : pz = z[i]; + + for(size_t s = 0; s < S; s++){ + F[i] += w_array[s].pos(px, py, pz); //sum all plane waves at this point + } + } +#else + stim::complex* dev_F; //allocate space for the field + cudaMalloc(&dev_F, N * sizeof(stim::complex)); + cudaMemset(dev_F, 0, N * sizeof(stim::complex)); //set the field to zero (necessary because a sum is used) + + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified) + if(x != NULL){ + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T))); + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice)); + } + + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified) + if(y != NULL){ + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T))); + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice)); + } + + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified) + if(z != NULL){ + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T))); + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice)); + } + + size_t wave_bytes = sizeof(stim::scalarwave); + size_t shared_bytes = stim::sharedMemPerBlock(); //calculate the maximum amount of shared memory available + size_t array_bytes = w_array.size() * wave_bytes; //calculate the maximum number of bytes required for the planewave array + size_t max_batch = shared_bytes / wave_bytes; //calculate number of plane waves that will fit into shared memory + size_t num_batches = w_array.size() / max_batch + 1; //calculate the number of batches required to process all plane waves + size_t batch_bytes = min(w_array.size(), max_batch) * wave_bytes; //initialize the batch size (in bytes) to the maximum batch required + + stim::scalarwave* dev_w; + HANDLE_ERROR(cudaMalloc(&dev_w, batch_bytes)); //allocate memory for a single batch of plane waves + + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks + + size_t batch_size; //declare a variable to store the size of the current batch + size_t waves_processed = 0; //initialize the number of waves processed to zero + while(waves_processed < w_array.size()){ //while there are still waves to be processed + batch_size = min(max_batch, w_array.size() - waves_processed); //process either a whole batch, or whatever is left + batch_bytes = batch_size * sizeof(stim::scalarwave); + HANDLE_ERROR(cudaMemcpy(dev_w, &w_array[waves_processed], batch_bytes, cudaMemcpyHostToDevice)); //copy the plane waves into global memory + cuda_scalarwave<<< blocks, threads, batch_bytes >>>(dev_F, N, dev_x, dev_y, dev_z, dev_w, batch_size); //call the kernel + waves_processed += batch_size; //increment the counter indicating how many waves have been processed + } + + cudaMemcpy(F, dev_F, N * sizeof(stim::complex), cudaMemcpyDeviceToHost); //copy the field from device memory + + if(x != NULL) cudaFree(dev_x); //free everything + if(y != NULL) cudaFree(dev_y); + if(z != NULL) cudaFree(dev_z); + cudaFree(dev_F); + cudaFree(dev_w); + +#endif +} + +template +void cpu_scalarwave(stim::complex* F, size_t N, T* x, T* y, T* z, stim::scalarwave w){ + std::vector< stim::scalarwave > w_array(1, w); + cpu_sum_scalarwaves(F, N, x, y, z, w_array); +} + + +/// Sums a series of coherent plane waves at a specified point +/// @param x is the x coordinate of the field point +/// @param y is the y coordinate of the field point +/// @param z is the z coordinate of the field point +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength) +/// @param A is the list of amplitudes for each wave +/// @param S is the list of propagation directions for each wave +template +CUDA_CALLABLE stim::complex sum_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave > W){ + size_t N = W.size(); //get the number of plane wave samples + stim::complex field(0, 0); //initialize the field to zero (0) + stim::vec3 k; //allocate space for the direction vector + for(size_t i = 0; i < N; i++){ + field += W[i].pos(x, y, z); + } + return field; +} + +} //end namespace stim + +template +std::ostream& operator<<(std::ostream& os, stim::scalarwave p) +{ + os< + +namespace stim{ + +template +class beam : public planewave

+{ +public: + enum beam_type {Uniform, Bartlett, Hamming, Hanning}; + +private: + + P _na[2]; //numerical aperature of the focusing optics + vec

f; //focal point + function apod; //apodization function + unsigned int apod_res; //resolution of apodization filter functions + + void apod_uniform() + { + apod = (P)1; + } + void apod_bartlett() + { + apod = (P)1; + apod.insert((P)1, (P)0); + } + void apod_hanning() + { + apod = (P)0; + P x, y; + for(unsigned int n=0; n k = rts::vec

(0, 0, rtsTAU), + vec

_E0 = rts::vec

(1, 0, 0), + beam_type _apod = Uniform) + : planewave

(k, _E0) + { + _na[0] = (P)0.0; + _na[1] = (P)1.0; + f = vec

( (P)0, (P)0, (P)0 ); + apod_res = 256; //set the default resolution for apodization filters + set_apod(_apod); //set the apodization function type + } + + beam

refract(rts::vec

kn) const{ + + beam

new_beam; + new_beam._na[0] = _na[0]; + new_beam._na[1] = _na[1]; + + + rts::planewave

pw = planewave

::bend(kn); + //std::cout< > mc(unsigned int N = 100000, unsigned int seed = 0) const + { + /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling + of a sphere and projecting these samples onto an inscribed sphere. + + seed = seed for the random number generator + */ + srand(seed); //seed the random number generator + + vec

k_hat = beam::k.norm(); + + ///compute the rotation operator to transform (0, 0, 1) to k + P cos_angle = k_hat.dot(rts::vec

(0, 0, 1)); + rts::matrix rotation; + + //if the cosine of the angle is -1, the rotation is just a flip across the z axis + if(cos_angle == -1){ + rotation(2, 2) = -1; + } + else if(cos_angle != 1.0) + { + rts::vec

r_axis = rts::vec

(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation + P angle = acos(cos_angle); //compute the angle of rotation + rts::quaternion

quat; //create a quaternion describing the rotation + quat.CreateRotation(angle, r_axis); + rotation = quat.toMatrix3(); //compute the rotation matrix + } + + //find the phi values associated with the cassegrain ring + P PHI[2]; + PHI[0] = (P)asin(_na[0]); + PHI[1] = (P)asin(_na[1]); + + //calculate the z-axis cylinder coordinates associated with these angles + P Z[2]; + Z[0] = cos(PHI[0]); + Z[1] = cos(PHI[1]); + P range = Z[0] - Z[1]; + + std::vector< planewave

> samples; //create a vector of plane waves + + //draw a distribution of random phi, z values + P z, phi, theta; + for(int i=0; i spherical(1, theta, phi); //convert from spherical to cartesian coordinates + rts::vec

cart = spherical.sph2cart(); + vec

k_prime = rotation * cart; //create a sample vector + + //store a wave refracted along the given direction + //std::cout<<"k prime: "<::refract(k_prime) * apod(phi/PHI[1])); + } + + return samples; + } + + std::string str() + { + std::stringstream ss; + ss<<"Beam:"< +__global__ void gpu_planewave2efield(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, + planewave w, rect q) +{ + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= r0 || iv >= r1) return; + + //compute the index into the field + int i = iv*r0 + iu; + + //get the current position + vec p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); + vec r(p[0], p[1], p[2]); + + complex x( 0.0f, w.k.dot(r) ); + + if(Y == NULL) //if this is a scalar simulation + X[i] += w.E0.len() * exp(x); //use the vector magnitude as the plane wave amplitude + else + { + X[i] += w.E0[0] * exp(x); + Y[i] += w.E0[1] * exp(x); + Z[i] += w.E0[2] * exp(x); + } +} + +template +__global__ void gpu_efield_magnitude(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, T* M) +{ + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= r0 || iv >= r1) return; + + //compute the index into the field + int i = iv*r0 + iu; + + if(Y == NULL) //if this is a scalar simulation + M[i] = X[i].abs(); //compute the magnitude of the X component + else + { + M[i] = rts::vec(X[i].abs(), Y[i].abs(), Z[i].abs()).len(); + //M[i] = Z[i].abs(); + } +} + +template +__global__ void gpu_efield_real_magnitude(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, T* M) +{ + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= r0 || iv >= r1) return; + + //compute the index into the field + int i = iv*r0 + iu; + + if(Y == NULL) //if this is a scalar simulation + M[i] = X[i].abs(); //compute the magnitude of the X component + else + { + M[i] = rts::vec(X[i].real(), Y[i].real(), Z[i].real()).len(); + //M[i] = Z[i].abs(); + } +} + +template +__global__ void gpu_efield_polarization(complex* X, complex* Y, complex* Z, + unsigned int r0, unsigned int r1, + T* Px, T* Py, T* Pz) +{ + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= r0 || iv >= r1) return; + + //compute the index into the field + int i = iv*r0 + iu; + + //compute the field polarization + Px[i] = X[i].abs(); + Py[i] = Y[i].abs(); + Pz[i] = Z[i].abs(); + +} + +/* This function computes the sum of two complex fields and stores the result in *dest +*/ +template +__global__ void gpu_efield_sum(complex* dest, complex* src, unsigned int r0, unsigned int r1) +{ + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= r0 || iv >= r1) return; + + //compute the index into the field + int i = iv*r0 + iu; + + //sum the fields + dest[i] += src[i]; +} + +/* This class implements a discrete representation of an electromagnetic field + in 2D. The majority of this representation is done on the GPU. +*/ +template +class efield +{ +protected: + + bool vector; + + //gpu pointer to the field data + rts::complex* X; + rts::complex* Y; + rts::complex* Z; + + //resolution of the discrete field + unsigned int R[2]; + + //shape of the 2D field + rect pos; + + void from_planewave(planewave p) + { + unsigned int SQRT_BLOCK = 16; + //create one thread for each detector pixel + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + + gpu_planewave2efield <<>> (X, Y, Z, R[0], R[1], p, pos); + } + + void init(unsigned int res0, unsigned int res1, bool _vector) + { + vector = _vector; //initialize field type + + X = Y = Z = NULL; //initialize all pointers to NULL + R[0] = res0; + R[1] = res1; + + //allocate memory on the gpu + cudaMalloc(&X, sizeof(rts::complex) * R[0] * R[1]); + cudaMemset(X, 0, sizeof(rts::complex) * R[0] * R[1]); + + if(vector) + { + cudaMalloc(&Y, sizeof(rts::complex) * R[0] * R[1]); + cudaMemset(Y, 0, sizeof(rts::complex) * R[0] * R[1]); + + cudaMalloc(&Z, sizeof(rts::complex) * R[0] * R[1]); + cudaMemset(Z, 0, sizeof(rts::complex) * R[0] * R[1]); + } + } + + void destroy() + { + if(X != NULL) cudaFree(X); + if(Y != NULL) cudaFree(Y); + if(Z != NULL) cudaFree(Z); + } + + void shallowcpy(const rts::efield & src) + { + vector = src.vector; + R[0] = src.R[0]; + R[1] = src.R[1]; + } + + void deepcpy(const rts::efield & src) + { + //perform a shallow copy + shallowcpy(src); + + //allocate memory on the gpu + if(src.X != NULL) + { + cudaMalloc(&X, sizeof(rts::complex) * R[0] * R[1]); + cudaMemcpy(X, src.X, sizeof(rts::complex) * R[0] * R[1], cudaMemcpyDeviceToDevice); + } + if(src.Y != NULL) + { + cudaMalloc(&Y, sizeof(rts::complex) * R[0] * R[1]); + cudaMemcpy(Y, src.Y, sizeof(rts::complex) * R[0] * R[1], cudaMemcpyDeviceToDevice); + } + if(src.Z != NULL) + { + cudaMalloc(&Z, sizeof(rts::complex) * R[0] * R[1]); + cudaMemcpy(Z, src.Z, sizeof(rts::complex) * R[0] * R[1], cudaMemcpyDeviceToDevice); + } + } + +public: + efield(unsigned int res0, unsigned int res1, bool _vector = true) + { + init(res0, res1, _vector); + //pos = rts::rect(rts::vec(-10, 0, -10), rts::vec(-10, 0, 10), rts::vec(10, 0, 10)); + } + + //destructor + ~efield() + { + destroy(); + } + + ///Clear the field - set all points to zero + void clear() + { + cudaMemset(X, 0, sizeof(rts::complex) * R[0] * R[1]); + + if(vector) + { + cudaMemset(Y, 0, sizeof(rts::complex) * R[0] * R[1]); + cudaMemset(Z, 0, sizeof(rts::complex) * R[0] * R[1]); + } + } + + void position(rect _p) + { + pos = _p; + } + + //access functions + complex* x(){ + return X; + } + complex* y(){ + return Y; + } + complex* z(){ + return Z; + } + unsigned int Ru(){ + return R[0]; + } + unsigned int Rv(){ + return R[1]; + } + rect p(){ + return pos; + } + + std::string str() + { + stringstream ss; + ss< & operator= (const efield & rhs) + { + destroy(); //destroy any previous information about this field + deepcpy(rhs); //create a deep copy + return *this; //return the current object + } + + //assignment operator: build an electric field from a plane wave + efield & operator= (const planewave & rhs) + { + + clear(); //clear any previous field data + from_planewave(rhs); //create a field from the planewave + return *this; //return the current object + } + + //assignment operator: add an existing electric field + efield & operator+= (const efield & rhs) + { + //if this field and the source field represent the same regions in space + if(R[0] == rhs.R[0] && R[1] == rhs.R[1] && pos == rhs.pos) + { + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); + + //create one thread for each detector pixel + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //sum the fields + gpu_efield_sum <<>> (X, rhs.X, R[0], R[1]); + if(Y != NULL) + { + gpu_efield_sum <<>> (Y, rhs.Y, R[0], R[1]); + gpu_efield_sum <<>> (Z, rhs.Z, R[0], R[1]); + } + } + else + { + std::cout<<"ERROR in efield: The two summed fields do not represent the same geometry."< & operator= (const rts::beam & rhs) + { + //get a vector of monte-carlo samples + std::vector< rts::planewave > p_list = rhs.mc(); + + clear(); //clear any previous field data + for(unsigned int i = 0; i < p_list.size(); i++) + from_planewave(p_list[i]); + return *this; + } + + + //return a scalar field representing field magnitude + realfield mag() + { + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); + + //create a scalar field to store the result + realfield M(R[0], R[1]); + + //create one thread for each detector pixel + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //compute the magnitude and store it in a scalar field + gpu_efield_magnitude <<>> (X, Y, Z, R[0], R[1], M.ptr(0)); + + return M; + } + + //return a sacalar field representing the field magnitude at an infinitely small point in time + realfield real_mag() + { + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); + + //create a scalar field to store the result + realfield M(R[0], R[1]); + + //create one thread for each detector pixel + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //compute the magnitude and store it in a scalar field + gpu_efield_real_magnitude <<>> (X, Y, Z, R[0], R[1], M.ptr(0)); + + return M; + } + + //return a vector field representing field polarization + realfield polarization() + { + if(!vector) + { + std::cout<<"ERROR: Cannot compute polarization of a scalar field."< Pol(R[0], R[1]); //create a vector field to store the result + + //compute the polarization and store it in the vector field + gpu_efield_polarization <<>> (X, Y, Z, R[0], R[1], Pol.ptr(0), Pol.ptr(1), Pol.ptr(2)); + + return Pol; //return the vector field + } + + ////////FRIENDSHIP + //friend void operator<< (rts::efield &ef, rts::halfspace hs); + + +}; + + + +} //end namespace rts + +#endif diff --git a/stim/optics_old/esphere.cuh b/stim/optics_old/esphere.cuh new file mode 100644 index 0000000..1b5a559 --- /dev/null +++ b/stim/optics_old/esphere.cuh @@ -0,0 +1,162 @@ +#ifndef RTS_ESPHERE +#define RTS_ESPHERE + +#include "../math/complex.h" +#include "../math/bessel.h" +#include "../visualization/colormap.h" +#include "../optics/planewave.h" +#include "../cuda/devices.h" +#include "../optics/efield.cuh" + +namespace stim{ + +/* This class implements a discrete representation of an electromagnetic field + in 2D scattered by a sphere. This class implements Mie scattering. +*/ +template +class esphere : public efield

+{ +private: + stim::complex

n; //sphere refractive index + P a; //sphere radius + + //parameters dependent on wavelength + unsigned int Nl; //number of orders for the calculation + rts::complex

* A; //internal scattering coefficients + rts::complex

* B; //external scattering coefficients + + void calcNl(P kmag) + { + //return ceil( ((P)6.282 * a) / lambda + 4 * pow( ((P)6.282 * a) / lambda, ((P)1/(P)3)) + 2); + Nl = ceil( kmag*a + 4 * pow(kmag * a, (P)1/(P)3) + 2); + } + + void calcAB(P k, unsigned int Nl, rts::complex

* A, rts::complex

* B) + { + /* These calculations require double precision, so they are computed + using doubles and converted to P at the end. + + Input: + + k = magnitude of the k vector (tau/lambda) + Nl = highest order coefficient ([0 Nl] are computed) + */ + + //clear the previous coefficients + rts::complex

* cpuA = (rts::complex

*)malloc(sizeof(rts::complex

) * (Nl+1)); + rts::complex

* cpuB = (rts::complex

*)malloc(sizeof(rts::complex

) * (Nl+1)); + + //convert to an std complex value + complex nc = (rts::complex)n; + + //compute the magnitude of the k vector + complex kna = nc * k * (double)a; + + //compute the arguments k*a and k*n*a + complex ka(k * a, 0.0); + + //allocate space for the Bessel functions of the first and second kind (and derivatives) + unsigned int bytes = sizeof(complex) * (Nl + 1); + complex* cjv_ka = (complex*)malloc(bytes); + complex* cyv_ka = (complex*)malloc(bytes); + complex* cjvp_ka = (complex*)malloc(bytes); + complex* cyvp_ka = (complex*)malloc(bytes); + complex* cjv_kna = (complex*)malloc(bytes); + complex* cyv_kna = (complex*)malloc(bytes); + complex* cjvp_kna = (complex*)malloc(bytes); + complex* cyvp_kna = (complex*)malloc(bytes); + + //allocate space for the spherical Hankel functions and derivative + complex* chv_ka = (complex*)malloc(bytes); + complex* chvp_ka = (complex*)malloc(bytes); + + //compute the bessel functions using the CPU-based algorithm + double vm; + cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka); + cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna); + + //compute A for each order + complex i(0, 1); + complex a, b, c, d; + complex An, Bn; + for(int l=0; l<=Nl; l++) + { + //compute the Hankel functions from j and y + chv_ka[l] = cjv_ka[l] + i * cyv_ka[l]; + chvp_ka[l] = cjvp_ka[l] + i * cyvp_ka[l]; + + //Compute A (internal scattering coefficient) + //compute the numerator and denominator for A + a = cjv_ka[l] * chvp_ka[l] - cjvp_ka[l] * chv_ka[l]; + b = cjv_kna[l] * chvp_ka[l] - chv_ka[l] * cjvp_kna[l] * nc; + + //calculate A and add it to the list + rts::complex An = (2.0 * l + 1.0) * pow(i, l) * (a / b); + cpuA[l] = (rts::complex

)An; + + //Compute B (external scattering coefficient) + c = cjv_ka[l] * cjvp_kna[l] * nc - cjv_kna[l] * cjvp_ka[l]; + d = cjv_kna[l] * chvp_ka[l] - chv_ka[l] * cjvp_kna[l] * nc; + + //calculate B and add it to the list + rts::complex Bn = (2.0 * l + 1.0) * pow(i, l) * (c / d); + cpuB[l] = (rts::complex

)Bn; + + std::cout<<"A: "<) * (Nl+1)); //allocate memory for new coefficients + cudaMalloc(&B, sizeof(rts::complex

) * (Nl+1)); + + //copy the calculations from the CPU to the GPU + cudaMemcpy(A, cpuA, sizeof(rts::complex

) * (Nl+1), cudaMemcpyDeviceToHost); + cudaMemcpy(B, cpuB, sizeof(rts::complex

) * (Nl+1), cudaMemcpyDeviceToHost); + } + +public: + + esphere(unsigned int res0, unsigned int res1, P _a, rts::complex

_n, bool _scalar = false) : + efield

(res0, res1, _scalar) + { + std::cout<<"Sphere scattered field created."< & operator= (const planewave

& rhs) + { + calcNl(rhs.kmag()); //compute the number of scattering coefficients + std::cout<<"Nl: "<::str()< +__global__ void gpu_halfspace_pw2ef(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, + plane P, planewave w, rect q, bool at_surface = false) +{ + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= r0 || iv >= r1) return; + + //compute the index into the field + int i = iv*r0 + iu; + + //get the current position + vec p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); + + if(at_surface){ + if(P.side(p) > 0) + return; + } + else{ + if(P.side(p) >= 0) + return; + } + + //if the current position is on the wrong side of the plane + + //vec r(p[0], p[1], p[2]); + + complex x( 0.0f, w.kvec().dot(p) ); + //complex phase( 0.0f, w.phase()); + + if(Y == NULL) //if this is a scalar simulation + X[i] += w.E().len() * exp(x); //use the vector magnitude as the plane wave amplitude + else + { + //X[i] = Y[i] = Z[i] = 1; + X[i] += w.E()[0] * exp(x);// * exp(phase); + Y[i] += w.E()[1] * exp(x);// * exp(phase); + Z[i] += w.E()[2] * exp(x);// * exp(phase); + } +} + +//GPU kernel to compute the electric displacement field +template +__global__ void gpu_halfspace_pw2df(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, + plane P, planewave w, rect q, T n, bool at_surface = false) +{ + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= r0 || iv >= r1) return; + + //compute the index into the field + int i = iv*r0 + iu; + + //get the current position + vec p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); + + if(at_surface){ + if(P.side(p) > 0) + return; + } + else{ + if(P.side(p) >= 0) + return; + } + + //if the current position is on the wrong side of the plane + + //vec r(p[0], p[1], p[2]); + + complex x( 0.0f, w.kvec().dot(p) ); + //complex phase( 0.0f, w.phase()); + + //vec< complex > testE(1, 0, 0); + + + + if(Y == NULL) //if this is a scalar simulation + X[i] += w.E().len() * exp(x); //use the vector magnitude as the plane wave amplitude + else + { + plane< complex > cplane = plane< complex, 3>(P); + vec< complex > E_para;// = cplane.parallel(w.E()); + vec< complex > E_perp;// = cplane.perpendicular(w.E()) * (n*n); + cplane.decompose(w.E(), E_para, E_perp); + T epsilon = n*n; + + X[i] += (E_para[0] + E_perp[0] * epsilon) * exp(x); + Y[i] += (E_para[1] + E_perp[1] * epsilon) * exp(x); + Z[i] += (E_para[2] + E_perp[2] * epsilon) * exp(x); + } +} + +//computes a scalar field containing the refractive index of the half-space at each point +template +__global__ void gpu_halfspace_n(T* n, unsigned int r0, unsigned int r1, rect q, plane P, T n0, T n1){ + + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= r0 || iv >= r1) return; + + //compute the index into the field + int i = iv*r0 + iu; + + //get the current position + vec p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); + + //set the appropriate refractive index + if(P.side(p) < 0) n[i] = n0; + else n[i] = n1; +} + +template +class halfspace +{ +private: + rts::plane S; //surface plane splitting the half space + rts::complex n0; //refractive index at the front of the plane + rts::complex n1; //refractive index at the back of the plane + + //lists of waves in front (pw0) and behind (pw1) the plane + std::vector< rts::planewave > w0; + std::vector< rts::planewave > w1; + + //rts::planewave pi; //incident plane wave + //rts::planewave pr; //plane wave reflected from the surface + //rts::planewave pt; //plane wave transmitted through the surface + + void init(){ + n0 = 1.0; + n1 = 1.5; + } + + //compute which side of the interface is hit by the incoming plane wave (0 = front, 1 = back) + bool facing(planewave p){ + if(p.kvec().dot(S.norm()) > 0) + return 1; + else + return 0; + } + + T calc_theta_i(vec v){ + + vec v_hat = v.norm(); + + //compute the cosine of the angle between k_hat and the plane normal + T cos_theta_i = v_hat.dot(S.norm()); + + return acos(abs(cos_theta_i)); + } + + T calc_theta_t(T ni_nt, T theta_i){ + + T sin_theta_t = ni_nt * sin(theta_i); + return asin(sin_theta_t); + } + + +public: + + //constructors + halfspace(){ + init(); + } + + halfspace(T na, T nb){ + init(); + n0 = na; + n1 = nb; + } + + halfspace(T na, T nb, plane p){ + n0 = na; + n1 = nb; + S = p; + } + + //compute the transmitted and reflective waves given the incident (vacuum) plane wave p + void incident(rts::planewave p){ + + planewave r, t; + p.scatter(S, n1.real()/n0.real(), r, t); + + //std::cout<<"i+r: "< b, unsigned int N = 10000){ + + //generate a plane wave decomposition for the beam + std::vector< planewave > pw_list = b.mc(N); + + //calculate the reflected and refracted waves for each incident wave + for(unsigned int w = 0; w < pw_list.size(); w++){ + incident(pw_list[w]); + } + } + + //return the electric field at the specified resolution and position + rts::efield E(unsigned int r0, unsigned int r1, rect R){ + + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); + + //create one thread for each detector pixel + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((r0 + SQRT_BLOCK -1)/SQRT_BLOCK, (r1 + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //create an electric field + rts::efield ef(r0, r1); + ef.position(R); + + //render each plane wave + + //plane waves at the surface front + for(unsigned int w = 0; w < w0.size(); w++){ + //std::cout<<"w0 "< <<>> (ef.x(), ef.y(), ef.z(), r0, r1, S, w0[w], ef.p()); + } + + //plane waves at the surface back + for(unsigned int w = 0; w < w1.size(); w++){ + //std::cout<<"w1 "< <<>> (ef.x(), ef.y(), ef.z(), r0, r1, -S, w1[w], ef.p(), true); + } + + return ef; + } + + //return the electric displacement at the specified resolution and position + rts::efield D(unsigned int r0, unsigned int r1, rect R){ + + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); + + //create one thread for each detector pixel + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((r0 + SQRT_BLOCK -1)/SQRT_BLOCK, (r1 + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //create a complex vector field + rts::efield df(r0, r1); + df.position(R); + + //render each plane wave + + //plane waves at the surface front + for(unsigned int w = 0; w < w0.size(); w++){ + //std::cout<<"w0 "< <<>> (df.x(), df.y(), df.z(), r0, r1, S, w0[w], df.p(), n0.real()); + } + + //plane waves at the surface back + for(unsigned int w = 0; w < w1.size(); w++){ + //std::cout<<"w1 "< <<>> (df.x(), df.y(), df.z(), r0, r1, -S, w1[w], df.p(), n1.real(), true); + } + + return df; + } + + realfield nfield(unsigned int Ru, unsigned int Rv, rect p){ + + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); + + //create one thread for each detector pixel + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((Ru + SQRT_BLOCK -1)/SQRT_BLOCK, (Rv + SQRT_BLOCK - 1)/SQRT_BLOCK); + + realfield n(Ru, Rv); //create a scalar field to store the result of n + + rts::gpu_halfspace_n <<>> (n.ptr(), Ru, Rv, p, S, n0.real(), n1.real()); + + return n; + } + + std::string str(){ + std::stringstream ss; + ss<<"Half Space-------------"< (rts::efield &ef, rts::halfspace hs); + +}; + + + + +} + + +#endif diff --git a/stim/optics_old/material.h b/stim/optics_old/material.h new file mode 100644 index 0000000..f22fa0d --- /dev/null +++ b/stim/optics_old/material.h @@ -0,0 +1,153 @@ +#ifndef RTS_MATERIAL_H +#define RTS_MATERIAL_H + +#include +#include +#include +#include +#include +#include +#include +#include "../math/complex.h" +#include "../math/constants.h" +#include "../math/function.h" + +namespace stim{ + +//Material class - default representation for the material property is the refractive index (RI) +template +class material : public function< T, complex >{ + +public: + enum wave_property{microns, inverse_cm}; + enum material_property{ri, absorbance}; + +private: + + using function< T, complex >::X; + using function< T, complex >::Y; + using function< T, complex >::insert; + using function< T, complex >::bounding; + + std::string name; //name for the material (defaults to file name) + + void process_header(std::string str, wave_property& wp, material_property& mp){ + + std::stringstream ss(str); //create a stream from the data string + std::string line; + std::getline(ss, line); //get the first line as a string + while(line[0] == '#'){ //continue looping while the line is a comment + + std::stringstream lstream(line); //create a stream from the line + lstream.ignore(); //ignore the first character ('#') + + std::string prop; //get the property name + lstream>>prop; + + if(prop == "X"){ + std::string wp_name; + lstream>>wp_name; + if(wp_name == "microns") wp = microns; + else if(wp_name == "inverse_cm") wp = inverse_cm; + } + else if(prop == "Y"){ + std::string mp_name; + lstream>>mp_name; + if(mp_name == "ri") mp = ri; + else if(mp_name == "absorbance") mp = absorbance; + } + + std::getline(ss, line); //get the next line + } + + function< T, stim::complex >::process_string(str); + } + + void from_inverse_cm(){ + //convert inverse centimeters to wavelength (in microns) + for(unsigned int i=0; i(1, 0); + } + + +public: + + material(std::string filename, wave_property wp, material_property mp){ + name = filename; + load(filename, wp, mp); + } + + material(std::string filename){ + name = filename; + load(filename); + } + + material(){ + init(); + } + + complex getN(T lambda){ + return function< T, complex >::linear(lambda); + } + + void load(std::string filename, wave_property wp, material_property mp){ + + //load the file as a function + function< T, complex >::load(filename); + } + + void load(std::string filename){ + + wave_property wp = inverse_cm; + material_property mp = ri; + //turn the file into a string + std::ifstream t(filename.c_str()); //open the file as a stream + + if(!t){ + std::cout<<"ERROR: Couldn't open the material file '"<(t)), + std::istreambuf_iterator()); + + //process the header information + process_header(str, wp, mp); + + //convert units + if(wp == inverse_cm) + from_inverse_cm(); + //set the bounding values + bounding[0] = Y[0]; + bounding[1] = Y.back(); + } + std::string str(){ + std::stringstream ss; + ss< >::str(); + return ss.str(); + } + std::string get_name(){ + return name; + } + + void set_name(std::string str){ + name = str; + } + +}; + +} + + + + +#endif diff --git a/stim/optics_old/mirst-1d.cuh b/stim/optics_old/mirst-1d.cuh new file mode 100644 index 0000000..e7fde75 --- /dev/null +++ b/stim/optics_old/mirst-1d.cuh @@ -0,0 +1,447 @@ +#include "../optics/material.h" +#include "../math/complexfield.cuh" +#include "../math/constants.h" +//#include "../envi/bil.h" + +#include "cufft.h" + +#include +#include + +namespace stim{ + +//this function writes a sinc function to "dest" such that an iFFT produces a slab +template +__global__ void gpu_mirst1d_layer_fft(complex* dest, complex* ri, + T* src, T* zf, + T w, unsigned int zR, unsigned int nuR){ + //dest = complex field representing the sample + //ri = refractive indices for each wavelength + //src = intensity of the light source for each wavelength + //zf = z position of the slab interface for each wavelength (accounting for optical path length) + //w = width of the slab (in pixels) + //zR = number of z-axis samples + //nuR = number of wavelengths + + //get the current coordinate in the plane slice + int ifz = blockIdx.x * blockDim.x + threadIdx.x; + int inu = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(inu >= nuR || ifz >= zR) return; + + int i = inu * zR + ifz; + + T fz; + if(ifz < zR/2) + fz = ifz / (T)zR; + else + fz = -(zR - ifz) / (T)zR; + + //if the slab starts outside of the simulation domain, just return + if(zf[inu] >= zR) return; + + //fill the array along z with a sinc function representing the Fourier transform of the layer + + T opl = w * ri[inu].real(); //optical path length + + //handle the case where the slab goes outside the simulation domain + if(zf[inu] + opl >= zR) + opl = zR - zf[inu]; + + if(opl == 0) return; + + //T l = w * ri[inu].real(); + //complex e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0)); + complex e(0, -2 * stimPI * fz * (zf[inu] + opl/2)); + + complex eta = ri[inu] * ri[inu] - 1; + + //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz); + if(ifz == 0) + dest[i] += opl * exp(e) * eta * src[inu]; + else + dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl); +} + +template +__global__ void gpu_mirst1d_increment_z(T* zf, complex* ri, T w, unsigned int S){ + //zf = current z depth (optical path length) in pixels + //ri = refractive index of the material + //w = actual width of the layer (in pixels) + + + //compute the index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= S) return; + + if(ri == NULL) + zf[i] += w; + else + zf[i] += ri[i].real() * w; +} + +//apply the 1D MIRST filter to an existing sample (overwriting the sample) +template +__global__ void gpu_mirst1d_apply_filter(complex* sampleFFT, T* lambda, + T dFz, + T inNA, T outNA, + unsigned int lambdaR, unsigned int zR, + T sigma = 0){ + //sampleFFT = the sample in the Fourier domain (will be overwritten) + //lambda = list of wavelengths + //dFz = delta along the Fz axis in the frequency domain + //inNA = NA of the internal obscuration + //outNA = NA of the objective + //zR = number of pixels along the Fz axis (same as the z-axis) + //lambdaR = number of wavelengths + //sigma = width of the Gaussian source + int ifz = blockIdx.x * blockDim.x + threadIdx.x; + int inu = blockIdx.y * blockDim.y + threadIdx.y; + + if(inu >= lambdaR || ifz >= zR) return; + + //calculate the index into the sample FT + int i = inu * zR + ifz; + + //compute the frequency (and set all negative spatial frequencies to zero) + T fz; + if(ifz < zR / 2) + fz = ifz * dFz; + //if the spatial frequency is negative, set it to zero and exit + else{ + sampleFFT[i] = 0; + return; + } + + //compute the frequency in inverse microns + T nu = 1/lambda[inu]; + + //determine the radius of the integration circle + T nu_sq = nu * nu; + T fz_sq = (fz * fz) / 4; + + //cut off frequencies above the diffraction limit + T r; + if(fz_sq < nu_sq) + r = sqrt(nu_sq - fz_sq); + else + r = 0; + + //account for the optics + T Q = 0; + if(r > nu * inNA && r < nu * outNA) + Q = 1; + + //account for the source + //T sigma = 30.0; + T s = exp( - (r*r * sigma*sigma) / 2 ); + //T s=1; + + //compute the final filter + T mirst = 0; + if(fz != 0) + mirst = 2 * stimPI * r * s * Q * (1/fz); + + sampleFFT[i] *= mirst; + +} + +/*This object performs a 1-dimensional (layered) MIRST simulation +*/ +template +class mirst1d{ + +private: + unsigned int Z; //z-axis resolution + unsigned int pad; //pixel padding on either side of the sample + + std::vector< material > matlist; //list of materials + std::vector< T > layers; //list of layer thicknesses + + std::vector< T > lambdas; //list of wavelengths that are being simulated + unsigned int S; //number of wavelengths (size of "lambdas") + + T NA[2]; //numerical aperature (central obscuration and outer diameter) + + function source_profile; //profile (spectrum) of the source (expressed in inverse centimeters) + + complexfield scratch; //scratch GPU memory used to build samples, transforms, etc. + + void fft(int direction = CUFFT_FORWARD){ + + unsigned padZ = Z + pad; + + //create cuFFT handles + cufftHandle plan; + cufftResult result; + + if(sizeof(T) == 4) + result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision + else + result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision + + //check for Plan 1D errors + if(result != CUFFT_SUCCESS){ + std::cout<<"Error creating CUFFT plan for computing the FFT:"<(Z + pad , lambdas.size()); + scratch = 0; + } + + //get the list of scattering efficiency (eta) values for a specified layer + std::vector< complex > layer_etas(unsigned int l){ + + std::vector< complex > etas; + + //fill the list of etas + for(unsigned int i=0; i* gpuRi; + HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex) * S)); + + //allocate memory for the source profile + T* gpuSrc; + HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S)); + + complex ri; + T source; + //store the refractive index and source profile in a CPU array + for(int inu=0; inu), cudaMemcpyHostToDevice )); + + //save the source profile to the GPU + source = source_profile(10000 / lambdas[inu]); + HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice )); + + } + + //create one thread for each pixel of the field slice + dim3 gridDim, blockDim; + cuda_params(gridDim, blockDim); + stim::gpu_mirst1d_layer_fft<<>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S); + + int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size + int linGrid = S / linBlock + 1; + stim::gpu_mirst1d_increment_z <<>>(zf, gpuRi, wpx, S); + + //free memory + HANDLE_ERROR(cudaFree(gpuRi)); + HANDLE_ERROR(cudaFree(gpuSrc)); + } + + void build_sample(){ + init_scratch(); //initialize the GPU scratch space + //build_layer(1); + + T* zf; + HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S)); + HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S)); + + //render each layer of the sample + for(unsigned int l=0; l>>(scratch.ptr(), gpuLambdas, + dFz, + NA[0], NA[1], + S, Zpad); + } + + //crop the image to the sample thickness - keep in mind that sample thickness != optical path length + void crop(){ + + scratch = scratch.crop(Z, S); + } + + //save the scratch field as a binary file + void to_binary(std::string filename){ + + } + + +public: + + //constructor + mirst1d(unsigned int rZ = 100, + unsigned int padding = 0){ + Z = rZ; + pad = padding; + NA[0] = 0; + NA[1] = 0.8; + S = 0; + source_profile = 1; + } + + //add a layer, thickness = microns + void add_layer(material mat, T thickness){ + matlist.push_back(mat); + layers.push_back(thickness); + } + + void add_layer(std::string filename, T thickness){ + add_layer(material(filename), thickness); + } + + //adds a profile spectrum for the light source + void set_source(std::string filename){ + source_profile.load(filename); + } + + //adds a block of wavenumbers (cm^-1) to the simulation parameters + void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){ + unsigned int nu = start; + while(nu <= stop){ + lambdas.push_back((T)10000 / nu); + nu += step; + } + S = lambdas.size(); //increment the number of wavelengths (shorthand for later) + } + + T thickness(){ + T t = 0; + for(unsigned int l=0; l get_source(){ + return source_profile; + } + + void save_sample(std::string filename){ + //create a sample and save the magnitude as an image + build_sample(); + fft(CUFFT_INVERSE); + scratch.toImage(filename, stim::complexfield::magnitude); + } + + void save_mirst(std::string filename, bool binary = true){ + //apply the MIRST filter to a sample and save the image + + //build the sample in the Fourier domain + build_sample(); + + //apply the MIRST filter + apply_filter(); + + //apply an inverse FFT to bring the results back into the spatial domain + fft(CUFFT_INVERSE); + + crop(); + + //save the image + if(binary) + to_binary(filename); + else + scratch.toImage(filename, stim::complexfield::magnitude); + } + + + + + std::string str(){ + + stringstream ss; + ss<<"1D MIRST Simulation========================="< +#include + +#include "../math/vector.h" +#include "../math/quaternion.h" +#include "../math/constants.h" +#include "../math/plane.h" +#include "../cuda/callable.h" + +/*Basic conversions used here (assuming a vacuum) + lambda = +*/ + +namespace stim{ + namespace optics{ + +template +class planewave{ + +protected: + + vec k; //k = tau / lambda + vec< complex > E0; //amplitude + //T phi; + + CUDA_CALLABLE planewave bend(rts::vec kn) const{ + + vec kn_hat = kn.norm(); //normalize the new k + vec k_hat = k.norm(); //normalize the current k + + //std::cout<<"PLANE WAVE BENDING------------------"< new_p; //create a new plane wave + + //if kn is equal to k or -k, handle the degenerate case + T k_dot_kn = k_hat.dot(kn_hat); + + //if k . n < 0, then the bend is a reflection + //flip k_hat + if(k_dot_kn < 0) k_hat = -k_hat; + + //std::cout<<"k dot kn: "< r = k_hat.cross(kn_hat); //compute the rotation vector + + //std::cout<<"r: "< q; + q.CreateRotation(theta, r.norm()); + + //apply the rotation to E0 + vec< complex > E0n = q.toMatrix3() * E0; + + new_p.k = kn_hat * kmag(); + new_p.E0 = E0n; + + return new_p; + } + +public: + + + ///constructor: create a plane wave propagating along z, polarized along x + /*planewave(T lambda = (T)1) + { + k = rts::vec(0, 0, 1) * (TAU/lambda); + E0 = rts::vec(1, 0, 0); + }*/ + ///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega + CUDA_CALLABLE planewave(vec kvec = rts::vec(0, 0, rtsTAU), + vec< complex > E = rts::vec(1, 0, 0), T phase = 0) + { + //phi = phase; + + k = kvec; + vec< complex > k_hat = k.norm(); + + if(E.len() == 0) //if the plane wave has an amplitude of 0 + E0 = vec(0); //just return it + else{ + vec< complex > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector + vec< complex > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector + E0 = E_hat * E_hat.dot(E); //compute the projection of _E0 onto E0_hat + } + + E0 = E0 * exp( complex(0, phase) ); + } + + ///multiplication operator: scale E0 + CUDA_CALLABLE planewave & operator* (const T & rhs) + { + + E0 = E0 * rhs; + return *this; + } + + CUDA_CALLABLE T lambda() const + { + return rtsTAU / k.len(); + } + + CUDA_CALLABLE T kmag() const + { + return k.len(); + } + + CUDA_CALLABLE vec< complex > E(){ + return E0; + } + + CUDA_CALLABLE vec kvec(){ + return k; + } + + /*CUDA_CALLABLE T phase(){ + return phi; + } + + CUDA_CALLABLE void phase(T p){ + phi = p; + }*/ + + CUDA_CALLABLE vec< complex > pos(vec p = vec(0, 0, 0)){ + vec< complex > result; + + T kdp = k.dot(p); + complex x = complex(0, kdp); + complex expx = exp(x); + + result[0] = E0[0] * expx; + result[1] = E0[1] * expx; + result[2] = E0[2] * expx; + + return result; + } + + //scales k based on a transition from material ni to material nt + CUDA_CALLABLE planewave n(T ni, T nt){ + return planewave(k * (nt / ni), E0); + } + + CUDA_CALLABLE planewave refract(rts::vec kn) const + { + return bend(kn); + } + + void scatter(rts::plane P, T nr, planewave &r, planewave &t){ + + int facing = P.face(k); //determine which direction the plane wave is coming in + + //if(facing == 0) //if the wave is tangent to the plane, return an identical wave + // return *this; + //else + if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr + P = P.flip(); //flip the plane + nr = 1/nr; //invert the refractive index (now nr = n0/n1) + } + + //use Snell's Law to calculate the transmitted angle + T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i + T theta_i = acos(cos_theta_i); //compute theta_i + T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law + T theta_t = asin(sin_theta_t); //compute the cosine of theta_t + + bool tir = false; //flag for total internal reflection + if(theta_t != theta_t){ + tir = true; + theta_t = rtsPI / (T)2; + } + + //handle the degenerate case where theta_i is 0 (the plane wave hits head-on) + if(theta_i == 0){ + T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients + T tp = 2 / (1 + nr); + vec kr = -k; + vec kt = k * nr; //set the k vectors for theta_i = 0 + vec< complex > Er = E0 * rp; //compute the E vectors + vec< complex > Et = E0 * tp; + T phase_t = P.p().dot(k - kt); //compute the phase offset + T phase_r = P.p().dot(k - kr); + //std::cout<<"Degeneracy: Head-On"<(kr, Er, phase_r); + t = planewave(kt, Et, phase_t); + + //std::cout<<"i + r: "< z_hat = -P.norm(); + vec y_hat = P.parallel(k).norm(); + vec x_hat = y_hat.cross(z_hat).norm(); + + //compute the k vectors for r and t + vec kr, kt; + kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag(); + kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr; + + //compute the magnitude of the p- and s-polarized components of the incident E vector + complex Ei_s = E0.dot(x_hat); + //int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0); + int sgn = E0.dot(y_hat).sgn(); + vec< complex > cx_hat = x_hat; + complex Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn; + //T Ei_p = ( E0 - x_hat * Ei_s ).len(); + //compute the magnitude of the p- and s-polarized components of the reflected E vector + complex Er_s = Ei_s * rs; + complex Er_p = Ei_p * rp; + //compute the magnitude of the p- and s-polarized components of the transmitted E vector + complex Et_s = Ei_s * ts; + complex Et_p = Ei_p * tp; + + //std::cout<<"E0: "< > Er = vec< complex >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s; + //compute the transmitted E vector + vec< complex > Et = vec< complex >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s; + + T phase_t = P.p().dot(k - kt); + T phase_r = P.p().dot(k - kr); + + //std::cout<<"phase r: "<(0, phase_r) ); + //r.phi = phase_r; + + //t = bend(kt); + //t.k = t.k * nr; + + t.k = kt; + t.E0 = Et * exp( complex(0, phase_t) ); + //t.phi = phase_t; + //std::cout<<"i: "< +std::ostream& operator<<(std::ostream& os, rts::planewave p) +{ + os<