Commit 8e4f836425e5bee321a75e5ee33dee9867df0a85

Authored by David Mayerich
1 parent 3ff136b3

started a new optical framework focused on scalar simulation

stim/cuda/cudatools/devices.h
... ... @@ -15,7 +15,7 @@ int maxThreadsPerBlock()
15 15 }
16 16  
17 17 extern "C"
18   -int sharedMemPerBlock()
  18 +size_t sharedMemPerBlock()
19 19 {
20 20 int device;
21 21 cudaGetDevice(&device); //get the id of the current device
... ... @@ -23,6 +23,16 @@ int sharedMemPerBlock()
23 23 cudaGetDeviceProperties(&props, device);
24 24 return props.sharedMemPerBlock;
25 25 }
  26 +
  27 +extern "C"
  28 +size_t constMem()
  29 +{
  30 + int device;
  31 + cudaGetDevice(&device); //get the id of the current device
  32 + cudaDeviceProp props; //device property structure
  33 + cudaGetDeviceProperties(&props, device);
  34 + return props.totalConstMem;
  35 +}
26 36 } //end namespace rts
27 37  
28 38 #endif
... ...
stim/cuda/sharedmem.cuh
... ... @@ -5,7 +5,7 @@
5 5 namespace stim{
6 6 namespace cuda{
7 7  
8   - // Copies values from global memory to shared memory, optimizing threads
  8 + // Copies values from texture memory to shared memory, optimizing threads
9 9 template<typename T>
10 10 __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
11 11 unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
... ... @@ -35,6 +35,19 @@ namespace stim{
35 35 }
36 36 }
37 37  
  38 + // Copies values from global memory to shared memory, optimizing threads
  39 + template<typename T>
  40 + __device__ void sharedMemcpy(T* dest, T* src, size_t N, size_t tid, size_t nt){
  41 +
  42 + size_t I = N / nt + 1; //calculate the number of iterations required to make the copy
  43 + size_t xi = tid; //initialize the source and destination index to the thread ID
  44 + for(size_t i = 0; i < I; i++){ //for each iteration
  45 + if(xi < N) //if the index is within the copy region
  46 + dest[xi] = src[xi]; //perform the copy
  47 + xi += nt;
  48 + }
  49 + }
  50 +
38 51  
39 52 }
40 53 }
... ...
stim/math/bessel.h
... ... @@ -17,6 +17,11 @@ static complex&lt;double&gt; czero(0.0,0.0);
17 17 template< typename P >
18 18 P gamma(P x)
19 19 {
  20 + const P EPS = numeric_limits<P>::epsilon();
  21 + const P FPMIN_MAG = numeric_limits<P>::min();
  22 + const P FPMIN = numeric_limits<P>::lowest();
  23 + const P FPMAX = numeric_limits<P>::max();
  24 +
20 25 int i,k,m;
21 26 P ga,gr,r,z;
22 27  
... ... @@ -47,7 +52,7 @@ P gamma(P x)
47 52 -0.54e-14,
48 53 0.14e-14};
49 54  
50   - if (x > 171.0) return 1e308; // This value is an overflow flag.
  55 + if (x > 171.0) return FPMAX; // This value is an overflow flag.
51 56 if (x == (int)x) {
52 57 if (x > 0.0) {
53 58 ga = 1.0; // use factorial
... ... @@ -56,7 +61,7 @@ P gamma(P x)
56 61 }
57 62 }
58 63 else
59   - ga = 1e308;
  64 + ga = FPMAX;
60 65 }
61 66 else {
62 67 if (fabs(x) > 1.0) {
... ... @@ -89,6 +94,11 @@ template&lt;typename P&gt;
89 94 int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1,
90 95 P &j0p,P &j1p,P &y0p,P &y1p)
91 96 {
  97 + const P EPS = numeric_limits<P>::epsilon();
  98 + const P FPMIN_MAG = numeric_limits<P>::min();
  99 + const P FPMIN = numeric_limits<P>::lowest();
  100 + const P FPMAX = numeric_limits<P>::max();
  101 +
92 102 P x2,r,ec,w0,w1,r0,r1,cs0,cs1;
93 103 P cu,p0,q0,p1,q1,t1,t2;
94 104 int k,kz;
... ... @@ -157,12 +167,12 @@ int bessjy01a(P x,P &amp;j0,P &amp;j1,P &amp;y0,P &amp;y1,
157 167 if (x == 0.0) {
158 168 j0 = 1.0;
159 169 j1 = 0.0;
160   - y0 = -1e308;
161   - y1 = -1e308;
  170 + y0 = -FPMIN;
  171 + y1 = -FPMIN;
162 172 j0p = 0.0;
163 173 j1p = 0.5;
164   - y0p = 1e308;
165   - y1p = 1e308;
  174 + y0p = FPMAX;
  175 + y1p = FPMAX;
166 176 return 0;
167 177 }
168 178 x2 = x*x;
... ... @@ -329,7 +339,7 @@ int msta1(P x,int mp)
329 339 for (i=0;i<20;i++) {
330 340 nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
331 341 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp;
332   - if (abs(nn-n1) < 1) break;
  342 + if (std::abs(nn-n1) < 1) break;
333 343 n0 = n1;
334 344 f0 = f1;
335 345 n1 = nn;
... ... @@ -361,7 +371,7 @@ int msta2(P x,int n,int mp)
361 371 for (i=0;i<20;i++) {
362 372 nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
363 373 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj;
364   - if (abs(nn-n1) < 1) break;
  374 + if (std::abs(nn-n1) < 1) break;
365 375 n0 = n1;
366 376 f0 = f1;
367 377 n1 = nn;
... ... @@ -596,21 +606,26 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
596 606 P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk;
597 607 int j,k,l,m,n,kz;
598 608  
  609 + const P EPS = numeric_limits<P>::epsilon();
  610 + const P FPMIN_MAG = numeric_limits<P>::min();
  611 + const P FPMIN = numeric_limits<P>::lowest();
  612 + const P FPMAX = numeric_limits<P>::max();
  613 +
599 614 x2 = x*x;
600 615 n = (int)v;
601 616 v0 = v-n;
602 617 if ((x < 0.0) || (v < 0.0)) return 1;
603   - if (x < 1e-15) {
  618 + if (x < EPS) {
604 619 for (k=0;k<=n;k++) {
605 620 jv[k] = 0.0;
606   - yv[k] = -1e308;
  621 + yv[k] = FPMIN;
607 622 djv[k] = 0.0;
608   - dyv[k] = 1e308;
  623 + dyv[k] = FPMAX;
609 624 if (v0 == 0.0) {
610 625 jv[0] = 1.0;
611 626 djv[1] = 0.5;
612 627 }
613   - else djv[0] = 1e308;
  628 + else djv[0] = FPMAX;
614 629 }
615 630 vm = v;
616 631 return 0;
... ... @@ -623,7 +638,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
623 638 for (k=1;k<=40;k++) {
624 639 r *= -0.25*x2/(k*(k+vl));
625 640 bjvl += r;
626   - if (fabs(r) < fabs(bjvl)*1e-15) break;
  641 + if (fabs(r) < fabs(bjvl)*EPS) break;
627 642 }
628 643 vg = 1.0 + vl;
629 644 a = pow(0.5*x,vl)/gamma(vg);
... ... @@ -686,7 +701,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
686 701 if (m < n) n = m;
687 702 else m = msta2(x,n,15);
688 703 f2 = 0.0;
689   - f1 = 1.0e-100;
  704 + f1 = FPMIN_MAG;
690 705 for (k=m;k>=0;k--) {
691 706 f = 2.0*(v0+k+1.0)*f1/x-f2;
692 707 if (k <= n) jv[k] = f;
... ... @@ -766,17 +781,17 @@ int bessjyv_sph(int v, P z, P &amp;vm, P* cjv,
766 781 P* cyv, P* cjvp, P* cyvp)
767 782 {
768 783 //first, compute the bessel functions of fractional order
769   - bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  784 + bessjyv<P>(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp);
770 785  
771 786 //iterate through each and scale
772 787 for(int n = 0; n<=v; n++)
773 788 {
774 789  
775   - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0));
776   - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0));
  790 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  791 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
777 792  
778   - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0));
779   - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0));
  793 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  794 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
780 795 }
781 796  
782 797 return 0;
... ... @@ -1498,11 +1513,11 @@ int cbessjyva_sph(int v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1498 1513 for(int n = 0; n<=v; n++)
1499 1514 {
1500 1515  
1501   - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0));
1502   - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0));
  1516 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  1517 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
1503 1518  
1504   - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0));
1505   - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0));
  1519 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  1520 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
1506 1521 }
1507 1522  
1508 1523 return 0;
... ...
stim/math/complex.h
1   -/*RTS Complex number class. This class is CUDA compatible,
2   -and can therefore be used in CUDA code and on CUDA devices.
3   -*/
  1 +/// CUDA compatible complex number class
4 2  
5   -#ifndef RTS_COMPLEX
6   -#define RTS_COMPLEX
  3 +#ifndef STIM_COMPLEX
  4 +#define STIM_COMPLEX
7 5  
8   -#include "../cuda/callable.h"
  6 +#include "../cuda/cudatools/callable.h"
9 7 #include <cmath>
10 8 #include <string>
11 9 #include <sstream>
... ... @@ -230,12 +228,6 @@ struct complex
230 228 return result;
231 229 }
232 230  
233   - /*CUDA_CALLABLE complex<T> pow(int y)
234   - {
235   -
236   - return pow((double)y);
237   - }*/
238   -
239 231 CUDA_CALLABLE complex<T> pow(T y)
240 232 {
241 233 complex<T> result;
... ... @@ -328,8 +320,31 @@ struct complex
328 320 return *this;
329 321 }
330 322  
  323 +
  324 +
331 325 };
332 326  
  327 +/// Cast an array of complex values to an array of real values
  328 +template<typename T>
  329 +static void real(T* r, complex<T>* c, size_t n){
  330 + for(size_t i = 0; i < n; i++)
  331 + r[i] = c[i].real();
  332 +}
  333 +
  334 +/// Cast an array of complex values to an array of real values
  335 +template<typename T>
  336 +static void imag(T* r, complex<T>* c, size_t n){
  337 + for(size_t i = 0; i < n; i++)
  338 + r[i] = c[i].imag();
  339 +}
  340 +
  341 +/// Calculate the magnitude of an array of complex values
  342 +template<typename T>
  343 +static void abs(T* m, complex<T>* c, size_t n){
  344 + for(size_t i = 0; i < n; i++)
  345 + m[i] = c[i].abs();
  346 +}
  347 +
333 348 } //end RTS namespace
334 349  
335 350 //addition
... ... @@ -432,17 +447,6 @@ CUDA_CALLABLE static T imag(stim::complex&lt;T&gt; a)
432 447 return a.i;
433 448 }
434 449  
435   -//trigonometric functions
436   -//template<class A>
437   -/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)
438   -{
439   - stim::complex<float> result;
440   - result.r = sinf(x.r) * coshf(x.i);
441   - result.i = cosf(x.r) * sinhf(x.i);
442   -
443   - return result;
444   -}*/
445   -
446 450 template<class A>
447 451 CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)
448 452 {
... ... @@ -453,17 +457,6 @@ CUDA_CALLABLE stim::complex&lt;A&gt; sin(const stim::complex&lt;A&gt; x)
453 457 return result;
454 458 }
455 459  
456   -//floating point template
457   -//template<class A>
458   -/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)
459   -{
460   - stim::complex<float> result;
461   - result.r = cosf(x.r) * coshf(x.i);
462   - result.i = -(sinf(x.r) * sinhf(x.i));
463   -
464   - return result;
465   -}*/
466   -
467 460 template<class A>
468 461 CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)
469 462 {
... ... @@ -496,10 +489,4 @@ std::istream&amp; operator&gt;&gt;(std::istream&amp; is, stim::complex&lt;A&gt;&amp; x)
496 489 return is; //return the stream
497 490 }
498 491  
499   -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
500   -//template<class T> using rtsComplex = stim::complex<T>;
501   -//#endif
502   -
503   -
504   -
505 492 #endif
... ...
stim/math/constants.h
1   -#ifndef RTS_CONSTANTS_H
2   -#define RTS_CONSTANTS_H
  1 +#ifndef STIM_CONSTANTS_H
  2 +#define STIM_CONSTANTS_H
3 3  
4   -#define stimPI 3.14159
5   -#define stimTAU 2 * rtsPI
  4 +namespace stim{
  5 + const double PI = 3.1415926535897932384626433832795028841971693993751058209749445923078164062862;
  6 + const double TAU = 2 * stim::PI;
  7 +}
6 8  
7 9 #endif
... ...
stim/math/matrix.h
... ... @@ -50,10 +50,8 @@ struct matrix
50 50 return *this;
51 51 }
52 52  
53   -
54 53 template<typename Y>
55   - CUDA_CALLABLE vec<Y> operator*(vec<Y> rhs)
56   - {
  54 + vec<Y> operator*(vec<Y> rhs){
57 55 unsigned int N = rhs.size();
58 56  
59 57 vec<Y> result;
... ... @@ -66,6 +64,16 @@ struct matrix
66 64 return result;
67 65 }
68 66  
  67 + template<typename Y>
  68 + CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){
  69 + vec3<Y> result = 0;
  70 + for(int r=0; r<3; r++)
  71 + for(int c=0; c<3; c++)
  72 + result[r] += (*this)(r, c) * rhs[c];
  73 +
  74 + return result;
  75 + }
  76 +
69 77 std::string toStr()
70 78 {
71 79 std::stringstream ss;
... ... @@ -82,10 +90,6 @@ struct matrix
82 90  
83 91 return ss.str();
84 92 }
85   -
86   -
87   -
88   -
89 93 };
90 94  
91 95 } //end namespace rts
... ...
stim/math/meshgrid.h 0 โ†’ 100644
  1 +#ifndef STIM_MESHGRID_H
  2 +#define STIM_MESHGRID_H
  3 +
  4 +namespace stim{
  5 +
  6 + /// Create a 2D grid based on a pair of vectors representing the grid spacing (see Matlab)
  7 + /// @param X is an [nx x ny] array that will store the X coordinates for each 2D point
  8 + /// @param Y is an [nx x ny] array that will store the Y coordinates for each 2D point
  9 + /// @param x is an [nx] array that provides the positions of grid points in the x direction
  10 + /// @param nx is the number of grid points in the x direction
  11 + /// @param y is an [ny] array that provides the positions of grid points in the y direction
  12 + /// @param ny is the number of grid points in the y direction
  13 + template<typename T>
  14 + void meshgrid(T* X, T* Y, T* x, size_t nx, T* y, size_t ny){
  15 + size_t xi, yi; //allocate index variables
  16 + for(yi = 0; yi < ny; yi++){ //iterate through each column
  17 + for(xi = 0; xi < nx; xi++){ //iterate through each row
  18 + X[yi * nx + xi] = x[xi];
  19 + Y[yi * nx + xi] = y[yi];
  20 + }
  21 + }
  22 + }
  23 +
  24 + /// Creates an array of n equally spaced values in the range [xmin xmax]
  25 + /// @param X is an array of length n that stores the values
  26 + /// @param xmin is the start point of the array
  27 + /// @param xmax is the end point of the array
  28 + /// @param n is the number of points in the array
  29 + template<typename T>
  30 + void linspace(T* X, T xmin, T xmax, size_t n){
  31 + T alpha;
  32 + for(size_t i = 0; i < n; i++){
  33 + alpha = (T)i / (T)n;
  34 + X[i] = (1 - alpha) * xmin + alpha * xmax;
  35 + }
  36 + }
  37 +
  38 +
  39 +}
  40 +
  41 +
  42 +#endif
0 43 \ No newline at end of file
... ...
stim/math/quaternion.h
... ... @@ -26,13 +26,13 @@ public:
26 26  
27 27 CUDA_CALLABLE void CreateRotation(T theta, T ux, T uy, T uz){
28 28  
29   - vec<T> u(ux, uy, uz);
  29 + vec3<T> u(ux, uy, uz);
30 30 CreateRotation(theta, u);
31 31 }
32 32  
33   - CUDA_CALLABLE void CreateRotation(T theta, vec<T> u){
  33 + CUDA_CALLABLE void CreateRotation(T theta, vec3<T> u){
34 34  
35   - vec<T> u_hat = u.norm();
  35 + vec3<T> u_hat = u.norm();
36 36  
37 37 //assign the given Euler rotation to this quaternion
38 38 w = (T)cos(theta/2);
... ... @@ -41,9 +41,9 @@ public:
41 41 z = u_hat[2]*(T)sin(theta/2);
42 42 }
43 43  
44   - void CreateRotation(vec<T> from, vec<T> to){
  44 + CUDA_CALLABLE void CreateRotation(vec3<T> from, vec3<T> to){
45 45  
46   - vec<T> r = from.cross(to); //compute the rotation vector
  46 + vec3<T> r = from.cross(to); //compute the rotation vector
47 47 T theta = asin(r.len()); //compute the angle of the rotation about r
48 48 //deal with a zero vector (both k and kn point in the same direction)
49 49 if(theta == (T)0){
... ...
stim/math/vec3.h 0 โ†’ 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +
  7 +
  8 +namespace stim{
  9 +
  10 +
  11 +/// A class designed to act as a 3D vector with CUDA compatibility
  12 +template<typename T>
  13 +class vec3{
  14 +
  15 +protected:
  16 + T ptr[3];
  17 +
  18 +public:
  19 +
  20 + CUDA_CALLABLE vec3(){}
  21 +
  22 + CUDA_CALLABLE vec3(T v){
  23 + ptr[0] = ptr[1] = ptr[2] = v;
  24 + }
  25 +
  26 + CUDA_CALLABLE vec3(T x, T y, T z){
  27 + ptr[0] = x;
  28 + ptr[1] = y;
  29 + ptr[2] = z;
  30 + }
  31 +
  32 + //copy constructor
  33 + CUDA_CALLABLE vec3( const vec3<T>& other){
  34 + ptr[0] = other.ptr[0];
  35 + ptr[1] = other.ptr[1];
  36 + ptr[2] = other.ptr[2];
  37 + }
  38 +
  39 + //access an element using an index
  40 + CUDA_CALLABLE T& operator[](int idx){
  41 + return ptr[idx];
  42 + }
  43 +
  44 +/// Casting operator. Creates a new vector with a new type U.
  45 + template< typename U >
  46 + CUDA_CALLABLE operator vec3<U>(){
  47 + vec3<U> result;
  48 + result.ptr[0] = (U)ptr[0];
  49 + result.ptr[1] = (U)ptr[1];
  50 + result.ptr[2] = (U)ptr[2];
  51 +
  52 + return result;
  53 + }
  54 +
  55 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  56 + CUDA_CALLABLE T len_sq() const{
  57 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  58 + }
  59 +
  60 + /// computes the Euclidean length of the vector
  61 + CUDA_CALLABLE T len() const{
  62 + return sqrt(len_sq());
  63 + }
  64 +
  65 +
  66 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  67 + CUDA_CALLABLE vec3<T> cart2sph() const{
  68 + vec3<T> sph;
  69 + sph.ptr[0] = len();
  70 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  71 + if(sph.ptr[0] == 0)
  72 + sph.ptr[2] = 0;
  73 + else
  74 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  75 + return sph;
  76 + }
  77 +
  78 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  79 + CUDA_CALLABLE vec3<T> sph2cart() const{
  80 + vec3<T> cart;
  81 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  82 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  83 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  84 +
  85 + return cart;
  86 + }
  87 +
  88 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  89 + CUDA_CALLABLE vec3<T> norm() const{
  90 + vec3<T> result;
  91 + T l = len(); //compute the vector length
  92 + return (*this) / l;
  93 + }
  94 +
  95 + /// Computes the cross product of a 3-dimensional vector
  96 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  97 +
  98 + vec3<T> result;
  99 +
  100 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  101 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  102 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  103 +
  104 + return result;
  105 + }
  106 +
  107 + /// Compute the Euclidean inner (dot) product
  108 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  109 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  110 + }
  111 +
  112 + /// Arithmetic addition operator
  113 +
  114 + /// @param rhs is the right-hand-side operator for the addition
  115 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  116 + vec3<T> result;
  117 + result.ptr[0] = ptr[0] + rhs[0];
  118 + result.ptr[1] = ptr[1] + rhs[1];
  119 + result.ptr[2] = ptr[2] + rhs[2];
  120 + return result;
  121 + }
  122 +
  123 + /// Arithmetic addition to a scalar
  124 +
  125 + /// @param rhs is the right-hand-side operator for the addition
  126 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  127 + vec3<T> result;
  128 + result.ptr[0] = ptr[0] + rhs;
  129 + result.ptr[1] = ptr[1] + rhs;
  130 + result.ptr[2] = ptr[2] + rhs;
  131 + return result;
  132 + }
  133 +
  134 + /// Arithmetic subtraction operator
  135 +
  136 + /// @param rhs is the right-hand-side operator for the subtraction
  137 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  138 + vec3<T> result;
  139 + result.ptr[0] = ptr[0] - rhs[0];
  140 + result.ptr[1] = ptr[1] - rhs[1];
  141 + result.ptr[2] = ptr[2] - rhs[2];
  142 + return result;
  143 + }
  144 + /// Arithmetic subtraction to a scalar
  145 +
  146 + /// @param rhs is the right-hand-side operator for the addition
  147 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  148 + vec3<T> result;
  149 + result.ptr[0] = ptr[0] - rhs;
  150 + result.ptr[1] = ptr[1] - rhs;
  151 + result.ptr[2] = ptr[2] - rhs;
  152 + return result;
  153 + }
  154 +
  155 + /// Arithmetic scalar multiplication operator
  156 +
  157 + /// @param rhs is the right-hand-side operator for the subtraction
  158 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  159 + vec3<T> result;
  160 + result.ptr[0] = ptr[0] * rhs;
  161 + result.ptr[1] = ptr[1] * rhs;
  162 + result.ptr[2] = ptr[2] * rhs;
  163 + return result;
  164 + }
  165 +
  166 + /// Arithmetic scalar division operator
  167 +
  168 + /// @param rhs is the right-hand-side operator for the subtraction
  169 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  170 + return (*this) * ((T)1.0/rhs);
  171 + }
  172 +
  173 + /// Multiplication by a scalar, followed by assignment
  174 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  175 + ptr[0] = ptr[0] * rhs;
  176 + ptr[1] = ptr[1] * rhs;
  177 + ptr[2] = ptr[2] * rhs;
  178 + return *this;
  179 + }
  180 +
  181 + /// Addition and assignment
  182 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  183 + ptr[0] = ptr[0] + rhs;
  184 + ptr[1] = ptr[1] + rhs;
  185 + ptr[2] = ptr[2] + rhs;
  186 + return *this;
  187 + }
  188 +
  189 + /// Assign a scalar to all values
  190 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  191 + ptr[0] = ptr[0] = rhs;
  192 + ptr[1] = ptr[1] = rhs;
  193 + ptr[2] = ptr[2] = rhs;
  194 + return *this;
  195 + }
  196 +
  197 + /// Casting and assignment
  198 + template<typename Y>
  199 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  200 + ptr[0] = (T)rhs.ptr[0];
  201 + ptr[1] = (T)rhs.ptr[1];
  202 + ptr[2] = (T)rhs.ptr[2];
  203 + return *this;
  204 + }
  205 +
  206 + /// Unary minus (returns the negative of the vector)
  207 + CUDA_CALLABLE vec3<T> operator-() const{
  208 + vec3<T> result;
  209 + result.ptr[0] = -ptr[0];
  210 + result.ptr[1] = -ptr[1];
  211 + result.ptr[2] = -ptr[2];
  212 + return result;
  213 + }
  214 +
  215 +
  216 + /// Outputs the vector as a string
  217 + std::string str() const{
  218 + std::stringstream ss;
  219 +
  220 + size_t N = size();
  221 +
  222 + ss<<"[";
  223 + for(size_t i=0; i<N; i++)
  224 + {
  225 + ss<<at(i);
  226 + if(i != N-1)
  227 + ss<<", ";
  228 + }
  229 + ss<<"]";
  230 +
  231 + return ss.str();
  232 + }
  233 + }; //end class triple
  234 +} //end namespace stim
  235 +
  236 +/// Multiply a vector by a constant when the vector is on the right hand side
  237 +template <typename T>
  238 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  239 + return rhs * lhs;
  240 +}
  241 +
  242 +#endif
0 243 \ No newline at end of file
... ...
stim/math/vector.h
1   -#ifndef RTS_VECTOR_H
2   -#define RTS_VECTOR_H
  1 +#ifndef STIM_VECTOR_H
  2 +#define STIM_VECTOR_H
3 3  
4 4 #include <iostream>
5 5 #include <cmath>
... ... @@ -11,8 +11,6 @@
11 11 namespace stim
12 12 {
13 13  
14   -
15   -
16 14 template <class T>
17 15 struct vec : public std::vector<T>
18 16 {
... ...
stim/optics/planewave.h
1   -#ifndef RTS_PLANEWAVE
2   -#define RTS_PLANEWAVE
  1 +#ifndef STIM_PLANEWAVE_H
  2 +#define STIM_PLANEWAVE_H
3 3  
4 4 #include <string>
5 5 #include <sstream>
  6 +#include <cmath>
6 7  
7 8 #include "../math/vector.h"
8 9 #include "../math/quaternion.h"
9 10 #include "../math/constants.h"
10 11 #include "../math/plane.h"
11   -#include "../cuda/callable.h"
12   -
13   -/*Basic conversions used here (assuming a vacuum)
14   - lambda =
15   -*/
  12 +#include "../math/complex.h"
16 13  
17 14 namespace stim{
  15 + namespace optics{
  16 +
  17 + /// evaluate the scalar field produced by a plane wave at a point (x, y, z)
  18 +
  19 + /// @param x is the x-coordinate of the point
  20 + /// @param y is the y-coordinate of the point
  21 + /// @param z is the z-coordinate of the point
  22 + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0)
  23 + /// @param kx is the k-vector component in the x direction
  24 + /// @param ky is the k-vector component in the y direction
  25 + /// @param kz is the k-vector component in the z direction
  26 + template<typename T>
  27 + stim::complex<T> planewave_scalar(T x, T y, T z, stim::complex<T> A, T kx, T ky, T kz){
  28 + T d = x * kx + y * ky + z * kz; //calculate the dot product between k and p = (x, y, z) to find the distance p is along the propagation direction
  29 + stim::complex<T> di = stim::complex<T>(0, d); //calculate the phase shift that will have to be applied to propagate the wave distance d
  30 + return A * exp(di); //multiply the phase term by the amplitude at (0, 0, 0) to propagate the wave to p
  31 + }
  32 +
  33 + /// evaluate the scalar field produced by a plane wave at several positions
  34 +
  35 + /// @param field is a pre-allocated block of memory that will store the complex field at all points
  36 + /// @param N is the number of field values to be evaluated
  37 + /// @param x is a set of x coordinates defining positions within the field (NULL implies that all values are zero)
  38 + /// @param y is a set of y coordinates defining positions within the field (NULL implies that all values are zero)
  39 + /// @param z is a set of z coordinates defining positions within the field (NULL implies that all values are zero)
  40 + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0)
  41 + /// @param kx is the k-vector component in the x direction
  42 + /// @param ky is the k-vector component in the y direction
  43 + /// @param kz is the k-vector component in the z direction
  44 + template<typename T>
  45 + void cpu_planewave_scalar(stim::complex<T>* field, size_t N, T* x, T* y = NULL, T* z = NULL, stim::complex<T> A = 1.0, T kx = 0.0, T ky = 0.0, T kz = 0.0){
  46 + T px, py, pz;
  47 + for(size_t i = 0; i < N; i++){ // for each element in the array
  48 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values
  49 + (y == NULL) ? py = 0 : py = y[i];
  50 + (z == NULL) ? pz = 0 : pz = z[i];
  51 +
  52 + field[i] = planewave_scalar(px, py, pz, A, kx, ky, kz); // call the single-value plane wave function
  53 + }
  54 + }
18 55  
19 56 template<typename T>
20 57 class planewave{
21 58  
22 59 protected:
23 60  
24   - vec<T> k; //k = tau / lambda
25   - vec< complex<T> > E0; //amplitude
26   - //T phi;
27   -
28   - CUDA_CALLABLE planewave<T> bend(rts::vec<T> kn) const{
  61 + stim::vec<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
  62 + stim::vec< stim::complex<T> > E0; //amplitude (for a scalar plane wave, only E0[0] is used)
29 63  
30   - vec<T> kn_hat = kn.norm(); //normalize the new k
31   - vec<T> k_hat = k.norm(); //normalize the current k
  64 + /// Bend a plane wave via refraction, given that the new propagation direction is known
  65 + CUDA_CALLABLE planewave<T> bend(stim::vec<T> kn) const{
32 66  
33   - //std::cout<<"PLANE WAVE BENDING------------------"<<std::endl;
34   - //std::cout<<"kn_hat: "<<kn_hat<<" k_hat: "<<k_hat<<std::endl;
  67 + stim::vec<T> kn_hat = kn.norm(); //normalize the new k
  68 + stim::vec<T> k_hat = k.norm(); //normalize the current k
35 69  
36   - planewave<T> new_p; //create a new plane wave
  70 + planewave<T> new_p; //create a new plane wave
37 71  
38   - //if kn is equal to k or -k, handle the degenerate case
39   - T k_dot_kn = k_hat.dot(kn_hat);
  72 + T k_dot_kn = k_hat.dot(kn_hat); //if kn is equal to k or -k, handle the degenerate case
40 73  
41 74 //if k . n < 0, then the bend is a reflection
42   - //flip k_hat
43   - if(k_dot_kn < 0) k_hat = -k_hat;
  75 + if(k_dot_kn < 0) k_hat = -k_hat; //flip k_hat
44 76  
45   - //std::cout<<"k dot kn: "<<k_dot_kn<<std::endl;
46   -
47   - //std::cout<<"k_dot_kn: "<<k_dot_kn<<std::endl;
48 77 if(k_dot_kn == -1){
49 78 new_p.k = -k;
50 79 new_p.E0 = E0;
... ... @@ -56,28 +85,11 @@ protected:
56 85 return new_p;
57 86 }
58 87  
59   - vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector
60   -
61   - //std::cout<<"r: "<<r<<std::endl;
62   -
63   - T theta = asin(r.len()); //compute the angle of the rotation about r
64   -
65   -
66   -
67   - //deal with a zero vector (both k and kn point in the same direction)
68   - //if(theta == (T)0)
69   - //{
70   - // new_p = *this;
71   - // return new_p;
72   - //}
73   -
74   - //create a quaternion to capture the rotation
75   - quaternion<T> q;
76   - q.CreateRotation(theta, r.norm());
77   -
78   - //apply the rotation to E0
79   - vec< complex<T> > E0n = q.toMatrix3() * E0;
80   -
  88 + vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector
  89 + T theta = asin(r.len()); //compute the angle of the rotation about r
  90 + quaternion<T> q; //create a quaternion to capture the rotation
  91 + q.CreateRotation(theta, r.norm());
  92 + vec< complex<T> > E0n = q.toMatrix3() * E0; //apply the rotation to E0
81 93 new_p.k = kn_hat * kmag();
82 94 new_p.E0 = E0n;
83 95  
... ... @@ -86,16 +98,9 @@ protected:
86 98  
87 99 public:
88 100  
89   -
90   - ///constructor: create a plane wave propagating along z, polarized along x
91   - /*planewave(T lambda = (T)1)
92   - {
93   - k = rts::vec<T>(0, 0, 1) * (TAU/lambda);
94   - E0 = rts::vec<T>(1, 0, 0);
95   - }*/
96   - ///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega
97   - CUDA_CALLABLE planewave(vec<T> kvec = rts::vec<T>(0, 0, rtsTAU),
98   - vec< complex<T> > E = rts::vec<T>(1, 0, 0), T phase = 0)
  101 + ///constructor: create a plane wave propagating along k
  102 + CUDA_CALLABLE planewave(vec<T> kvec = stim::vec<T>(0, 0, stim::TAU),
  103 + vec< complex<T> > E = stim::vec<T>(1, 0, 0))
99 104 {
100 105 //phi = phase;
101 106  
... ... @@ -107,27 +112,23 @@ public:
107 112 else{
108 113 vec< complex<T> > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector
109 114 vec< complex<T> > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector
110   - E0 = E_hat * E_hat.dot(E); //compute the projection of _E0 onto E0_hat
  115 + E0 = E_hat;// * E_hat.dot(E); //compute the projection of _E0 onto E0_hat
111 116 }
112 117  
113 118 E0 = E0 * exp( complex<T>(0, phase) );
114 119 }
115 120  
116 121 ///multiplication operator: scale E0
117   - CUDA_CALLABLE planewave<T> & operator* (const T & rhs)
118   - {
119   -
  122 + CUDA_CALLABLE planewave<T> & operator* (const T & rhs){
120 123 E0 = E0 * rhs;
121 124 return *this;
122 125 }
123 126  
124   - CUDA_CALLABLE T lambda() const
125   - {
126   - return rtsTAU / k.len();
  127 + CUDA_CALLABLE T lambda() const{
  128 + return stim::TAU / k.len();
127 129 }
128 130  
129   - CUDA_CALLABLE T kmag() const
130   - {
  131 + CUDA_CALLABLE T kmag() const{
131 132 return k.len();
132 133 }
133 134  
... ... @@ -139,14 +140,11 @@ public:
139 140 return k;
140 141 }
141 142  
142   - /*CUDA_CALLABLE T phase(){
143   - return phi;
  143 + /// calculate the value of the field produced by the plane wave given a three-dimensional position
  144 + CUDA_CALLABLE vec< complex<T> > pos(T x, T y, T z){
  145 + return pos( stim::vec<T>(x, y, z) );
144 146 }
145 147  
146   - CUDA_CALLABLE void phase(T p){
147   - phi = p;
148   - }*/
149   -
150 148 CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){
151 149 vec< complex<T> > result;
152 150  
... ... @@ -166,18 +164,32 @@ public:
166 164 return planewave<T>(k * (nt / ni), E0);
167 165 }
168 166  
169   - CUDA_CALLABLE planewave<T> refract(rts::vec<T> kn) const
170   - {
  167 + CUDA_CALLABLE planewave<T> refract(stim::vec<T> kn) const{
171 168 return bend(kn);
172 169 }
173 170  
174   - void scatter(rts::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){
  171 + /// Calculate the result of a plane wave hitting an interface between two refractive indices
  172 +
  173 + /// @param P is a plane representing the position and orientation of the surface
  174 + /// @param n0 is the refractive index outside of the surface (in the direction of the normal)
  175 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  176 + /// @param r is the reflected component of the plane wave
  177 + /// @param t is the transmitted component of the plane wave
  178 + void scatter(stim::plane<T> P, T n0, T n1, planewave<T> &r, planewave<T> &t){
  179 + scatter(P, n1/n0, r, t);
  180 + }
  181 +
  182 + /// Calculate the scattering result when nr = n1/n0
  183 +
  184 + /// @param P is a plane representing the position and orientation of the surface
  185 + /// @param r is the ration n1/n0
  186 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  187 + /// @param r is the reflected component of the plane wave
  188 + /// @param t is the transmitted component of the plane wave
  189 + void scatter(stim::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){
175 190  
176 191 int facing = P.face(k); //determine which direction the plane wave is coming in
177 192  
178   - //if(facing == 0) //if the wave is tangent to the plane, return an identical wave
179   - // return *this;
180   - //else
181 193 if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
182 194 P = P.flip(); //flip the plane
183 195 nr = 1/nr; //invert the refractive index (now nr = n0/n1)
... ... @@ -192,7 +204,7 @@ public:
192 204 bool tir = false; //flag for total internal reflection
193 205 if(theta_t != theta_t){
194 206 tir = true;
195   - theta_t = rtsPI / (T)2;
  207 + theta_t = stim::PI / (T)2;
196 208 }
197 209  
198 210 //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
... ... @@ -205,17 +217,10 @@ public:
205 217 vec< complex<T> > Et = E0 * tp;
206 218 T phase_t = P.p().dot(k - kt); //compute the phase offset
207 219 T phase_r = P.p().dot(k - kr);
208   - //std::cout<<"Degeneracy: Head-On"<<std::endl;
209   - //std::cout<<"rs: "<<rp<<" rp: "<<rp<<" ts: "<<tp<<" tp: "<<tp<<std::endl;
210   - //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
211 220  
212 221 //create the plane waves
213 222 r = planewave<T>(kr, Er, phase_r);
214 223 t = planewave<T>(kt, Et, phase_t);
215   -
216   - //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
217   - //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
218   - //std::cout<<"--------------------------------"<<std::endl;
219 224 return;
220 225 }
221 226  
... ... @@ -245,11 +250,9 @@ public:
245 250  
246 251 //compute the magnitude of the p- and s-polarized components of the incident E vector
247 252 complex<T> Ei_s = E0.dot(x_hat);
248   - //int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0);
249 253 int sgn = E0.dot(y_hat).sgn();
250 254 vec< complex<T> > cx_hat = x_hat;
251 255 complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
252   - //T Ei_p = ( E0 - x_hat * Ei_s ).len();
253 256 //compute the magnitude of the p- and s-polarized components of the reflected E vector
254 257 complex<T> Er_s = Ei_s * rs;
255 258 complex<T> Er_p = Ei_p * rp;
... ... @@ -257,14 +260,6 @@ public:
257 260 complex<T> Et_s = Ei_s * ts;
258 261 complex<T> Et_p = Ei_p * tp;
259 262  
260   - //std::cout<<"E0: "<<E0<<std::endl;
261   - //std::cout<<"E0 dot y_hat: "<<E0.dot(y_hat)<<std::endl;
262   - //std::cout<<"theta i: "<<theta_i<<" theta t: "<<theta_t<<std::endl;
263   - //std::cout<<"x_hat: "<<x_hat<<" y_hat: "<<y_hat<<" z_hat: "<<z_hat<<std::endl;
264   - //std::cout<<"Ei_s: "<<Ei_s<<" Ei_p: "<<Ei_p<<" Er_s: "<<Er_s<<" Er_p: "<<Er_p<<" Et_s: "<<Et_s<<" Et_p: "<<Et_p<<std::endl;
265   - //std::cout<<"rs: "<<rs<<" rp: "<<rp<<" ts: "<<ts<<" tp: "<<tp<<std::endl;
266   -
267   -
268 263 //compute the reflected E vector
269 264 vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
270 265 //compute the transmitted E vector
... ... @@ -273,29 +268,12 @@ public:
273 268 T phase_t = P.p().dot(k - kt);
274 269 T phase_r = P.p().dot(k - kr);
275 270  
276   - //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
277   -
278   - //std::cout<<"phase: "<<phase<<std::endl;
279   -
280 271 //create the plane waves
281 272 r.k = kr;
282 273 r.E0 = Er * exp( complex<T>(0, phase_r) );
283   - //r.phi = phase_r;
284   -
285   - //t = bend(kt);
286   - //t.k = t.k * nr;
287 274  
288 275 t.k = kt;
289 276 t.E0 = Et * exp( complex<T>(0, phase_t) );
290   - //t.phi = phase_t;
291   - //std::cout<<"i: "<<str()<<std::endl;
292   - //std::cout<<"r: "<<r.str()<<std::endl;
293   - //std::cout<<"t: "<<t.str()<<std::endl;
294   -
295   - //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
296   - //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
297   - //std::cout<<"--------------------------------"<<std::endl;
298   -
299 277 }
300 278  
301 279 std::string str()
... ... @@ -305,14 +283,15 @@ public:
305 283 ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
306 284 return ss.str();
307 285 }
308   -};
309   -}
  286 +}; //end planewave class
  287 +} //end namespace optics
  288 +} //end namespace stim
310 289  
311 290 template <typename T>
312   -std::ostream& operator<<(std::ostream& os, rts::planewave<T> p)
  291 +std::ostream& operator<<(std::ostream& os, stim::optics::planewave<T> p)
313 292 {
314 293 os<<p.str();
315 294 return os;
316 295 }
317 296  
318 297 -#endif
  298 +#endif
319 299 \ No newline at end of file
... ...
stim/optics/scalarbeam.h 0 โ†’ 100644
  1 +#ifndef RTS_BEAM
  2 +#define RTS_BEAM
  3 +
  4 +#include "../math/vec3.h"
  5 +#include "../optics/scalarwave.h"
  6 +#include "../math/bessel.h"
  7 +#include <vector>
  8 +
  9 +//Boost
  10 +//#include <boost/math/special_functions/bessel.hpp>
  11 +//#include <boost/math/special_functions/legendre.hpp>
  12 +
  13 +namespace stim{
  14 +
  15 + /// Function returns the value of the scalar field produced by a beam with the specified parameters
  16 +
  17 + template<typename T>
  18 + std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d, T NA, T NA_in = 0){
  19 +
  20 + std::vector< stim::vec3<T> > dirs(N); //allocate an array to store the focusing vectors
  21 +
  22 + ///compute the rotation operator to transform (0, 0, 1) to k
  23 + T cos_angle = d.dot(vec3<T>(0, 0, 1));
  24 + stim::matrix<T, 3> rotation;
  25 +
  26 + //if the cosine of the angle is -1, the rotation is just a flip across the z axis
  27 + if(cos_angle == -1){
  28 + rotation(2, 2) = -1;
  29 + }
  30 + else if(cos_angle != 1.0)
  31 + {
  32 + vec3<T> r_axis = vec3<T>(0, 0, 1).cross(d).norm(); //compute the axis of rotation
  33 + T angle = acos(cos_angle); //compute the angle of rotation
  34 + quaternion<T> quat; //create a quaternion describing the rotation
  35 + quat.CreateRotation(angle, r_axis);
  36 + rotation = quat.toMatrix3(); //compute the rotation matrix
  37 + }
  38 +
  39 + //find the phi values associated with the cassegrain ring
  40 + T PHI[2];
  41 + PHI[0] = (T)asin(NA);
  42 + PHI[1] = (T)asin(NA_in);
  43 +
  44 + //calculate the z-axis cylinder coordinates associated with these angles
  45 + T Z[2];
  46 + Z[0] = cos(PHI[0]);
  47 + Z[1] = cos(PHI[1]);
  48 + T range = Z[0] - Z[1];
  49 +
  50 + //draw a distribution of random phi, z values
  51 + T z, phi, theta;
  52 + //T kmag = stim::TAU / lambda;
  53 + for(int i=0; i<N; i++){ //for each sample
  54 + z = (T)((double)rand() / (double)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
  55 + theta = (T)(((double)rand() / (double)RAND_MAX) * stim::TAU);
  56 + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
  57 +
  58 + //compute and store cartesian coordinates
  59 + vec3<T> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
  60 + vec3<T> cart = spherical.sph2cart();
  61 + dirs[i] = rotation * cart; //create a sample vector
  62 + }
  63 + return dirs;
  64 + }
  65 +
  66 +/// Class stim::beam represents a beam of light focused at a point and composed of several plane waves
  67 +template<typename T>
  68 +class scalarbeam
  69 +{
  70 +public:
  71 + //enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  72 +
  73 +private:
  74 +
  75 + T NA[2]; //numerical aperature of the focusing optics
  76 + vec3<T> f; //focal point
  77 + vec3<T> d; //propagation direction
  78 + stim::complex<T> A; //beam amplitude
  79 + T lambda; //beam wavelength
  80 +public:
  81 +
  82 + ///constructor: build a default beam (NA=1.0)
  83 + scalarbeam(T wavelength = 1, stim::complex<T> amplitude = 1, vec3<T> focal_point = vec3<T>(0, 0, 0), vec3<T> direction = vec3<T>(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){
  84 + lambda = wavelength;
  85 + A = amplitude;
  86 + f = focal_point;
  87 + d = direction.norm(); //make sure that the direction vector is normalized (makes calculations more efficient later on)
  88 + NA[0] = numerical_aperture;
  89 + NA[1] = center_obsc;
  90 + }
  91 +
  92 + ///Numerical Aperature functions
  93 + void setNA(T na)
  94 + {
  95 + NA[0] = (T)0;
  96 + NA[1] = na;
  97 + }
  98 + void setNA(T na0, T na1)
  99 + {
  100 + NA[0] = na0;
  101 + NA[1] = na1;
  102 + }
  103 +
  104 + //Monte-Carlo decomposition into plane waves
  105 + std::vector< scalarwave<T> > mc(size_t N = 100000) const{
  106 +
  107 + std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]); //generate a random set of N vectors forming a focus
  108 + std::vector< scalarwave<T> > samples(N); //create a vector of plane waves
  109 + T kmag = (T)stim::TAU / lambda; //calculate the wavenumber
  110 + stim::complex<T> apw; //allocate space for the amplitude at the focal point
  111 + stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction
  112 + for(size_t i=0; i<N; i++){ //for each sample
  113 + kpw = dirs[i] * kmag; //calculate the k-vector for the new plane wave
  114 + apw = exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave
  115 + samples[i] = scalarwave<T>(kpw, apw); //create a plane wave based on the direction
  116 + }
  117 +
  118 + return samples;
  119 + }
  120 +
  121 + /// Calculate the field at a given point
  122 + /// @param x is the x-coordinate of the field point
  123 + /// @O is the approximation accuracy
  124 + stim::complex<T> field(T x, T y, T z, size_t O){
  125 + std::vector< scalarwave<T> > W = mc(O);
  126 + T result = 0; //initialize the result to zero (0)
  127 + for(size_t i = 0; i < O; i++){ //for each plane wave
  128 + result += W[i].pos(x, y, z);
  129 + }
  130 + return result;
  131 + }
  132 +
  133 + /// Calculate the field at a set of positions
  134 + /*void field(stim::complex<T>* F, T* x, T* y, T* z, size_t N, size_t O){
  135 +
  136 + memset(F, 0, N * sizeof(stim::complex<T>));
  137 + std::vector< scalarwave<T> > W = mc(O); //get a random set of plane waves representing the beam
  138 + size_t o, n;
  139 + T px, py, pz;
  140 + for(n = 0; n < N; n++){ //for each point in the field
  141 + (x == NULL) ? px = 0 : px = x[n]; // test for NULL values
  142 + (y == NULL) ? py = 0 : py = y[n];
  143 + (z == NULL) ? pz = 0 : pz = z[n];
  144 + for(o = 0; o < O; o++){ //for each plane wave
  145 + F[n] += W[o].pos(px, py, pz);
  146 + }
  147 + }
  148 + }*/
  149 +
  150 + std::string str()
  151 + {
  152 + std::stringstream ss;
  153 + ss<<"Beam:"<<std::endl;
  154 + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  155 + ss<<" Beam Direction: "<<d<<std::endl;
  156 + if(NA[0] == 0)
  157 + ss<<" NA: "<<NA[1];
  158 + else
  159 + ss<<" NA: "<<NA[0]<<" -- "<<NA[1];
  160 +
  161 + return ss.str();
  162 + }
  163 +
  164 +
  165 +
  166 +}; //end beam
  167 +
  168 +template<typename T>
  169 +void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){
  170 +
  171 + memset(F, 0, N * sizeof(stim::complex<T>));
  172 + T k = stim::TAU / lambda;
  173 + T jl, Pl, C, kr, cos_phi;
  174 + T cos_alpha_1 = cos(asin(NA_in));
  175 + T cos_alpha_2 = cos(asin(NA));
  176 + stim::vec3<T> p, ps;
  177 +
  178 + /*double vm;
  179 + size_t table_bytes = (Nl + 1) * sizeof(double);
  180 + double* jv = (double*) malloc( table_bytes );
  181 + double* yv = (double*) malloc( table_bytes );
  182 + double* djv= (double*) malloc( table_bytes );
  183 + double* dyv= (double*) malloc( table_bytes );
  184 + */
  185 +
  186 + T vm;
  187 + size_t table_bytes = (Nl + 1) * sizeof(T);
  188 + T* jv = (T*) malloc( table_bytes );
  189 + T* yv = (T*) malloc( table_bytes );
  190 + T* djv= (T*) malloc( table_bytes );
  191 + T* dyv= (T*) malloc( table_bytes );
  192 +
  193 + for(size_t n = 0; n < N; n++){
  194 + (x == NULL) ? p[0] = 0 : p[0] = x[n]; // test for NULL values and set positions
  195 + (y == NULL) ? p[1] = 0 : p[1] = y[n];
  196 + (z == NULL) ? p[2] = 0 : p[2] = z[n];
  197 +
  198 + ps = p.cart2sph(); //convert this point to spherical coordinates
  199 + kr = k * ps[0];
  200 + cos_phi = std::cos(ps[2]);
  201 + stim::bessjyv_sph<T>(Nl, kr, vm, jv, yv, djv, dyv);
  202 +
  203 + for(int l = 0; l <= Nl; l++){
  204 + //jl = boost::math::sph_bessel<T>(l, kr);
  205 + //jl = stim::bessjyv(l, kr
  206 + jl = (T)jv[l];
  207 + Pl = 1;//boost::math::legendre_p<T>(l, cos_phi);
  208 + C = 1;//boost::math::legendre_p<T>(l+1, cos_alpha_1) - boost::math::legendre_p<T>(l + 1, cos_alpha_2) - boost::math::legendre_p<T>(l - 1, cos_alpha_1) + boost::math::legendre_p<T>(l - 1, cos_alpha_2);
  209 + F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C;
  210 + }
  211 + F[n] *= A * stim::TAU;
  212 + }
  213 +}
  214 +
  215 +} //end namespace stim
  216 +
  217 +#endif
... ...
stim/optics/scalarwave.h 0 โ†’ 100644
  1 +#ifndef STIM_SCALARWAVE_H
  2 +#define STIM_SCALARWAVE_H
  3 +
  4 +
  5 +#include <string>
  6 +#include <sstream>
  7 +#include <cmath>
  8 +
  9 +//#include "../math/vector.h"
  10 +#include "../math/vec3.h"
  11 +#include "../math/quaternion.h"
  12 +#include "../math/constants.h"
  13 +#include "../math/plane.h"
  14 +#include "../math/complex.h"
  15 +
  16 +//CUDA
  17 +#include "../cuda/cudatools/devices.h"
  18 +#include "../cuda/cudatools/error.h"
  19 +#include "../cuda/sharedmem.cuh"
  20 +
  21 +namespace stim{
  22 +
  23 +template<typename T>
  24 +class scalarwave{
  25 +
  26 +protected:
  27 +
  28 + stim::vec3<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
  29 + stim::complex<T> E0; //amplitude
  30 +
  31 + /// Bend a plane wave via refraction, given that the new propagation direction is known
  32 + CUDA_CALLABLE scalarwave<T> bend(stim::vec3<T> kn) const{
  33 + return scalarwave<T>(kn.norm() * kmag(), E0);
  34 + }
  35 +
  36 +public:
  37 +
  38 + ///constructor: create a plane wave propagating along k
  39 + CUDA_CALLABLE scalarwave(vec3<T> kvec = stim::vec3<T>(0, 0, (T)stim::TAU), complex<T> E = 1){
  40 + k = kvec;
  41 + E0 = E;
  42 + }
  43 +
  44 + CUDA_CALLABLE scalarwave(T kx, T ky, T kz, complex<T> E = 1){
  45 + k = vec3<T>(kx, ky, kz);
  46 + E0 = E;
  47 + }
  48 +
  49 + ///multiplication operator: scale E0
  50 + CUDA_CALLABLE scalarwave<T> & operator* (const T & rhs){
  51 + E0 = E0 * rhs;
  52 + return *this;
  53 + }
  54 +
  55 + CUDA_CALLABLE T lambda() const{
  56 + return stim::TAU / k.len();
  57 + }
  58 +
  59 + CUDA_CALLABLE T kmag() const{
  60 + return k.len();
  61 + }
  62 +
  63 + CUDA_CALLABLE vec3< complex<T> > E(){
  64 + return E0;
  65 + }
  66 +
  67 + CUDA_CALLABLE vec3<T> kvec(){
  68 + return k;
  69 + }
  70 +
  71 + /// calculate the value of the field produced by the plane wave given a three-dimensional position
  72 + CUDA_CALLABLE complex<T> pos(T x, T y, T z){
  73 + return pos( stim::vec3<T>(x, y, z) );
  74 + }
  75 +
  76 + CUDA_CALLABLE complex<T> pos(vec3<T> p = vec3<T>(0, 0, 0)){
  77 + return E0 * exp(complex<T>(0, k.dot(p)));
  78 + }
  79 +
  80 + //scales k based on a transition from material ni to material nt
  81 + CUDA_CALLABLE scalarwave<T> n(T ni, T nt){
  82 + return scalarwave<T>(k * (nt / ni), E0);
  83 + }
  84 +
  85 + CUDA_CALLABLE scalarwave<T> refract(stim::vec3<T> kn) const{
  86 + return bend(kn);
  87 + }
  88 +
  89 + /// Calculate the result of a plane wave hitting an interface between two refractive indices
  90 +
  91 + /// @param P is a plane representing the position and orientation of the surface
  92 + /// @param n0 is the refractive index outside of the surface (in the direction of the normal)
  93 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  94 + /// @param r is the reflected component of the plane wave
  95 + /// @param t is the transmitted component of the plane wave
  96 + void scatter(stim::plane<T> P, T n0, T n1, scalarwave<T> &r, scalarwave<T> &t){
  97 + scatter(P, n1/n0, r, t);
  98 + }
  99 +
  100 + /// Calculate the scattering result when nr = n1/n0
  101 +
  102 + /// @param P is a plane representing the position and orientation of the surface
  103 + /// @param r is the ration n1/n0
  104 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  105 + /// @param r is the reflected component of the plane wave
  106 + /// @param t is the transmitted component of the plane wave
  107 + void scatter(stim::plane<T> P, T nr, scalarwave<T> &r, scalarwave<T> &t){
  108 + /*
  109 + int facing = P.face(k); //determine which direction the plane wave is coming in
  110 +
  111 + if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
  112 + P = P.flip(); //flip the plane
  113 + nr = 1/nr; //invert the refractive index (now nr = n0/n1)
  114 + }
  115 +
  116 + //use Snell's Law to calculate the transmitted angle
  117 + T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i
  118 + T theta_i = acos(cos_theta_i); //compute theta_i
  119 + T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law
  120 + T theta_t = asin(sin_theta_t); //compute the cosine of theta_t
  121 +
  122 + bool tir = false; //flag for total internal reflection
  123 + if(theta_t != theta_t){
  124 + tir = true;
  125 + theta_t = stim::PI / (T)2;
  126 + }
  127 +
  128 + //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
  129 + if(theta_i == 0){
  130 + T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients
  131 + T tp = 2 / (1 + nr);
  132 + vec3<T> kr = -k;
  133 + vec3<T> kt = k * nr; //set the k vectors for theta_i = 0
  134 + vec3< complex<T> > Er = E0 * rp; //compute the E vectors
  135 + vec3< complex<T> > Et = E0 * tp;
  136 + T phase_t = P.p().dot(k - kt); //compute the phase offset
  137 + T phase_r = P.p().dot(k - kr);
  138 +
  139 + //create the plane waves
  140 + r = planewave<T>(kr, Er, phase_r);
  141 + t = planewave<T>(kt, Et, phase_t);
  142 + return;
  143 + }
  144 +
  145 +
  146 + //compute the Fresnel coefficients
  147 + T rp, rs, tp, ts;
  148 + rp = tan(theta_t - theta_i) / tan(theta_t + theta_i);
  149 + rs = sin(theta_t - theta_i) / sin(theta_t + theta_i);
  150 +
  151 + if(tir){
  152 + tp = ts = 0;
  153 + }
  154 + else{
  155 + tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) );
  156 + ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i);
  157 + }
  158 +
  159 + //compute the coordinate space for the plane of incidence
  160 + vec3<T> z_hat = -P.norm();
  161 + vec3<T> y_hat = P.parallel(k).norm();
  162 + vec3<T> x_hat = y_hat.cross(z_hat).norm();
  163 +
  164 + //compute the k vectors for r and t
  165 + vec3<T> kr, kt;
  166 + kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag();
  167 + kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;
  168 +
  169 + //compute the magnitude of the p- and s-polarized components of the incident E vector
  170 + complex<T> Ei_s = E0.dot(x_hat);
  171 + int sgn = E0.dot(y_hat).sgn();
  172 + vec3< complex<T> > cx_hat = x_hat;
  173 + complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
  174 + //compute the magnitude of the p- and s-polarized components of the reflected E vector
  175 + complex<T> Er_s = Ei_s * rs;
  176 + complex<T> Er_p = Ei_p * rp;
  177 + //compute the magnitude of the p- and s-polarized components of the transmitted E vector
  178 + complex<T> Et_s = Ei_s * ts;
  179 + complex<T> Et_p = Ei_p * tp;
  180 +
  181 + //compute the reflected E vector
  182 + vec3< complex<T> > Er = vec3< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
  183 + //compute the transmitted E vector
  184 + vec3< complex<T> > Et = vec3< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;
  185 +
  186 + T phase_t = P.p().dot(k - kt);
  187 + T phase_r = P.p().dot(k - kr);
  188 +
  189 + //create the plane waves
  190 + r.k = kr;
  191 + r.E0 = Er * exp( complex<T>(0, phase_r) );
  192 +
  193 + t.k = kt;
  194 + t.E0 = Et * exp( complex<T>(0, phase_t) );
  195 + */
  196 + }
  197 +
  198 + std::string str()
  199 + {
  200 + std::stringstream ss;
  201 + ss<<"Plane Wave:"<<std::endl;
  202 + ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
  203 + return ss.str();
  204 + }
  205 +}; //end planewave class
  206 +
  207 +
  208 +/// CUDA kernel for computing the field produced by a batch of plane waves at an array of locations
  209 +template<typename T>
  210 +__global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){
  211 + extern __shared__ stim::scalarwave<T> shared_W[]; //declare the list of waves in shared memory
  212 +
  213 + stim::cuda::sharedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x); //copy the plane waves into shared memory for faster access
  214 + __syncthreads(); //synchronize threads to insure all data is copied
  215 +
  216 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  217 + if(i >= N) return; //exit if this thread is outside the array
  218 + T px, py, pz;
  219 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values and set positions
  220 + (y == NULL) ? py = 0 : py = y[i];
  221 + (z == NULL) ? pz = 0 : pz = z[i];
  222 +
  223 + stim::complex<T> f = 0; //create a register to store the result
  224 + for(size_t w = 0; w < n_waves; w++)
  225 + f += shared_W[w].pos(px, py, pz); //evaluate the plane wave
  226 + F[i] += f; //copy the result to device memory
  227 +}
  228 +
  229 +/// evaluate a scalar wave at several points, where all arrays are on the GPU
  230 +template<typename T>
  231 +void gpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  232 +
  233 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  234 + dim3 blocks(N / threads + 1); //calculate the optimal number of blocks
  235 + cuda_scalarwave<T><<< blocks, threads >>>(F, N, x, y, z, w); //call the kernel
  236 +}
  237 +
  238 +/// Sums a series of coherent plane waves at a specified point
  239 +/// @param field is the output array of field values corresponding to each input point
  240 +/// @param x is an array of x coordinates for the field point
  241 +/// @param y is an array of y coordinates for the field point
  242 +/// @param z is an array of z coordinates for the field point
  243 +/// @param N is the number of points in the input and output arrays
  244 +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  245 +/// @param A is the list of amplitudes for each wave
  246 +/// @param S is the list of propagation directions for each wave
  247 +template<typename T>
  248 +void cpu_sum_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > w_array){
  249 + size_t S = w_array.size(); //store the number of waves
  250 +#ifdef NO_CUDA
  251 + memset(F, 0, N * sizeof(stim::complex<T>));
  252 + T px, py, pz;
  253 + for(size_t i = 0; i < N; i++){ // for each element in the array
  254 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values
  255 + (y == NULL) ? py = 0 : py = y[i];
  256 + (z == NULL) ? pz = 0 : pz = z[i];
  257 +
  258 + for(size_t s = 0; s < S; s++){
  259 + F[i] += w_array[s].pos(px, py, pz); //sum all plane waves at this point
  260 + }
  261 + }
  262 +#else
  263 + stim::complex<T>* dev_F; //allocate space for the field
  264 + cudaMalloc(&dev_F, N * sizeof(stim::complex<T>));
  265 + cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used)
  266 +
  267 + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified)
  268 + if(x != NULL){
  269 + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
  270 + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
  271 + }
  272 +
  273 + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified)
  274 + if(y != NULL){
  275 + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
  276 + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
  277 + }
  278 +
  279 + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified)
  280 + if(z != NULL){
  281 + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
  282 + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
  283 + }
  284 +
  285 + size_t wave_bytes = sizeof(stim::scalarwave<T>);
  286 + size_t shared_bytes = stim::sharedMemPerBlock(); //calculate the maximum amount of shared memory available
  287 + size_t array_bytes = w_array.size() * wave_bytes; //calculate the maximum number of bytes required for the planewave array
  288 + size_t max_batch = shared_bytes / wave_bytes; //calculate number of plane waves that will fit into shared memory
  289 + size_t num_batches = w_array.size() / max_batch + 1; //calculate the number of batches required to process all plane waves
  290 + size_t batch_bytes = min(w_array.size(), max_batch) * wave_bytes; //initialize the batch size (in bytes) to the maximum batch required
  291 +
  292 + stim::scalarwave<T>* dev_w;
  293 + HANDLE_ERROR(cudaMalloc(&dev_w, batch_bytes)); //allocate memory for a single batch of plane waves
  294 +
  295 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  296 + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  297 +
  298 + size_t batch_size; //declare a variable to store the size of the current batch
  299 + size_t waves_processed = 0; //initialize the number of waves processed to zero
  300 + while(waves_processed < w_array.size()){ //while there are still waves to be processed
  301 + batch_size = min<size_t>(max_batch, w_array.size() - waves_processed); //process either a whole batch, or whatever is left
  302 + batch_bytes = batch_size * sizeof(stim::scalarwave<T>);
  303 + HANDLE_ERROR(cudaMemcpy(dev_w, &w_array[waves_processed], batch_bytes, cudaMemcpyHostToDevice)); //copy the plane waves into global memory
  304 + cuda_scalarwave<T><<< blocks, threads, batch_bytes >>>(dev_F, N, dev_x, dev_y, dev_z, dev_w, batch_size); //call the kernel
  305 + waves_processed += batch_size; //increment the counter indicating how many waves have been processed
  306 + }
  307 +
  308 + cudaMemcpy(F, dev_F, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory
  309 +
  310 + if(x != NULL) cudaFree(dev_x); //free everything
  311 + if(y != NULL) cudaFree(dev_y);
  312 + if(z != NULL) cudaFree(dev_z);
  313 + cudaFree(dev_F);
  314 + cudaFree(dev_w);
  315 +
  316 +#endif
  317 +}
  318 +
  319 +template<typename T>
  320 +void cpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  321 + std::vector< stim::scalarwave<T> > w_array(1, w);
  322 + cpu_sum_scalarwaves(F, N, x, y, z, w_array);
  323 +}
  324 +
  325 +
  326 +/// Sums a series of coherent plane waves at a specified point
  327 +/// @param x is the x coordinate of the field point
  328 +/// @param y is the y coordinate of the field point
  329 +/// @param z is the z coordinate of the field point
  330 +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  331 +/// @param A is the list of amplitudes for each wave
  332 +/// @param S is the list of propagation directions for each wave
  333 +template<typename T>
  334 +CUDA_CALLABLE stim::complex<T> sum_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave<T> > W){
  335 + size_t N = W.size(); //get the number of plane wave samples
  336 + stim::complex<T> field(0, 0); //initialize the field to zero (0)
  337 + stim::vec3<T> k; //allocate space for the direction vector
  338 + for(size_t i = 0; i < N; i++){
  339 + field += W[i].pos(x, y, z);
  340 + }
  341 + return field;
  342 +}
  343 +
  344 +} //end namespace stim
  345 +
  346 +template <typename T>
  347 +std::ostream& operator<<(std::ostream& os, stim::scalarwave<T> p)
  348 +{
  349 + os<<p.str();
  350 + return os;
  351 +}
  352 +
  353 +#endif
0 354 \ No newline at end of file
... ...
stim/optics/beam.h renamed to stim/optics_old/beam.h
1   -#ifndef RTS_BEAM
2   -#define RTS_BEAM
3   -
4   -#include "../math/vector.h"
5   -#include "../math/function.h"
6   -#include "../optics/planewave.h"
7   -#include <vector>
8   -
9   -namespace stim{
10   -
11   -template<typename P>
12   -class beam : public planewave<P>
13   -{
14   -public:
15   - enum beam_type {Uniform, Bartlett, Hamming, Hanning};
16   -
17   -private:
18   -
19   - P _na[2]; //numerical aperature of the focusing optics
20   - vec<P> f; //focal point
21   - function<P, P> apod; //apodization function
22   - unsigned int apod_res; //resolution of apodization filter functions
23   -
24   - void apod_uniform()
25   - {
26   - apod = (P)1;
27   - }
28   - void apod_bartlett()
29   - {
30   - apod = (P)1;
31   - apod.insert((P)1, (P)0);
32   - }
33   - void apod_hanning()
34   - {
35   - apod = (P)0;
36   - P x, y;
37   - for(unsigned int n=0; n<apod_res; n++)
38   - {
39   - x = (P)n/(P)apod_res;
40   - y = pow( cos( ((P)3.14159 * x) / 2 ), 2);
41   - apod.insert(x, y);
42   - }
43   - }
44   - void apod_hamming()
45   - {
46   - apod = (P)0;
47   - P x, y;
48   - for(unsigned int n=0; n<apod_res; n++)
49   - {
50   - x = (P)n/(P)apod_res;
51   - y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x);
52   - apod.insert(x, y);
53   - }
54   - }
55   -
56   - void set_apod(beam_type type)
57   - {
58   - if(type == Uniform)
59   - apod_uniform();
60   - if(type == Bartlett)
61   - apod_bartlett();
62   - if(type == Hanning)
63   - apod_hanning();
64   - if(type == Hamming)
65   - apod_hamming();
66   - }
67   -
68   -public:
69   -
70   - ///constructor: build a default beam (NA=1.0)
71   - beam(
72   - vec<P> k = rts::vec<P>(0, 0, rtsTAU),
73   - vec<P> _E0 = rts::vec<P>(1, 0, 0),
74   - beam_type _apod = Uniform)
75   - : planewave<P>(k, _E0)
76   - {
77   - _na[0] = (P)0.0;
78   - _na[1] = (P)1.0;
79   - f = vec<P>( (P)0, (P)0, (P)0 );
80   - apod_res = 256; //set the default resolution for apodization filters
81   - set_apod(_apod); //set the apodization function type
82   - }
83   -
84   - beam<P> refract(rts::vec<P> kn) const{
85   -
86   - beam<P> new_beam;
87   - new_beam._na[0] = _na[0];
88   - new_beam._na[1] = _na[1];
89   -
90   -
91   - rts::planewave<P> pw = planewave<P>::bend(kn);
92   - //std::cout<<pw.str()<<std::endl;
93   -
94   - new_beam.k = pw.kvec();
95   - new_beam.E0 = pw.E();
96   -
97   - return new_beam;
98   - }
99   -
100   - ///Numerical Aperature functions
101   - void NA(P na)
102   - {
103   - _na[0] = (P)0;
104   - _na[1] = na;
105   - }
106   - void NA(P na0, P na1)
107   - {
108   - _na[0] = na0;
109   - _na[1] = na1;
110   - }
111   -
112   - /*string str() :
113   - {
114   - stringstream ss;
115   - ss<<"Beam Center: "<<k<<std::endl;
116   -
117   - return ss.str();
118   - }*/
119   -
120   - //Monte-Carlo decomposition into plane waves
121   - std::vector< planewave<P> > mc(unsigned int N = 100000, unsigned int seed = 0) const
122   - {
123   - /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling
124   - of a sphere and projecting these samples onto an inscribed sphere.
125   -
126   - seed = seed for the random number generator
127   - */
128   - srand(seed); //seed the random number generator
129   -
130   - vec<P> k_hat = beam::k.norm();
131   -
132   - ///compute the rotation operator to transform (0, 0, 1) to k
133   - P cos_angle = k_hat.dot(rts::vec<P>(0, 0, 1));
134   - rts::matrix<P, 3> rotation;
135   -
136   - //if the cosine of the angle is -1, the rotation is just a flip across the z axis
137   - if(cos_angle == -1){
138   - rotation(2, 2) = -1;
139   - }
140   - else if(cos_angle != 1.0)
141   - {
142   - rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation
143   - P angle = acos(cos_angle); //compute the angle of rotation
144   - rts::quaternion<P> quat; //create a quaternion describing the rotation
145   - quat.CreateRotation(angle, r_axis);
146   - rotation = quat.toMatrix3(); //compute the rotation matrix
147   - }
148   -
149   - //find the phi values associated with the cassegrain ring
150   - P PHI[2];
151   - PHI[0] = (P)asin(_na[0]);
152   - PHI[1] = (P)asin(_na[1]);
153   -
154   - //calculate the z-axis cylinder coordinates associated with these angles
155   - P Z[2];
156   - Z[0] = cos(PHI[0]);
157   - Z[1] = cos(PHI[1]);
158   - P range = Z[0] - Z[1];
159   -
160   - std::vector< planewave<P> > samples; //create a vector of plane waves
161   -
162   - //draw a distribution of random phi, z values
163   - P z, phi, theta;
164   - for(int i=0; i<N; i++) //for each sample
165   - {
166   - z = ((P)rand() / (P)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
167   - theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159;
168   - phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
169   -
170   - //compute and store cartesian coordinates
171   - rts::vec<P> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
172   - rts::vec<P> cart = spherical.sph2cart();
173   - vec<P> k_prime = rotation * cart; //create a sample vector
174   -
175   - //store a wave refracted along the given direction
176   - //std::cout<<"k prime: "<<rotation<<std::endl;
177   - samples.push_back(planewave<P>::refract(k_prime) * apod(phi/PHI[1]));
178   - }
179   -
180   - return samples;
181   - }
182   -
183   - std::string str()
184   - {
185   - std::stringstream ss;
186   - ss<<"Beam:"<<std::endl;
187   - //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
188   - ss<<" Central Plane Wave: "<<beam::k<<std::endl;
189   - if(_na[0] == 0)
190   - ss<<" NA: "<<_na[1];
191   - else
192   - ss<<" NA: "<<_na[0]<<" -- "<<_na[1];
193   -
194   - return ss.str();
195   - }
196   -
197   -
198   -
199   -};
200   -
201   -}
202   -
203   -#endif
  1 +#ifndef RTS_BEAM
  2 +#define RTS_BEAM
  3 +
  4 +#include "../math/vector.h"
  5 +#include "../math/function.h"
  6 +#include "../optics/planewave.h"
  7 +#include <vector>
  8 +
  9 +namespace stim{
  10 +
  11 +template<typename P>
  12 +class beam : public planewave<P>
  13 +{
  14 +public:
  15 + enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  16 +
  17 +private:
  18 +
  19 + P _na[2]; //numerical aperature of the focusing optics
  20 + vec<P> f; //focal point
  21 + function<P, P> apod; //apodization function
  22 + unsigned int apod_res; //resolution of apodization filter functions
  23 +
  24 + void apod_uniform()
  25 + {
  26 + apod = (P)1;
  27 + }
  28 + void apod_bartlett()
  29 + {
  30 + apod = (P)1;
  31 + apod.insert((P)1, (P)0);
  32 + }
  33 + void apod_hanning()
  34 + {
  35 + apod = (P)0;
  36 + P x, y;
  37 + for(unsigned int n=0; n<apod_res; n++)
  38 + {
  39 + x = (P)n/(P)apod_res;
  40 + y = pow( cos( ((P)3.14159 * x) / 2 ), 2);
  41 + apod.insert(x, y);
  42 + }
  43 + }
  44 + void apod_hamming()
  45 + {
  46 + apod = (P)0;
  47 + P x, y;
  48 + for(unsigned int n=0; n<apod_res; n++)
  49 + {
  50 + x = (P)n/(P)apod_res;
  51 + y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x);
  52 + apod.insert(x, y);
  53 + }
  54 + }
  55 +
  56 + void set_apod(beam_type type)
  57 + {
  58 + if(type == Uniform)
  59 + apod_uniform();
  60 + if(type == Bartlett)
  61 + apod_bartlett();
  62 + if(type == Hanning)
  63 + apod_hanning();
  64 + if(type == Hamming)
  65 + apod_hamming();
  66 + }
  67 +
  68 +public:
  69 +
  70 + ///constructor: build a default beam (NA=1.0)
  71 + beam(
  72 + vec<P> k = rts::vec<P>(0, 0, rtsTAU),
  73 + vec<P> _E0 = rts::vec<P>(1, 0, 0),
  74 + beam_type _apod = Uniform)
  75 + : planewave<P>(k, _E0)
  76 + {
  77 + _na[0] = (P)0.0;
  78 + _na[1] = (P)1.0;
  79 + f = vec<P>( (P)0, (P)0, (P)0 );
  80 + apod_res = 256; //set the default resolution for apodization filters
  81 + set_apod(_apod); //set the apodization function type
  82 + }
  83 +
  84 + beam<P> refract(rts::vec<P> kn) const{
  85 +
  86 + beam<P> new_beam;
  87 + new_beam._na[0] = _na[0];
  88 + new_beam._na[1] = _na[1];
  89 +
  90 +
  91 + rts::planewave<P> pw = planewave<P>::bend(kn);
  92 + //std::cout<<pw.str()<<std::endl;
  93 +
  94 + new_beam.k = pw.kvec();
  95 + new_beam.E0 = pw.E();
  96 +
  97 + return new_beam;
  98 + }
  99 +
  100 + ///Numerical Aperature functions
  101 + void NA(P na)
  102 + {
  103 + _na[0] = (P)0;
  104 + _na[1] = na;
  105 + }
  106 + void NA(P na0, P na1)
  107 + {
  108 + _na[0] = na0;
  109 + _na[1] = na1;
  110 + }
  111 +
  112 + /*string str() :
  113 + {
  114 + stringstream ss;
  115 + ss<<"Beam Center: "<<k<<std::endl;
  116 +
  117 + return ss.str();
  118 + }*/
  119 +
  120 + //Monte-Carlo decomposition into plane waves
  121 + std::vector< planewave<P> > mc(unsigned int N = 100000, unsigned int seed = 0) const
  122 + {
  123 + /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling
  124 + of a sphere and projecting these samples onto an inscribed sphere.
  125 +
  126 + seed = seed for the random number generator
  127 + */
  128 + srand(seed); //seed the random number generator
  129 +
  130 + vec<P> k_hat = beam::k.norm();
  131 +
  132 + ///compute the rotation operator to transform (0, 0, 1) to k
  133 + P cos_angle = k_hat.dot(rts::vec<P>(0, 0, 1));
  134 + rts::matrix<P, 3> rotation;
  135 +
  136 + //if the cosine of the angle is -1, the rotation is just a flip across the z axis
  137 + if(cos_angle == -1){
  138 + rotation(2, 2) = -1;
  139 + }
  140 + else if(cos_angle != 1.0)
  141 + {
  142 + rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation
  143 + P angle = acos(cos_angle); //compute the angle of rotation
  144 + rts::quaternion<P> quat; //create a quaternion describing the rotation
  145 + quat.CreateRotation(angle, r_axis);
  146 + rotation = quat.toMatrix3(); //compute the rotation matrix
  147 + }
  148 +
  149 + //find the phi values associated with the cassegrain ring
  150 + P PHI[2];
  151 + PHI[0] = (P)asin(_na[0]);
  152 + PHI[1] = (P)asin(_na[1]);
  153 +
  154 + //calculate the z-axis cylinder coordinates associated with these angles
  155 + P Z[2];
  156 + Z[0] = cos(PHI[0]);
  157 + Z[1] = cos(PHI[1]);
  158 + P range = Z[0] - Z[1];
  159 +
  160 + std::vector< planewave<P> > samples; //create a vector of plane waves
  161 +
  162 + //draw a distribution of random phi, z values
  163 + P z, phi, theta;
  164 + for(int i=0; i<N; i++) //for each sample
  165 + {
  166 + z = ((P)rand() / (P)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
  167 + theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159;
  168 + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
  169 +
  170 + //compute and store cartesian coordinates
  171 + rts::vec<P> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
  172 + rts::vec<P> cart = spherical.sph2cart();
  173 + vec<P> k_prime = rotation * cart; //create a sample vector
  174 +
  175 + //store a wave refracted along the given direction
  176 + //std::cout<<"k prime: "<<rotation<<std::endl;
  177 + samples.push_back(planewave<P>::refract(k_prime) * apod(phi/PHI[1]));
  178 + }
  179 +
  180 + return samples;
  181 + }
  182 +
  183 + std::string str()
  184 + {
  185 + std::stringstream ss;
  186 + ss<<"Beam:"<<std::endl;
  187 + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  188 + ss<<" Central Plane Wave: "<<beam::k<<std::endl;
  189 + if(_na[0] == 0)
  190 + ss<<" NA: "<<_na[1];
  191 + else
  192 + ss<<" NA: "<<_na[0]<<" -- "<<_na[1];
  193 +
  194 + return ss.str();
  195 + }
  196 +
  197 +
  198 +
  199 +};
  200 +
  201 +}
  202 +
  203 +#endif
... ...
stim/optics/efield.cuh renamed to stim/optics_old/efield.cuh
stim/optics/esphere.cuh renamed to stim/optics_old/esphere.cuh
stim/optics/halfspace.cuh renamed to stim/optics_old/halfspace.cuh
stim/optics/material.h renamed to stim/optics_old/material.h
1   -#ifndef RTS_MATERIAL_H
2   -#define RTS_MATERIAL_H
3   -
4   -#include <vector>
5   -#include <ostream>
6   -#include <iostream>
7   -#include <fstream>
8   -#include <complex>
9   -#include <algorithm>
10   -#include <sstream>
11   -#include "../math/complex.h"
12   -#include "../math/constants.h"
13   -#include "../math/function.h"
14   -
15   -namespace stim{
16   -
17   -//Material class - default representation for the material property is the refractive index (RI)
18   -template<typename T>
19   -class material : public function< T, complex<T> >{
20   -
21   -public:
22   - enum wave_property{microns, inverse_cm};
23   - enum material_property{ri, absorbance};
24   -
25   -private:
26   -
27   - using function< T, complex<T> >::X;
28   - using function< T, complex<T> >::Y;
29   - using function< T, complex<T> >::insert;
30   - using function< T, complex<T> >::bounding;
31   -
32   - std::string name; //name for the material (defaults to file name)
33   -
34   - void process_header(std::string str, wave_property& wp, material_property& mp){
35   -
36   - std::stringstream ss(str); //create a stream from the data string
37   - std::string line;
38   - std::getline(ss, line); //get the first line as a string
39   - while(line[0] == '#'){ //continue looping while the line is a comment
40   -
41   - std::stringstream lstream(line); //create a stream from the line
42   - lstream.ignore(); //ignore the first character ('#')
43   -
44   - std::string prop; //get the property name
45   - lstream>>prop;
46   -
47   - if(prop == "X"){
48   - std::string wp_name;
49   - lstream>>wp_name;
50   - if(wp_name == "microns") wp = microns;
51   - else if(wp_name == "inverse_cm") wp = inverse_cm;
52   - }
53   - else if(prop == "Y"){
54   - std::string mp_name;
55   - lstream>>mp_name;
56   - if(mp_name == "ri") mp = ri;
57   - else if(mp_name == "absorbance") mp = absorbance;
58   - }
59   -
60   - std::getline(ss, line); //get the next line
61   - }
62   -
63   - function< T, stim::complex<T> >::process_string(str);
64   - }
65   -
66   - void from_inverse_cm(){
67   - //convert inverse centimeters to wavelength (in microns)
68   - for(unsigned int i=0; i<X.size(); i++)
69   - X[i] = 10000 / X[i];
70   -
71   - //reverse the function array
72   - std::reverse(X.begin(), X.end());
73   - std::reverse(Y.begin(), Y.end());
74   -
75   - }
76   -
77   - void init(){
78   - bounding[0] = bounding[1] = stim::complex<T>(1, 0);
79   - }
80   -
81   -
82   -public:
83   -
84   - material(std::string filename, wave_property wp, material_property mp){
85   - name = filename;
86   - load(filename, wp, mp);
87   - }
88   -
89   - material(std::string filename){
90   - name = filename;
91   - load(filename);
92   - }
93   -
94   - material(){
95   - init();
96   - }
97   -
98   - complex<T> getN(T lambda){
99   - return function< T, complex<T> >::linear(lambda);
100   - }
101   -
102   - void load(std::string filename, wave_property wp, material_property mp){
103   -
104   - //load the file as a function
105   - function< T, complex<T> >::load(filename);
106   - }
107   -
108   - void load(std::string filename){
109   -
110   - wave_property wp = inverse_cm;
111   - material_property mp = ri;
112   - //turn the file into a string
113   - std::ifstream t(filename.c_str()); //open the file as a stream
114   -
115   - if(!t){
116   - std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;
117   - exit(1);
118   - }
119   - std::string str((std::istreambuf_iterator<char>(t)),
120   - std::istreambuf_iterator<char>());
121   -
122   - //process the header information
123   - process_header(str, wp, mp);
124   -
125   - //convert units
126   - if(wp == inverse_cm)
127   - from_inverse_cm();
128   - //set the bounding values
129   - bounding[0] = Y[0];
130   - bounding[1] = Y.back();
131   - }
132   - std::string str(){
133   - std::stringstream ss;
134   - ss<<name<<std::endl;
135   - ss<<function< T, complex<T> >::str();
136   - return ss.str();
137   - }
138   - std::string get_name(){
139   - return name;
140   - }
141   -
142   - void set_name(std::string str){
143   - name = str;
144   - }
145   -
146   -};
147   -
148   -}
149   -
150   -
151   -
152   -
153   -#endif
  1 +#ifndef RTS_MATERIAL_H
  2 +#define RTS_MATERIAL_H
  3 +
  4 +#include <vector>
  5 +#include <ostream>
  6 +#include <iostream>
  7 +#include <fstream>
  8 +#include <complex>
  9 +#include <algorithm>
  10 +#include <sstream>
  11 +#include "../math/complex.h"
  12 +#include "../math/constants.h"
  13 +#include "../math/function.h"
  14 +
  15 +namespace stim{
  16 +
  17 +//Material class - default representation for the material property is the refractive index (RI)
  18 +template<typename T>
  19 +class material : public function< T, complex<T> >{
  20 +
  21 +public:
  22 + enum wave_property{microns, inverse_cm};
  23 + enum material_property{ri, absorbance};
  24 +
  25 +private:
  26 +
  27 + using function< T, complex<T> >::X;
  28 + using function< T, complex<T> >::Y;
  29 + using function< T, complex<T> >::insert;
  30 + using function< T, complex<T> >::bounding;
  31 +
  32 + std::string name; //name for the material (defaults to file name)
  33 +
  34 + void process_header(std::string str, wave_property& wp, material_property& mp){
  35 +
  36 + std::stringstream ss(str); //create a stream from the data string
  37 + std::string line;
  38 + std::getline(ss, line); //get the first line as a string
  39 + while(line[0] == '#'){ //continue looping while the line is a comment
  40 +
  41 + std::stringstream lstream(line); //create a stream from the line
  42 + lstream.ignore(); //ignore the first character ('#')
  43 +
  44 + std::string prop; //get the property name
  45 + lstream>>prop;
  46 +
  47 + if(prop == "X"){
  48 + std::string wp_name;
  49 + lstream>>wp_name;
  50 + if(wp_name == "microns") wp = microns;
  51 + else if(wp_name == "inverse_cm") wp = inverse_cm;
  52 + }
  53 + else if(prop == "Y"){
  54 + std::string mp_name;
  55 + lstream>>mp_name;
  56 + if(mp_name == "ri") mp = ri;
  57 + else if(mp_name == "absorbance") mp = absorbance;
  58 + }
  59 +
  60 + std::getline(ss, line); //get the next line
  61 + }
  62 +
  63 + function< T, stim::complex<T> >::process_string(str);
  64 + }
  65 +
  66 + void from_inverse_cm(){
  67 + //convert inverse centimeters to wavelength (in microns)
  68 + for(unsigned int i=0; i<X.size(); i++)
  69 + X[i] = 10000 / X[i];
  70 +
  71 + //reverse the function array
  72 + std::reverse(X.begin(), X.end());
  73 + std::reverse(Y.begin(), Y.end());
  74 +
  75 + }
  76 +
  77 + void init(){
  78 + bounding[0] = bounding[1] = stim::complex<T>(1, 0);
  79 + }
  80 +
  81 +
  82 +public:
  83 +
  84 + material(std::string filename, wave_property wp, material_property mp){
  85 + name = filename;
  86 + load(filename, wp, mp);
  87 + }
  88 +
  89 + material(std::string filename){
  90 + name = filename;
  91 + load(filename);
  92 + }
  93 +
  94 + material(){
  95 + init();
  96 + }
  97 +
  98 + complex<T> getN(T lambda){
  99 + return function< T, complex<T> >::linear(lambda);
  100 + }
  101 +
  102 + void load(std::string filename, wave_property wp, material_property mp){
  103 +
  104 + //load the file as a function
  105 + function< T, complex<T> >::load(filename);
  106 + }
  107 +
  108 + void load(std::string filename){
  109 +
  110 + wave_property wp = inverse_cm;
  111 + material_property mp = ri;
  112 + //turn the file into a string
  113 + std::ifstream t(filename.c_str()); //open the file as a stream
  114 +
  115 + if(!t){
  116 + std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;
  117 + exit(1);
  118 + }
  119 + std::string str((std::istreambuf_iterator<char>(t)),
  120 + std::istreambuf_iterator<char>());
  121 +
  122 + //process the header information
  123 + process_header(str, wp, mp);
  124 +
  125 + //convert units
  126 + if(wp == inverse_cm)
  127 + from_inverse_cm();
  128 + //set the bounding values
  129 + bounding[0] = Y[0];
  130 + bounding[1] = Y.back();
  131 + }
  132 + std::string str(){
  133 + std::stringstream ss;
  134 + ss<<name<<std::endl;
  135 + ss<<function< T, complex<T> >::str();
  136 + return ss.str();
  137 + }
  138 + std::string get_name(){
  139 + return name;
  140 + }
  141 +
  142 + void set_name(std::string str){
  143 + name = str;
  144 + }
  145 +
  146 +};
  147 +
  148 +}
  149 +
  150 +
  151 +
  152 +
  153 +#endif
... ...
stim/optics/mirst-1d.cuh renamed to stim/optics_old/mirst-1d.cuh
1   -#include "../optics/material.h"
2   -#include "../math/complexfield.cuh"
3   -#include "../math/constants.h"
4   -//#include "../envi/bil.h"
5   -
6   -#include "cufft.h"
7   -
8   -#include <vector>
9   -#include <sstream>
10   -
11   -namespace stim{
12   -
13   -//this function writes a sinc function to "dest" such that an iFFT produces a slab
14   -template<typename T>
15   -__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,
16   - T* src, T* zf,
17   - T w, unsigned int zR, unsigned int nuR){
18   - //dest = complex field representing the sample
19   - //ri = refractive indices for each wavelength
20   - //src = intensity of the light source for each wavelength
21   - //zf = z position of the slab interface for each wavelength (accounting for optical path length)
22   - //w = width of the slab (in pixels)
23   - //zR = number of z-axis samples
24   - //nuR = number of wavelengths
25   -
26   - //get the current coordinate in the plane slice
27   - int ifz = blockIdx.x * blockDim.x + threadIdx.x;
28   - int inu = blockIdx.y * blockDim.y + threadIdx.y;
29   -
30   - //make sure that the thread indices are in-bounds
31   - if(inu >= nuR || ifz >= zR) return;
32   -
33   - int i = inu * zR + ifz;
34   -
35   - T fz;
36   - if(ifz < zR/2)
37   - fz = ifz / (T)zR;
38   - else
39   - fz = -(zR - ifz) / (T)zR;
40   -
41   - //if the slab starts outside of the simulation domain, just return
42   - if(zf[inu] >= zR) return;
43   -
44   - //fill the array along z with a sinc function representing the Fourier transform of the layer
45   -
46   - T opl = w * ri[inu].real(); //optical path length
47   -
48   - //handle the case where the slab goes outside the simulation domain
49   - if(zf[inu] + opl >= zR)
50   - opl = zR - zf[inu];
51   -
52   - if(opl == 0) return;
53   -
54   - //T l = w * ri[inu].real();
55   - //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
56   - complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
57   -
58   - complex<T> eta = ri[inu] * ri[inu] - 1;
59   -
60   - //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
61   - if(ifz == 0)
62   - dest[i] += opl * exp(e) * eta * src[inu];
63   - else
64   - dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
65   -}
66   -
67   -template<typename T>
68   -__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
69   - //zf = current z depth (optical path length) in pixels
70   - //ri = refractive index of the material
71   - //w = actual width of the layer (in pixels)
72   -
73   -
74   - //compute the index for this thread
75   - int i = blockIdx.x * blockDim.x + threadIdx.x;
76   - if(i >= S) return;
77   -
78   - if(ri == NULL)
79   - zf[i] += w;
80   - else
81   - zf[i] += ri[i].real() * w;
82   -}
83   -
84   -//apply the 1D MIRST filter to an existing sample (overwriting the sample)
85   -template<typename T>
86   -__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,
87   - T dFz,
88   - T inNA, T outNA,
89   - unsigned int lambdaR, unsigned int zR,
90   - T sigma = 0){
91   - //sampleFFT = the sample in the Fourier domain (will be overwritten)
92   - //lambda = list of wavelengths
93   - //dFz = delta along the Fz axis in the frequency domain
94   - //inNA = NA of the internal obscuration
95   - //outNA = NA of the objective
96   - //zR = number of pixels along the Fz axis (same as the z-axis)
97   - //lambdaR = number of wavelengths
98   - //sigma = width of the Gaussian source
99   - int ifz = blockIdx.x * blockDim.x + threadIdx.x;
100   - int inu = blockIdx.y * blockDim.y + threadIdx.y;
101   -
102   - if(inu >= lambdaR || ifz >= zR) return;
103   -
104   - //calculate the index into the sample FT
105   - int i = inu * zR + ifz;
106   -
107   - //compute the frequency (and set all negative spatial frequencies to zero)
108   - T fz;
109   - if(ifz < zR / 2)
110   - fz = ifz * dFz;
111   - //if the spatial frequency is negative, set it to zero and exit
112   - else{
113   - sampleFFT[i] = 0;
114   - return;
115   - }
116   -
117   - //compute the frequency in inverse microns
118   - T nu = 1/lambda[inu];
119   -
120   - //determine the radius of the integration circle
121   - T nu_sq = nu * nu;
122   - T fz_sq = (fz * fz) / 4;
123   -
124   - //cut off frequencies above the diffraction limit
125   - T r;
126   - if(fz_sq < nu_sq)
127   - r = sqrt(nu_sq - fz_sq);
128   - else
129   - r = 0;
130   -
131   - //account for the optics
132   - T Q = 0;
133   - if(r > nu * inNA && r < nu * outNA)
134   - Q = 1;
135   -
136   - //account for the source
137   - //T sigma = 30.0;
138   - T s = exp( - (r*r * sigma*sigma) / 2 );
139   - //T s=1;
140   -
141   - //compute the final filter
142   - T mirst = 0;
143   - if(fz != 0)
144   - mirst = 2 * stimPI * r * s * Q * (1/fz);
145   -
146   - sampleFFT[i] *= mirst;
147   -
148   -}
149   -
150   -/*This object performs a 1-dimensional (layered) MIRST simulation
151   -*/
152   -template<typename T>
153   -class mirst1d{
154   -
155   -private:
156   - unsigned int Z; //z-axis resolution
157   - unsigned int pad; //pixel padding on either side of the sample
158   -
159   - std::vector< material<T> > matlist; //list of materials
160   - std::vector< T > layers; //list of layer thicknesses
161   -
162   - std::vector< T > lambdas; //list of wavelengths that are being simulated
163   - unsigned int S; //number of wavelengths (size of "lambdas")
164   -
165   - T NA[2]; //numerical aperature (central obscuration and outer diameter)
166   -
167   - function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)
168   -
169   - complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.
170   -
171   - void fft(int direction = CUFFT_FORWARD){
172   -
173   - unsigned padZ = Z + pad;
174   -
175   - //create cuFFT handles
176   - cufftHandle plan;
177   - cufftResult result;
178   -
179   - if(sizeof(T) == 4)
180   - result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision
181   - else
182   - result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision
183   -
184   - //check for Plan 1D errors
185   - if(result != CUFFT_SUCCESS){
186   - std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
187   - CufftError(result);
188   - exit(1);
189   - }
190   -
191   - if(sizeof(T) == 4)
192   - result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
193   - else
194   - result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
195   -
196   - //check for FFT errors
197   - if(result != CUFFT_SUCCESS){
198   - std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
199   - CufftError(result);
200   - exit(1);
201   - }
202   -
203   - cufftDestroy(plan);
204   - }
205   -
206   -
207   - //initialize the scratch memory
208   - void init_scratch(){
209   - scratch = complexfield<T, 1>(Z + pad , lambdas.size());
210   - scratch = 0;
211   - }
212   -
213   - //get the list of scattering efficiency (eta) values for a specified layer
214   - std::vector< complex<T> > layer_etas(unsigned int l){
215   -
216   - std::vector< complex<T> > etas;
217   -
218   - //fill the list of etas
219   - for(unsigned int i=0; i<lambdas.size(); i++)
220   - etas.push_back( matlist[l].eta(lambdas[i]) );
221   - return etas;
222   - }
223   -
224   - //calculates the optimal block and grid sizes using information from the GPU
225   - void cuda_params(dim3& grids, dim3& blocks){
226   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
227   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
228   -
229   - //create one thread for each detector pixel
230   - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
231   - grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
232   - }
233   -
234   - //add the fourier transform of layer n to the scratch space
235   - void build_layer_fft(unsigned int n, T* zf){
236   - unsigned int paddedZ = Z + pad;
237   -
238   - T wpx = layers[n] / dz(); //calculate the width of the layer in pixels
239   -
240   - //allocate memory for the refractive index
241   - complex<T>* gpuRi;
242   - HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
243   -
244   - //allocate memory for the source profile
245   - T* gpuSrc;
246   - HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
247   -
248   - complex<T> ri;
249   - T source;
250   - //store the refractive index and source profile in a CPU array
251   - for(int inu=0; inu<S; inu++){
252   - //save the refractive index to the GPU
253   - ri = matlist[n].getN(lambdas[inu]);
254   - HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
255   -
256   - //save the source profile to the GPU
257   - source = source_profile(10000 / lambdas[inu]);
258   - HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
259   -
260   - }
261   -
262   - //create one thread for each pixel of the field slice
263   - dim3 gridDim, blockDim;
264   - cuda_params(gridDim, blockDim);
265   - stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
266   -
267   - int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
268   - int linGrid = S / linBlock + 1;
269   - stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
270   -
271   - //free memory
272   - HANDLE_ERROR(cudaFree(gpuRi));
273   - HANDLE_ERROR(cudaFree(gpuSrc));
274   - }
275   -
276   - void build_sample(){
277   - init_scratch(); //initialize the GPU scratch space
278   - //build_layer(1);
279   -
280   - T* zf;
281   - HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
282   - HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
283   -
284   - //render each layer of the sample
285   - for(unsigned int l=0; l<layers.size(); l++){
286   - build_layer_fft(l, zf);
287   - }
288   -
289   - HANDLE_ERROR(cudaFree(zf));
290   - }
291   -
292   - void apply_filter(){
293   - dim3 gridDim, blockDim;
294   - cuda_params(gridDim, blockDim);
295   -
296   - unsigned int Zpad = Z + pad;
297   -
298   - T sim_range = dz() * Zpad;
299   - T dFz = 1 / sim_range;
300   -
301   - //copy the array of wavelengths to the GPU
302   - T* gpuLambdas;
303   - HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
304   - HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
305   - stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,
306   - dFz,
307   - NA[0], NA[1],
308   - S, Zpad);
309   - }
310   -
311   - //crop the image to the sample thickness - keep in mind that sample thickness != optical path length
312   - void crop(){
313   -
314   - scratch = scratch.crop(Z, S);
315   - }
316   -
317   - //save the scratch field as a binary file
318   - void to_binary(std::string filename){
319   -
320   - }
321   -
322   -
323   -public:
324   -
325   - //constructor
326   - mirst1d(unsigned int rZ = 100,
327   - unsigned int padding = 0){
328   - Z = rZ;
329   - pad = padding;
330   - NA[0] = 0;
331   - NA[1] = 0.8;
332   - S = 0;
333   - source_profile = 1;
334   - }
335   -
336   - //add a layer, thickness = microns
337   - void add_layer(material<T> mat, T thickness){
338   - matlist.push_back(mat);
339   - layers.push_back(thickness);
340   - }
341   -
342   - void add_layer(std::string filename, T thickness){
343   - add_layer(material<T>(filename), thickness);
344   - }
345   -
346   - //adds a profile spectrum for the light source
347   - void set_source(std::string filename){
348   - source_profile.load(filename);
349   - }
350   -
351   - //adds a block of wavenumbers (cm^-1) to the simulation parameters
352   - void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
353   - unsigned int nu = start;
354   - while(nu <= stop){
355   - lambdas.push_back((T)10000 / nu);
356   - nu += step;
357   - }
358   - S = lambdas.size(); //increment the number of wavelengths (shorthand for later)
359   - }
360   -
361   - T thickness(){
362   - T t = 0;
363   - for(unsigned int l=0; l<layers.size(); l++)
364   - t += layers[l];
365   - return t;
366   - }
367   -
368   - void padding(unsigned int padding = 0){
369   - pad = padding;
370   - }
371   -
372   - T dz(){
373   - return thickness() / Z; //calculate the z-axis step size
374   - }
375   -
376   - void na(T in, T out){
377   - NA[0] = in;
378   - NA[1] = out;
379   - }
380   -
381   - void na(T out){
382   - na(0, out);
383   - }
384   -
385   - stim::function<T, T> get_source(){
386   - return source_profile;
387   - }
388   -
389   - void save_sample(std::string filename){
390   - //create a sample and save the magnitude as an image
391   - build_sample();
392   - fft(CUFFT_INVERSE);
393   - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
394   - }
395   -
396   - void save_mirst(std::string filename, bool binary = true){
397   - //apply the MIRST filter to a sample and save the image
398   -
399   - //build the sample in the Fourier domain
400   - build_sample();
401   -
402   - //apply the MIRST filter
403   - apply_filter();
404   -
405   - //apply an inverse FFT to bring the results back into the spatial domain
406   - fft(CUFFT_INVERSE);
407   -
408   - crop();
409   -
410   - //save the image
411   - if(binary)
412   - to_binary(filename);
413   - else
414   - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
415   - }
416   -
417   -
418   -
419   -
420   - std::string str(){
421   -
422   - stringstream ss;
423   - ss<<"1D MIRST Simulation========================="<<std::endl;
424   - ss<<"z-axis resolution: "<<Z<<std::endl;
425   - ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
426   - ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
427   - ss<<"padding: "<<pad<<std::endl;
428   - ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
429   - ss<<"dz: "<<dz()<<" um"<<std::endl;
430   - ss<<std::endl;
431   - ss<<layers.size()<<" layers-------------"<<std::endl;
432   - for(unsigned int l=0; l<layers.size(); l++)
433   - ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
434   -
435   - ss<<"source profile-----------"<<std::endl;
436   - ss<<get_source().str()<<std::endl;
437   -
438   - return ss.str();
439   -
440   -
441   - }
442   -
443   -
444   -
445   -};
446   -
447   -}
  1 +#include "../optics/material.h"
  2 +#include "../math/complexfield.cuh"
  3 +#include "../math/constants.h"
  4 +//#include "../envi/bil.h"
  5 +
  6 +#include "cufft.h"
  7 +
  8 +#include <vector>
  9 +#include <sstream>
  10 +
  11 +namespace stim{
  12 +
  13 +//this function writes a sinc function to "dest" such that an iFFT produces a slab
  14 +template<typename T>
  15 +__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,
  16 + T* src, T* zf,
  17 + T w, unsigned int zR, unsigned int nuR){
  18 + //dest = complex field representing the sample
  19 + //ri = refractive indices for each wavelength
  20 + //src = intensity of the light source for each wavelength
  21 + //zf = z position of the slab interface for each wavelength (accounting for optical path length)
  22 + //w = width of the slab (in pixels)
  23 + //zR = number of z-axis samples
  24 + //nuR = number of wavelengths
  25 +
  26 + //get the current coordinate in the plane slice
  27 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  28 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  29 +
  30 + //make sure that the thread indices are in-bounds
  31 + if(inu >= nuR || ifz >= zR) return;
  32 +
  33 + int i = inu * zR + ifz;
  34 +
  35 + T fz;
  36 + if(ifz < zR/2)
  37 + fz = ifz / (T)zR;
  38 + else
  39 + fz = -(zR - ifz) / (T)zR;
  40 +
  41 + //if the slab starts outside of the simulation domain, just return
  42 + if(zf[inu] >= zR) return;
  43 +
  44 + //fill the array along z with a sinc function representing the Fourier transform of the layer
  45 +
  46 + T opl = w * ri[inu].real(); //optical path length
  47 +
  48 + //handle the case where the slab goes outside the simulation domain
  49 + if(zf[inu] + opl >= zR)
  50 + opl = zR - zf[inu];
  51 +
  52 + if(opl == 0) return;
  53 +
  54 + //T l = w * ri[inu].real();
  55 + //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
  56 + complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
  57 +
  58 + complex<T> eta = ri[inu] * ri[inu] - 1;
  59 +
  60 + //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
  61 + if(ifz == 0)
  62 + dest[i] += opl * exp(e) * eta * src[inu];
  63 + else
  64 + dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
  65 +}
  66 +
  67 +template<typename T>
  68 +__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
  69 + //zf = current z depth (optical path length) in pixels
  70 + //ri = refractive index of the material
  71 + //w = actual width of the layer (in pixels)
  72 +
  73 +
  74 + //compute the index for this thread
  75 + int i = blockIdx.x * blockDim.x + threadIdx.x;
  76 + if(i >= S) return;
  77 +
  78 + if(ri == NULL)
  79 + zf[i] += w;
  80 + else
  81 + zf[i] += ri[i].real() * w;
  82 +}
  83 +
  84 +//apply the 1D MIRST filter to an existing sample (overwriting the sample)
  85 +template<typename T>
  86 +__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,
  87 + T dFz,
  88 + T inNA, T outNA,
  89 + unsigned int lambdaR, unsigned int zR,
  90 + T sigma = 0){
  91 + //sampleFFT = the sample in the Fourier domain (will be overwritten)
  92 + //lambda = list of wavelengths
  93 + //dFz = delta along the Fz axis in the frequency domain
  94 + //inNA = NA of the internal obscuration
  95 + //outNA = NA of the objective
  96 + //zR = number of pixels along the Fz axis (same as the z-axis)
  97 + //lambdaR = number of wavelengths
  98 + //sigma = width of the Gaussian source
  99 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  100 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  101 +
  102 + if(inu >= lambdaR || ifz >= zR) return;
  103 +
  104 + //calculate the index into the sample FT
  105 + int i = inu * zR + ifz;
  106 +
  107 + //compute the frequency (and set all negative spatial frequencies to zero)
  108 + T fz;
  109 + if(ifz < zR / 2)
  110 + fz = ifz * dFz;
  111 + //if the spatial frequency is negative, set it to zero and exit
  112 + else{
  113 + sampleFFT[i] = 0;
  114 + return;
  115 + }
  116 +
  117 + //compute the frequency in inverse microns
  118 + T nu = 1/lambda[inu];
  119 +
  120 + //determine the radius of the integration circle
  121 + T nu_sq = nu * nu;
  122 + T fz_sq = (fz * fz) / 4;
  123 +
  124 + //cut off frequencies above the diffraction limit
  125 + T r;
  126 + if(fz_sq < nu_sq)
  127 + r = sqrt(nu_sq - fz_sq);
  128 + else
  129 + r = 0;
  130 +
  131 + //account for the optics
  132 + T Q = 0;
  133 + if(r > nu * inNA && r < nu * outNA)
  134 + Q = 1;
  135 +
  136 + //account for the source
  137 + //T sigma = 30.0;
  138 + T s = exp( - (r*r * sigma*sigma) / 2 );
  139 + //T s=1;
  140 +
  141 + //compute the final filter
  142 + T mirst = 0;
  143 + if(fz != 0)
  144 + mirst = 2 * stimPI * r * s * Q * (1/fz);
  145 +
  146 + sampleFFT[i] *= mirst;
  147 +
  148 +}
  149 +
  150 +/*This object performs a 1-dimensional (layered) MIRST simulation
  151 +*/
  152 +template<typename T>
  153 +class mirst1d{
  154 +
  155 +private:
  156 + unsigned int Z; //z-axis resolution
  157 + unsigned int pad; //pixel padding on either side of the sample
  158 +
  159 + std::vector< material<T> > matlist; //list of materials
  160 + std::vector< T > layers; //list of layer thicknesses
  161 +
  162 + std::vector< T > lambdas; //list of wavelengths that are being simulated
  163 + unsigned int S; //number of wavelengths (size of "lambdas")
  164 +
  165 + T NA[2]; //numerical aperature (central obscuration and outer diameter)
  166 +
  167 + function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)
  168 +
  169 + complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.
  170 +
  171 + void fft(int direction = CUFFT_FORWARD){
  172 +
  173 + unsigned padZ = Z + pad;
  174 +
  175 + //create cuFFT handles
  176 + cufftHandle plan;
  177 + cufftResult result;
  178 +
  179 + if(sizeof(T) == 4)
  180 + result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision
  181 + else
  182 + result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision
  183 +
  184 + //check for Plan 1D errors
  185 + if(result != CUFFT_SUCCESS){
  186 + std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
  187 + CufftError(result);
  188 + exit(1);
  189 + }
  190 +
  191 + if(sizeof(T) == 4)
  192 + result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
  193 + else
  194 + result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
  195 +
  196 + //check for FFT errors
  197 + if(result != CUFFT_SUCCESS){
  198 + std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
  199 + CufftError(result);
  200 + exit(1);
  201 + }
  202 +
  203 + cufftDestroy(plan);
  204 + }
  205 +
  206 +
  207 + //initialize the scratch memory
  208 + void init_scratch(){
  209 + scratch = complexfield<T, 1>(Z + pad , lambdas.size());
  210 + scratch = 0;
  211 + }
  212 +
  213 + //get the list of scattering efficiency (eta) values for a specified layer
  214 + std::vector< complex<T> > layer_etas(unsigned int l){
  215 +
  216 + std::vector< complex<T> > etas;
  217 +
  218 + //fill the list of etas
  219 + for(unsigned int i=0; i<lambdas.size(); i++)
  220 + etas.push_back( matlist[l].eta(lambdas[i]) );
  221 + return etas;
  222 + }
  223 +
  224 + //calculates the optimal block and grid sizes using information from the GPU
  225 + void cuda_params(dim3& grids, dim3& blocks){
  226 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  227 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  228 +
  229 + //create one thread for each detector pixel
  230 + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
  231 + grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
  232 + }
  233 +
  234 + //add the fourier transform of layer n to the scratch space
  235 + void build_layer_fft(unsigned int n, T* zf){
  236 + unsigned int paddedZ = Z + pad;
  237 +
  238 + T wpx = layers[n] / dz(); //calculate the width of the layer in pixels
  239 +
  240 + //allocate memory for the refractive index
  241 + complex<T>* gpuRi;
  242 + HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
  243 +
  244 + //allocate memory for the source profile
  245 + T* gpuSrc;
  246 + HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
  247 +
  248 + complex<T> ri;
  249 + T source;
  250 + //store the refractive index and source profile in a CPU array
  251 + for(int inu=0; inu<S; inu++){
  252 + //save the refractive index to the GPU
  253 + ri = matlist[n].getN(lambdas[inu]);
  254 + HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
  255 +
  256 + //save the source profile to the GPU
  257 + source = source_profile(10000 / lambdas[inu]);
  258 + HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
  259 +
  260 + }
  261 +
  262 + //create one thread for each pixel of the field slice
  263 + dim3 gridDim, blockDim;
  264 + cuda_params(gridDim, blockDim);
  265 + stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
  266 +
  267 + int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
  268 + int linGrid = S / linBlock + 1;
  269 + stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
  270 +
  271 + //free memory
  272 + HANDLE_ERROR(cudaFree(gpuRi));
  273 + HANDLE_ERROR(cudaFree(gpuSrc));
  274 + }
  275 +
  276 + void build_sample(){
  277 + init_scratch(); //initialize the GPU scratch space
  278 + //build_layer(1);
  279 +
  280 + T* zf;
  281 + HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
  282 + HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
  283 +
  284 + //render each layer of the sample
  285 + for(unsigned int l=0; l<layers.size(); l++){
  286 + build_layer_fft(l, zf);
  287 + }
  288 +
  289 + HANDLE_ERROR(cudaFree(zf));
  290 + }
  291 +
  292 + void apply_filter(){
  293 + dim3 gridDim, blockDim;
  294 + cuda_params(gridDim, blockDim);
  295 +
  296 + unsigned int Zpad = Z + pad;
  297 +
  298 + T sim_range = dz() * Zpad;
  299 + T dFz = 1 / sim_range;
  300 +
  301 + //copy the array of wavelengths to the GPU
  302 + T* gpuLambdas;
  303 + HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
  304 + HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
  305 + stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,
  306 + dFz,
  307 + NA[0], NA[1],
  308 + S, Zpad);
  309 + }
  310 +
  311 + //crop the image to the sample thickness - keep in mind that sample thickness != optical path length
  312 + void crop(){
  313 +
  314 + scratch = scratch.crop(Z, S);
  315 + }
  316 +
  317 + //save the scratch field as a binary file
  318 + void to_binary(std::string filename){
  319 +
  320 + }
  321 +
  322 +
  323 +public:
  324 +
  325 + //constructor
  326 + mirst1d(unsigned int rZ = 100,
  327 + unsigned int padding = 0){
  328 + Z = rZ;
  329 + pad = padding;
  330 + NA[0] = 0;
  331 + NA[1] = 0.8;
  332 + S = 0;
  333 + source_profile = 1;
  334 + }
  335 +
  336 + //add a layer, thickness = microns
  337 + void add_layer(material<T> mat, T thickness){
  338 + matlist.push_back(mat);
  339 + layers.push_back(thickness);
  340 + }
  341 +
  342 + void add_layer(std::string filename, T thickness){
  343 + add_layer(material<T>(filename), thickness);
  344 + }
  345 +
  346 + //adds a profile spectrum for the light source
  347 + void set_source(std::string filename){
  348 + source_profile.load(filename);
  349 + }
  350 +
  351 + //adds a block of wavenumbers (cm^-1) to the simulation parameters
  352 + void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
  353 + unsigned int nu = start;
  354 + while(nu <= stop){
  355 + lambdas.push_back((T)10000 / nu);
  356 + nu += step;
  357 + }
  358 + S = lambdas.size(); //increment the number of wavelengths (shorthand for later)
  359 + }
  360 +
  361 + T thickness(){
  362 + T t = 0;
  363 + for(unsigned int l=0; l<layers.size(); l++)
  364 + t += layers[l];
  365 + return t;
  366 + }
  367 +
  368 + void padding(unsigned int padding = 0){
  369 + pad = padding;
  370 + }
  371 +
  372 + T dz(){
  373 + return thickness() / Z; //calculate the z-axis step size
  374 + }
  375 +
  376 + void na(T in, T out){
  377 + NA[0] = in;
  378 + NA[1] = out;
  379 + }
  380 +
  381 + void na(T out){
  382 + na(0, out);
  383 + }
  384 +
  385 + stim::function<T, T> get_source(){
  386 + return source_profile;
  387 + }
  388 +
  389 + void save_sample(std::string filename){
  390 + //create a sample and save the magnitude as an image
  391 + build_sample();
  392 + fft(CUFFT_INVERSE);
  393 + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  394 + }
  395 +
  396 + void save_mirst(std::string filename, bool binary = true){
  397 + //apply the MIRST filter to a sample and save the image
  398 +
  399 + //build the sample in the Fourier domain
  400 + build_sample();
  401 +
  402 + //apply the MIRST filter
  403 + apply_filter();
  404 +
  405 + //apply an inverse FFT to bring the results back into the spatial domain
  406 + fft(CUFFT_INVERSE);
  407 +
  408 + crop();
  409 +
  410 + //save the image
  411 + if(binary)
  412 + to_binary(filename);
  413 + else
  414 + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  415 + }
  416 +
  417 +
  418 +
  419 +
  420 + std::string str(){
  421 +
  422 + stringstream ss;
  423 + ss<<"1D MIRST Simulation========================="<<std::endl;
  424 + ss<<"z-axis resolution: "<<Z<<std::endl;
  425 + ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
  426 + ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
  427 + ss<<"padding: "<<pad<<std::endl;
  428 + ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
  429 + ss<<"dz: "<<dz()<<" um"<<std::endl;
  430 + ss<<std::endl;
  431 + ss<<layers.size()<<" layers-------------"<<std::endl;
  432 + for(unsigned int l=0; l<layers.size(); l++)
  433 + ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
  434 +
  435 + ss<<"source profile-----------"<<std::endl;
  436 + ss<<get_source().str()<<std::endl;
  437 +
  438 + return ss.str();
  439 +
  440 +
  441 + }
  442 +
  443 +
  444 +
  445 +};
  446 +
  447 +}
... ...
stim/optics_old/planewave.h 0 โ†’ 100644
  1 +#ifndef RTS_PLANEWAVE
  2 +#define RTS_PLANEWAVE
  3 +
  4 +#include <string>
  5 +#include <sstream>
  6 +
  7 +#include "../math/vector.h"
  8 +#include "../math/quaternion.h"
  9 +#include "../math/constants.h"
  10 +#include "../math/plane.h"
  11 +#include "../cuda/callable.h"
  12 +
  13 +/*Basic conversions used here (assuming a vacuum)
  14 + lambda =
  15 +*/
  16 +
  17 +namespace stim{
  18 + namespace optics{
  19 +
  20 +template<typename T>
  21 +class planewave{
  22 +
  23 +protected:
  24 +
  25 + vec<T> k; //k = tau / lambda
  26 + vec< complex<T> > E0; //amplitude
  27 + //T phi;
  28 +
  29 + CUDA_CALLABLE planewave<T> bend(rts::vec<T> kn) const{
  30 +
  31 + vec<T> kn_hat = kn.norm(); //normalize the new k
  32 + vec<T> k_hat = k.norm(); //normalize the current k
  33 +
  34 + //std::cout<<"PLANE WAVE BENDING------------------"<<std::endl;
  35 + //std::cout<<"kn_hat: "<<kn_hat<<" k_hat: "<<k_hat<<std::endl;
  36 +
  37 + planewave<T> new_p; //create a new plane wave
  38 +
  39 + //if kn is equal to k or -k, handle the degenerate case
  40 + T k_dot_kn = k_hat.dot(kn_hat);
  41 +
  42 + //if k . n < 0, then the bend is a reflection
  43 + //flip k_hat
  44 + if(k_dot_kn < 0) k_hat = -k_hat;
  45 +
  46 + //std::cout<<"k dot kn: "<<k_dot_kn<<std::endl;
  47 +
  48 + //std::cout<<"k_dot_kn: "<<k_dot_kn<<std::endl;
  49 + if(k_dot_kn == -1){
  50 + new_p.k = -k;
  51 + new_p.E0 = E0;
  52 + return new_p;
  53 + }
  54 + else if(k_dot_kn == 1){
  55 + new_p.k = k;
  56 + new_p.E0 = E0;
  57 + return new_p;
  58 + }
  59 +
  60 + vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector
  61 +
  62 + //std::cout<<"r: "<<r<<std::endl;
  63 +
  64 + T theta = asin(r.len()); //compute the angle of the rotation about r
  65 +
  66 +
  67 +
  68 + //deal with a zero vector (both k and kn point in the same direction)
  69 + //if(theta == (T)0)
  70 + //{
  71 + // new_p = *this;
  72 + // return new_p;
  73 + //}
  74 +
  75 + //create a quaternion to capture the rotation
  76 + quaternion<T> q;
  77 + q.CreateRotation(theta, r.norm());
  78 +
  79 + //apply the rotation to E0
  80 + vec< complex<T> > E0n = q.toMatrix3() * E0;
  81 +
  82 + new_p.k = kn_hat * kmag();
  83 + new_p.E0 = E0n;
  84 +
  85 + return new_p;
  86 + }
  87 +
  88 +public:
  89 +
  90 +
  91 + ///constructor: create a plane wave propagating along z, polarized along x
  92 + /*planewave(T lambda = (T)1)
  93 + {
  94 + k = rts::vec<T>(0, 0, 1) * (TAU/lambda);
  95 + E0 = rts::vec<T>(1, 0, 0);
  96 + }*/
  97 + ///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega
  98 + CUDA_CALLABLE planewave(vec<T> kvec = rts::vec<T>(0, 0, rtsTAU),
  99 + vec< complex<T> > E = rts::vec<T>(1, 0, 0), T phase = 0)
  100 + {
  101 + //phi = phase;
  102 +
  103 + k = kvec;
  104 + vec< complex<T> > k_hat = k.norm();
  105 +
  106 + if(E.len() == 0) //if the plane wave has an amplitude of 0
  107 + E0 = vec<T>(0); //just return it
  108 + else{
  109 + vec< complex<T> > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector
  110 + vec< complex<T> > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector
  111 + E0 = E_hat * E_hat.dot(E); //compute the projection of _E0 onto E0_hat
  112 + }
  113 +
  114 + E0 = E0 * exp( complex<T>(0, phase) );
  115 + }
  116 +
  117 + ///multiplication operator: scale E0
  118 + CUDA_CALLABLE planewave<T> & operator* (const T & rhs)
  119 + {
  120 +
  121 + E0 = E0 * rhs;
  122 + return *this;
  123 + }
  124 +
  125 + CUDA_CALLABLE T lambda() const
  126 + {
  127 + return rtsTAU / k.len();
  128 + }
  129 +
  130 + CUDA_CALLABLE T kmag() const
  131 + {
  132 + return k.len();
  133 + }
  134 +
  135 + CUDA_CALLABLE vec< complex<T> > E(){
  136 + return E0;
  137 + }
  138 +
  139 + CUDA_CALLABLE vec<T> kvec(){
  140 + return k;
  141 + }
  142 +
  143 + /*CUDA_CALLABLE T phase(){
  144 + return phi;
  145 + }
  146 +
  147 + CUDA_CALLABLE void phase(T p){
  148 + phi = p;
  149 + }*/
  150 +
  151 + CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){
  152 + vec< complex<T> > result;
  153 +
  154 + T kdp = k.dot(p);
  155 + complex<T> x = complex<T>(0, kdp);
  156 + complex<T> expx = exp(x);
  157 +
  158 + result[0] = E0[0] * expx;
  159 + result[1] = E0[1] * expx;
  160 + result[2] = E0[2] * expx;
  161 +
  162 + return result;
  163 + }
  164 +
  165 + //scales k based on a transition from material ni to material nt
  166 + CUDA_CALLABLE planewave<T> n(T ni, T nt){
  167 + return planewave<T>(k * (nt / ni), E0);
  168 + }
  169 +
  170 + CUDA_CALLABLE planewave<T> refract(rts::vec<T> kn) const
  171 + {
  172 + return bend(kn);
  173 + }
  174 +
  175 + void scatter(rts::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){
  176 +
  177 + int facing = P.face(k); //determine which direction the plane wave is coming in
  178 +
  179 + //if(facing == 0) //if the wave is tangent to the plane, return an identical wave
  180 + // return *this;
  181 + //else
  182 + if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
  183 + P = P.flip(); //flip the plane
  184 + nr = 1/nr; //invert the refractive index (now nr = n0/n1)
  185 + }
  186 +
  187 + //use Snell's Law to calculate the transmitted angle
  188 + T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i
  189 + T theta_i = acos(cos_theta_i); //compute theta_i
  190 + T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law
  191 + T theta_t = asin(sin_theta_t); //compute the cosine of theta_t
  192 +
  193 + bool tir = false; //flag for total internal reflection
  194 + if(theta_t != theta_t){
  195 + tir = true;
  196 + theta_t = rtsPI / (T)2;
  197 + }
  198 +
  199 + //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
  200 + if(theta_i == 0){
  201 + T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients
  202 + T tp = 2 / (1 + nr);
  203 + vec<T> kr = -k;
  204 + vec<T> kt = k * nr; //set the k vectors for theta_i = 0
  205 + vec< complex<T> > Er = E0 * rp; //compute the E vectors
  206 + vec< complex<T> > Et = E0 * tp;
  207 + T phase_t = P.p().dot(k - kt); //compute the phase offset
  208 + T phase_r = P.p().dot(k - kr);
  209 + //std::cout<<"Degeneracy: Head-On"<<std::endl;
  210 + //std::cout<<"rs: "<<rp<<" rp: "<<rp<<" ts: "<<tp<<" tp: "<<tp<<std::endl;
  211 + //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
  212 +
  213 + //create the plane waves
  214 + r = planewave<T>(kr, Er, phase_r);
  215 + t = planewave<T>(kt, Et, phase_t);
  216 +
  217 + //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
  218 + //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
  219 + //std::cout<<"--------------------------------"<<std::endl;
  220 + return;
  221 + }
  222 +
  223 +
  224 + //compute the Fresnel coefficients
  225 + T rp, rs, tp, ts;
  226 + rp = tan(theta_t - theta_i) / tan(theta_t + theta_i);
  227 + rs = sin(theta_t - theta_i) / sin(theta_t + theta_i);
  228 +
  229 + if(tir){
  230 + tp = ts = 0;
  231 + }
  232 + else{
  233 + tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) );
  234 + ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i);
  235 + }
  236 +
  237 + //compute the coordinate space for the plane of incidence
  238 + vec<T> z_hat = -P.norm();
  239 + vec<T> y_hat = P.parallel(k).norm();
  240 + vec<T> x_hat = y_hat.cross(z_hat).norm();
  241 +
  242 + //compute the k vectors for r and t
  243 + vec<T> kr, kt;
  244 + kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag();
  245 + kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;
  246 +
  247 + //compute the magnitude of the p- and s-polarized components of the incident E vector
  248 + complex<T> Ei_s = E0.dot(x_hat);
  249 + //int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0);
  250 + int sgn = E0.dot(y_hat).sgn();
  251 + vec< complex<T> > cx_hat = x_hat;
  252 + complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
  253 + //T Ei_p = ( E0 - x_hat * Ei_s ).len();
  254 + //compute the magnitude of the p- and s-polarized components of the reflected E vector
  255 + complex<T> Er_s = Ei_s * rs;
  256 + complex<T> Er_p = Ei_p * rp;
  257 + //compute the magnitude of the p- and s-polarized components of the transmitted E vector
  258 + complex<T> Et_s = Ei_s * ts;
  259 + complex<T> Et_p = Ei_p * tp;
  260 +
  261 + //std::cout<<"E0: "<<E0<<std::endl;
  262 + //std::cout<<"E0 dot y_hat: "<<E0.dot(y_hat)<<std::endl;
  263 + //std::cout<<"theta i: "<<theta_i<<" theta t: "<<theta_t<<std::endl;
  264 + //std::cout<<"x_hat: "<<x_hat<<" y_hat: "<<y_hat<<" z_hat: "<<z_hat<<std::endl;
  265 + //std::cout<<"Ei_s: "<<Ei_s<<" Ei_p: "<<Ei_p<<" Er_s: "<<Er_s<<" Er_p: "<<Er_p<<" Et_s: "<<Et_s<<" Et_p: "<<Et_p<<std::endl;
  266 + //std::cout<<"rs: "<<rs<<" rp: "<<rp<<" ts: "<<ts<<" tp: "<<tp<<std::endl;
  267 +
  268 +
  269 + //compute the reflected E vector
  270 + vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
  271 + //compute the transmitted E vector
  272 + vec< complex<T> > Et = vec< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;
  273 +
  274 + T phase_t = P.p().dot(k - kt);
  275 + T phase_r = P.p().dot(k - kr);
  276 +
  277 + //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
  278 +
  279 + //std::cout<<"phase: "<<phase<<std::endl;
  280 +
  281 + //create the plane waves
  282 + r.k = kr;
  283 + r.E0 = Er * exp( complex<T>(0, phase_r) );
  284 + //r.phi = phase_r;
  285 +
  286 + //t = bend(kt);
  287 + //t.k = t.k * nr;
  288 +
  289 + t.k = kt;
  290 + t.E0 = Et * exp( complex<T>(0, phase_t) );
  291 + //t.phi = phase_t;
  292 + //std::cout<<"i: "<<str()<<std::endl;
  293 + //std::cout<<"r: "<<r.str()<<std::endl;
  294 + //std::cout<<"t: "<<t.str()<<std::endl;
  295 +
  296 + //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
  297 + //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
  298 + //std::cout<<"--------------------------------"<<std::endl;
  299 +
  300 + }
  301 +
  302 + std::string str()
  303 + {
  304 + std::stringstream ss;
  305 + ss<<"Plane Wave:"<<std::endl;
  306 + ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
  307 + return ss.str();
  308 + }
  309 +}; //end planewave class
  310 +} //end namespace optics
  311 +} //end namespace stim
  312 +
  313 +template <typename T>
  314 +std::ostream& operator<<(std::ostream& os, rts::planewave<T> p)
  315 +{
  316 + os<<p.str();
  317 + return os;
  318 +}
  319 +
  320 +#endif
... ...