Commit 8e4f836425e5bee321a75e5ee33dee9867df0a85

Authored by David Mayerich
1 parent 3ff136b3

started a new optical framework focused on scalar simulation

stim/cuda/cudatools/devices.h
@@ -15,7 +15,7 @@ int maxThreadsPerBlock() @@ -15,7 +15,7 @@ int maxThreadsPerBlock()
15 } 15 }
16 16
17 extern "C" 17 extern "C"
18 -int sharedMemPerBlock() 18 +size_t sharedMemPerBlock()
19 { 19 {
20 int device; 20 int device;
21 cudaGetDevice(&device); //get the id of the current device 21 cudaGetDevice(&device); //get the id of the current device
@@ -23,6 +23,16 @@ int sharedMemPerBlock() @@ -23,6 +23,16 @@ int sharedMemPerBlock()
23 cudaGetDeviceProperties(&props, device); 23 cudaGetDeviceProperties(&props, device);
24 return props.sharedMemPerBlock; 24 return props.sharedMemPerBlock;
25 } 25 }
  26 +
  27 +extern "C"
  28 +size_t constMem()
  29 +{
  30 + int device;
  31 + cudaGetDevice(&device); //get the id of the current device
  32 + cudaDeviceProp props; //device property structure
  33 + cudaGetDeviceProperties(&props, device);
  34 + return props.totalConstMem;
  35 +}
26 } //end namespace rts 36 } //end namespace rts
27 37
28 #endif 38 #endif
stim/cuda/sharedmem.cuh
@@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
5 namespace stim{ 5 namespace stim{
6 namespace cuda{ 6 namespace cuda{
7 7
8 - // Copies values from global memory to shared memory, optimizing threads 8 + // Copies values from texture memory to shared memory, optimizing threads
9 template<typename T> 9 template<typename T>
10 __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src, 10 __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
11 unsigned int x, unsigned int y, unsigned int X, unsigned int Y, 11 unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
@@ -35,6 +35,19 @@ namespace stim{ @@ -35,6 +35,19 @@ namespace stim{
35 } 35 }
36 } 36 }
37 37
  38 + // Copies values from global memory to shared memory, optimizing threads
  39 + template<typename T>
  40 + __device__ void sharedMemcpy(T* dest, T* src, size_t N, size_t tid, size_t nt){
  41 +
  42 + size_t I = N / nt + 1; //calculate the number of iterations required to make the copy
  43 + size_t xi = tid; //initialize the source and destination index to the thread ID
  44 + for(size_t i = 0; i < I; i++){ //for each iteration
  45 + if(xi < N) //if the index is within the copy region
  46 + dest[xi] = src[xi]; //perform the copy
  47 + xi += nt;
  48 + }
  49 + }
  50 +
38 51
39 } 52 }
40 } 53 }
stim/math/bessel.h
@@ -17,6 +17,11 @@ static complex&lt;double&gt; czero(0.0,0.0); @@ -17,6 +17,11 @@ static complex&lt;double&gt; czero(0.0,0.0);
17 template< typename P > 17 template< typename P >
18 P gamma(P x) 18 P gamma(P x)
19 { 19 {
  20 + const P EPS = numeric_limits<P>::epsilon();
  21 + const P FPMIN_MAG = numeric_limits<P>::min();
  22 + const P FPMIN = numeric_limits<P>::lowest();
  23 + const P FPMAX = numeric_limits<P>::max();
  24 +
20 int i,k,m; 25 int i,k,m;
21 P ga,gr,r,z; 26 P ga,gr,r,z;
22 27
@@ -47,7 +52,7 @@ P gamma(P x) @@ -47,7 +52,7 @@ P gamma(P x)
47 -0.54e-14, 52 -0.54e-14,
48 0.14e-14}; 53 0.14e-14};
49 54
50 - if (x > 171.0) return 1e308; // This value is an overflow flag. 55 + if (x > 171.0) return FPMAX; // This value is an overflow flag.
51 if (x == (int)x) { 56 if (x == (int)x) {
52 if (x > 0.0) { 57 if (x > 0.0) {
53 ga = 1.0; // use factorial 58 ga = 1.0; // use factorial
@@ -56,7 +61,7 @@ P gamma(P x) @@ -56,7 +61,7 @@ P gamma(P x)
56 } 61 }
57 } 62 }
58 else 63 else
59 - ga = 1e308; 64 + ga = FPMAX;
60 } 65 }
61 else { 66 else {
62 if (fabs(x) > 1.0) { 67 if (fabs(x) > 1.0) {
@@ -89,6 +94,11 @@ template&lt;typename P&gt; @@ -89,6 +94,11 @@ template&lt;typename P&gt;
89 int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1, 94 int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1,
90 P &j0p,P &j1p,P &y0p,P &y1p) 95 P &j0p,P &j1p,P &y0p,P &y1p)
91 { 96 {
  97 + const P EPS = numeric_limits<P>::epsilon();
  98 + const P FPMIN_MAG = numeric_limits<P>::min();
  99 + const P FPMIN = numeric_limits<P>::lowest();
  100 + const P FPMAX = numeric_limits<P>::max();
  101 +
92 P x2,r,ec,w0,w1,r0,r1,cs0,cs1; 102 P x2,r,ec,w0,w1,r0,r1,cs0,cs1;
93 P cu,p0,q0,p1,q1,t1,t2; 103 P cu,p0,q0,p1,q1,t1,t2;
94 int k,kz; 104 int k,kz;
@@ -157,12 +167,12 @@ int bessjy01a(P x,P &amp;j0,P &amp;j1,P &amp;y0,P &amp;y1, @@ -157,12 +167,12 @@ int bessjy01a(P x,P &amp;j0,P &amp;j1,P &amp;y0,P &amp;y1,
157 if (x == 0.0) { 167 if (x == 0.0) {
158 j0 = 1.0; 168 j0 = 1.0;
159 j1 = 0.0; 169 j1 = 0.0;
160 - y0 = -1e308;  
161 - y1 = -1e308; 170 + y0 = -FPMIN;
  171 + y1 = -FPMIN;
162 j0p = 0.0; 172 j0p = 0.0;
163 j1p = 0.5; 173 j1p = 0.5;
164 - y0p = 1e308;  
165 - y1p = 1e308; 174 + y0p = FPMAX;
  175 + y1p = FPMAX;
166 return 0; 176 return 0;
167 } 177 }
168 x2 = x*x; 178 x2 = x*x;
@@ -329,7 +339,7 @@ int msta1(P x,int mp) @@ -329,7 +339,7 @@ int msta1(P x,int mp)
329 for (i=0;i<20;i++) { 339 for (i=0;i<20;i++) {
330 nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); 340 nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
331 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp; 341 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp;
332 - if (abs(nn-n1) < 1) break; 342 + if (std::abs(nn-n1) < 1) break;
333 n0 = n1; 343 n0 = n1;
334 f0 = f1; 344 f0 = f1;
335 n1 = nn; 345 n1 = nn;
@@ -361,7 +371,7 @@ int msta2(P x,int n,int mp) @@ -361,7 +371,7 @@ int msta2(P x,int n,int mp)
361 for (i=0;i<20;i++) { 371 for (i=0;i<20;i++) {
362 nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); 372 nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
363 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj; 373 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj;
364 - if (abs(nn-n1) < 1) break; 374 + if (std::abs(nn-n1) < 1) break;
365 n0 = n1; 375 n0 = n1;
366 f0 = f1; 376 f0 = f1;
367 n1 = nn; 377 n1 = nn;
@@ -596,21 +606,26 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv, @@ -596,21 +606,26 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
596 P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk; 606 P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk;
597 int j,k,l,m,n,kz; 607 int j,k,l,m,n,kz;
598 608
  609 + const P EPS = numeric_limits<P>::epsilon();
  610 + const P FPMIN_MAG = numeric_limits<P>::min();
  611 + const P FPMIN = numeric_limits<P>::lowest();
  612 + const P FPMAX = numeric_limits<P>::max();
  613 +
599 x2 = x*x; 614 x2 = x*x;
600 n = (int)v; 615 n = (int)v;
601 v0 = v-n; 616 v0 = v-n;
602 if ((x < 0.0) || (v < 0.0)) return 1; 617 if ((x < 0.0) || (v < 0.0)) return 1;
603 - if (x < 1e-15) { 618 + if (x < EPS) {
604 for (k=0;k<=n;k++) { 619 for (k=0;k<=n;k++) {
605 jv[k] = 0.0; 620 jv[k] = 0.0;
606 - yv[k] = -1e308; 621 + yv[k] = FPMIN;
607 djv[k] = 0.0; 622 djv[k] = 0.0;
608 - dyv[k] = 1e308; 623 + dyv[k] = FPMAX;
609 if (v0 == 0.0) { 624 if (v0 == 0.0) {
610 jv[0] = 1.0; 625 jv[0] = 1.0;
611 djv[1] = 0.5; 626 djv[1] = 0.5;
612 } 627 }
613 - else djv[0] = 1e308; 628 + else djv[0] = FPMAX;
614 } 629 }
615 vm = v; 630 vm = v;
616 return 0; 631 return 0;
@@ -623,7 +638,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv, @@ -623,7 +638,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
623 for (k=1;k<=40;k++) { 638 for (k=1;k<=40;k++) {
624 r *= -0.25*x2/(k*(k+vl)); 639 r *= -0.25*x2/(k*(k+vl));
625 bjvl += r; 640 bjvl += r;
626 - if (fabs(r) < fabs(bjvl)*1e-15) break; 641 + if (fabs(r) < fabs(bjvl)*EPS) break;
627 } 642 }
628 vg = 1.0 + vl; 643 vg = 1.0 + vl;
629 a = pow(0.5*x,vl)/gamma(vg); 644 a = pow(0.5*x,vl)/gamma(vg);
@@ -686,7 +701,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv, @@ -686,7 +701,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
686 if (m < n) n = m; 701 if (m < n) n = m;
687 else m = msta2(x,n,15); 702 else m = msta2(x,n,15);
688 f2 = 0.0; 703 f2 = 0.0;
689 - f1 = 1.0e-100; 704 + f1 = FPMIN_MAG;
690 for (k=m;k>=0;k--) { 705 for (k=m;k>=0;k--) {
691 f = 2.0*(v0+k+1.0)*f1/x-f2; 706 f = 2.0*(v0+k+1.0)*f1/x-f2;
692 if (k <= n) jv[k] = f; 707 if (k <= n) jv[k] = f;
@@ -766,17 +781,17 @@ int bessjyv_sph(int v, P z, P &amp;vm, P* cjv, @@ -766,17 +781,17 @@ int bessjyv_sph(int v, P z, P &amp;vm, P* cjv,
766 P* cyv, P* cjvp, P* cyvp) 781 P* cyv, P* cjvp, P* cyvp)
767 { 782 {
768 //first, compute the bessel functions of fractional order 783 //first, compute the bessel functions of fractional order
769 - bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); 784 + bessjyv<P>(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp);
770 785
771 //iterate through each and scale 786 //iterate through each and scale
772 for(int n = 0; n<=v; n++) 787 for(int n = 0; n<=v; n++)
773 { 788 {
774 789
775 - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0));  
776 - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0)); 790 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  791 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
777 792
778 - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0));  
779 - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0)); 793 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  794 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
780 } 795 }
781 796
782 return 0; 797 return 0;
@@ -1498,11 +1513,11 @@ int cbessjyva_sph(int v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1498,11 +1513,11 @@ int cbessjyva_sph(int v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1498 for(int n = 0; n<=v; n++) 1513 for(int n = 0; n<=v; n++)
1499 { 1514 {
1500 1515
1501 - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0));  
1502 - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0)); 1516 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  1517 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
1503 1518
1504 - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0));  
1505 - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0)); 1519 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  1520 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
1506 } 1521 }
1507 1522
1508 return 0; 1523 return 0;
stim/math/complex.h
1 -/*RTS Complex number class. This class is CUDA compatible,  
2 -and can therefore be used in CUDA code and on CUDA devices.  
3 -*/ 1 +/// CUDA compatible complex number class
4 2
5 -#ifndef RTS_COMPLEX  
6 -#define RTS_COMPLEX 3 +#ifndef STIM_COMPLEX
  4 +#define STIM_COMPLEX
7 5
8 -#include "../cuda/callable.h" 6 +#include "../cuda/cudatools/callable.h"
9 #include <cmath> 7 #include <cmath>
10 #include <string> 8 #include <string>
11 #include <sstream> 9 #include <sstream>
@@ -230,12 +228,6 @@ struct complex @@ -230,12 +228,6 @@ struct complex
230 return result; 228 return result;
231 } 229 }
232 230
233 - /*CUDA_CALLABLE complex<T> pow(int y)  
234 - {  
235 -  
236 - return pow((double)y);  
237 - }*/  
238 -  
239 CUDA_CALLABLE complex<T> pow(T y) 231 CUDA_CALLABLE complex<T> pow(T y)
240 { 232 {
241 complex<T> result; 233 complex<T> result;
@@ -328,8 +320,31 @@ struct complex @@ -328,8 +320,31 @@ struct complex
328 return *this; 320 return *this;
329 } 321 }
330 322
  323 +
  324 +
331 }; 325 };
332 326
  327 +/// Cast an array of complex values to an array of real values
  328 +template<typename T>
  329 +static void real(T* r, complex<T>* c, size_t n){
  330 + for(size_t i = 0; i < n; i++)
  331 + r[i] = c[i].real();
  332 +}
  333 +
  334 +/// Cast an array of complex values to an array of real values
  335 +template<typename T>
  336 +static void imag(T* r, complex<T>* c, size_t n){
  337 + for(size_t i = 0; i < n; i++)
  338 + r[i] = c[i].imag();
  339 +}
  340 +
  341 +/// Calculate the magnitude of an array of complex values
  342 +template<typename T>
  343 +static void abs(T* m, complex<T>* c, size_t n){
  344 + for(size_t i = 0; i < n; i++)
  345 + m[i] = c[i].abs();
  346 +}
  347 +
333 } //end RTS namespace 348 } //end RTS namespace
334 349
335 //addition 350 //addition
@@ -432,17 +447,6 @@ CUDA_CALLABLE static T imag(stim::complex&lt;T&gt; a) @@ -432,17 +447,6 @@ CUDA_CALLABLE static T imag(stim::complex&lt;T&gt; a)
432 return a.i; 447 return a.i;
433 } 448 }
434 449
435 -//trigonometric functions  
436 -//template<class A>  
437 -/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)  
438 -{  
439 - stim::complex<float> result;  
440 - result.r = sinf(x.r) * coshf(x.i);  
441 - result.i = cosf(x.r) * sinhf(x.i);  
442 -  
443 - return result;  
444 -}*/  
445 -  
446 template<class A> 450 template<class A>
447 CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x) 451 CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)
448 { 452 {
@@ -453,17 +457,6 @@ CUDA_CALLABLE stim::complex&lt;A&gt; sin(const stim::complex&lt;A&gt; x) @@ -453,17 +457,6 @@ CUDA_CALLABLE stim::complex&lt;A&gt; sin(const stim::complex&lt;A&gt; x)
453 return result; 457 return result;
454 } 458 }
455 459
456 -//floating point template  
457 -//template<class A>  
458 -/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)  
459 -{  
460 - stim::complex<float> result;  
461 - result.r = cosf(x.r) * coshf(x.i);  
462 - result.i = -(sinf(x.r) * sinhf(x.i));  
463 -  
464 - return result;  
465 -}*/  
466 -  
467 template<class A> 460 template<class A>
468 CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x) 461 CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)
469 { 462 {
@@ -496,10 +489,4 @@ std::istream&amp; operator&gt;&gt;(std::istream&amp; is, stim::complex&lt;A&gt;&amp; x) @@ -496,10 +489,4 @@ std::istream&amp; operator&gt;&gt;(std::istream&amp; is, stim::complex&lt;A&gt;&amp; x)
496 return is; //return the stream 489 return is; //return the stream
497 } 490 }
498 491
499 -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7  
500 -//template<class T> using rtsComplex = stim::complex<T>;  
501 -//#endif  
502 -  
503 -  
504 -  
505 #endif 492 #endif
stim/math/constants.h
1 -#ifndef RTS_CONSTANTS_H  
2 -#define RTS_CONSTANTS_H 1 +#ifndef STIM_CONSTANTS_H
  2 +#define STIM_CONSTANTS_H
3 3
4 -#define stimPI 3.14159  
5 -#define stimTAU 2 * rtsPI 4 +namespace stim{
  5 + const double PI = 3.1415926535897932384626433832795028841971693993751058209749445923078164062862;
  6 + const double TAU = 2 * stim::PI;
  7 +}
6 8
7 #endif 9 #endif
stim/math/matrix.h
@@ -50,10 +50,8 @@ struct matrix @@ -50,10 +50,8 @@ struct matrix
50 return *this; 50 return *this;
51 } 51 }
52 52
53 -  
54 template<typename Y> 53 template<typename Y>
55 - CUDA_CALLABLE vec<Y> operator*(vec<Y> rhs)  
56 - { 54 + vec<Y> operator*(vec<Y> rhs){
57 unsigned int N = rhs.size(); 55 unsigned int N = rhs.size();
58 56
59 vec<Y> result; 57 vec<Y> result;
@@ -66,6 +64,16 @@ struct matrix @@ -66,6 +64,16 @@ struct matrix
66 return result; 64 return result;
67 } 65 }
68 66
  67 + template<typename Y>
  68 + CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){
  69 + vec3<Y> result = 0;
  70 + for(int r=0; r<3; r++)
  71 + for(int c=0; c<3; c++)
  72 + result[r] += (*this)(r, c) * rhs[c];
  73 +
  74 + return result;
  75 + }
  76 +
69 std::string toStr() 77 std::string toStr()
70 { 78 {
71 std::stringstream ss; 79 std::stringstream ss;
@@ -82,10 +90,6 @@ struct matrix @@ -82,10 +90,6 @@ struct matrix
82 90
83 return ss.str(); 91 return ss.str();
84 } 92 }
85 -  
86 -  
87 -  
88 -  
89 }; 93 };
90 94
91 } //end namespace rts 95 } //end namespace rts
stim/math/meshgrid.h 0 โ†’ 100644
  1 +#ifndef STIM_MESHGRID_H
  2 +#define STIM_MESHGRID_H
  3 +
  4 +namespace stim{
  5 +
  6 + /// Create a 2D grid based on a pair of vectors representing the grid spacing (see Matlab)
  7 + /// @param X is an [nx x ny] array that will store the X coordinates for each 2D point
  8 + /// @param Y is an [nx x ny] array that will store the Y coordinates for each 2D point
  9 + /// @param x is an [nx] array that provides the positions of grid points in the x direction
  10 + /// @param nx is the number of grid points in the x direction
  11 + /// @param y is an [ny] array that provides the positions of grid points in the y direction
  12 + /// @param ny is the number of grid points in the y direction
  13 + template<typename T>
  14 + void meshgrid(T* X, T* Y, T* x, size_t nx, T* y, size_t ny){
  15 + size_t xi, yi; //allocate index variables
  16 + for(yi = 0; yi < ny; yi++){ //iterate through each column
  17 + for(xi = 0; xi < nx; xi++){ //iterate through each row
  18 + X[yi * nx + xi] = x[xi];
  19 + Y[yi * nx + xi] = y[yi];
  20 + }
  21 + }
  22 + }
  23 +
  24 + /// Creates an array of n equally spaced values in the range [xmin xmax]
  25 + /// @param X is an array of length n that stores the values
  26 + /// @param xmin is the start point of the array
  27 + /// @param xmax is the end point of the array
  28 + /// @param n is the number of points in the array
  29 + template<typename T>
  30 + void linspace(T* X, T xmin, T xmax, size_t n){
  31 + T alpha;
  32 + for(size_t i = 0; i < n; i++){
  33 + alpha = (T)i / (T)n;
  34 + X[i] = (1 - alpha) * xmin + alpha * xmax;
  35 + }
  36 + }
  37 +
  38 +
  39 +}
  40 +
  41 +
  42 +#endif
0 \ No newline at end of file 43 \ No newline at end of file
stim/math/quaternion.h
@@ -26,13 +26,13 @@ public: @@ -26,13 +26,13 @@ public:
26 26
27 CUDA_CALLABLE void CreateRotation(T theta, T ux, T uy, T uz){ 27 CUDA_CALLABLE void CreateRotation(T theta, T ux, T uy, T uz){
28 28
29 - vec<T> u(ux, uy, uz); 29 + vec3<T> u(ux, uy, uz);
30 CreateRotation(theta, u); 30 CreateRotation(theta, u);
31 } 31 }
32 32
33 - CUDA_CALLABLE void CreateRotation(T theta, vec<T> u){ 33 + CUDA_CALLABLE void CreateRotation(T theta, vec3<T> u){
34 34
35 - vec<T> u_hat = u.norm(); 35 + vec3<T> u_hat = u.norm();
36 36
37 //assign the given Euler rotation to this quaternion 37 //assign the given Euler rotation to this quaternion
38 w = (T)cos(theta/2); 38 w = (T)cos(theta/2);
@@ -41,9 +41,9 @@ public: @@ -41,9 +41,9 @@ public:
41 z = u_hat[2]*(T)sin(theta/2); 41 z = u_hat[2]*(T)sin(theta/2);
42 } 42 }
43 43
44 - void CreateRotation(vec<T> from, vec<T> to){ 44 + CUDA_CALLABLE void CreateRotation(vec3<T> from, vec3<T> to){
45 45
46 - vec<T> r = from.cross(to); //compute the rotation vector 46 + vec3<T> r = from.cross(to); //compute the rotation vector
47 T theta = asin(r.len()); //compute the angle of the rotation about r 47 T theta = asin(r.len()); //compute the angle of the rotation about r
48 //deal with a zero vector (both k and kn point in the same direction) 48 //deal with a zero vector (both k and kn point in the same direction)
49 if(theta == (T)0){ 49 if(theta == (T)0){
stim/math/vec3.h 0 โ†’ 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +
  7 +
  8 +namespace stim{
  9 +
  10 +
  11 +/// A class designed to act as a 3D vector with CUDA compatibility
  12 +template<typename T>
  13 +class vec3{
  14 +
  15 +protected:
  16 + T ptr[3];
  17 +
  18 +public:
  19 +
  20 + CUDA_CALLABLE vec3(){}
  21 +
  22 + CUDA_CALLABLE vec3(T v){
  23 + ptr[0] = ptr[1] = ptr[2] = v;
  24 + }
  25 +
  26 + CUDA_CALLABLE vec3(T x, T y, T z){
  27 + ptr[0] = x;
  28 + ptr[1] = y;
  29 + ptr[2] = z;
  30 + }
  31 +
  32 + //copy constructor
  33 + CUDA_CALLABLE vec3( const vec3<T>& other){
  34 + ptr[0] = other.ptr[0];
  35 + ptr[1] = other.ptr[1];
  36 + ptr[2] = other.ptr[2];
  37 + }
  38 +
  39 + //access an element using an index
  40 + CUDA_CALLABLE T& operator[](int idx){
  41 + return ptr[idx];
  42 + }
  43 +
  44 +/// Casting operator. Creates a new vector with a new type U.
  45 + template< typename U >
  46 + CUDA_CALLABLE operator vec3<U>(){
  47 + vec3<U> result;
  48 + result.ptr[0] = (U)ptr[0];
  49 + result.ptr[1] = (U)ptr[1];
  50 + result.ptr[2] = (U)ptr[2];
  51 +
  52 + return result;
  53 + }
  54 +
  55 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  56 + CUDA_CALLABLE T len_sq() const{
  57 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  58 + }
  59 +
  60 + /// computes the Euclidean length of the vector
  61 + CUDA_CALLABLE T len() const{
  62 + return sqrt(len_sq());
  63 + }
  64 +
  65 +
  66 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  67 + CUDA_CALLABLE vec3<T> cart2sph() const{
  68 + vec3<T> sph;
  69 + sph.ptr[0] = len();
  70 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  71 + if(sph.ptr[0] == 0)
  72 + sph.ptr[2] = 0;
  73 + else
  74 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  75 + return sph;
  76 + }
  77 +
  78 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  79 + CUDA_CALLABLE vec3<T> sph2cart() const{
  80 + vec3<T> cart;
  81 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  82 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  83 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  84 +
  85 + return cart;
  86 + }
  87 +
  88 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  89 + CUDA_CALLABLE vec3<T> norm() const{
  90 + vec3<T> result;
  91 + T l = len(); //compute the vector length
  92 + return (*this) / l;
  93 + }
  94 +
  95 + /// Computes the cross product of a 3-dimensional vector
  96 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  97 +
  98 + vec3<T> result;
  99 +
  100 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  101 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  102 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  103 +
  104 + return result;
  105 + }
  106 +
  107 + /// Compute the Euclidean inner (dot) product
  108 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  109 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  110 + }
  111 +
  112 + /// Arithmetic addition operator
  113 +
  114 + /// @param rhs is the right-hand-side operator for the addition
  115 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  116 + vec3<T> result;
  117 + result.ptr[0] = ptr[0] + rhs[0];
  118 + result.ptr[1] = ptr[1] + rhs[1];
  119 + result.ptr[2] = ptr[2] + rhs[2];
  120 + return result;
  121 + }
  122 +
  123 + /// Arithmetic addition to a scalar
  124 +
  125 + /// @param rhs is the right-hand-side operator for the addition
  126 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  127 + vec3<T> result;
  128 + result.ptr[0] = ptr[0] + rhs;
  129 + result.ptr[1] = ptr[1] + rhs;
  130 + result.ptr[2] = ptr[2] + rhs;
  131 + return result;
  132 + }
  133 +
  134 + /// Arithmetic subtraction operator
  135 +
  136 + /// @param rhs is the right-hand-side operator for the subtraction
  137 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  138 + vec3<T> result;
  139 + result.ptr[0] = ptr[0] - rhs[0];
  140 + result.ptr[1] = ptr[1] - rhs[1];
  141 + result.ptr[2] = ptr[2] - rhs[2];
  142 + return result;
  143 + }
  144 + /// Arithmetic subtraction to a scalar
  145 +
  146 + /// @param rhs is the right-hand-side operator for the addition
  147 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  148 + vec3<T> result;
  149 + result.ptr[0] = ptr[0] - rhs;
  150 + result.ptr[1] = ptr[1] - rhs;
  151 + result.ptr[2] = ptr[2] - rhs;
  152 + return result;
  153 + }
  154 +
  155 + /// Arithmetic scalar multiplication operator
  156 +
  157 + /// @param rhs is the right-hand-side operator for the subtraction
  158 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  159 + vec3<T> result;
  160 + result.ptr[0] = ptr[0] * rhs;
  161 + result.ptr[1] = ptr[1] * rhs;
  162 + result.ptr[2] = ptr[2] * rhs;
  163 + return result;
  164 + }
  165 +
  166 + /// Arithmetic scalar division operator
  167 +
  168 + /// @param rhs is the right-hand-side operator for the subtraction
  169 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  170 + return (*this) * ((T)1.0/rhs);
  171 + }
  172 +
  173 + /// Multiplication by a scalar, followed by assignment
  174 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  175 + ptr[0] = ptr[0] * rhs;
  176 + ptr[1] = ptr[1] * rhs;
  177 + ptr[2] = ptr[2] * rhs;
  178 + return *this;
  179 + }
  180 +
  181 + /// Addition and assignment
  182 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  183 + ptr[0] = ptr[0] + rhs;
  184 + ptr[1] = ptr[1] + rhs;
  185 + ptr[2] = ptr[2] + rhs;
  186 + return *this;
  187 + }
  188 +
  189 + /// Assign a scalar to all values
  190 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  191 + ptr[0] = ptr[0] = rhs;
  192 + ptr[1] = ptr[1] = rhs;
  193 + ptr[2] = ptr[2] = rhs;
  194 + return *this;
  195 + }
  196 +
  197 + /// Casting and assignment
  198 + template<typename Y>
  199 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  200 + ptr[0] = (T)rhs.ptr[0];
  201 + ptr[1] = (T)rhs.ptr[1];
  202 + ptr[2] = (T)rhs.ptr[2];
  203 + return *this;
  204 + }
  205 +
  206 + /// Unary minus (returns the negative of the vector)
  207 + CUDA_CALLABLE vec3<T> operator-() const{
  208 + vec3<T> result;
  209 + result.ptr[0] = -ptr[0];
  210 + result.ptr[1] = -ptr[1];
  211 + result.ptr[2] = -ptr[2];
  212 + return result;
  213 + }
  214 +
  215 +
  216 + /// Outputs the vector as a string
  217 + std::string str() const{
  218 + std::stringstream ss;
  219 +
  220 + size_t N = size();
  221 +
  222 + ss<<"[";
  223 + for(size_t i=0; i<N; i++)
  224 + {
  225 + ss<<at(i);
  226 + if(i != N-1)
  227 + ss<<", ";
  228 + }
  229 + ss<<"]";
  230 +
  231 + return ss.str();
  232 + }
  233 + }; //end class triple
  234 +} //end namespace stim
  235 +
  236 +/// Multiply a vector by a constant when the vector is on the right hand side
  237 +template <typename T>
  238 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  239 + return rhs * lhs;
  240 +}
  241 +
  242 +#endif
0 \ No newline at end of file 243 \ No newline at end of file
stim/math/vector.h
1 -#ifndef RTS_VECTOR_H  
2 -#define RTS_VECTOR_H 1 +#ifndef STIM_VECTOR_H
  2 +#define STIM_VECTOR_H
3 3
4 #include <iostream> 4 #include <iostream>
5 #include <cmath> 5 #include <cmath>
@@ -11,8 +11,6 @@ @@ -11,8 +11,6 @@
11 namespace stim 11 namespace stim
12 { 12 {
13 13
14 -  
15 -  
16 template <class T> 14 template <class T>
17 struct vec : public std::vector<T> 15 struct vec : public std::vector<T>
18 { 16 {
stim/optics/planewave.h
1 -#ifndef RTS_PLANEWAVE  
2 -#define RTS_PLANEWAVE 1 +#ifndef STIM_PLANEWAVE_H
  2 +#define STIM_PLANEWAVE_H
3 3
4 #include <string> 4 #include <string>
5 #include <sstream> 5 #include <sstream>
  6 +#include <cmath>
6 7
7 #include "../math/vector.h" 8 #include "../math/vector.h"
8 #include "../math/quaternion.h" 9 #include "../math/quaternion.h"
9 #include "../math/constants.h" 10 #include "../math/constants.h"
10 #include "../math/plane.h" 11 #include "../math/plane.h"
11 -#include "../cuda/callable.h"  
12 -  
13 -/*Basic conversions used here (assuming a vacuum)  
14 - lambda =  
15 -*/ 12 +#include "../math/complex.h"
16 13
17 namespace stim{ 14 namespace stim{
  15 + namespace optics{
  16 +
  17 + /// evaluate the scalar field produced by a plane wave at a point (x, y, z)
  18 +
  19 + /// @param x is the x-coordinate of the point
  20 + /// @param y is the y-coordinate of the point
  21 + /// @param z is the z-coordinate of the point
  22 + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0)
  23 + /// @param kx is the k-vector component in the x direction
  24 + /// @param ky is the k-vector component in the y direction
  25 + /// @param kz is the k-vector component in the z direction
  26 + template<typename T>
  27 + stim::complex<T> planewave_scalar(T x, T y, T z, stim::complex<T> A, T kx, T ky, T kz){
  28 + T d = x * kx + y * ky + z * kz; //calculate the dot product between k and p = (x, y, z) to find the distance p is along the propagation direction
  29 + stim::complex<T> di = stim::complex<T>(0, d); //calculate the phase shift that will have to be applied to propagate the wave distance d
  30 + return A * exp(di); //multiply the phase term by the amplitude at (0, 0, 0) to propagate the wave to p
  31 + }
  32 +
  33 + /// evaluate the scalar field produced by a plane wave at several positions
  34 +
  35 + /// @param field is a pre-allocated block of memory that will store the complex field at all points
  36 + /// @param N is the number of field values to be evaluated
  37 + /// @param x is a set of x coordinates defining positions within the field (NULL implies that all values are zero)
  38 + /// @param y is a set of y coordinates defining positions within the field (NULL implies that all values are zero)
  39 + /// @param z is a set of z coordinates defining positions within the field (NULL implies that all values are zero)
  40 + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0)
  41 + /// @param kx is the k-vector component in the x direction
  42 + /// @param ky is the k-vector component in the y direction
  43 + /// @param kz is the k-vector component in the z direction
  44 + template<typename T>
  45 + void cpu_planewave_scalar(stim::complex<T>* field, size_t N, T* x, T* y = NULL, T* z = NULL, stim::complex<T> A = 1.0, T kx = 0.0, T ky = 0.0, T kz = 0.0){
  46 + T px, py, pz;
  47 + for(size_t i = 0; i < N; i++){ // for each element in the array
  48 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values
  49 + (y == NULL) ? py = 0 : py = y[i];
  50 + (z == NULL) ? pz = 0 : pz = z[i];
  51 +
  52 + field[i] = planewave_scalar(px, py, pz, A, kx, ky, kz); // call the single-value plane wave function
  53 + }
  54 + }
18 55
19 template<typename T> 56 template<typename T>
20 class planewave{ 57 class planewave{
21 58
22 protected: 59 protected:
23 60
24 - vec<T> k; //k = tau / lambda  
25 - vec< complex<T> > E0; //amplitude  
26 - //T phi;  
27 -  
28 - CUDA_CALLABLE planewave<T> bend(rts::vec<T> kn) const{ 61 + stim::vec<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
  62 + stim::vec< stim::complex<T> > E0; //amplitude (for a scalar plane wave, only E0[0] is used)
29 63
30 - vec<T> kn_hat = kn.norm(); //normalize the new k  
31 - vec<T> k_hat = k.norm(); //normalize the current k 64 + /// Bend a plane wave via refraction, given that the new propagation direction is known
  65 + CUDA_CALLABLE planewave<T> bend(stim::vec<T> kn) const{
32 66
33 - //std::cout<<"PLANE WAVE BENDING------------------"<<std::endl;  
34 - //std::cout<<"kn_hat: "<<kn_hat<<" k_hat: "<<k_hat<<std::endl; 67 + stim::vec<T> kn_hat = kn.norm(); //normalize the new k
  68 + stim::vec<T> k_hat = k.norm(); //normalize the current k
35 69
36 - planewave<T> new_p; //create a new plane wave 70 + planewave<T> new_p; //create a new plane wave
37 71
38 - //if kn is equal to k or -k, handle the degenerate case  
39 - T k_dot_kn = k_hat.dot(kn_hat); 72 + T k_dot_kn = k_hat.dot(kn_hat); //if kn is equal to k or -k, handle the degenerate case
40 73
41 //if k . n < 0, then the bend is a reflection 74 //if k . n < 0, then the bend is a reflection
42 - //flip k_hat  
43 - if(k_dot_kn < 0) k_hat = -k_hat; 75 + if(k_dot_kn < 0) k_hat = -k_hat; //flip k_hat
44 76
45 - //std::cout<<"k dot kn: "<<k_dot_kn<<std::endl;  
46 -  
47 - //std::cout<<"k_dot_kn: "<<k_dot_kn<<std::endl;  
48 if(k_dot_kn == -1){ 77 if(k_dot_kn == -1){
49 new_p.k = -k; 78 new_p.k = -k;
50 new_p.E0 = E0; 79 new_p.E0 = E0;
@@ -56,28 +85,11 @@ protected: @@ -56,28 +85,11 @@ protected:
56 return new_p; 85 return new_p;
57 } 86 }
58 87
59 - vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector  
60 -  
61 - //std::cout<<"r: "<<r<<std::endl;  
62 -  
63 - T theta = asin(r.len()); //compute the angle of the rotation about r  
64 -  
65 -  
66 -  
67 - //deal with a zero vector (both k and kn point in the same direction)  
68 - //if(theta == (T)0)  
69 - //{  
70 - // new_p = *this;  
71 - // return new_p;  
72 - //}  
73 -  
74 - //create a quaternion to capture the rotation  
75 - quaternion<T> q;  
76 - q.CreateRotation(theta, r.norm());  
77 -  
78 - //apply the rotation to E0  
79 - vec< complex<T> > E0n = q.toMatrix3() * E0;  
80 - 88 + vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector
  89 + T theta = asin(r.len()); //compute the angle of the rotation about r
  90 + quaternion<T> q; //create a quaternion to capture the rotation
  91 + q.CreateRotation(theta, r.norm());
  92 + vec< complex<T> > E0n = q.toMatrix3() * E0; //apply the rotation to E0
81 new_p.k = kn_hat * kmag(); 93 new_p.k = kn_hat * kmag();
82 new_p.E0 = E0n; 94 new_p.E0 = E0n;
83 95
@@ -86,16 +98,9 @@ protected: @@ -86,16 +98,9 @@ protected:
86 98
87 public: 99 public:
88 100
89 -  
90 - ///constructor: create a plane wave propagating along z, polarized along x  
91 - /*planewave(T lambda = (T)1)  
92 - {  
93 - k = rts::vec<T>(0, 0, 1) * (TAU/lambda);  
94 - E0 = rts::vec<T>(1, 0, 0);  
95 - }*/  
96 - ///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega  
97 - CUDA_CALLABLE planewave(vec<T> kvec = rts::vec<T>(0, 0, rtsTAU),  
98 - vec< complex<T> > E = rts::vec<T>(1, 0, 0), T phase = 0) 101 + ///constructor: create a plane wave propagating along k
  102 + CUDA_CALLABLE planewave(vec<T> kvec = stim::vec<T>(0, 0, stim::TAU),
  103 + vec< complex<T> > E = stim::vec<T>(1, 0, 0))
99 { 104 {
100 //phi = phase; 105 //phi = phase;
101 106
@@ -107,27 +112,23 @@ public: @@ -107,27 +112,23 @@ public:
107 else{ 112 else{
108 vec< complex<T> > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector 113 vec< complex<T> > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector
109 vec< complex<T> > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector 114 vec< complex<T> > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector
110 - E0 = E_hat * E_hat.dot(E); //compute the projection of _E0 onto E0_hat 115 + E0 = E_hat;// * E_hat.dot(E); //compute the projection of _E0 onto E0_hat
111 } 116 }
112 117
113 E0 = E0 * exp( complex<T>(0, phase) ); 118 E0 = E0 * exp( complex<T>(0, phase) );
114 } 119 }
115 120
116 ///multiplication operator: scale E0 121 ///multiplication operator: scale E0
117 - CUDA_CALLABLE planewave<T> & operator* (const T & rhs)  
118 - {  
119 - 122 + CUDA_CALLABLE planewave<T> & operator* (const T & rhs){
120 E0 = E0 * rhs; 123 E0 = E0 * rhs;
121 return *this; 124 return *this;
122 } 125 }
123 126
124 - CUDA_CALLABLE T lambda() const  
125 - {  
126 - return rtsTAU / k.len(); 127 + CUDA_CALLABLE T lambda() const{
  128 + return stim::TAU / k.len();
127 } 129 }
128 130
129 - CUDA_CALLABLE T kmag() const  
130 - { 131 + CUDA_CALLABLE T kmag() const{
131 return k.len(); 132 return k.len();
132 } 133 }
133 134
@@ -139,14 +140,11 @@ public: @@ -139,14 +140,11 @@ public:
139 return k; 140 return k;
140 } 141 }
141 142
142 - /*CUDA_CALLABLE T phase(){  
143 - return phi; 143 + /// calculate the value of the field produced by the plane wave given a three-dimensional position
  144 + CUDA_CALLABLE vec< complex<T> > pos(T x, T y, T z){
  145 + return pos( stim::vec<T>(x, y, z) );
144 } 146 }
145 147
146 - CUDA_CALLABLE void phase(T p){  
147 - phi = p;  
148 - }*/  
149 -  
150 CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){ 148 CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){
151 vec< complex<T> > result; 149 vec< complex<T> > result;
152 150
@@ -166,18 +164,32 @@ public: @@ -166,18 +164,32 @@ public:
166 return planewave<T>(k * (nt / ni), E0); 164 return planewave<T>(k * (nt / ni), E0);
167 } 165 }
168 166
169 - CUDA_CALLABLE planewave<T> refract(rts::vec<T> kn) const  
170 - { 167 + CUDA_CALLABLE planewave<T> refract(stim::vec<T> kn) const{
171 return bend(kn); 168 return bend(kn);
172 } 169 }
173 170
174 - void scatter(rts::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){ 171 + /// Calculate the result of a plane wave hitting an interface between two refractive indices
  172 +
  173 + /// @param P is a plane representing the position and orientation of the surface
  174 + /// @param n0 is the refractive index outside of the surface (in the direction of the normal)
  175 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  176 + /// @param r is the reflected component of the plane wave
  177 + /// @param t is the transmitted component of the plane wave
  178 + void scatter(stim::plane<T> P, T n0, T n1, planewave<T> &r, planewave<T> &t){
  179 + scatter(P, n1/n0, r, t);
  180 + }
  181 +
  182 + /// Calculate the scattering result when nr = n1/n0
  183 +
  184 + /// @param P is a plane representing the position and orientation of the surface
  185 + /// @param r is the ration n1/n0
  186 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  187 + /// @param r is the reflected component of the plane wave
  188 + /// @param t is the transmitted component of the plane wave
  189 + void scatter(stim::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){
175 190
176 int facing = P.face(k); //determine which direction the plane wave is coming in 191 int facing = P.face(k); //determine which direction the plane wave is coming in
177 192
178 - //if(facing == 0) //if the wave is tangent to the plane, return an identical wave  
179 - // return *this;  
180 - //else  
181 if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr 193 if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
182 P = P.flip(); //flip the plane 194 P = P.flip(); //flip the plane
183 nr = 1/nr; //invert the refractive index (now nr = n0/n1) 195 nr = 1/nr; //invert the refractive index (now nr = n0/n1)
@@ -192,7 +204,7 @@ public: @@ -192,7 +204,7 @@ public:
192 bool tir = false; //flag for total internal reflection 204 bool tir = false; //flag for total internal reflection
193 if(theta_t != theta_t){ 205 if(theta_t != theta_t){
194 tir = true; 206 tir = true;
195 - theta_t = rtsPI / (T)2; 207 + theta_t = stim::PI / (T)2;
196 } 208 }
197 209
198 //handle the degenerate case where theta_i is 0 (the plane wave hits head-on) 210 //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
@@ -205,17 +217,10 @@ public: @@ -205,17 +217,10 @@ public:
205 vec< complex<T> > Et = E0 * tp; 217 vec< complex<T> > Et = E0 * tp;
206 T phase_t = P.p().dot(k - kt); //compute the phase offset 218 T phase_t = P.p().dot(k - kt); //compute the phase offset
207 T phase_r = P.p().dot(k - kr); 219 T phase_r = P.p().dot(k - kr);
208 - //std::cout<<"Degeneracy: Head-On"<<std::endl;  
209 - //std::cout<<"rs: "<<rp<<" rp: "<<rp<<" ts: "<<tp<<" tp: "<<tp<<std::endl;  
210 - //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;  
211 220
212 //create the plane waves 221 //create the plane waves
213 r = planewave<T>(kr, Er, phase_r); 222 r = planewave<T>(kr, Er, phase_r);
214 t = planewave<T>(kt, Et, phase_t); 223 t = planewave<T>(kt, Et, phase_t);
215 -  
216 - //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;  
217 - //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;  
218 - //std::cout<<"--------------------------------"<<std::endl;  
219 return; 224 return;
220 } 225 }
221 226
@@ -245,11 +250,9 @@ public: @@ -245,11 +250,9 @@ public:
245 250
246 //compute the magnitude of the p- and s-polarized components of the incident E vector 251 //compute the magnitude of the p- and s-polarized components of the incident E vector
247 complex<T> Ei_s = E0.dot(x_hat); 252 complex<T> Ei_s = E0.dot(x_hat);
248 - //int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0);  
249 int sgn = E0.dot(y_hat).sgn(); 253 int sgn = E0.dot(y_hat).sgn();
250 vec< complex<T> > cx_hat = x_hat; 254 vec< complex<T> > cx_hat = x_hat;
251 complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn; 255 complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
252 - //T Ei_p = ( E0 - x_hat * Ei_s ).len();  
253 //compute the magnitude of the p- and s-polarized components of the reflected E vector 256 //compute the magnitude of the p- and s-polarized components of the reflected E vector
254 complex<T> Er_s = Ei_s * rs; 257 complex<T> Er_s = Ei_s * rs;
255 complex<T> Er_p = Ei_p * rp; 258 complex<T> Er_p = Ei_p * rp;
@@ -257,14 +260,6 @@ public: @@ -257,14 +260,6 @@ public:
257 complex<T> Et_s = Ei_s * ts; 260 complex<T> Et_s = Ei_s * ts;
258 complex<T> Et_p = Ei_p * tp; 261 complex<T> Et_p = Ei_p * tp;
259 262
260 - //std::cout<<"E0: "<<E0<<std::endl;  
261 - //std::cout<<"E0 dot y_hat: "<<E0.dot(y_hat)<<std::endl;  
262 - //std::cout<<"theta i: "<<theta_i<<" theta t: "<<theta_t<<std::endl;  
263 - //std::cout<<"x_hat: "<<x_hat<<" y_hat: "<<y_hat<<" z_hat: "<<z_hat<<std::endl;  
264 - //std::cout<<"Ei_s: "<<Ei_s<<" Ei_p: "<<Ei_p<<" Er_s: "<<Er_s<<" Er_p: "<<Er_p<<" Et_s: "<<Et_s<<" Et_p: "<<Et_p<<std::endl;  
265 - //std::cout<<"rs: "<<rs<<" rp: "<<rp<<" ts: "<<ts<<" tp: "<<tp<<std::endl;  
266 -  
267 -  
268 //compute the reflected E vector 263 //compute the reflected E vector
269 vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s; 264 vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
270 //compute the transmitted E vector 265 //compute the transmitted E vector
@@ -273,29 +268,12 @@ public: @@ -273,29 +268,12 @@ public:
273 T phase_t = P.p().dot(k - kt); 268 T phase_t = P.p().dot(k - kt);
274 T phase_r = P.p().dot(k - kr); 269 T phase_r = P.p().dot(k - kr);
275 270
276 - //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;  
277 -  
278 - //std::cout<<"phase: "<<phase<<std::endl;  
279 -  
280 //create the plane waves 271 //create the plane waves
281 r.k = kr; 272 r.k = kr;
282 r.E0 = Er * exp( complex<T>(0, phase_r) ); 273 r.E0 = Er * exp( complex<T>(0, phase_r) );
283 - //r.phi = phase_r;  
284 -  
285 - //t = bend(kt);  
286 - //t.k = t.k * nr;  
287 274
288 t.k = kt; 275 t.k = kt;
289 t.E0 = Et * exp( complex<T>(0, phase_t) ); 276 t.E0 = Et * exp( complex<T>(0, phase_t) );
290 - //t.phi = phase_t;  
291 - //std::cout<<"i: "<<str()<<std::endl;  
292 - //std::cout<<"r: "<<r.str()<<std::endl;  
293 - //std::cout<<"t: "<<t.str()<<std::endl;  
294 -  
295 - //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;  
296 - //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;  
297 - //std::cout<<"--------------------------------"<<std::endl;  
298 -  
299 } 277 }
300 278
301 std::string str() 279 std::string str()
@@ -305,14 +283,15 @@ public: @@ -305,14 +283,15 @@ public:
305 ss<<" "<<E0<<" e^i ( "<<k<<" . r )"; 283 ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
306 return ss.str(); 284 return ss.str();
307 } 285 }
308 -};  
309 -} 286 +}; //end planewave class
  287 +} //end namespace optics
  288 +} //end namespace stim
310 289
311 template <typename T> 290 template <typename T>
312 -std::ostream& operator<<(std::ostream& os, rts::planewave<T> p) 291 +std::ostream& operator<<(std::ostream& os, stim::optics::planewave<T> p)
313 { 292 {
314 os<<p.str(); 293 os<<p.str();
315 return os; 294 return os;
316 } 295 }
317 296
318 -#endif 297 -#endif
  298 +#endif
319 \ No newline at end of file 299 \ No newline at end of file
stim/optics/scalarbeam.h 0 โ†’ 100644
  1 +#ifndef RTS_BEAM
  2 +#define RTS_BEAM
  3 +
  4 +#include "../math/vec3.h"
  5 +#include "../optics/scalarwave.h"
  6 +#include "../math/bessel.h"
  7 +#include <vector>
  8 +
  9 +//Boost
  10 +//#include <boost/math/special_functions/bessel.hpp>
  11 +//#include <boost/math/special_functions/legendre.hpp>
  12 +
  13 +namespace stim{
  14 +
  15 + /// Function returns the value of the scalar field produced by a beam with the specified parameters
  16 +
  17 + template<typename T>
  18 + std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d, T NA, T NA_in = 0){
  19 +
  20 + std::vector< stim::vec3<T> > dirs(N); //allocate an array to store the focusing vectors
  21 +
  22 + ///compute the rotation operator to transform (0, 0, 1) to k
  23 + T cos_angle = d.dot(vec3<T>(0, 0, 1));
  24 + stim::matrix<T, 3> rotation;
  25 +
  26 + //if the cosine of the angle is -1, the rotation is just a flip across the z axis
  27 + if(cos_angle == -1){
  28 + rotation(2, 2) = -1;
  29 + }
  30 + else if(cos_angle != 1.0)
  31 + {
  32 + vec3<T> r_axis = vec3<T>(0, 0, 1).cross(d).norm(); //compute the axis of rotation
  33 + T angle = acos(cos_angle); //compute the angle of rotation
  34 + quaternion<T> quat; //create a quaternion describing the rotation
  35 + quat.CreateRotation(angle, r_axis);
  36 + rotation = quat.toMatrix3(); //compute the rotation matrix
  37 + }
  38 +
  39 + //find the phi values associated with the cassegrain ring
  40 + T PHI[2];
  41 + PHI[0] = (T)asin(NA);
  42 + PHI[1] = (T)asin(NA_in);
  43 +
  44 + //calculate the z-axis cylinder coordinates associated with these angles
  45 + T Z[2];
  46 + Z[0] = cos(PHI[0]);
  47 + Z[1] = cos(PHI[1]);
  48 + T range = Z[0] - Z[1];
  49 +
  50 + //draw a distribution of random phi, z values
  51 + T z, phi, theta;
  52 + //T kmag = stim::TAU / lambda;
  53 + for(int i=0; i<N; i++){ //for each sample
  54 + z = (T)((double)rand() / (double)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
  55 + theta = (T)(((double)rand() / (double)RAND_MAX) * stim::TAU);
  56 + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
  57 +
  58 + //compute and store cartesian coordinates
  59 + vec3<T> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
  60 + vec3<T> cart = spherical.sph2cart();
  61 + dirs[i] = rotation * cart; //create a sample vector
  62 + }
  63 + return dirs;
  64 + }
  65 +
  66 +/// Class stim::beam represents a beam of light focused at a point and composed of several plane waves
  67 +template<typename T>
  68 +class scalarbeam
  69 +{
  70 +public:
  71 + //enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  72 +
  73 +private:
  74 +
  75 + T NA[2]; //numerical aperature of the focusing optics
  76 + vec3<T> f; //focal point
  77 + vec3<T> d; //propagation direction
  78 + stim::complex<T> A; //beam amplitude
  79 + T lambda; //beam wavelength
  80 +public:
  81 +
  82 + ///constructor: build a default beam (NA=1.0)
  83 + scalarbeam(T wavelength = 1, stim::complex<T> amplitude = 1, vec3<T> focal_point = vec3<T>(0, 0, 0), vec3<T> direction = vec3<T>(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){
  84 + lambda = wavelength;
  85 + A = amplitude;
  86 + f = focal_point;
  87 + d = direction.norm(); //make sure that the direction vector is normalized (makes calculations more efficient later on)
  88 + NA[0] = numerical_aperture;
  89 + NA[1] = center_obsc;
  90 + }
  91 +
  92 + ///Numerical Aperature functions
  93 + void setNA(T na)
  94 + {
  95 + NA[0] = (T)0;
  96 + NA[1] = na;
  97 + }
  98 + void setNA(T na0, T na1)
  99 + {
  100 + NA[0] = na0;
  101 + NA[1] = na1;
  102 + }
  103 +
  104 + //Monte-Carlo decomposition into plane waves
  105 + std::vector< scalarwave<T> > mc(size_t N = 100000) const{
  106 +
  107 + std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]); //generate a random set of N vectors forming a focus
  108 + std::vector< scalarwave<T> > samples(N); //create a vector of plane waves
  109 + T kmag = (T)stim::TAU / lambda; //calculate the wavenumber
  110 + stim::complex<T> apw; //allocate space for the amplitude at the focal point
  111 + stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction
  112 + for(size_t i=0; i<N; i++){ //for each sample
  113 + kpw = dirs[i] * kmag; //calculate the k-vector for the new plane wave
  114 + apw = exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave
  115 + samples[i] = scalarwave<T>(kpw, apw); //create a plane wave based on the direction
  116 + }
  117 +
  118 + return samples;
  119 + }
  120 +
  121 + /// Calculate the field at a given point
  122 + /// @param x is the x-coordinate of the field point
  123 + /// @O is the approximation accuracy
  124 + stim::complex<T> field(T x, T y, T z, size_t O){
  125 + std::vector< scalarwave<T> > W = mc(O);
  126 + T result = 0; //initialize the result to zero (0)
  127 + for(size_t i = 0; i < O; i++){ //for each plane wave
  128 + result += W[i].pos(x, y, z);
  129 + }
  130 + return result;
  131 + }
  132 +
  133 + /// Calculate the field at a set of positions
  134 + /*void field(stim::complex<T>* F, T* x, T* y, T* z, size_t N, size_t O){
  135 +
  136 + memset(F, 0, N * sizeof(stim::complex<T>));
  137 + std::vector< scalarwave<T> > W = mc(O); //get a random set of plane waves representing the beam
  138 + size_t o, n;
  139 + T px, py, pz;
  140 + for(n = 0; n < N; n++){ //for each point in the field
  141 + (x == NULL) ? px = 0 : px = x[n]; // test for NULL values
  142 + (y == NULL) ? py = 0 : py = y[n];
  143 + (z == NULL) ? pz = 0 : pz = z[n];
  144 + for(o = 0; o < O; o++){ //for each plane wave
  145 + F[n] += W[o].pos(px, py, pz);
  146 + }
  147 + }
  148 + }*/
  149 +
  150 + std::string str()
  151 + {
  152 + std::stringstream ss;
  153 + ss<<"Beam:"<<std::endl;
  154 + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  155 + ss<<" Beam Direction: "<<d<<std::endl;
  156 + if(NA[0] == 0)
  157 + ss<<" NA: "<<NA[1];
  158 + else
  159 + ss<<" NA: "<<NA[0]<<" -- "<<NA[1];
  160 +
  161 + return ss.str();
  162 + }
  163 +
  164 +
  165 +
  166 +}; //end beam
  167 +
  168 +template<typename T>
  169 +void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){
  170 +
  171 + memset(F, 0, N * sizeof(stim::complex<T>));
  172 + T k = stim::TAU / lambda;
  173 + T jl, Pl, C, kr, cos_phi;
  174 + T cos_alpha_1 = cos(asin(NA_in));
  175 + T cos_alpha_2 = cos(asin(NA));
  176 + stim::vec3<T> p, ps;
  177 +
  178 + /*double vm;
  179 + size_t table_bytes = (Nl + 1) * sizeof(double);
  180 + double* jv = (double*) malloc( table_bytes );
  181 + double* yv = (double*) malloc( table_bytes );
  182 + double* djv= (double*) malloc( table_bytes );
  183 + double* dyv= (double*) malloc( table_bytes );
  184 + */
  185 +
  186 + T vm;
  187 + size_t table_bytes = (Nl + 1) * sizeof(T);
  188 + T* jv = (T*) malloc( table_bytes );
  189 + T* yv = (T*) malloc( table_bytes );
  190 + T* djv= (T*) malloc( table_bytes );
  191 + T* dyv= (T*) malloc( table_bytes );
  192 +
  193 + for(size_t n = 0; n < N; n++){
  194 + (x == NULL) ? p[0] = 0 : p[0] = x[n]; // test for NULL values and set positions
  195 + (y == NULL) ? p[1] = 0 : p[1] = y[n];
  196 + (z == NULL) ? p[2] = 0 : p[2] = z[n];
  197 +
  198 + ps = p.cart2sph(); //convert this point to spherical coordinates
  199 + kr = k * ps[0];
  200 + cos_phi = std::cos(ps[2]);
  201 + stim::bessjyv_sph<T>(Nl, kr, vm, jv, yv, djv, dyv);
  202 +
  203 + for(int l = 0; l <= Nl; l++){
  204 + //jl = boost::math::sph_bessel<T>(l, kr);
  205 + //jl = stim::bessjyv(l, kr
  206 + jl = (T)jv[l];
  207 + Pl = 1;//boost::math::legendre_p<T>(l, cos_phi);
  208 + C = 1;//boost::math::legendre_p<T>(l+1, cos_alpha_1) - boost::math::legendre_p<T>(l + 1, cos_alpha_2) - boost::math::legendre_p<T>(l - 1, cos_alpha_1) + boost::math::legendre_p<T>(l - 1, cos_alpha_2);
  209 + F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C;
  210 + }
  211 + F[n] *= A * stim::TAU;
  212 + }
  213 +}
  214 +
  215 +} //end namespace stim
  216 +
  217 +#endif
stim/optics/scalarwave.h 0 โ†’ 100644
  1 +#ifndef STIM_SCALARWAVE_H
  2 +#define STIM_SCALARWAVE_H
  3 +
  4 +
  5 +#include <string>
  6 +#include <sstream>
  7 +#include <cmath>
  8 +
  9 +//#include "../math/vector.h"
  10 +#include "../math/vec3.h"
  11 +#include "../math/quaternion.h"
  12 +#include "../math/constants.h"
  13 +#include "../math/plane.h"
  14 +#include "../math/complex.h"
  15 +
  16 +//CUDA
  17 +#include "../cuda/cudatools/devices.h"
  18 +#include "../cuda/cudatools/error.h"
  19 +#include "../cuda/sharedmem.cuh"
  20 +
  21 +namespace stim{
  22 +
  23 +template<typename T>
  24 +class scalarwave{
  25 +
  26 +protected:
  27 +
  28 + stim::vec3<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
  29 + stim::complex<T> E0; //amplitude
  30 +
  31 + /// Bend a plane wave via refraction, given that the new propagation direction is known
  32 + CUDA_CALLABLE scalarwave<T> bend(stim::vec3<T> kn) const{
  33 + return scalarwave<T>(kn.norm() * kmag(), E0);
  34 + }
  35 +
  36 +public:
  37 +
  38 + ///constructor: create a plane wave propagating along k
  39 + CUDA_CALLABLE scalarwave(vec3<T> kvec = stim::vec3<T>(0, 0, (T)stim::TAU), complex<T> E = 1){
  40 + k = kvec;
  41 + E0 = E;
  42 + }
  43 +
  44 + CUDA_CALLABLE scalarwave(T kx, T ky, T kz, complex<T> E = 1){
  45 + k = vec3<T>(kx, ky, kz);
  46 + E0 = E;
  47 + }
  48 +
  49 + ///multiplication operator: scale E0
  50 + CUDA_CALLABLE scalarwave<T> & operator* (const T & rhs){
  51 + E0 = E0 * rhs;
  52 + return *this;
  53 + }
  54 +
  55 + CUDA_CALLABLE T lambda() const{
  56 + return stim::TAU / k.len();
  57 + }
  58 +
  59 + CUDA_CALLABLE T kmag() const{
  60 + return k.len();
  61 + }
  62 +
  63 + CUDA_CALLABLE vec3< complex<T> > E(){
  64 + return E0;
  65 + }
  66 +
  67 + CUDA_CALLABLE vec3<T> kvec(){
  68 + return k;
  69 + }
  70 +
  71 + /// calculate the value of the field produced by the plane wave given a three-dimensional position
  72 + CUDA_CALLABLE complex<T> pos(T x, T y, T z){
  73 + return pos( stim::vec3<T>(x, y, z) );
  74 + }
  75 +
  76 + CUDA_CALLABLE complex<T> pos(vec3<T> p = vec3<T>(0, 0, 0)){
  77 + return E0 * exp(complex<T>(0, k.dot(p)));
  78 + }
  79 +
  80 + //scales k based on a transition from material ni to material nt
  81 + CUDA_CALLABLE scalarwave<T> n(T ni, T nt){
  82 + return scalarwave<T>(k * (nt / ni), E0);
  83 + }
  84 +
  85 + CUDA_CALLABLE scalarwave<T> refract(stim::vec3<T> kn) const{
  86 + return bend(kn);
  87 + }
  88 +
  89 + /// Calculate the result of a plane wave hitting an interface between two refractive indices
  90 +
  91 + /// @param P is a plane representing the position and orientation of the surface
  92 + /// @param n0 is the refractive index outside of the surface (in the direction of the normal)
  93 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  94 + /// @param r is the reflected component of the plane wave
  95 + /// @param t is the transmitted component of the plane wave
  96 + void scatter(stim::plane<T> P, T n0, T n1, scalarwave<T> &r, scalarwave<T> &t){
  97 + scatter(P, n1/n0, r, t);
  98 + }
  99 +
  100 + /// Calculate the scattering result when nr = n1/n0
  101 +
  102 + /// @param P is a plane representing the position and orientation of the surface
  103 + /// @param r is the ration n1/n0
  104 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  105 + /// @param r is the reflected component of the plane wave
  106 + /// @param t is the transmitted component of the plane wave
  107 + void scatter(stim::plane<T> P, T nr, scalarwave<T> &r, scalarwave<T> &t){
  108 + /*
  109 + int facing = P.face(k); //determine which direction the plane wave is coming in
  110 +
  111 + if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
  112 + P = P.flip(); //flip the plane
  113 + nr = 1/nr; //invert the refractive index (now nr = n0/n1)
  114 + }
  115 +
  116 + //use Snell's Law to calculate the transmitted angle
  117 + T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i
  118 + T theta_i = acos(cos_theta_i); //compute theta_i
  119 + T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law
  120 + T theta_t = asin(sin_theta_t); //compute the cosine of theta_t
  121 +
  122 + bool tir = false; //flag for total internal reflection
  123 + if(theta_t != theta_t){
  124 + tir = true;
  125 + theta_t = stim::PI / (T)2;
  126 + }
  127 +
  128 + //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
  129 + if(theta_i == 0){
  130 + T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients
  131 + T tp = 2 / (1 + nr);
  132 + vec3<T> kr = -k;
  133 + vec3<T> kt = k * nr; //set the k vectors for theta_i = 0
  134 + vec3< complex<T> > Er = E0 * rp; //compute the E vectors
  135 + vec3< complex<T> > Et = E0 * tp;
  136 + T phase_t = P.p().dot(k - kt); //compute the phase offset
  137 + T phase_r = P.p().dot(k - kr);
  138 +
  139 + //create the plane waves
  140 + r = planewave<T>(kr, Er, phase_r);
  141 + t = planewave<T>(kt, Et, phase_t);
  142 + return;
  143 + }
  144 +
  145 +
  146 + //compute the Fresnel coefficients
  147 + T rp, rs, tp, ts;
  148 + rp = tan(theta_t - theta_i) / tan(theta_t + theta_i);
  149 + rs = sin(theta_t - theta_i) / sin(theta_t + theta_i);
  150 +
  151 + if(tir){
  152 + tp = ts = 0;
  153 + }
  154 + else{
  155 + tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) );
  156 + ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i);
  157 + }
  158 +
  159 + //compute the coordinate space for the plane of incidence
  160 + vec3<T> z_hat = -P.norm();
  161 + vec3<T> y_hat = P.parallel(k).norm();
  162 + vec3<T> x_hat = y_hat.cross(z_hat).norm();
  163 +
  164 + //compute the k vectors for r and t
  165 + vec3<T> kr, kt;
  166 + kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag();
  167 + kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;
  168 +
  169 + //compute the magnitude of the p- and s-polarized components of the incident E vector
  170 + complex<T> Ei_s = E0.dot(x_hat);
  171 + int sgn = E0.dot(y_hat).sgn();
  172 + vec3< complex<T> > cx_hat = x_hat;
  173 + complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
  174 + //compute the magnitude of the p- and s-polarized components of the reflected E vector
  175 + complex<T> Er_s = Ei_s * rs;
  176 + complex<T> Er_p = Ei_p * rp;
  177 + //compute the magnitude of the p- and s-polarized components of the transmitted E vector
  178 + complex<T> Et_s = Ei_s * ts;
  179 + complex<T> Et_p = Ei_p * tp;
  180 +
  181 + //compute the reflected E vector
  182 + vec3< complex<T> > Er = vec3< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
  183 + //compute the transmitted E vector
  184 + vec3< complex<T> > Et = vec3< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;
  185 +
  186 + T phase_t = P.p().dot(k - kt);
  187 + T phase_r = P.p().dot(k - kr);
  188 +
  189 + //create the plane waves
  190 + r.k = kr;
  191 + r.E0 = Er * exp( complex<T>(0, phase_r) );
  192 +
  193 + t.k = kt;
  194 + t.E0 = Et * exp( complex<T>(0, phase_t) );
  195 + */
  196 + }
  197 +
  198 + std::string str()
  199 + {
  200 + std::stringstream ss;
  201 + ss<<"Plane Wave:"<<std::endl;
  202 + ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
  203 + return ss.str();
  204 + }
  205 +}; //end planewave class
  206 +
  207 +
  208 +/// CUDA kernel for computing the field produced by a batch of plane waves at an array of locations
  209 +template<typename T>
  210 +__global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){
  211 + extern __shared__ stim::scalarwave<T> shared_W[]; //declare the list of waves in shared memory
  212 +
  213 + stim::cuda::sharedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x); //copy the plane waves into shared memory for faster access
  214 + __syncthreads(); //synchronize threads to insure all data is copied
  215 +
  216 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  217 + if(i >= N) return; //exit if this thread is outside the array
  218 + T px, py, pz;
  219 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values and set positions
  220 + (y == NULL) ? py = 0 : py = y[i];
  221 + (z == NULL) ? pz = 0 : pz = z[i];
  222 +
  223 + stim::complex<T> f = 0; //create a register to store the result
  224 + for(size_t w = 0; w < n_waves; w++)
  225 + f += shared_W[w].pos(px, py, pz); //evaluate the plane wave
  226 + F[i] += f; //copy the result to device memory
  227 +}
  228 +
  229 +/// evaluate a scalar wave at several points, where all arrays are on the GPU
  230 +template<typename T>
  231 +void gpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  232 +
  233 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  234 + dim3 blocks(N / threads + 1); //calculate the optimal number of blocks
  235 + cuda_scalarwave<T><<< blocks, threads >>>(F, N, x, y, z, w); //call the kernel
  236 +}
  237 +
  238 +/// Sums a series of coherent plane waves at a specified point
  239 +/// @param field is the output array of field values corresponding to each input point
  240 +/// @param x is an array of x coordinates for the field point
  241 +/// @param y is an array of y coordinates for the field point
  242 +/// @param z is an array of z coordinates for the field point
  243 +/// @param N is the number of points in the input and output arrays
  244 +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  245 +/// @param A is the list of amplitudes for each wave
  246 +/// @param S is the list of propagation directions for each wave
  247 +template<typename T>
  248 +void cpu_sum_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > w_array){
  249 + size_t S = w_array.size(); //store the number of waves
  250 +#ifdef NO_CUDA
  251 + memset(F, 0, N * sizeof(stim::complex<T>));
  252 + T px, py, pz;
  253 + for(size_t i = 0; i < N; i++){ // for each element in the array
  254 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values
  255 + (y == NULL) ? py = 0 : py = y[i];
  256 + (z == NULL) ? pz = 0 : pz = z[i];
  257 +
  258 + for(size_t s = 0; s < S; s++){
  259 + F[i] += w_array[s].pos(px, py, pz); //sum all plane waves at this point
  260 + }
  261 + }
  262 +#else
  263 + stim::complex<T>* dev_F; //allocate space for the field
  264 + cudaMalloc(&dev_F, N * sizeof(stim::complex<T>));
  265 + cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used)
  266 +
  267 + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified)
  268 + if(x != NULL){
  269 + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
  270 + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
  271 + }
  272 +
  273 + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified)
  274 + if(y != NULL){
  275 + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
  276 + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
  277 + }
  278 +
  279 + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified)
  280 + if(z != NULL){
  281 + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
  282 + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
  283 + }
  284 +
  285 + size_t wave_bytes = sizeof(stim::scalarwave<T>);
  286 + size_t shared_bytes = stim::sharedMemPerBlock(); //calculate the maximum amount of shared memory available
  287 + size_t array_bytes = w_array.size() * wave_bytes; //calculate the maximum number of bytes required for the planewave array
  288 + size_t max_batch = shared_bytes / wave_bytes; //calculate number of plane waves that will fit into shared memory
  289 + size_t num_batches = w_array.size() / max_batch + 1; //calculate the number of batches required to process all plane waves
  290 + size_t batch_bytes = min(w_array.size(), max_batch) * wave_bytes; //initialize the batch size (in bytes) to the maximum batch required
  291 +
  292 + stim::scalarwave<T>* dev_w;
  293 + HANDLE_ERROR(cudaMalloc(&dev_w, batch_bytes)); //allocate memory for a single batch of plane waves
  294 +
  295 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  296 + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  297 +
  298 + size_t batch_size; //declare a variable to store the size of the current batch
  299 + size_t waves_processed = 0; //initialize the number of waves processed to zero
  300 + while(waves_processed < w_array.size()){ //while there are still waves to be processed
  301 + batch_size = min<size_t>(max_batch, w_array.size() - waves_processed); //process either a whole batch, or whatever is left
  302 + batch_bytes = batch_size * sizeof(stim::scalarwave<T>);
  303 + HANDLE_ERROR(cudaMemcpy(dev_w, &w_array[waves_processed], batch_bytes, cudaMemcpyHostToDevice)); //copy the plane waves into global memory
  304 + cuda_scalarwave<T><<< blocks, threads, batch_bytes >>>(dev_F, N, dev_x, dev_y, dev_z, dev_w, batch_size); //call the kernel
  305 + waves_processed += batch_size; //increment the counter indicating how many waves have been processed
  306 + }
  307 +
  308 + cudaMemcpy(F, dev_F, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory
  309 +
  310 + if(x != NULL) cudaFree(dev_x); //free everything
  311 + if(y != NULL) cudaFree(dev_y);
  312 + if(z != NULL) cudaFree(dev_z);
  313 + cudaFree(dev_F);
  314 + cudaFree(dev_w);
  315 +
  316 +#endif
  317 +}
  318 +
  319 +template<typename T>
  320 +void cpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  321 + std::vector< stim::scalarwave<T> > w_array(1, w);
  322 + cpu_sum_scalarwaves(F, N, x, y, z, w_array);
  323 +}
  324 +
  325 +
  326 +/// Sums a series of coherent plane waves at a specified point
  327 +/// @param x is the x coordinate of the field point
  328 +/// @param y is the y coordinate of the field point
  329 +/// @param z is the z coordinate of the field point
  330 +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  331 +/// @param A is the list of amplitudes for each wave
  332 +/// @param S is the list of propagation directions for each wave
  333 +template<typename T>
  334 +CUDA_CALLABLE stim::complex<T> sum_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave<T> > W){
  335 + size_t N = W.size(); //get the number of plane wave samples
  336 + stim::complex<T> field(0, 0); //initialize the field to zero (0)
  337 + stim::vec3<T> k; //allocate space for the direction vector
  338 + for(size_t i = 0; i < N; i++){
  339 + field += W[i].pos(x, y, z);
  340 + }
  341 + return field;
  342 +}
  343 +
  344 +} //end namespace stim
  345 +
  346 +template <typename T>
  347 +std::ostream& operator<<(std::ostream& os, stim::scalarwave<T> p)
  348 +{
  349 + os<<p.str();
  350 + return os;
  351 +}
  352 +
  353 +#endif
0 \ No newline at end of file 354 \ No newline at end of file
stim/optics/beam.h renamed to stim/optics_old/beam.h
1 -#ifndef RTS_BEAM  
2 -#define RTS_BEAM  
3 -  
4 -#include "../math/vector.h"  
5 -#include "../math/function.h"  
6 -#include "../optics/planewave.h"  
7 -#include <vector>  
8 -  
9 -namespace stim{  
10 -  
11 -template<typename P>  
12 -class beam : public planewave<P>  
13 -{  
14 -public:  
15 - enum beam_type {Uniform, Bartlett, Hamming, Hanning};  
16 -  
17 -private:  
18 -  
19 - P _na[2]; //numerical aperature of the focusing optics  
20 - vec<P> f; //focal point  
21 - function<P, P> apod; //apodization function  
22 - unsigned int apod_res; //resolution of apodization filter functions  
23 -  
24 - void apod_uniform()  
25 - {  
26 - apod = (P)1;  
27 - }  
28 - void apod_bartlett()  
29 - {  
30 - apod = (P)1;  
31 - apod.insert((P)1, (P)0);  
32 - }  
33 - void apod_hanning()  
34 - {  
35 - apod = (P)0;  
36 - P x, y;  
37 - for(unsigned int n=0; n<apod_res; n++)  
38 - {  
39 - x = (P)n/(P)apod_res;  
40 - y = pow( cos( ((P)3.14159 * x) / 2 ), 2);  
41 - apod.insert(x, y);  
42 - }  
43 - }  
44 - void apod_hamming()  
45 - {  
46 - apod = (P)0;  
47 - P x, y;  
48 - for(unsigned int n=0; n<apod_res; n++)  
49 - {  
50 - x = (P)n/(P)apod_res;  
51 - y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x);  
52 - apod.insert(x, y);  
53 - }  
54 - }  
55 -  
56 - void set_apod(beam_type type)  
57 - {  
58 - if(type == Uniform)  
59 - apod_uniform();  
60 - if(type == Bartlett)  
61 - apod_bartlett();  
62 - if(type == Hanning)  
63 - apod_hanning();  
64 - if(type == Hamming)  
65 - apod_hamming();  
66 - }  
67 -  
68 -public:  
69 -  
70 - ///constructor: build a default beam (NA=1.0)  
71 - beam(  
72 - vec<P> k = rts::vec<P>(0, 0, rtsTAU),  
73 - vec<P> _E0 = rts::vec<P>(1, 0, 0),  
74 - beam_type _apod = Uniform)  
75 - : planewave<P>(k, _E0)  
76 - {  
77 - _na[0] = (P)0.0;  
78 - _na[1] = (P)1.0;  
79 - f = vec<P>( (P)0, (P)0, (P)0 );  
80 - apod_res = 256; //set the default resolution for apodization filters  
81 - set_apod(_apod); //set the apodization function type  
82 - }  
83 -  
84 - beam<P> refract(rts::vec<P> kn) const{  
85 -  
86 - beam<P> new_beam;  
87 - new_beam._na[0] = _na[0];  
88 - new_beam._na[1] = _na[1];  
89 -  
90 -  
91 - rts::planewave<P> pw = planewave<P>::bend(kn);  
92 - //std::cout<<pw.str()<<std::endl;  
93 -  
94 - new_beam.k = pw.kvec();  
95 - new_beam.E0 = pw.E();  
96 -  
97 - return new_beam;  
98 - }  
99 -  
100 - ///Numerical Aperature functions  
101 - void NA(P na)  
102 - {  
103 - _na[0] = (P)0;  
104 - _na[1] = na;  
105 - }  
106 - void NA(P na0, P na1)  
107 - {  
108 - _na[0] = na0;  
109 - _na[1] = na1;  
110 - }  
111 -  
112 - /*string str() :  
113 - {  
114 - stringstream ss;  
115 - ss<<"Beam Center: "<<k<<std::endl;  
116 -  
117 - return ss.str();  
118 - }*/  
119 -  
120 - //Monte-Carlo decomposition into plane waves  
121 - std::vector< planewave<P> > mc(unsigned int N = 100000, unsigned int seed = 0) const  
122 - {  
123 - /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling  
124 - of a sphere and projecting these samples onto an inscribed sphere.  
125 -  
126 - seed = seed for the random number generator  
127 - */  
128 - srand(seed); //seed the random number generator  
129 -  
130 - vec<P> k_hat = beam::k.norm();  
131 -  
132 - ///compute the rotation operator to transform (0, 0, 1) to k  
133 - P cos_angle = k_hat.dot(rts::vec<P>(0, 0, 1));  
134 - rts::matrix<P, 3> rotation;  
135 -  
136 - //if the cosine of the angle is -1, the rotation is just a flip across the z axis  
137 - if(cos_angle == -1){  
138 - rotation(2, 2) = -1;  
139 - }  
140 - else if(cos_angle != 1.0)  
141 - {  
142 - rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation  
143 - P angle = acos(cos_angle); //compute the angle of rotation  
144 - rts::quaternion<P> quat; //create a quaternion describing the rotation  
145 - quat.CreateRotation(angle, r_axis);  
146 - rotation = quat.toMatrix3(); //compute the rotation matrix  
147 - }  
148 -  
149 - //find the phi values associated with the cassegrain ring  
150 - P PHI[2];  
151 - PHI[0] = (P)asin(_na[0]);  
152 - PHI[1] = (P)asin(_na[1]);  
153 -  
154 - //calculate the z-axis cylinder coordinates associated with these angles  
155 - P Z[2];  
156 - Z[0] = cos(PHI[0]);  
157 - Z[1] = cos(PHI[1]);  
158 - P range = Z[0] - Z[1];  
159 -  
160 - std::vector< planewave<P> > samples; //create a vector of plane waves  
161 -  
162 - //draw a distribution of random phi, z values  
163 - P z, phi, theta;  
164 - for(int i=0; i<N; i++) //for each sample  
165 - {  
166 - z = ((P)rand() / (P)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder  
167 - theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159;  
168 - phi = acos(z); //project onto the sphere, computing phi in spherical coordinates  
169 -  
170 - //compute and store cartesian coordinates  
171 - rts::vec<P> spherical(1, theta, phi); //convert from spherical to cartesian coordinates  
172 - rts::vec<P> cart = spherical.sph2cart();  
173 - vec<P> k_prime = rotation * cart; //create a sample vector  
174 -  
175 - //store a wave refracted along the given direction  
176 - //std::cout<<"k prime: "<<rotation<<std::endl;  
177 - samples.push_back(planewave<P>::refract(k_prime) * apod(phi/PHI[1]));  
178 - }  
179 -  
180 - return samples;  
181 - }  
182 -  
183 - std::string str()  
184 - {  
185 - std::stringstream ss;  
186 - ss<<"Beam:"<<std::endl;  
187 - //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;  
188 - ss<<" Central Plane Wave: "<<beam::k<<std::endl;  
189 - if(_na[0] == 0)  
190 - ss<<" NA: "<<_na[1];  
191 - else  
192 - ss<<" NA: "<<_na[0]<<" -- "<<_na[1];  
193 -  
194 - return ss.str();  
195 - }  
196 -  
197 -  
198 -  
199 -};  
200 -  
201 -}  
202 -  
203 -#endif 1 +#ifndef RTS_BEAM
  2 +#define RTS_BEAM
  3 +
  4 +#include "../math/vector.h"
  5 +#include "../math/function.h"
  6 +#include "../optics/planewave.h"
  7 +#include <vector>
  8 +
  9 +namespace stim{
  10 +
  11 +template<typename P>
  12 +class beam : public planewave<P>
  13 +{
  14 +public:
  15 + enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  16 +
  17 +private:
  18 +
  19 + P _na[2]; //numerical aperature of the focusing optics
  20 + vec<P> f; //focal point
  21 + function<P, P> apod; //apodization function
  22 + unsigned int apod_res; //resolution of apodization filter functions
  23 +
  24 + void apod_uniform()
  25 + {
  26 + apod = (P)1;
  27 + }
  28 + void apod_bartlett()
  29 + {
  30 + apod = (P)1;
  31 + apod.insert((P)1, (P)0);
  32 + }
  33 + void apod_hanning()
  34 + {
  35 + apod = (P)0;
  36 + P x, y;
  37 + for(unsigned int n=0; n<apod_res; n++)
  38 + {
  39 + x = (P)n/(P)apod_res;
  40 + y = pow( cos( ((P)3.14159 * x) / 2 ), 2);
  41 + apod.insert(x, y);
  42 + }
  43 + }
  44 + void apod_hamming()
  45 + {
  46 + apod = (P)0;
  47 + P x, y;
  48 + for(unsigned int n=0; n<apod_res; n++)
  49 + {
  50 + x = (P)n/(P)apod_res;
  51 + y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x);
  52 + apod.insert(x, y);
  53 + }
  54 + }
  55 +
  56 + void set_apod(beam_type type)
  57 + {
  58 + if(type == Uniform)
  59 + apod_uniform();
  60 + if(type == Bartlett)
  61 + apod_bartlett();
  62 + if(type == Hanning)
  63 + apod_hanning();
  64 + if(type == Hamming)
  65 + apod_hamming();
  66 + }
  67 +
  68 +public:
  69 +
  70 + ///constructor: build a default beam (NA=1.0)
  71 + beam(
  72 + vec<P> k = rts::vec<P>(0, 0, rtsTAU),
  73 + vec<P> _E0 = rts::vec<P>(1, 0, 0),
  74 + beam_type _apod = Uniform)
  75 + : planewave<P>(k, _E0)
  76 + {
  77 + _na[0] = (P)0.0;
  78 + _na[1] = (P)1.0;
  79 + f = vec<P>( (P)0, (P)0, (P)0 );
  80 + apod_res = 256; //set the default resolution for apodization filters
  81 + set_apod(_apod); //set the apodization function type
  82 + }
  83 +
  84 + beam<P> refract(rts::vec<P> kn) const{
  85 +
  86 + beam<P> new_beam;
  87 + new_beam._na[0] = _na[0];
  88 + new_beam._na[1] = _na[1];
  89 +
  90 +
  91 + rts::planewave<P> pw = planewave<P>::bend(kn);
  92 + //std::cout<<pw.str()<<std::endl;
  93 +
  94 + new_beam.k = pw.kvec();
  95 + new_beam.E0 = pw.E();
  96 +
  97 + return new_beam;
  98 + }
  99 +
  100 + ///Numerical Aperature functions
  101 + void NA(P na)
  102 + {
  103 + _na[0] = (P)0;
  104 + _na[1] = na;
  105 + }
  106 + void NA(P na0, P na1)
  107 + {
  108 + _na[0] = na0;
  109 + _na[1] = na1;
  110 + }
  111 +
  112 + /*string str() :
  113 + {
  114 + stringstream ss;
  115 + ss<<"Beam Center: "<<k<<std::endl;
  116 +
  117 + return ss.str();
  118 + }*/
  119 +
  120 + //Monte-Carlo decomposition into plane waves
  121 + std::vector< planewave<P> > mc(unsigned int N = 100000, unsigned int seed = 0) const
  122 + {
  123 + /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling
  124 + of a sphere and projecting these samples onto an inscribed sphere.
  125 +
  126 + seed = seed for the random number generator
  127 + */
  128 + srand(seed); //seed the random number generator
  129 +
  130 + vec<P> k_hat = beam::k.norm();
  131 +
  132 + ///compute the rotation operator to transform (0, 0, 1) to k
  133 + P cos_angle = k_hat.dot(rts::vec<P>(0, 0, 1));
  134 + rts::matrix<P, 3> rotation;
  135 +
  136 + //if the cosine of the angle is -1, the rotation is just a flip across the z axis
  137 + if(cos_angle == -1){
  138 + rotation(2, 2) = -1;
  139 + }
  140 + else if(cos_angle != 1.0)
  141 + {
  142 + rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation
  143 + P angle = acos(cos_angle); //compute the angle of rotation
  144 + rts::quaternion<P> quat; //create a quaternion describing the rotation
  145 + quat.CreateRotation(angle, r_axis);
  146 + rotation = quat.toMatrix3(); //compute the rotation matrix
  147 + }
  148 +
  149 + //find the phi values associated with the cassegrain ring
  150 + P PHI[2];
  151 + PHI[0] = (P)asin(_na[0]);
  152 + PHI[1] = (P)asin(_na[1]);
  153 +
  154 + //calculate the z-axis cylinder coordinates associated with these angles
  155 + P Z[2];
  156 + Z[0] = cos(PHI[0]);
  157 + Z[1] = cos(PHI[1]);
  158 + P range = Z[0] - Z[1];
  159 +
  160 + std::vector< planewave<P> > samples; //create a vector of plane waves
  161 +
  162 + //draw a distribution of random phi, z values
  163 + P z, phi, theta;
  164 + for(int i=0; i<N; i++) //for each sample
  165 + {
  166 + z = ((P)rand() / (P)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
  167 + theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159;
  168 + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
  169 +
  170 + //compute and store cartesian coordinates
  171 + rts::vec<P> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
  172 + rts::vec<P> cart = spherical.sph2cart();
  173 + vec<P> k_prime = rotation * cart; //create a sample vector
  174 +
  175 + //store a wave refracted along the given direction
  176 + //std::cout<<"k prime: "<<rotation<<std::endl;
  177 + samples.push_back(planewave<P>::refract(k_prime) * apod(phi/PHI[1]));
  178 + }
  179 +
  180 + return samples;
  181 + }
  182 +
  183 + std::string str()
  184 + {
  185 + std::stringstream ss;
  186 + ss<<"Beam:"<<std::endl;
  187 + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  188 + ss<<" Central Plane Wave: "<<beam::k<<std::endl;
  189 + if(_na[0] == 0)
  190 + ss<<" NA: "<<_na[1];
  191 + else
  192 + ss<<" NA: "<<_na[0]<<" -- "<<_na[1];
  193 +
  194 + return ss.str();
  195 + }
  196 +
  197 +
  198 +
  199 +};
  200 +
  201 +}
  202 +
  203 +#endif
stim/optics/efield.cuh renamed to stim/optics_old/efield.cuh
stim/optics/esphere.cuh renamed to stim/optics_old/esphere.cuh
stim/optics/halfspace.cuh renamed to stim/optics_old/halfspace.cuh
stim/optics/material.h renamed to stim/optics_old/material.h
1 -#ifndef RTS_MATERIAL_H  
2 -#define RTS_MATERIAL_H  
3 -  
4 -#include <vector>  
5 -#include <ostream>  
6 -#include <iostream>  
7 -#include <fstream>  
8 -#include <complex>  
9 -#include <algorithm>  
10 -#include <sstream>  
11 -#include "../math/complex.h"  
12 -#include "../math/constants.h"  
13 -#include "../math/function.h"  
14 -  
15 -namespace stim{  
16 -  
17 -//Material class - default representation for the material property is the refractive index (RI)  
18 -template<typename T>  
19 -class material : public function< T, complex<T> >{  
20 -  
21 -public:  
22 - enum wave_property{microns, inverse_cm};  
23 - enum material_property{ri, absorbance};  
24 -  
25 -private:  
26 -  
27 - using function< T, complex<T> >::X;  
28 - using function< T, complex<T> >::Y;  
29 - using function< T, complex<T> >::insert;  
30 - using function< T, complex<T> >::bounding;  
31 -  
32 - std::string name; //name for the material (defaults to file name)  
33 -  
34 - void process_header(std::string str, wave_property& wp, material_property& mp){  
35 -  
36 - std::stringstream ss(str); //create a stream from the data string  
37 - std::string line;  
38 - std::getline(ss, line); //get the first line as a string  
39 - while(line[0] == '#'){ //continue looping while the line is a comment  
40 -  
41 - std::stringstream lstream(line); //create a stream from the line  
42 - lstream.ignore(); //ignore the first character ('#')  
43 -  
44 - std::string prop; //get the property name  
45 - lstream>>prop;  
46 -  
47 - if(prop == "X"){  
48 - std::string wp_name;  
49 - lstream>>wp_name;  
50 - if(wp_name == "microns") wp = microns;  
51 - else if(wp_name == "inverse_cm") wp = inverse_cm;  
52 - }  
53 - else if(prop == "Y"){  
54 - std::string mp_name;  
55 - lstream>>mp_name;  
56 - if(mp_name == "ri") mp = ri;  
57 - else if(mp_name == "absorbance") mp = absorbance;  
58 - }  
59 -  
60 - std::getline(ss, line); //get the next line  
61 - }  
62 -  
63 - function< T, stim::complex<T> >::process_string(str);  
64 - }  
65 -  
66 - void from_inverse_cm(){  
67 - //convert inverse centimeters to wavelength (in microns)  
68 - for(unsigned int i=0; i<X.size(); i++)  
69 - X[i] = 10000 / X[i];  
70 -  
71 - //reverse the function array  
72 - std::reverse(X.begin(), X.end());  
73 - std::reverse(Y.begin(), Y.end());  
74 -  
75 - }  
76 -  
77 - void init(){  
78 - bounding[0] = bounding[1] = stim::complex<T>(1, 0);  
79 - }  
80 -  
81 -  
82 -public:  
83 -  
84 - material(std::string filename, wave_property wp, material_property mp){  
85 - name = filename;  
86 - load(filename, wp, mp);  
87 - }  
88 -  
89 - material(std::string filename){  
90 - name = filename;  
91 - load(filename);  
92 - }  
93 -  
94 - material(){  
95 - init();  
96 - }  
97 -  
98 - complex<T> getN(T lambda){  
99 - return function< T, complex<T> >::linear(lambda);  
100 - }  
101 -  
102 - void load(std::string filename, wave_property wp, material_property mp){  
103 -  
104 - //load the file as a function  
105 - function< T, complex<T> >::load(filename);  
106 - }  
107 -  
108 - void load(std::string filename){  
109 -  
110 - wave_property wp = inverse_cm;  
111 - material_property mp = ri;  
112 - //turn the file into a string  
113 - std::ifstream t(filename.c_str()); //open the file as a stream  
114 -  
115 - if(!t){  
116 - std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;  
117 - exit(1);  
118 - }  
119 - std::string str((std::istreambuf_iterator<char>(t)),  
120 - std::istreambuf_iterator<char>());  
121 -  
122 - //process the header information  
123 - process_header(str, wp, mp);  
124 -  
125 - //convert units  
126 - if(wp == inverse_cm)  
127 - from_inverse_cm();  
128 - //set the bounding values  
129 - bounding[0] = Y[0];  
130 - bounding[1] = Y.back();  
131 - }  
132 - std::string str(){  
133 - std::stringstream ss;  
134 - ss<<name<<std::endl;  
135 - ss<<function< T, complex<T> >::str();  
136 - return ss.str();  
137 - }  
138 - std::string get_name(){  
139 - return name;  
140 - }  
141 -  
142 - void set_name(std::string str){  
143 - name = str;  
144 - }  
145 -  
146 -};  
147 -  
148 -}  
149 -  
150 -  
151 -  
152 -  
153 -#endif 1 +#ifndef RTS_MATERIAL_H
  2 +#define RTS_MATERIAL_H
  3 +
  4 +#include <vector>
  5 +#include <ostream>
  6 +#include <iostream>
  7 +#include <fstream>
  8 +#include <complex>
  9 +#include <algorithm>
  10 +#include <sstream>
  11 +#include "../math/complex.h"
  12 +#include "../math/constants.h"
  13 +#include "../math/function.h"
  14 +
  15 +namespace stim{
  16 +
  17 +//Material class - default representation for the material property is the refractive index (RI)
  18 +template<typename T>
  19 +class material : public function< T, complex<T> >{
  20 +
  21 +public:
  22 + enum wave_property{microns, inverse_cm};
  23 + enum material_property{ri, absorbance};
  24 +
  25 +private:
  26 +
  27 + using function< T, complex<T> >::X;
  28 + using function< T, complex<T> >::Y;
  29 + using function< T, complex<T> >::insert;
  30 + using function< T, complex<T> >::bounding;
  31 +
  32 + std::string name; //name for the material (defaults to file name)
  33 +
  34 + void process_header(std::string str, wave_property& wp, material_property& mp){
  35 +
  36 + std::stringstream ss(str); //create a stream from the data string
  37 + std::string line;
  38 + std::getline(ss, line); //get the first line as a string
  39 + while(line[0] == '#'){ //continue looping while the line is a comment
  40 +
  41 + std::stringstream lstream(line); //create a stream from the line
  42 + lstream.ignore(); //ignore the first character ('#')
  43 +
  44 + std::string prop; //get the property name
  45 + lstream>>prop;
  46 +
  47 + if(prop == "X"){
  48 + std::string wp_name;
  49 + lstream>>wp_name;
  50 + if(wp_name == "microns") wp = microns;
  51 + else if(wp_name == "inverse_cm") wp = inverse_cm;
  52 + }
  53 + else if(prop == "Y"){
  54 + std::string mp_name;
  55 + lstream>>mp_name;
  56 + if(mp_name == "ri") mp = ri;
  57 + else if(mp_name == "absorbance") mp = absorbance;
  58 + }
  59 +
  60 + std::getline(ss, line); //get the next line
  61 + }
  62 +
  63 + function< T, stim::complex<T> >::process_string(str);
  64 + }
  65 +
  66 + void from_inverse_cm(){
  67 + //convert inverse centimeters to wavelength (in microns)
  68 + for(unsigned int i=0; i<X.size(); i++)
  69 + X[i] = 10000 / X[i];
  70 +
  71 + //reverse the function array
  72 + std::reverse(X.begin(), X.end());
  73 + std::reverse(Y.begin(), Y.end());
  74 +
  75 + }
  76 +
  77 + void init(){
  78 + bounding[0] = bounding[1] = stim::complex<T>(1, 0);
  79 + }
  80 +
  81 +
  82 +public:
  83 +
  84 + material(std::string filename, wave_property wp, material_property mp){
  85 + name = filename;
  86 + load(filename, wp, mp);
  87 + }
  88 +
  89 + material(std::string filename){
  90 + name = filename;
  91 + load(filename);
  92 + }
  93 +
  94 + material(){
  95 + init();
  96 + }
  97 +
  98 + complex<T> getN(T lambda){
  99 + return function< T, complex<T> >::linear(lambda);
  100 + }
  101 +
  102 + void load(std::string filename, wave_property wp, material_property mp){
  103 +
  104 + //load the file as a function
  105 + function< T, complex<T> >::load(filename);
  106 + }
  107 +
  108 + void load(std::string filename){
  109 +
  110 + wave_property wp = inverse_cm;
  111 + material_property mp = ri;
  112 + //turn the file into a string
  113 + std::ifstream t(filename.c_str()); //open the file as a stream
  114 +
  115 + if(!t){
  116 + std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;
  117 + exit(1);
  118 + }
  119 + std::string str((std::istreambuf_iterator<char>(t)),
  120 + std::istreambuf_iterator<char>());
  121 +
  122 + //process the header information
  123 + process_header(str, wp, mp);
  124 +
  125 + //convert units
  126 + if(wp == inverse_cm)
  127 + from_inverse_cm();
  128 + //set the bounding values
  129 + bounding[0] = Y[0];
  130 + bounding[1] = Y.back();
  131 + }
  132 + std::string str(){
  133 + std::stringstream ss;
  134 + ss<<name<<std::endl;
  135 + ss<<function< T, complex<T> >::str();
  136 + return ss.str();
  137 + }
  138 + std::string get_name(){
  139 + return name;
  140 + }
  141 +
  142 + void set_name(std::string str){
  143 + name = str;
  144 + }
  145 +
  146 +};
  147 +
  148 +}
  149 +
  150 +
  151 +
  152 +
  153 +#endif
stim/optics/mirst-1d.cuh renamed to stim/optics_old/mirst-1d.cuh
1 -#include "../optics/material.h"  
2 -#include "../math/complexfield.cuh"  
3 -#include "../math/constants.h"  
4 -//#include "../envi/bil.h"  
5 -  
6 -#include "cufft.h"  
7 -  
8 -#include <vector>  
9 -#include <sstream>  
10 -  
11 -namespace stim{  
12 -  
13 -//this function writes a sinc function to "dest" such that an iFFT produces a slab  
14 -template<typename T>  
15 -__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,  
16 - T* src, T* zf,  
17 - T w, unsigned int zR, unsigned int nuR){  
18 - //dest = complex field representing the sample  
19 - //ri = refractive indices for each wavelength  
20 - //src = intensity of the light source for each wavelength  
21 - //zf = z position of the slab interface for each wavelength (accounting for optical path length)  
22 - //w = width of the slab (in pixels)  
23 - //zR = number of z-axis samples  
24 - //nuR = number of wavelengths  
25 -  
26 - //get the current coordinate in the plane slice  
27 - int ifz = blockIdx.x * blockDim.x + threadIdx.x;  
28 - int inu = blockIdx.y * blockDim.y + threadIdx.y;  
29 -  
30 - //make sure that the thread indices are in-bounds  
31 - if(inu >= nuR || ifz >= zR) return;  
32 -  
33 - int i = inu * zR + ifz;  
34 -  
35 - T fz;  
36 - if(ifz < zR/2)  
37 - fz = ifz / (T)zR;  
38 - else  
39 - fz = -(zR - ifz) / (T)zR;  
40 -  
41 - //if the slab starts outside of the simulation domain, just return  
42 - if(zf[inu] >= zR) return;  
43 -  
44 - //fill the array along z with a sinc function representing the Fourier transform of the layer  
45 -  
46 - T opl = w * ri[inu].real(); //optical path length  
47 -  
48 - //handle the case where the slab goes outside the simulation domain  
49 - if(zf[inu] + opl >= zR)  
50 - opl = zR - zf[inu];  
51 -  
52 - if(opl == 0) return;  
53 -  
54 - //T l = w * ri[inu].real();  
55 - //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));  
56 - complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));  
57 -  
58 - complex<T> eta = ri[inu] * ri[inu] - 1;  
59 -  
60 - //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);  
61 - if(ifz == 0)  
62 - dest[i] += opl * exp(e) * eta * src[inu];  
63 - else  
64 - dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);  
65 -}  
66 -  
67 -template<typename T>  
68 -__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){  
69 - //zf = current z depth (optical path length) in pixels  
70 - //ri = refractive index of the material  
71 - //w = actual width of the layer (in pixels)  
72 -  
73 -  
74 - //compute the index for this thread  
75 - int i = blockIdx.x * blockDim.x + threadIdx.x;  
76 - if(i >= S) return;  
77 -  
78 - if(ri == NULL)  
79 - zf[i] += w;  
80 - else  
81 - zf[i] += ri[i].real() * w;  
82 -}  
83 -  
84 -//apply the 1D MIRST filter to an existing sample (overwriting the sample)  
85 -template<typename T>  
86 -__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,  
87 - T dFz,  
88 - T inNA, T outNA,  
89 - unsigned int lambdaR, unsigned int zR,  
90 - T sigma = 0){  
91 - //sampleFFT = the sample in the Fourier domain (will be overwritten)  
92 - //lambda = list of wavelengths  
93 - //dFz = delta along the Fz axis in the frequency domain  
94 - //inNA = NA of the internal obscuration  
95 - //outNA = NA of the objective  
96 - //zR = number of pixels along the Fz axis (same as the z-axis)  
97 - //lambdaR = number of wavelengths  
98 - //sigma = width of the Gaussian source  
99 - int ifz = blockIdx.x * blockDim.x + threadIdx.x;  
100 - int inu = blockIdx.y * blockDim.y + threadIdx.y;  
101 -  
102 - if(inu >= lambdaR || ifz >= zR) return;  
103 -  
104 - //calculate the index into the sample FT  
105 - int i = inu * zR + ifz;  
106 -  
107 - //compute the frequency (and set all negative spatial frequencies to zero)  
108 - T fz;  
109 - if(ifz < zR / 2)  
110 - fz = ifz * dFz;  
111 - //if the spatial frequency is negative, set it to zero and exit  
112 - else{  
113 - sampleFFT[i] = 0;  
114 - return;  
115 - }  
116 -  
117 - //compute the frequency in inverse microns  
118 - T nu = 1/lambda[inu];  
119 -  
120 - //determine the radius of the integration circle  
121 - T nu_sq = nu * nu;  
122 - T fz_sq = (fz * fz) / 4;  
123 -  
124 - //cut off frequencies above the diffraction limit  
125 - T r;  
126 - if(fz_sq < nu_sq)  
127 - r = sqrt(nu_sq - fz_sq);  
128 - else  
129 - r = 0;  
130 -  
131 - //account for the optics  
132 - T Q = 0;  
133 - if(r > nu * inNA && r < nu * outNA)  
134 - Q = 1;  
135 -  
136 - //account for the source  
137 - //T sigma = 30.0;  
138 - T s = exp( - (r*r * sigma*sigma) / 2 );  
139 - //T s=1;  
140 -  
141 - //compute the final filter  
142 - T mirst = 0;  
143 - if(fz != 0)  
144 - mirst = 2 * stimPI * r * s * Q * (1/fz);  
145 -  
146 - sampleFFT[i] *= mirst;  
147 -  
148 -}  
149 -  
150 -/*This object performs a 1-dimensional (layered) MIRST simulation  
151 -*/  
152 -template<typename T>  
153 -class mirst1d{  
154 -  
155 -private:  
156 - unsigned int Z; //z-axis resolution  
157 - unsigned int pad; //pixel padding on either side of the sample  
158 -  
159 - std::vector< material<T> > matlist; //list of materials  
160 - std::vector< T > layers; //list of layer thicknesses  
161 -  
162 - std::vector< T > lambdas; //list of wavelengths that are being simulated  
163 - unsigned int S; //number of wavelengths (size of "lambdas")  
164 -  
165 - T NA[2]; //numerical aperature (central obscuration and outer diameter)  
166 -  
167 - function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)  
168 -  
169 - complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.  
170 -  
171 - void fft(int direction = CUFFT_FORWARD){  
172 -  
173 - unsigned padZ = Z + pad;  
174 -  
175 - //create cuFFT handles  
176 - cufftHandle plan;  
177 - cufftResult result;  
178 -  
179 - if(sizeof(T) == 4)  
180 - result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision  
181 - else  
182 - result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision  
183 -  
184 - //check for Plan 1D errors  
185 - if(result != CUFFT_SUCCESS){  
186 - std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;  
187 - CufftError(result);  
188 - exit(1);  
189 - }  
190 -  
191 - if(sizeof(T) == 4)  
192 - result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);  
193 - else  
194 - result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);  
195 -  
196 - //check for FFT errors  
197 - if(result != CUFFT_SUCCESS){  
198 - std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;  
199 - CufftError(result);  
200 - exit(1);  
201 - }  
202 -  
203 - cufftDestroy(plan);  
204 - }  
205 -  
206 -  
207 - //initialize the scratch memory  
208 - void init_scratch(){  
209 - scratch = complexfield<T, 1>(Z + pad , lambdas.size());  
210 - scratch = 0;  
211 - }  
212 -  
213 - //get the list of scattering efficiency (eta) values for a specified layer  
214 - std::vector< complex<T> > layer_etas(unsigned int l){  
215 -  
216 - std::vector< complex<T> > etas;  
217 -  
218 - //fill the list of etas  
219 - for(unsigned int i=0; i<lambdas.size(); i++)  
220 - etas.push_back( matlist[l].eta(lambdas[i]) );  
221 - return etas;  
222 - }  
223 -  
224 - //calculates the optimal block and grid sizes using information from the GPU  
225 - void cuda_params(dim3& grids, dim3& blocks){  
226 - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size  
227 - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);  
228 -  
229 - //create one thread for each detector pixel  
230 - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);  
231 - grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);  
232 - }  
233 -  
234 - //add the fourier transform of layer n to the scratch space  
235 - void build_layer_fft(unsigned int n, T* zf){  
236 - unsigned int paddedZ = Z + pad;  
237 -  
238 - T wpx = layers[n] / dz(); //calculate the width of the layer in pixels  
239 -  
240 - //allocate memory for the refractive index  
241 - complex<T>* gpuRi;  
242 - HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));  
243 -  
244 - //allocate memory for the source profile  
245 - T* gpuSrc;  
246 - HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));  
247 -  
248 - complex<T> ri;  
249 - T source;  
250 - //store the refractive index and source profile in a CPU array  
251 - for(int inu=0; inu<S; inu++){  
252 - //save the refractive index to the GPU  
253 - ri = matlist[n].getN(lambdas[inu]);  
254 - HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));  
255 -  
256 - //save the source profile to the GPU  
257 - source = source_profile(10000 / lambdas[inu]);  
258 - HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));  
259 -  
260 - }  
261 -  
262 - //create one thread for each pixel of the field slice  
263 - dim3 gridDim, blockDim;  
264 - cuda_params(gridDim, blockDim);  
265 - stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);  
266 -  
267 - int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size  
268 - int linGrid = S / linBlock + 1;  
269 - stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);  
270 -  
271 - //free memory  
272 - HANDLE_ERROR(cudaFree(gpuRi));  
273 - HANDLE_ERROR(cudaFree(gpuSrc));  
274 - }  
275 -  
276 - void build_sample(){  
277 - init_scratch(); //initialize the GPU scratch space  
278 - //build_layer(1);  
279 -  
280 - T* zf;  
281 - HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));  
282 - HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));  
283 -  
284 - //render each layer of the sample  
285 - for(unsigned int l=0; l<layers.size(); l++){  
286 - build_layer_fft(l, zf);  
287 - }  
288 -  
289 - HANDLE_ERROR(cudaFree(zf));  
290 - }  
291 -  
292 - void apply_filter(){  
293 - dim3 gridDim, blockDim;  
294 - cuda_params(gridDim, blockDim);  
295 -  
296 - unsigned int Zpad = Z + pad;  
297 -  
298 - T sim_range = dz() * Zpad;  
299 - T dFz = 1 / sim_range;  
300 -  
301 - //copy the array of wavelengths to the GPU  
302 - T* gpuLambdas;  
303 - HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));  
304 - HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));  
305 - stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,  
306 - dFz,  
307 - NA[0], NA[1],  
308 - S, Zpad);  
309 - }  
310 -  
311 - //crop the image to the sample thickness - keep in mind that sample thickness != optical path length  
312 - void crop(){  
313 -  
314 - scratch = scratch.crop(Z, S);  
315 - }  
316 -  
317 - //save the scratch field as a binary file  
318 - void to_binary(std::string filename){  
319 -  
320 - }  
321 -  
322 -  
323 -public:  
324 -  
325 - //constructor  
326 - mirst1d(unsigned int rZ = 100,  
327 - unsigned int padding = 0){  
328 - Z = rZ;  
329 - pad = padding;  
330 - NA[0] = 0;  
331 - NA[1] = 0.8;  
332 - S = 0;  
333 - source_profile = 1;  
334 - }  
335 -  
336 - //add a layer, thickness = microns  
337 - void add_layer(material<T> mat, T thickness){  
338 - matlist.push_back(mat);  
339 - layers.push_back(thickness);  
340 - }  
341 -  
342 - void add_layer(std::string filename, T thickness){  
343 - add_layer(material<T>(filename), thickness);  
344 - }  
345 -  
346 - //adds a profile spectrum for the light source  
347 - void set_source(std::string filename){  
348 - source_profile.load(filename);  
349 - }  
350 -  
351 - //adds a block of wavenumbers (cm^-1) to the simulation parameters  
352 - void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){  
353 - unsigned int nu = start;  
354 - while(nu <= stop){  
355 - lambdas.push_back((T)10000 / nu);  
356 - nu += step;  
357 - }  
358 - S = lambdas.size(); //increment the number of wavelengths (shorthand for later)  
359 - }  
360 -  
361 - T thickness(){  
362 - T t = 0;  
363 - for(unsigned int l=0; l<layers.size(); l++)  
364 - t += layers[l];  
365 - return t;  
366 - }  
367 -  
368 - void padding(unsigned int padding = 0){  
369 - pad = padding;  
370 - }  
371 -  
372 - T dz(){  
373 - return thickness() / Z; //calculate the z-axis step size  
374 - }  
375 -  
376 - void na(T in, T out){  
377 - NA[0] = in;  
378 - NA[1] = out;  
379 - }  
380 -  
381 - void na(T out){  
382 - na(0, out);  
383 - }  
384 -  
385 - stim::function<T, T> get_source(){  
386 - return source_profile;  
387 - }  
388 -  
389 - void save_sample(std::string filename){  
390 - //create a sample and save the magnitude as an image  
391 - build_sample();  
392 - fft(CUFFT_INVERSE);  
393 - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);  
394 - }  
395 -  
396 - void save_mirst(std::string filename, bool binary = true){  
397 - //apply the MIRST filter to a sample and save the image  
398 -  
399 - //build the sample in the Fourier domain  
400 - build_sample();  
401 -  
402 - //apply the MIRST filter  
403 - apply_filter();  
404 -  
405 - //apply an inverse FFT to bring the results back into the spatial domain  
406 - fft(CUFFT_INVERSE);  
407 -  
408 - crop();  
409 -  
410 - //save the image  
411 - if(binary)  
412 - to_binary(filename);  
413 - else  
414 - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);  
415 - }  
416 -  
417 -  
418 -  
419 -  
420 - std::string str(){  
421 -  
422 - stringstream ss;  
423 - ss<<"1D MIRST Simulation========================="<<std::endl;  
424 - ss<<"z-axis resolution: "<<Z<<std::endl;  
425 - ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;  
426 - ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;  
427 - ss<<"padding: "<<pad<<std::endl;  
428 - ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;  
429 - ss<<"dz: "<<dz()<<" um"<<std::endl;  
430 - ss<<std::endl;  
431 - ss<<layers.size()<<" layers-------------"<<std::endl;  
432 - for(unsigned int l=0; l<layers.size(); l++)  
433 - ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;  
434 -  
435 - ss<<"source profile-----------"<<std::endl;  
436 - ss<<get_source().str()<<std::endl;  
437 -  
438 - return ss.str();  
439 -  
440 -  
441 - }  
442 -  
443 -  
444 -  
445 -};  
446 -  
447 -} 1 +#include "../optics/material.h"
  2 +#include "../math/complexfield.cuh"
  3 +#include "../math/constants.h"
  4 +//#include "../envi/bil.h"
  5 +
  6 +#include "cufft.h"
  7 +
  8 +#include <vector>
  9 +#include <sstream>
  10 +
  11 +namespace stim{
  12 +
  13 +//this function writes a sinc function to "dest" such that an iFFT produces a slab
  14 +template<typename T>
  15 +__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,
  16 + T* src, T* zf,
  17 + T w, unsigned int zR, unsigned int nuR){
  18 + //dest = complex field representing the sample
  19 + //ri = refractive indices for each wavelength
  20 + //src = intensity of the light source for each wavelength
  21 + //zf = z position of the slab interface for each wavelength (accounting for optical path length)
  22 + //w = width of the slab (in pixels)
  23 + //zR = number of z-axis samples
  24 + //nuR = number of wavelengths
  25 +
  26 + //get the current coordinate in the plane slice
  27 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  28 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  29 +
  30 + //make sure that the thread indices are in-bounds
  31 + if(inu >= nuR || ifz >= zR) return;
  32 +
  33 + int i = inu * zR + ifz;
  34 +
  35 + T fz;
  36 + if(ifz < zR/2)
  37 + fz = ifz / (T)zR;
  38 + else
  39 + fz = -(zR - ifz) / (T)zR;
  40 +
  41 + //if the slab starts outside of the simulation domain, just return
  42 + if(zf[inu] >= zR) return;
  43 +
  44 + //fill the array along z with a sinc function representing the Fourier transform of the layer
  45 +
  46 + T opl = w * ri[inu].real(); //optical path length
  47 +
  48 + //handle the case where the slab goes outside the simulation domain
  49 + if(zf[inu] + opl >= zR)
  50 + opl = zR - zf[inu];
  51 +
  52 + if(opl == 0) return;
  53 +
  54 + //T l = w * ri[inu].real();
  55 + //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
  56 + complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
  57 +
  58 + complex<T> eta = ri[inu] * ri[inu] - 1;
  59 +
  60 + //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
  61 + if(ifz == 0)
  62 + dest[i] += opl * exp(e) * eta * src[inu];
  63 + else
  64 + dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
  65 +}
  66 +
  67 +template<typename T>
  68 +__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
  69 + //zf = current z depth (optical path length) in pixels
  70 + //ri = refractive index of the material
  71 + //w = actual width of the layer (in pixels)
  72 +
  73 +
  74 + //compute the index for this thread
  75 + int i = blockIdx.x * blockDim.x + threadIdx.x;
  76 + if(i >= S) return;
  77 +
  78 + if(ri == NULL)
  79 + zf[i] += w;
  80 + else
  81 + zf[i] += ri[i].real() * w;
  82 +}
  83 +
  84 +//apply the 1D MIRST filter to an existing sample (overwriting the sample)
  85 +template<typename T>
  86 +__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,
  87 + T dFz,
  88 + T inNA, T outNA,
  89 + unsigned int lambdaR, unsigned int zR,
  90 + T sigma = 0){
  91 + //sampleFFT = the sample in the Fourier domain (will be overwritten)
  92 + //lambda = list of wavelengths
  93 + //dFz = delta along the Fz axis in the frequency domain
  94 + //inNA = NA of the internal obscuration
  95 + //outNA = NA of the objective
  96 + //zR = number of pixels along the Fz axis (same as the z-axis)
  97 + //lambdaR = number of wavelengths
  98 + //sigma = width of the Gaussian source
  99 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  100 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  101 +
  102 + if(inu >= lambdaR || ifz >= zR) return;
  103 +
  104 + //calculate the index into the sample FT
  105 + int i = inu * zR + ifz;
  106 +
  107 + //compute the frequency (and set all negative spatial frequencies to zero)
  108 + T fz;
  109 + if(ifz < zR / 2)
  110 + fz = ifz * dFz;
  111 + //if the spatial frequency is negative, set it to zero and exit
  112 + else{
  113 + sampleFFT[i] = 0;
  114 + return;
  115 + }
  116 +
  117 + //compute the frequency in inverse microns
  118 + T nu = 1/lambda[inu];
  119 +
  120 + //determine the radius of the integration circle
  121 + T nu_sq = nu * nu;
  122 + T fz_sq = (fz * fz) / 4;
  123 +
  124 + //cut off frequencies above the diffraction limit
  125 + T r;
  126 + if(fz_sq < nu_sq)
  127 + r = sqrt(nu_sq - fz_sq);
  128 + else
  129 + r = 0;
  130 +
  131 + //account for the optics
  132 + T Q = 0;
  133 + if(r > nu * inNA && r < nu * outNA)
  134 + Q = 1;
  135 +
  136 + //account for the source
  137 + //T sigma = 30.0;
  138 + T s = exp( - (r*r * sigma*sigma) / 2 );
  139 + //T s=1;
  140 +
  141 + //compute the final filter
  142 + T mirst = 0;
  143 + if(fz != 0)
  144 + mirst = 2 * stimPI * r * s * Q * (1/fz);
  145 +
  146 + sampleFFT[i] *= mirst;
  147 +
  148 +}
  149 +
  150 +/*This object performs a 1-dimensional (layered) MIRST simulation
  151 +*/
  152 +template<typename T>
  153 +class mirst1d{
  154 +
  155 +private:
  156 + unsigned int Z; //z-axis resolution
  157 + unsigned int pad; //pixel padding on either side of the sample
  158 +
  159 + std::vector< material<T> > matlist; //list of materials
  160 + std::vector< T > layers; //list of layer thicknesses
  161 +
  162 + std::vector< T > lambdas; //list of wavelengths that are being simulated
  163 + unsigned int S; //number of wavelengths (size of "lambdas")
  164 +
  165 + T NA[2]; //numerical aperature (central obscuration and outer diameter)
  166 +
  167 + function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)
  168 +
  169 + complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.
  170 +
  171 + void fft(int direction = CUFFT_FORWARD){
  172 +
  173 + unsigned padZ = Z + pad;
  174 +
  175 + //create cuFFT handles
  176 + cufftHandle plan;
  177 + cufftResult result;
  178 +
  179 + if(sizeof(T) == 4)
  180 + result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision
  181 + else
  182 + result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision
  183 +
  184 + //check for Plan 1D errors
  185 + if(result != CUFFT_SUCCESS){
  186 + std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
  187 + CufftError(result);
  188 + exit(1);
  189 + }
  190 +
  191 + if(sizeof(T) == 4)
  192 + result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
  193 + else
  194 + result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
  195 +
  196 + //check for FFT errors
  197 + if(result != CUFFT_SUCCESS){
  198 + std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
  199 + CufftError(result);
  200 + exit(1);
  201 + }
  202 +
  203 + cufftDestroy(plan);
  204 + }
  205 +
  206 +
  207 + //initialize the scratch memory
  208 + void init_scratch(){
  209 + scratch = complexfield<T, 1>(Z + pad , lambdas.size());
  210 + scratch = 0;
  211 + }
  212 +
  213 + //get the list of scattering efficiency (eta) values for a specified layer
  214 + std::vector< complex<T> > layer_etas(unsigned int l){
  215 +
  216 + std::vector< complex<T> > etas;
  217 +
  218 + //fill the list of etas
  219 + for(unsigned int i=0; i<lambdas.size(); i++)
  220 + etas.push_back( matlist[l].eta(lambdas[i]) );
  221 + return etas;
  222 + }
  223 +
  224 + //calculates the optimal block and grid sizes using information from the GPU
  225 + void cuda_params(dim3& grids, dim3& blocks){
  226 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  227 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  228 +
  229 + //create one thread for each detector pixel
  230 + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
  231 + grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
  232 + }
  233 +
  234 + //add the fourier transform of layer n to the scratch space
  235 + void build_layer_fft(unsigned int n, T* zf){
  236 + unsigned int paddedZ = Z + pad;
  237 +
  238 + T wpx = layers[n] / dz(); //calculate the width of the layer in pixels
  239 +
  240 + //allocate memory for the refractive index
  241 + complex<T>* gpuRi;
  242 + HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
  243 +
  244 + //allocate memory for the source profile
  245 + T* gpuSrc;
  246 + HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
  247 +
  248 + complex<T> ri;
  249 + T source;
  250 + //store the refractive index and source profile in a CPU array
  251 + for(int inu=0; inu<S; inu++){
  252 + //save the refractive index to the GPU
  253 + ri = matlist[n].getN(lambdas[inu]);
  254 + HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
  255 +
  256 + //save the source profile to the GPU
  257 + source = source_profile(10000 / lambdas[inu]);
  258 + HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
  259 +
  260 + }
  261 +
  262 + //create one thread for each pixel of the field slice
  263 + dim3 gridDim, blockDim;
  264 + cuda_params(gridDim, blockDim);
  265 + stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
  266 +
  267 + int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
  268 + int linGrid = S / linBlock + 1;
  269 + stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
  270 +
  271 + //free memory
  272 + HANDLE_ERROR(cudaFree(gpuRi));
  273 + HANDLE_ERROR(cudaFree(gpuSrc));
  274 + }
  275 +
  276 + void build_sample(){
  277 + init_scratch(); //initialize the GPU scratch space
  278 + //build_layer(1);
  279 +
  280 + T* zf;
  281 + HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
  282 + HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
  283 +
  284 + //render each layer of the sample
  285 + for(unsigned int l=0; l<layers.size(); l++){
  286 + build_layer_fft(l, zf);
  287 + }
  288 +
  289 + HANDLE_ERROR(cudaFree(zf));
  290 + }
  291 +
  292 + void apply_filter(){
  293 + dim3 gridDim, blockDim;
  294 + cuda_params(gridDim, blockDim);
  295 +
  296 + unsigned int Zpad = Z + pad;
  297 +
  298 + T sim_range = dz() * Zpad;
  299 + T dFz = 1 / sim_range;
  300 +
  301 + //copy the array of wavelengths to the GPU
  302 + T* gpuLambdas;
  303 + HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
  304 + HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
  305 + stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,
  306 + dFz,
  307 + NA[0], NA[1],
  308 + S, Zpad);
  309 + }
  310 +
  311 + //crop the image to the sample thickness - keep in mind that sample thickness != optical path length
  312 + void crop(){
  313 +
  314 + scratch = scratch.crop(Z, S);
  315 + }
  316 +
  317 + //save the scratch field as a binary file
  318 + void to_binary(std::string filename){
  319 +
  320 + }
  321 +
  322 +
  323 +public:
  324 +
  325 + //constructor
  326 + mirst1d(unsigned int rZ = 100,
  327 + unsigned int padding = 0){
  328 + Z = rZ;
  329 + pad = padding;
  330 + NA[0] = 0;
  331 + NA[1] = 0.8;
  332 + S = 0;
  333 + source_profile = 1;
  334 + }
  335 +
  336 + //add a layer, thickness = microns
  337 + void add_layer(material<T> mat, T thickness){
  338 + matlist.push_back(mat);
  339 + layers.push_back(thickness);
  340 + }
  341 +
  342 + void add_layer(std::string filename, T thickness){
  343 + add_layer(material<T>(filename), thickness);
  344 + }
  345 +
  346 + //adds a profile spectrum for the light source
  347 + void set_source(std::string filename){
  348 + source_profile.load(filename);
  349 + }
  350 +
  351 + //adds a block of wavenumbers (cm^-1) to the simulation parameters
  352 + void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
  353 + unsigned int nu = start;
  354 + while(nu <= stop){
  355 + lambdas.push_back((T)10000 / nu);
  356 + nu += step;
  357 + }
  358 + S = lambdas.size(); //increment the number of wavelengths (shorthand for later)
  359 + }
  360 +
  361 + T thickness(){
  362 + T t = 0;
  363 + for(unsigned int l=0; l<layers.size(); l++)
  364 + t += layers[l];
  365 + return t;
  366 + }
  367 +
  368 + void padding(unsigned int padding = 0){
  369 + pad = padding;
  370 + }
  371 +
  372 + T dz(){
  373 + return thickness() / Z; //calculate the z-axis step size
  374 + }
  375 +
  376 + void na(T in, T out){
  377 + NA[0] = in;
  378 + NA[1] = out;
  379 + }
  380 +
  381 + void na(T out){
  382 + na(0, out);
  383 + }
  384 +
  385 + stim::function<T, T> get_source(){
  386 + return source_profile;
  387 + }
  388 +
  389 + void save_sample(std::string filename){
  390 + //create a sample and save the magnitude as an image
  391 + build_sample();
  392 + fft(CUFFT_INVERSE);
  393 + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  394 + }
  395 +
  396 + void save_mirst(std::string filename, bool binary = true){
  397 + //apply the MIRST filter to a sample and save the image
  398 +
  399 + //build the sample in the Fourier domain
  400 + build_sample();
  401 +
  402 + //apply the MIRST filter
  403 + apply_filter();
  404 +
  405 + //apply an inverse FFT to bring the results back into the spatial domain
  406 + fft(CUFFT_INVERSE);
  407 +
  408 + crop();
  409 +
  410 + //save the image
  411 + if(binary)
  412 + to_binary(filename);
  413 + else
  414 + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  415 + }
  416 +
  417 +
  418 +
  419 +
  420 + std::string str(){
  421 +
  422 + stringstream ss;
  423 + ss<<"1D MIRST Simulation========================="<<std::endl;
  424 + ss<<"z-axis resolution: "<<Z<<std::endl;
  425 + ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
  426 + ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
  427 + ss<<"padding: "<<pad<<std::endl;
  428 + ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
  429 + ss<<"dz: "<<dz()<<" um"<<std::endl;
  430 + ss<<std::endl;
  431 + ss<<layers.size()<<" layers-------------"<<std::endl;
  432 + for(unsigned int l=0; l<layers.size(); l++)
  433 + ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
  434 +
  435 + ss<<"source profile-----------"<<std::endl;
  436 + ss<<get_source().str()<<std::endl;
  437 +
  438 + return ss.str();
  439 +
  440 +
  441 + }
  442 +
  443 +
  444 +
  445 +};
  446 +
  447 +}
stim/optics_old/planewave.h 0 โ†’ 100644
  1 +#ifndef RTS_PLANEWAVE
  2 +#define RTS_PLANEWAVE
  3 +
  4 +#include <string>
  5 +#include <sstream>
  6 +
  7 +#include "../math/vector.h"
  8 +#include "../math/quaternion.h"
  9 +#include "../math/constants.h"
  10 +#include "../math/plane.h"
  11 +#include "../cuda/callable.h"
  12 +
  13 +/*Basic conversions used here (assuming a vacuum)
  14 + lambda =
  15 +*/
  16 +
  17 +namespace stim{
  18 + namespace optics{
  19 +
  20 +template<typename T>
  21 +class planewave{
  22 +
  23 +protected:
  24 +
  25 + vec<T> k; //k = tau / lambda
  26 + vec< complex<T> > E0; //amplitude
  27 + //T phi;
  28 +
  29 + CUDA_CALLABLE planewave<T> bend(rts::vec<T> kn) const{
  30 +
  31 + vec<T> kn_hat = kn.norm(); //normalize the new k
  32 + vec<T> k_hat = k.norm(); //normalize the current k
  33 +
  34 + //std::cout<<"PLANE WAVE BENDING------------------"<<std::endl;
  35 + //std::cout<<"kn_hat: "<<kn_hat<<" k_hat: "<<k_hat<<std::endl;
  36 +
  37 + planewave<T> new_p; //create a new plane wave
  38 +
  39 + //if kn is equal to k or -k, handle the degenerate case
  40 + T k_dot_kn = k_hat.dot(kn_hat);
  41 +
  42 + //if k . n < 0, then the bend is a reflection
  43 + //flip k_hat
  44 + if(k_dot_kn < 0) k_hat = -k_hat;
  45 +
  46 + //std::cout<<"k dot kn: "<<k_dot_kn<<std::endl;
  47 +
  48 + //std::cout<<"k_dot_kn: "<<k_dot_kn<<std::endl;
  49 + if(k_dot_kn == -1){
  50 + new_p.k = -k;
  51 + new_p.E0 = E0;
  52 + return new_p;
  53 + }
  54 + else if(k_dot_kn == 1){
  55 + new_p.k = k;
  56 + new_p.E0 = E0;
  57 + return new_p;
  58 + }
  59 +
  60 + vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector
  61 +
  62 + //std::cout<<"r: "<<r<<std::endl;
  63 +
  64 + T theta = asin(r.len()); //compute the angle of the rotation about r
  65 +
  66 +
  67 +
  68 + //deal with a zero vector (both k and kn point in the same direction)
  69 + //if(theta == (T)0)
  70 + //{
  71 + // new_p = *this;
  72 + // return new_p;
  73 + //}
  74 +
  75 + //create a quaternion to capture the rotation
  76 + quaternion<T> q;
  77 + q.CreateRotation(theta, r.norm());
  78 +
  79 + //apply the rotation to E0
  80 + vec< complex<T> > E0n = q.toMatrix3() * E0;
  81 +
  82 + new_p.k = kn_hat * kmag();
  83 + new_p.E0 = E0n;
  84 +
  85 + return new_p;
  86 + }
  87 +
  88 +public:
  89 +
  90 +
  91 + ///constructor: create a plane wave propagating along z, polarized along x
  92 + /*planewave(T lambda = (T)1)
  93 + {
  94 + k = rts::vec<T>(0, 0, 1) * (TAU/lambda);
  95 + E0 = rts::vec<T>(1, 0, 0);
  96 + }*/
  97 + ///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega
  98 + CUDA_CALLABLE planewave(vec<T> kvec = rts::vec<T>(0, 0, rtsTAU),
  99 + vec< complex<T> > E = rts::vec<T>(1, 0, 0), T phase = 0)
  100 + {
  101 + //phi = phase;
  102 +
  103 + k = kvec;
  104 + vec< complex<T> > k_hat = k.norm();
  105 +
  106 + if(E.len() == 0) //if the plane wave has an amplitude of 0
  107 + E0 = vec<T>(0); //just return it
  108 + else{
  109 + vec< complex<T> > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector
  110 + vec< complex<T> > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector
  111 + E0 = E_hat * E_hat.dot(E); //compute the projection of _E0 onto E0_hat
  112 + }
  113 +
  114 + E0 = E0 * exp( complex<T>(0, phase) );
  115 + }
  116 +
  117 + ///multiplication operator: scale E0
  118 + CUDA_CALLABLE planewave<T> & operator* (const T & rhs)
  119 + {
  120 +
  121 + E0 = E0 * rhs;
  122 + return *this;
  123 + }
  124 +
  125 + CUDA_CALLABLE T lambda() const
  126 + {
  127 + return rtsTAU / k.len();
  128 + }
  129 +
  130 + CUDA_CALLABLE T kmag() const
  131 + {
  132 + return k.len();
  133 + }
  134 +
  135 + CUDA_CALLABLE vec< complex<T> > E(){
  136 + return E0;
  137 + }
  138 +
  139 + CUDA_CALLABLE vec<T> kvec(){
  140 + return k;
  141 + }
  142 +
  143 + /*CUDA_CALLABLE T phase(){
  144 + return phi;
  145 + }
  146 +
  147 + CUDA_CALLABLE void phase(T p){
  148 + phi = p;
  149 + }*/
  150 +
  151 + CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){
  152 + vec< complex<T> > result;
  153 +
  154 + T kdp = k.dot(p);
  155 + complex<T> x = complex<T>(0, kdp);
  156 + complex<T> expx = exp(x);
  157 +
  158 + result[0] = E0[0] * expx;
  159 + result[1] = E0[1] * expx;
  160 + result[2] = E0[2] * expx;
  161 +
  162 + return result;
  163 + }
  164 +
  165 + //scales k based on a transition from material ni to material nt
  166 + CUDA_CALLABLE planewave<T> n(T ni, T nt){
  167 + return planewave<T>(k * (nt / ni), E0);
  168 + }
  169 +
  170 + CUDA_CALLABLE planewave<T> refract(rts::vec<T> kn) const
  171 + {
  172 + return bend(kn);
  173 + }
  174 +
  175 + void scatter(rts::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){
  176 +
  177 + int facing = P.face(k); //determine which direction the plane wave is coming in
  178 +
  179 + //if(facing == 0) //if the wave is tangent to the plane, return an identical wave
  180 + // return *this;
  181 + //else
  182 + if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
  183 + P = P.flip(); //flip the plane
  184 + nr = 1/nr; //invert the refractive index (now nr = n0/n1)
  185 + }
  186 +
  187 + //use Snell's Law to calculate the transmitted angle
  188 + T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i
  189 + T theta_i = acos(cos_theta_i); //compute theta_i
  190 + T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law
  191 + T theta_t = asin(sin_theta_t); //compute the cosine of theta_t
  192 +
  193 + bool tir = false; //flag for total internal reflection
  194 + if(theta_t != theta_t){
  195 + tir = true;
  196 + theta_t = rtsPI / (T)2;
  197 + }
  198 +
  199 + //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
  200 + if(theta_i == 0){
  201 + T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients
  202 + T tp = 2 / (1 + nr);
  203 + vec<T> kr = -k;
  204 + vec<T> kt = k * nr; //set the k vectors for theta_i = 0
  205 + vec< complex<T> > Er = E0 * rp; //compute the E vectors
  206 + vec< complex<T> > Et = E0 * tp;
  207 + T phase_t = P.p().dot(k - kt); //compute the phase offset
  208 + T phase_r = P.p().dot(k - kr);
  209 + //std::cout<<"Degeneracy: Head-On"<<std::endl;
  210 + //std::cout<<"rs: "<<rp<<" rp: "<<rp<<" ts: "<<tp<<" tp: "<<tp<<std::endl;
  211 + //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
  212 +
  213 + //create the plane waves
  214 + r = planewave<T>(kr, Er, phase_r);
  215 + t = planewave<T>(kt, Et, phase_t);
  216 +
  217 + //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
  218 + //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
  219 + //std::cout<<"--------------------------------"<<std::endl;
  220 + return;
  221 + }
  222 +
  223 +
  224 + //compute the Fresnel coefficients
  225 + T rp, rs, tp, ts;
  226 + rp = tan(theta_t - theta_i) / tan(theta_t + theta_i);
  227 + rs = sin(theta_t - theta_i) / sin(theta_t + theta_i);
  228 +
  229 + if(tir){
  230 + tp = ts = 0;
  231 + }
  232 + else{
  233 + tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) );
  234 + ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i);
  235 + }
  236 +
  237 + //compute the coordinate space for the plane of incidence
  238 + vec<T> z_hat = -P.norm();
  239 + vec<T> y_hat = P.parallel(k).norm();
  240 + vec<T> x_hat = y_hat.cross(z_hat).norm();
  241 +
  242 + //compute the k vectors for r and t
  243 + vec<T> kr, kt;
  244 + kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag();
  245 + kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;
  246 +
  247 + //compute the magnitude of the p- and s-polarized components of the incident E vector
  248 + complex<T> Ei_s = E0.dot(x_hat);
  249 + //int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0);
  250 + int sgn = E0.dot(y_hat).sgn();
  251 + vec< complex<T> > cx_hat = x_hat;
  252 + complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
  253 + //T Ei_p = ( E0 - x_hat * Ei_s ).len();
  254 + //compute the magnitude of the p- and s-polarized components of the reflected E vector
  255 + complex<T> Er_s = Ei_s * rs;
  256 + complex<T> Er_p = Ei_p * rp;
  257 + //compute the magnitude of the p- and s-polarized components of the transmitted E vector
  258 + complex<T> Et_s = Ei_s * ts;
  259 + complex<T> Et_p = Ei_p * tp;
  260 +
  261 + //std::cout<<"E0: "<<E0<<std::endl;
  262 + //std::cout<<"E0 dot y_hat: "<<E0.dot(y_hat)<<std::endl;
  263 + //std::cout<<"theta i: "<<theta_i<<" theta t: "<<theta_t<<std::endl;
  264 + //std::cout<<"x_hat: "<<x_hat<<" y_hat: "<<y_hat<<" z_hat: "<<z_hat<<std::endl;
  265 + //std::cout<<"Ei_s: "<<Ei_s<<" Ei_p: "<<Ei_p<<" Er_s: "<<Er_s<<" Er_p: "<<Er_p<<" Et_s: "<<Et_s<<" Et_p: "<<Et_p<<std::endl;
  266 + //std::cout<<"rs: "<<rs<<" rp: "<<rp<<" ts: "<<ts<<" tp: "<<tp<<std::endl;
  267 +
  268 +
  269 + //compute the reflected E vector
  270 + vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
  271 + //compute the transmitted E vector
  272 + vec< complex<T> > Et = vec< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;
  273 +
  274 + T phase_t = P.p().dot(k - kt);
  275 + T phase_r = P.p().dot(k - kr);
  276 +
  277 + //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
  278 +
  279 + //std::cout<<"phase: "<<phase<<std::endl;
  280 +
  281 + //create the plane waves
  282 + r.k = kr;
  283 + r.E0 = Er * exp( complex<T>(0, phase_r) );
  284 + //r.phi = phase_r;
  285 +
  286 + //t = bend(kt);
  287 + //t.k = t.k * nr;
  288 +
  289 + t.k = kt;
  290 + t.E0 = Et * exp( complex<T>(0, phase_t) );
  291 + //t.phi = phase_t;
  292 + //std::cout<<"i: "<<str()<<std::endl;
  293 + //std::cout<<"r: "<<r.str()<<std::endl;
  294 + //std::cout<<"t: "<<t.str()<<std::endl;
  295 +
  296 + //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
  297 + //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
  298 + //std::cout<<"--------------------------------"<<std::endl;
  299 +
  300 + }
  301 +
  302 + std::string str()
  303 + {
  304 + std::stringstream ss;
  305 + ss<<"Plane Wave:"<<std::endl;
  306 + ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
  307 + return ss.str();
  308 + }
  309 +}; //end planewave class
  310 +} //end namespace optics
  311 +} //end namespace stim
  312 +
  313 +template <typename T>
  314 +std::ostream& operator<<(std::ostream& os, rts::planewave<T> p)
  315 +{
  316 + os<<p.str();
  317 + return os;
  318 +}
  319 +
  320 +#endif