Commit 8e4f836425e5bee321a75e5ee33dee9867df0a85

Authored by David Mayerich
1 parent 3ff136b3

started a new optical framework focused on scalar simulation

stim/cuda/cudatools/devices.h
... ... @@ -15,7 +15,7 @@ int maxThreadsPerBlock()
15 15 }
16 16  
17 17 extern "C"
18   -int sharedMemPerBlock()
  18 +size_t sharedMemPerBlock()
19 19 {
20 20 int device;
21 21 cudaGetDevice(&device); //get the id of the current device
... ... @@ -23,6 +23,16 @@ int sharedMemPerBlock()
23 23 cudaGetDeviceProperties(&props, device);
24 24 return props.sharedMemPerBlock;
25 25 }
  26 +
  27 +extern "C"
  28 +size_t constMem()
  29 +{
  30 + int device;
  31 + cudaGetDevice(&device); //get the id of the current device
  32 + cudaDeviceProp props; //device property structure
  33 + cudaGetDeviceProperties(&props, device);
  34 + return props.totalConstMem;
  35 +}
26 36 } //end namespace rts
27 37  
28 38 #endif
... ...
stim/cuda/sharedmem.cuh
... ... @@ -5,7 +5,7 @@
5 5 namespace stim{
6 6 namespace cuda{
7 7  
8   - // Copies values from global memory to shared memory, optimizing threads
  8 + // Copies values from texture memory to shared memory, optimizing threads
9 9 template<typename T>
10 10 __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
11 11 unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
... ... @@ -35,6 +35,19 @@ namespace stim{
35 35 }
36 36 }
37 37  
  38 + // Copies values from global memory to shared memory, optimizing threads
  39 + template<typename T>
  40 + __device__ void sharedMemcpy(T* dest, T* src, size_t N, size_t tid, size_t nt){
  41 +
  42 + size_t I = N / nt + 1; //calculate the number of iterations required to make the copy
  43 + size_t xi = tid; //initialize the source and destination index to the thread ID
  44 + for(size_t i = 0; i < I; i++){ //for each iteration
  45 + if(xi < N) //if the index is within the copy region
  46 + dest[xi] = src[xi]; //perform the copy
  47 + xi += nt;
  48 + }
  49 + }
  50 +
38 51  
39 52 }
40 53 }
... ...
stim/math/bessel.h
... ... @@ -17,6 +17,11 @@ static complex&lt;double&gt; czero(0.0,0.0);
17 17 template< typename P >
18 18 P gamma(P x)
19 19 {
  20 + const P EPS = numeric_limits<P>::epsilon();
  21 + const P FPMIN_MAG = numeric_limits<P>::min();
  22 + const P FPMIN = numeric_limits<P>::lowest();
  23 + const P FPMAX = numeric_limits<P>::max();
  24 +
20 25 int i,k,m;
21 26 P ga,gr,r,z;
22 27  
... ... @@ -47,7 +52,7 @@ P gamma(P x)
47 52 -0.54e-14,
48 53 0.14e-14};
49 54  
50   - if (x > 171.0) return 1e308; // This value is an overflow flag.
  55 + if (x > 171.0) return FPMAX; // This value is an overflow flag.
51 56 if (x == (int)x) {
52 57 if (x > 0.0) {
53 58 ga = 1.0; // use factorial
... ... @@ -56,7 +61,7 @@ P gamma(P x)
56 61 }
57 62 }
58 63 else
59   - ga = 1e308;
  64 + ga = FPMAX;
60 65 }
61 66 else {
62 67 if (fabs(x) > 1.0) {
... ... @@ -89,6 +94,11 @@ template&lt;typename P&gt;
89 94 int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1,
90 95 P &j0p,P &j1p,P &y0p,P &y1p)
91 96 {
  97 + const P EPS = numeric_limits<P>::epsilon();
  98 + const P FPMIN_MAG = numeric_limits<P>::min();
  99 + const P FPMIN = numeric_limits<P>::lowest();
  100 + const P FPMAX = numeric_limits<P>::max();
  101 +
92 102 P x2,r,ec,w0,w1,r0,r1,cs0,cs1;
93 103 P cu,p0,q0,p1,q1,t1,t2;
94 104 int k,kz;
... ... @@ -157,12 +167,12 @@ int bessjy01a(P x,P &amp;j0,P &amp;j1,P &amp;y0,P &amp;y1,
157 167 if (x == 0.0) {
158 168 j0 = 1.0;
159 169 j1 = 0.0;
160   - y0 = -1e308;
161   - y1 = -1e308;
  170 + y0 = -FPMIN;
  171 + y1 = -FPMIN;
162 172 j0p = 0.0;
163 173 j1p = 0.5;
164   - y0p = 1e308;
165   - y1p = 1e308;
  174 + y0p = FPMAX;
  175 + y1p = FPMAX;
166 176 return 0;
167 177 }
168 178 x2 = x*x;
... ... @@ -329,7 +339,7 @@ int msta1(P x,int mp)
329 339 for (i=0;i<20;i++) {
330 340 nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
331 341 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp;
332   - if (abs(nn-n1) < 1) break;
  342 + if (std::abs(nn-n1) < 1) break;
333 343 n0 = n1;
334 344 f0 = f1;
335 345 n1 = nn;
... ... @@ -361,7 +371,7 @@ int msta2(P x,int n,int mp)
361 371 for (i=0;i<20;i++) {
362 372 nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
363 373 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj;
364   - if (abs(nn-n1) < 1) break;
  374 + if (std::abs(nn-n1) < 1) break;
365 375 n0 = n1;
366 376 f0 = f1;
367 377 n1 = nn;
... ... @@ -596,21 +606,26 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
596 606 P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk;
597 607 int j,k,l,m,n,kz;
598 608  
  609 + const P EPS = numeric_limits<P>::epsilon();
  610 + const P FPMIN_MAG = numeric_limits<P>::min();
  611 + const P FPMIN = numeric_limits<P>::lowest();
  612 + const P FPMAX = numeric_limits<P>::max();
  613 +
599 614 x2 = x*x;
600 615 n = (int)v;
601 616 v0 = v-n;
602 617 if ((x < 0.0) || (v < 0.0)) return 1;
603   - if (x < 1e-15) {
  618 + if (x < EPS) {
604 619 for (k=0;k<=n;k++) {
605 620 jv[k] = 0.0;
606   - yv[k] = -1e308;
  621 + yv[k] = FPMIN;
607 622 djv[k] = 0.0;
608   - dyv[k] = 1e308;
  623 + dyv[k] = FPMAX;
609 624 if (v0 == 0.0) {
610 625 jv[0] = 1.0;
611 626 djv[1] = 0.5;
612 627 }
613   - else djv[0] = 1e308;
  628 + else djv[0] = FPMAX;
614 629 }
615 630 vm = v;
616 631 return 0;
... ... @@ -623,7 +638,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
623 638 for (k=1;k<=40;k++) {
624 639 r *= -0.25*x2/(k*(k+vl));
625 640 bjvl += r;
626   - if (fabs(r) < fabs(bjvl)*1e-15) break;
  641 + if (fabs(r) < fabs(bjvl)*EPS) break;
627 642 }
628 643 vg = 1.0 + vl;
629 644 a = pow(0.5*x,vl)/gamma(vg);
... ... @@ -686,7 +701,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
686 701 if (m < n) n = m;
687 702 else m = msta2(x,n,15);
688 703 f2 = 0.0;
689   - f1 = 1.0e-100;
  704 + f1 = FPMIN_MAG;
690 705 for (k=m;k>=0;k--) {
691 706 f = 2.0*(v0+k+1.0)*f1/x-f2;
692 707 if (k <= n) jv[k] = f;
... ... @@ -766,17 +781,17 @@ int bessjyv_sph(int v, P z, P &amp;vm, P* cjv,
766 781 P* cyv, P* cjvp, P* cyvp)
767 782 {
768 783 //first, compute the bessel functions of fractional order
769   - bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  784 + bessjyv<P>(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp);
770 785  
771 786 //iterate through each and scale
772 787 for(int n = 0; n<=v; n++)
773 788 {
774 789  
775   - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0));
776   - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0));
  790 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  791 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
777 792  
778   - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0));
779   - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0));
  793 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  794 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
780 795 }
781 796  
782 797 return 0;
... ... @@ -1498,11 +1513,11 @@ int cbessjyva_sph(int v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1498 1513 for(int n = 0; n<=v; n++)
1499 1514 {
1500 1515  
1501   - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0));
1502   - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0));
  1516 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  1517 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
1503 1518  
1504   - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0));
1505   - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0));
  1519 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  1520 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
1506 1521 }
1507 1522  
1508 1523 return 0;
... ...
stim/math/complex.h
1   -/*RTS Complex number class. This class is CUDA compatible,
2   -and can therefore be used in CUDA code and on CUDA devices.
3   -*/
  1 +/// CUDA compatible complex number class
4 2  
5   -#ifndef RTS_COMPLEX
6   -#define RTS_COMPLEX
  3 +#ifndef STIM_COMPLEX
  4 +#define STIM_COMPLEX
7 5  
8   -#include "../cuda/callable.h"
  6 +#include "../cuda/cudatools/callable.h"
9 7 #include <cmath>
10 8 #include <string>
11 9 #include <sstream>
... ... @@ -230,12 +228,6 @@ struct complex
230 228 return result;
231 229 }
232 230  
233   - /*CUDA_CALLABLE complex<T> pow(int y)
234   - {
235   -
236   - return pow((double)y);
237   - }*/
238   -
239 231 CUDA_CALLABLE complex<T> pow(T y)
240 232 {
241 233 complex<T> result;
... ... @@ -328,8 +320,31 @@ struct complex
328 320 return *this;
329 321 }
330 322  
  323 +
  324 +
331 325 };
332 326  
  327 +/// Cast an array of complex values to an array of real values
  328 +template<typename T>
  329 +static void real(T* r, complex<T>* c, size_t n){
  330 + for(size_t i = 0; i < n; i++)
  331 + r[i] = c[i].real();
  332 +}
  333 +
  334 +/// Cast an array of complex values to an array of real values
  335 +template<typename T>
  336 +static void imag(T* r, complex<T>* c, size_t n){
  337 + for(size_t i = 0; i < n; i++)
  338 + r[i] = c[i].imag();
  339 +}
  340 +
  341 +/// Calculate the magnitude of an array of complex values
  342 +template<typename T>
  343 +static void abs(T* m, complex<T>* c, size_t n){
  344 + for(size_t i = 0; i < n; i++)
  345 + m[i] = c[i].abs();
  346 +}
  347 +
333 348 } //end RTS namespace
334 349  
335 350 //addition
... ... @@ -432,17 +447,6 @@ CUDA_CALLABLE static T imag(stim::complex&lt;T&gt; a)
432 447 return a.i;
433 448 }
434 449  
435   -//trigonometric functions
436   -//template<class A>
437   -/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)
438   -{
439   - stim::complex<float> result;
440   - result.r = sinf(x.r) * coshf(x.i);
441   - result.i = cosf(x.r) * sinhf(x.i);
442   -
443   - return result;
444   -}*/
445   -
446 450 template<class A>
447 451 CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)
448 452 {
... ... @@ -453,17 +457,6 @@ CUDA_CALLABLE stim::complex&lt;A&gt; sin(const stim::complex&lt;A&gt; x)
453 457 return result;
454 458 }
455 459  
456   -//floating point template
457   -//template<class A>
458   -/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)
459   -{
460   - stim::complex<float> result;
461   - result.r = cosf(x.r) * coshf(x.i);
462   - result.i = -(sinf(x.r) * sinhf(x.i));
463   -
464   - return result;
465   -}*/
466   -
467 460 template<class A>
468 461 CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)
469 462 {
... ... @@ -496,10 +489,4 @@ std::istream&amp; operator&gt;&gt;(std::istream&amp; is, stim::complex&lt;A&gt;&amp; x)
496 489 return is; //return the stream
497 490 }
498 491  
499   -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
500   -//template<class T> using rtsComplex = stim::complex<T>;
501   -//#endif
502   -
503   -
504   -
505 492 #endif
... ...
stim/math/constants.h
1   -#ifndef RTS_CONSTANTS_H
2   -#define RTS_CONSTANTS_H
  1 +#ifndef STIM_CONSTANTS_H
  2 +#define STIM_CONSTANTS_H
3 3  
4   -#define stimPI 3.14159
5   -#define stimTAU 2 * rtsPI
  4 +namespace stim{
  5 + const double PI = 3.1415926535897932384626433832795028841971693993751058209749445923078164062862;
  6 + const double TAU = 2 * stim::PI;
  7 +}
6 8  
7 9 #endif
... ...
stim/math/matrix.h
... ... @@ -50,10 +50,8 @@ struct matrix
50 50 return *this;
51 51 }
52 52  
53   -
54 53 template<typename Y>
55   - CUDA_CALLABLE vec<Y> operator*(vec<Y> rhs)
56   - {
  54 + vec<Y> operator*(vec<Y> rhs){
57 55 unsigned int N = rhs.size();
58 56  
59 57 vec<Y> result;
... ... @@ -66,6 +64,16 @@ struct matrix
66 64 return result;
67 65 }
68 66  
  67 + template<typename Y>
  68 + CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){
  69 + vec3<Y> result = 0;
  70 + for(int r=0; r<3; r++)
  71 + for(int c=0; c<3; c++)
  72 + result[r] += (*this)(r, c) * rhs[c];
  73 +
  74 + return result;
  75 + }
  76 +
69 77 std::string toStr()
70 78 {
71 79 std::stringstream ss;
... ... @@ -82,10 +90,6 @@ struct matrix
82 90  
83 91 return ss.str();
84 92 }
85   -
86   -
87   -
88   -
89 93 };
90 94  
91 95 } //end namespace rts
... ...
stim/math/meshgrid.h 0 โ†’ 100644
  1 +#ifndef STIM_MESHGRID_H
  2 +#define STIM_MESHGRID_H
  3 +
  4 +namespace stim{
  5 +
  6 + /// Create a 2D grid based on a pair of vectors representing the grid spacing (see Matlab)
  7 + /// @param X is an [nx x ny] array that will store the X coordinates for each 2D point
  8 + /// @param Y is an [nx x ny] array that will store the Y coordinates for each 2D point
  9 + /// @param x is an [nx] array that provides the positions of grid points in the x direction
  10 + /// @param nx is the number of grid points in the x direction
  11 + /// @param y is an [ny] array that provides the positions of grid points in the y direction
  12 + /// @param ny is the number of grid points in the y direction
  13 + template<typename T>
  14 + void meshgrid(T* X, T* Y, T* x, size_t nx, T* y, size_t ny){
  15 + size_t xi, yi; //allocate index variables
  16 + for(yi = 0; yi < ny; yi++){ //iterate through each column
  17 + for(xi = 0; xi < nx; xi++){ //iterate through each row
  18 + X[yi * nx + xi] = x[xi];
  19 + Y[yi * nx + xi] = y[yi];
  20 + }
  21 + }
  22 + }
  23 +
  24 + /// Creates an array of n equally spaced values in the range [xmin xmax]
  25 + /// @param X is an array of length n that stores the values
  26 + /// @param xmin is the start point of the array
  27 + /// @param xmax is the end point of the array
  28 + /// @param n is the number of points in the array
  29 + template<typename T>
  30 + void linspace(T* X, T xmin, T xmax, size_t n){
  31 + T alpha;
  32 + for(size_t i = 0; i < n; i++){
  33 + alpha = (T)i / (T)n;
  34 + X[i] = (1 - alpha) * xmin + alpha * xmax;
  35 + }
  36 + }
  37 +
  38 +
  39 +}
  40 +
  41 +
  42 +#endif
0 43 \ No newline at end of file
... ...
stim/math/quaternion.h
... ... @@ -26,13 +26,13 @@ public:
26 26  
27 27 CUDA_CALLABLE void CreateRotation(T theta, T ux, T uy, T uz){
28 28  
29   - vec<T> u(ux, uy, uz);
  29 + vec3<T> u(ux, uy, uz);
30 30 CreateRotation(theta, u);
31 31 }
32 32  
33   - CUDA_CALLABLE void CreateRotation(T theta, vec<T> u){
  33 + CUDA_CALLABLE void CreateRotation(T theta, vec3<T> u){
34 34  
35   - vec<T> u_hat = u.norm();
  35 + vec3<T> u_hat = u.norm();
36 36  
37 37 //assign the given Euler rotation to this quaternion
38 38 w = (T)cos(theta/2);
... ... @@ -41,9 +41,9 @@ public:
41 41 z = u_hat[2]*(T)sin(theta/2);
42 42 }
43 43  
44   - void CreateRotation(vec<T> from, vec<T> to){
  44 + CUDA_CALLABLE void CreateRotation(vec3<T> from, vec3<T> to){
45 45  
46   - vec<T> r = from.cross(to); //compute the rotation vector
  46 + vec3<T> r = from.cross(to); //compute the rotation vector
47 47 T theta = asin(r.len()); //compute the angle of the rotation about r
48 48 //deal with a zero vector (both k and kn point in the same direction)
49 49 if(theta == (T)0){
... ...
stim/math/vec3.h 0 โ†’ 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +
  7 +
  8 +namespace stim{
  9 +
  10 +
  11 +/// A class designed to act as a 3D vector with CUDA compatibility
  12 +template<typename T>
  13 +class vec3{
  14 +
  15 +protected:
  16 + T ptr[3];
  17 +
  18 +public:
  19 +
  20 + CUDA_CALLABLE vec3(){}
  21 +
  22 + CUDA_CALLABLE vec3(T v){
  23 + ptr[0] = ptr[1] = ptr[2] = v;
  24 + }
  25 +
  26 + CUDA_CALLABLE vec3(T x, T y, T z){
  27 + ptr[0] = x;
  28 + ptr[1] = y;
  29 + ptr[2] = z;
  30 + }
  31 +
  32 + //copy constructor
  33 + CUDA_CALLABLE vec3( const vec3<T>& other){
  34 + ptr[0] = other.ptr[0];
  35 + ptr[1] = other.ptr[1];
  36 + ptr[2] = other.ptr[2];
  37 + }
  38 +
  39 + //access an element using an index
  40 + CUDA_CALLABLE T& operator[](int idx){
  41 + return ptr[idx];
  42 + }
  43 +
  44 +/// Casting operator. Creates a new vector with a new type U.
  45 + template< typename U >
  46 + CUDA_CALLABLE operator vec3<U>(){
  47 + vec3<U> result;
  48 + result.ptr[0] = (U)ptr[0];
  49 + result.ptr[1] = (U)ptr[1];
  50 + result.ptr[2] = (U)ptr[2];
  51 +
  52 + return result;
  53 + }
  54 +
  55 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  56 + CUDA_CALLABLE T len_sq() const{
  57 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  58 + }
  59 +
  60 + /// computes the Euclidean length of the vector
  61 + CUDA_CALLABLE T len() const{
  62 + return sqrt(len_sq());
  63 + }
  64 +
  65 +
  66 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  67 + CUDA_CALLABLE vec3<T> cart2sph() const{
  68 + vec3<T> sph;
  69 + sph.ptr[0] = len();
  70 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  71 + if(sph.ptr[0] == 0)
  72 + sph.ptr[2] = 0;
  73 + else
  74 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  75 + return sph;
  76 + }
  77 +
  78 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  79 + CUDA_CALLABLE vec3<T> sph2cart() const{
  80 + vec3<T> cart;
  81 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  82 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  83 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  84 +
  85 + return cart;
  86 + }
  87 +
  88 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  89 + CUDA_CALLABLE vec3<T> norm() const{
  90 + vec3<T> result;
  91 + T l = len(); //compute the vector length
  92 + return (*this) / l;
  93 + }
  94 +
  95 + /// Computes the cross product of a 3-dimensional vector
  96 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  97 +
  98 + vec3<T> result;
  99 +
  100 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  101 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  102 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  103 +
  104 + return result;
  105 + }
  106 +
  107 + /// Compute the Euclidean inner (dot) product
  108 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  109 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  110 + }
  111 +
  112 + /// Arithmetic addition operator
  113 +
  114 + /// @param rhs is the right-hand-side operator for the addition
  115 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  116 + vec3<T> result;
  117 + result.ptr[0] = ptr[0] + rhs[0];
  118 + result.ptr[1] = ptr[1] + rhs[1];
  119 + result.ptr[2] = ptr[2] + rhs[2];
  120 + return result;
  121 + }
  122 +
  123 + /// Arithmetic addition to a scalar
  124 +
  125 + /// @param rhs is the right-hand-side operator for the addition
  126 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  127 + vec3<T> result;
  128 + result.ptr[0] = ptr[0] + rhs;
  129 + result.ptr[1] = ptr[1] + rhs;
  130 + result.ptr[2] = ptr[2] + rhs;
  131 + return result;
  132 + }
  133 +
  134 + /// Arithmetic subtraction operator
  135 +
  136 + /// @param rhs is the right-hand-side operator for the subtraction
  137 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  138 + vec3<T> result;
  139 + result.ptr[0] = ptr[0] - rhs[0];
  140 + result.ptr[1] = ptr[1] - rhs[1];
  141 + result.ptr[2] = ptr[2] - rhs[2];
  142 + return result;
  143 + }
  144 + /// Arithmetic subtraction to a scalar
  145 +
  146 + /// @param rhs is the right-hand-side operator for the addition
  147 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  148 + vec3<T> result;
  149 + result.ptr[0] = ptr[0] - rhs;
  150 + result.ptr[1] = ptr[1] - rhs;
  151 + result.ptr[2] = ptr[2] - rhs;
  152 + return result;
  153 + }
  154 +
  155 + /// Arithmetic scalar multiplication operator
  156 +
  157 + /// @param rhs is the right-hand-side operator for the subtraction
  158 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  159 + vec3<T> result;
  160 + result.ptr[0] = ptr[0] * rhs;
  161 + result.ptr[1] = ptr[1] * rhs;
  162 + result.ptr[2] = ptr[2] * rhs;
  163 + return result;
  164 + }
  165 +
  166 + /// Arithmetic scalar division operator
  167 +
  168 + /// @param rhs is the right-hand-side operator for the subtraction
  169 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  170 + return (*this) * ((T)1.0/rhs);
  171 + }
  172 +
  173 + /// Multiplication by a scalar, followed by assignment
  174 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  175 + ptr[0] = ptr[0] * rhs;
  176 + ptr[1] = ptr[1] * rhs;
  177 + ptr[2] = ptr[2] * rhs;
  178 + return *this;
  179 + }
  180 +
  181 + /// Addition and assignment
  182 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  183 + ptr[0] = ptr[0] + rhs;
  184 + ptr[1] = ptr[1] + rhs;
  185 + ptr[2] = ptr[2] + rhs;
  186 + return *this;
  187 + }
  188 +
  189 + /// Assign a scalar to all values
  190 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  191 + ptr[0] = ptr[0] = rhs;
  192 + ptr[1] = ptr[1] = rhs;
  193 + ptr[2] = ptr[2] = rhs;
  194 + return *this;
  195 + }
  196 +
  197 + /// Casting and assignment
  198 + template<typename Y>
  199 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  200 + ptr[0] = (T)rhs.ptr[0];
  201 + ptr[1] = (T)rhs.ptr[1];
  202 + ptr[2] = (T)rhs.ptr[2];
  203 + return *this;
  204 + }
  205 +
  206 + /// Unary minus (returns the negative of the vector)
  207 + CUDA_CALLABLE vec3<T> operator-() const{
  208 + vec3<T> result;
  209 + result.ptr[0] = -ptr[0];
  210 + result.ptr[1] = -ptr[1];
  211 + result.ptr[2] = -ptr[2];
  212 + return result;
  213 + }
  214 +
  215 +
  216 + /// Outputs the vector as a string
  217 + std::string str() const{
  218 + std::stringstream ss;
  219 +
  220 + size_t N = size();
  221 +
  222 + ss<<"[";
  223 + for(size_t i=0; i<N; i++)
  224 + {
  225 + ss<<at(i);
  226 + if(i != N-1)
  227 + ss<<", ";
  228 + }
  229 + ss<<"]";
  230 +
  231 + return ss.str();
  232 + }
  233 + }; //end class triple
  234 +} //end namespace stim
  235 +
  236 +/// Multiply a vector by a constant when the vector is on the right hand side
  237 +template <typename T>
  238 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  239 + return rhs * lhs;
  240 +}
  241 +
  242 +#endif
0 243 \ No newline at end of file
... ...
stim/math/vector.h
1   -#ifndef RTS_VECTOR_H
2   -#define RTS_VECTOR_H
  1 +#ifndef STIM_VECTOR_H
  2 +#define STIM_VECTOR_H
3 3  
4 4 #include <iostream>
5 5 #include <cmath>
... ... @@ -11,8 +11,6 @@
11 11 namespace stim
12 12 {
13 13  
14   -
15   -
16 14 template <class T>
17 15 struct vec : public std::vector<T>
18 16 {
... ...
stim/optics/planewave.h
1   -#ifndef RTS_PLANEWAVE
2   -#define RTS_PLANEWAVE
  1 +#ifndef STIM_PLANEWAVE_H
  2 +#define STIM_PLANEWAVE_H
3 3  
4 4 #include <string>
5 5 #include <sstream>
  6 +#include <cmath>
6 7  
7 8 #include "../math/vector.h"
8 9 #include "../math/quaternion.h"
9 10 #include "../math/constants.h"
10 11 #include "../math/plane.h"
11   -#include "../cuda/callable.h"
12   -
13   -/*Basic conversions used here (assuming a vacuum)
14   - lambda =
15   -*/
  12 +#include "../math/complex.h"
16 13  
17 14 namespace stim{
  15 + namespace optics{
  16 +
  17 + /// evaluate the scalar field produced by a plane wave at a point (x, y, z)
  18 +
  19 + /// @param x is the x-coordinate of the point
  20 + /// @param y is the y-coordinate of the point
  21 + /// @param z is the z-coordinate of the point
  22 + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0)
  23 + /// @param kx is the k-vector component in the x direction
  24 + /// @param ky is the k-vector component in the y direction
  25 + /// @param kz is the k-vector component in the z direction
  26 + template<typename T>
  27 + stim::complex<T> planewave_scalar(T x, T y, T z, stim::complex<T> A, T kx, T ky, T kz){
  28 + T d = x * kx + y * ky + z * kz; //calculate the dot product between k and p = (x, y, z) to find the distance p is along the propagation direction
  29 + stim::complex<T> di = stim::complex<T>(0, d); //calculate the phase shift that will have to be applied to propagate the wave distance d
  30 + return A * exp(di); //multiply the phase term by the amplitude at (0, 0, 0) to propagate the wave to p
  31 + }
  32 +
  33 + /// evaluate the scalar field produced by a plane wave at several positions
  34 +
  35 + /// @param field is a pre-allocated block of memory that will store the complex field at all points
  36 + /// @param N is the number of field values to be evaluated
  37 + /// @param x is a set of x coordinates defining positions within the field (NULL implies that all values are zero)
  38 + /// @param y is a set of y coordinates defining positions within the field (NULL implies that all values are zero)
  39 + /// @param z is a set of z coordinates defining positions within the field (NULL implies that all values are zero)
  40 + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0)
  41 + /// @param kx is the k-vector component in the x direction
  42 + /// @param ky is the k-vector component in the y direction
  43 + /// @param kz is the k-vector component in the z direction
  44 + template<typename T>
  45 + void cpu_planewave_scalar(stim::complex<T>* field, size_t N, T* x, T* y = NULL, T* z = NULL, stim::complex<T> A = 1.0, T kx = 0.0, T ky = 0.0, T kz = 0.0){
  46 + T px, py, pz;
  47 + for(size_t i = 0; i < N; i++){ // for each element in the array
  48 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values
  49 + (y == NULL) ? py = 0 : py = y[i];
  50 + (z == NULL) ? pz = 0 : pz = z[i];
  51 +
  52 + field[i] = planewave_scalar(px, py, pz, A, kx, ky, kz); // call the single-value plane wave function
  53 + }
  54 + }
18 55  
19 56 template<typename T>
20 57 class planewave{
21 58  
22 59 protected:
23 60  
24   - vec<T> k; //k = tau / lambda
25   - vec< complex<T> > E0; //amplitude
26   - //T phi;
27   -
28   - CUDA_CALLABLE planewave<T> bend(rts::vec<T> kn) const{
  61 + stim::vec<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
  62 + stim::vec< stim::complex<T> > E0; //amplitude (for a scalar plane wave, only E0[0] is used)
29 63  
30   - vec<T> kn_hat = kn.norm(); //normalize the new k
31   - vec<T> k_hat = k.norm(); //normalize the current k
  64 + /// Bend a plane wave via refraction, given that the new propagation direction is known
  65 + CUDA_CALLABLE planewave<T> bend(stim::vec<T> kn) const{
32 66  
33   - //std::cout<<"PLANE WAVE BENDING------------------"<<std::endl;
34   - //std::cout<<"kn_hat: "<<kn_hat<<" k_hat: "<<k_hat<<std::endl;
  67 + stim::vec<T> kn_hat = kn.norm(); //normalize the new k
  68 + stim::vec<T> k_hat = k.norm(); //normalize the current k
35 69  
36   - planewave<T> new_p; //create a new plane wave
  70 + planewave<T> new_p; //create a new plane wave
37 71  
38   - //if kn is equal to k or -k, handle the degenerate case
39   - T k_dot_kn = k_hat.dot(kn_hat);
  72 + T k_dot_kn = k_hat.dot(kn_hat); //if kn is equal to k or -k, handle the degenerate case
40 73  
41 74 //if k . n < 0, then the bend is a reflection
42   - //flip k_hat
43   - if(k_dot_kn < 0) k_hat = -k_hat;
  75 + if(k_dot_kn < 0) k_hat = -k_hat; //flip k_hat
44 76  
45   - //std::cout<<"k dot kn: "<<k_dot_kn<<std::endl;
46   -
47   - //std::cout<<"k_dot_kn: "<<k_dot_kn<<std::endl;
48 77 if(k_dot_kn == -1){
49 78 new_p.k = -k;
50 79 new_p.E0 = E0;
... ... @@ -56,28 +85,11 @@ protected:
56 85 return new_p;
57 86 }
58 87  
59   - vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector
60   -
61   - //std::cout<<"r: "<<r<<std::endl;
62   -
63   - T theta = asin(r.len()); //compute the angle of the rotation about r
64   -
65   -
66   -
67   - //deal with a zero vector (both k and kn point in the same direction)
68   - //if(theta == (T)0)
69   - //{
70   - // new_p = *this;
71   - // return new_p;
72   - //}
73   -
74   - //create a quaternion to capture the rotation
75   - quaternion<T> q;
76   - q.CreateRotation(theta, r.norm());
77   -
78   - //apply the rotation to E0
79   - vec< complex<T> > E0n = q.toMatrix3() * E0;
80   -
  88 + vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector
  89 + T theta = asin(r.len()); //compute the angle of the rotation about r
  90 + quaternion<T> q; //create a quaternion to capture the rotation
  91 + q.CreateRotation(theta, r.norm());
  92 + vec< complex<T> > E0n = q.toMatrix3() * E0; //apply the rotation to E0
81 93 new_p.k = kn_hat * kmag();
82 94 new_p.E0 = E0n;
83 95  
... ... @@ -86,16 +98,9 @@ protected:
86 98  
87 99 public:
88 100  
89   -
90   - ///constructor: create a plane wave propagating along z, polarized along x
91   - /*planewave(T lambda = (T)1)
92   - {
93   - k = rts::vec<T>(0, 0, 1) * (TAU/lambda);
94   - E0 = rts::vec<T>(1, 0, 0);
95   - }*/
96   - ///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega
97   - CUDA_CALLABLE planewave(vec<T> kvec = rts::vec<T>(0, 0, rtsTAU),
98   - vec< complex<T> > E = rts::vec<T>(1, 0, 0), T phase = 0)
  101 + ///constructor: create a plane wave propagating along k
  102 + CUDA_CALLABLE planewave(vec<T> kvec = stim::vec<T>(0, 0, stim::TAU),
  103 + vec< complex<T> > E = stim::vec<T>(1, 0, 0))
99 104 {
100 105 //phi = phase;
101 106  
... ... @@ -107,27 +112,23 @@ public:
107 112 else{
108 113 vec< complex<T> > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector
109 114 vec< complex<T> > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector
110   - E0 = E_hat * E_hat.dot(E); //compute the projection of _E0 onto E0_hat
  115 + E0 = E_hat;// * E_hat.dot(E); //compute the projection of _E0 onto E0_hat
111 116 }
112 117  
113 118 E0 = E0 * exp( complex<T>(0, phase) );
114 119 }
115 120  
116 121 ///multiplication operator: scale E0
117   - CUDA_CALLABLE planewave<T> & operator* (const T & rhs)
118   - {
119   -
  122 + CUDA_CALLABLE planewave<T> & operator* (const T & rhs){
120 123 E0 = E0 * rhs;
121 124 return *this;
122 125 }
123 126  
124   - CUDA_CALLABLE T lambda() const
125   - {
126   - return rtsTAU / k.len();
  127 + CUDA_CALLABLE T lambda() const{
  128 + return stim::TAU / k.len();
127 129 }
128 130  
129   - CUDA_CALLABLE T kmag() const
130   - {
  131 + CUDA_CALLABLE T kmag() const{
131 132 return k.len();
132 133 }
133 134  
... ... @@ -139,14 +140,11 @@ public:
139 140 return k;
140 141 }
141 142  
142   - /*CUDA_CALLABLE T phase(){
143   - return phi;
  143 + /// calculate the value of the field produced by the plane wave given a three-dimensional position
  144 + CUDA_CALLABLE vec< complex<T> > pos(T x, T y, T z){
  145 + return pos( stim::vec<T>(x, y, z) );
144 146 }
145 147  
146   - CUDA_CALLABLE void phase(T p){
147   - phi = p;
148   - }*/
149   -
150 148 CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){
151 149 vec< complex<T> > result;
152 150  
... ... @@ -166,18 +164,32 @@ public:
166 164 return planewave<T>(k * (nt / ni), E0);
167 165 }
168 166  
169   - CUDA_CALLABLE planewave<T> refract(rts::vec<T> kn) const
170   - {
  167 + CUDA_CALLABLE planewave<T> refract(stim::vec<T> kn) const{
171 168 return bend(kn);
172 169 }
173 170  
174   - void scatter(rts::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){
  171 + /// Calculate the result of a plane wave hitting an interface between two refractive indices
  172 +
  173 + /// @param P is a plane representing the position and orientation of the surface
  174 + /// @param n0 is the refractive index outside of the surface (in the direction of the normal)
  175 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  176 + /// @param r is the reflected component of the plane wave
  177 + /// @param t is the transmitted component of the plane wave
  178 + void scatter(stim::plane<T> P, T n0, T n1, planewave<T> &r, planewave<T> &t){
  179 + scatter(P, n1/n0, r, t);
  180 + }
  181 +
  182 + /// Calculate the scattering result when nr = n1/n0
  183 +
  184 + /// @param P is a plane representing the position and orientation of the surface
  185 + /// @param r is the ration n1/n0
  186 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  187 + /// @param r is the reflected component of the plane wave
  188 + /// @param t is the transmitted component of the plane wave
  189 + void scatter(stim::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){
175 190  
176 191 int facing = P.face(k); //determine which direction the plane wave is coming in
177 192  
178   - //if(facing == 0) //if the wave is tangent to the plane, return an identical wave
179   - // return *this;
180   - //else
181 193 if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
182 194 P = P.flip(); //flip the plane
183 195 nr = 1/nr; //invert the refractive index (now nr = n0/n1)
... ... @@ -192,7 +204,7 @@ public:
192 204 bool tir = false; //flag for total internal reflection
193 205 if(theta_t != theta_t){
194 206 tir = true;
195   - theta_t = rtsPI / (T)2;
  207 + theta_t = stim::PI / (T)2;
196 208 }
197 209  
198 210 //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
... ... @@ -205,17 +217,10 @@ public:
205 217 vec< complex<T> > Et = E0 * tp;
206 218 T phase_t = P.p().dot(k - kt); //compute the phase offset
207 219 T phase_r = P.p().dot(k - kr);
208   - //std::cout<<"Degeneracy: Head-On"<<std::endl;
209   - //std::cout<<"rs: "<<rp<<" rp: "<<rp<<" ts: "<<tp<<" tp: "<<tp<<std::endl;
210   - //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
211 220  
212 221 //create the plane waves
213 222 r = planewave<T>(kr, Er, phase_r);
214 223 t = planewave<T>(kt, Et, phase_t);
215   -
216   - //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
217   - //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
218   - //std::cout<<"--------------------------------"<<std::endl;
219 224 return;
220 225 }
221 226  
... ... @@ -245,11 +250,9 @@ public:
245 250  
246 251 //compute the magnitude of the p- and s-polarized components of the incident E vector
247 252 complex<T> Ei_s = E0.dot(x_hat);
248   - //int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0);
249 253 int sgn = E0.dot(y_hat).sgn();
250 254 vec< complex<T> > cx_hat = x_hat;
251 255 complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
252   - //T Ei_p = ( E0 - x_hat * Ei_s ).len();
253 256 //compute the magnitude of the p- and s-polarized components of the reflected E vector
254 257 complex<T> Er_s = Ei_s * rs;
255 258 complex<T> Er_p = Ei_p * rp;
... ... @@ -257,14 +260,6 @@ public:
257 260 complex<T> Et_s = Ei_s * ts;
258 261 complex<T> Et_p = Ei_p * tp;
259 262  
260   - //std::cout<<"E0: "<<E0<<std::endl;
261   - //std::cout<<"E0 dot y_hat: "<<E0.dot(y_hat)<<std::endl;
262   - //std::cout<<"theta i: "<<theta_i<<" theta t: "<<theta_t<<std::endl;
263   - //std::cout<<"x_hat: "<<x_hat<<" y_hat: "<<y_hat<<" z_hat: "<<z_hat<<std::endl;
264   - //std::cout<<"Ei_s: "<<Ei_s<<" Ei_p: "<<Ei_p<<" Er_s: "<<Er_s<<" Er_p: "<<Er_p<<" Et_s: "<<Et_s<<" Et_p: "<<Et_p<<std::endl;
265   - //std::cout<<"rs: "<<rs<<" rp: "<<rp<<" ts: "<<ts<<" tp: "<<tp<<std::endl;
266   -
267   -
268 263 //compute the reflected E vector
269 264 vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
270 265 //compute the transmitted E vector
... ... @@ -273,29 +268,12 @@ public:
273 268 T phase_t = P.p().dot(k - kt);
274 269 T phase_r = P.p().dot(k - kr);
275 270  
276   - //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
277   -
278   - //std::cout<<"phase: "<<phase<<std::endl;
279   -
280 271 //create the plane waves
281 272 r.k = kr;
282 273 r.E0 = Er * exp( complex<T>(0, phase_r) );
283   - //r.phi = phase_r;
284   -
285   - //t = bend(kt);
286   - //t.k = t.k * nr;
287 274  
288 275 t.k = kt;
289 276 t.E0 = Et * exp( complex<T>(0, phase_t) );
290   - //t.phi = phase_t;
291   - //std::cout<<"i: "<<str()<<std::endl;
292   - //std::cout<<"r: "<<r.str()<<std::endl;
293   - //std::cout<<"t: "<<t.str()<<std::endl;
294   -
295   - //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
296   - //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
297   - //std::cout<<"--------------------------------"<<std::endl;
298   -
299 277 }
300 278  
301 279 std::string str()
... ... @@ -305,14 +283,15 @@ public:
305 283 ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
306 284 return ss.str();
307 285 }
308   -};
309   -}
  286 +}; //end planewave class
  287 +} //end namespace optics
  288 +} //end namespace stim
310 289  
311 290 template <typename T>
312   -std::ostream& operator<<(std::ostream& os, rts::planewave<T> p)
  291 +std::ostream& operator<<(std::ostream& os, stim::optics::planewave<T> p)
313 292 {
314 293 os<<p.str();
315 294 return os;
316 295 }
317 296  
318 297 -#endif
  298 +#endif
319 299 \ No newline at end of file
... ...
stim/optics/scalarbeam.h 0 โ†’ 100644
  1 +#ifndef RTS_BEAM
  2 +#define RTS_BEAM
  3 +
  4 +#include "../math/vec3.h"
  5 +#include "../optics/scalarwave.h"
  6 +#include "../math/bessel.h"
  7 +#include <vector>
  8 +
  9 +//Boost
  10 +//#include <boost/math/special_functions/bessel.hpp>
  11 +//#include <boost/math/special_functions/legendre.hpp>
  12 +
  13 +namespace stim{
  14 +
  15 + /// Function returns the value of the scalar field produced by a beam with the specified parameters
  16 +
  17 + template<typename T>
  18 + std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d, T NA, T NA_in = 0){
  19 +
  20 + std::vector< stim::vec3<T> > dirs(N); //allocate an array to store the focusing vectors
  21 +
  22 + ///compute the rotation operator to transform (0, 0, 1) to k
  23 + T cos_angle = d.dot(vec3<T>(0, 0, 1));
  24 + stim::matrix<T, 3> rotation;
  25 +
  26 + //if the cosine of the angle is -1, the rotation is just a flip across the z axis
  27 + if(cos_angle == -1){
  28 + rotation(2, 2) = -1;
  29 + }
  30 + else if(cos_angle != 1.0)
  31 + {
  32 + vec3<T> r_axis = vec3<T>(0, 0, 1).cross(d).norm(); //compute the axis of rotation
  33 + T angle = acos(cos_angle); //compute the angle of rotation
  34 + quaternion<T> quat; //create a quaternion describing the rotation
  35 + quat.CreateRotation(angle, r_axis);
  36 + rotation = quat.toMatrix3(); //compute the rotation matrix
  37 + }
  38 +
  39 + //find the phi values associated with the cassegrain ring
  40 + T PHI[2];
  41 + PHI[0] = (T)asin(NA);
  42 + PHI[1] = (T)asin(NA_in);
  43 +
  44 + //calculate the z-axis cylinder coordinates associated with these angles
  45 + T Z[2];
  46 + Z[0] = cos(PHI[0]);
  47 + Z[1] = cos(PHI[1]);
  48 + T range = Z[0] - Z[1];
  49 +
  50 + //draw a distribution of random phi, z values
  51 + T z, phi, theta;
  52 + //T kmag = stim::TAU / lambda;
  53 + for(int i=0; i<N; i++){ //for each sample
  54 + z = (T)((double)rand() / (double)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
  55 + theta = (T)(((double)rand() / (double)RAND_MAX) * stim::TAU);
  56 + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
  57 +
  58 + //compute and store cartesian coordinates
  59 + vec3<T> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
  60 + vec3<T> cart = spherical.sph2cart();
  61 + dirs[i] = rotation * cart; //create a sample vector
  62 + }
  63 + return dirs;
  64 + }
  65 +
  66 +/// Class stim::beam represents a beam of light focused at a point and composed of several plane waves
  67 +template<typename T>
  68 +class scalarbeam
  69 +{
  70 +public:
  71 + //enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  72 +
  73 +private:
  74 +
  75 + T NA[2]; //numerical aperature of the focusing optics
  76 + vec3<T> f; //focal point
  77 + vec3<T> d; //propagation direction
  78 + stim::complex<T> A; //beam amplitude
  79 + T lambda; //beam wavelength
  80 +public:
  81 +
  82 + ///constructor: build a default beam (NA=1.0)
  83 + scalarbeam(T wavelength = 1, stim::complex<T> amplitude = 1, vec3<T> focal_point = vec3<T>(0, 0, 0), vec3<T> direction = vec3<T>(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){
  84 + lambda = wavelength;
  85 + A = amplitude;
  86 + f = focal_point;
  87 + d = direction.norm(); //make sure that the direction vector is normalized (makes calculations more efficient later on)
  88 + NA[0] = numerical_aperture;
  89 + NA[1] = center_obsc;
  90 + }
  91 +
  92 + ///Numerical Aperature functions
  93 + void setNA(T na)
  94 + {
  95 + NA[0] = (T)0;
  96 + NA[1] = na;
  97 + }
  98 + void setNA(T na0, T na1)
  99 + {
  100 + NA[0] = na0;
  101 + NA[1] = na1;
  102 + }
  103 +
  104 + //Monte-Carlo decomposition into plane waves
  105 + std::vector< scalarwave<T> > mc(size_t N = 100000) const{
  106 +
  107 + std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]); //generate a random set of N vectors forming a focus
  108 + std::vector< scalarwave<T> > samples(N); //create a vector of plane waves
  109 + T kmag = (T)stim::TAU / lambda; //calculate the wavenumber
  110 + stim::complex<T> apw; //allocate space for the amplitude at the focal point
  111 + stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction
  112 + for(size_t i=0; i<N; i++){ //for each sample
  113 + kpw = dirs[i] * kmag; //calculate the k-vector for the new plane wave
  114 + apw = exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave
  115 + samples[i] = scalarwave<T>(kpw, apw); //create a plane wave based on the direction
  116 + }
  117 +
  118 + return samples;
  119 + }
  120 +
  121 + /// Calculate the field at a given point
  122 + /// @param x is the x-coordinate of the field point
  123 + /// @O is the approximation accuracy
  124 + stim::complex<T> field(T x, T y, T z, size_t O){
  125 + std::vector< scalarwave<T> > W = mc(O);
  126 + T result = 0; //initialize the result to zero (0)
  127 + for(size_t i = 0; i < O; i++){ //for each plane wave
  128 + result += W[i].pos(x, y, z);
  129 + }
  130 + return result;
  131 + }
  132 +
  133 + /// Calculate the field at a set of positions
  134 + /*void field(stim::complex<T>* F, T* x, T* y, T* z, size_t N, size_t O){
  135 +
  136 + memset(F, 0, N * sizeof(stim::complex<T>));
  137 + std::vector< scalarwave<T> > W = mc(O); //get a random set of plane waves representing the beam
  138 + size_t o, n;
  139 + T px, py, pz;
  140 + for(n = 0; n < N; n++){ //for each point in the field
  141 + (x == NULL) ? px = 0 : px = x[n]; // test for NULL values
  142 + (y == NULL) ? py = 0 : py = y[n];
  143 + (z == NULL) ? pz = 0 : pz = z[n];
  144 + for(o = 0; o < O; o++){ //for each plane wave
  145 + F[n] += W[o].pos(px, py, pz);
  146 + }
  147 + }
  148 + }*/
  149 +
  150 + std::string str()
  151 + {
  152 + std::stringstream ss;
  153 + ss<<"Beam:"<<std::endl;
  154 + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  155 + ss<<" Beam Direction: "<<d<<std::endl;
  156 + if(NA[0] == 0)
  157 + ss<<" NA: "<<NA[1];
  158 + else
  159 + ss<<" NA: "<<NA[0]<<" -- "<<NA[1];
  160 +
  161 + return ss.str();
  162 + }
  163 +
  164 +
  165 +
  166 +}; //end beam
  167 +
  168 +template<typename T>
  169 +void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){
  170 +
  171 + memset(F, 0, N * sizeof(stim::complex<T>));
  172 + T k = stim::TAU / lambda;
  173 + T jl, Pl, C, kr, cos_phi;
  174 + T cos_alpha_1 = cos(asin(NA_in));
  175 + T cos_alpha_2 = cos(asin(NA));
  176 + stim::vec3<T> p, ps;
  177 +
  178 + /*double vm;
  179 + size_t table_bytes = (Nl + 1) * sizeof(double);
  180 + double* jv = (double*) malloc( table_bytes );
  181 + double* yv = (double*) malloc( table_bytes );
  182 + double* djv= (double*) malloc( table_bytes );
  183 + double* dyv= (double*) malloc( table_bytes );
  184 + */
  185 +
  186 + T vm;
  187 + size_t table_bytes = (Nl + 1) * sizeof(T);
  188 + T* jv = (T*) malloc( table_bytes );
  189 + T* yv = (T*) malloc( table_bytes );
  190 + T* djv= (T*) malloc( table_bytes );
  191 + T* dyv= (T*) malloc( table_bytes );
  192 +
  193 + for(size_t n = 0; n < N; n++){
  194 + (x == NULL) ? p[0] = 0 : p[0] = x[n]; // test for NULL values and set positions
  195 + (y == NULL) ? p[1] = 0 : p[1] = y[n];
  196 + (z == NULL) ? p[2] = 0 : p[2] = z[n];
  197 +
  198 + ps = p.cart2sph(); //convert this point to spherical coordinates
  199 + kr = k * ps[0];
  200 + cos_phi = std::cos(ps[2]);
  201 + stim::bessjyv_sph<T>(Nl, kr, vm, jv, yv, djv, dyv);
  202 +
  203 + for(int l = 0; l <= Nl; l++){
  204 + //jl = boost::math::sph_bessel<T>(l, kr);
  205 + //jl = stim::bessjyv(l, kr
  206 + jl = (T)jv[l];
  207 + Pl = 1;//boost::math::legendre_p<T>(l, cos_phi);
  208 + C = 1;//boost::math::legendre_p<T>(l+1, cos_alpha_1) - boost::math::legendre_p<T>(l + 1, cos_alpha_2) - boost::math::legendre_p<T>(l - 1, cos_alpha_1) + boost::math::legendre_p<T>(l - 1, cos_alpha_2);
  209 + F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C;
  210 + }
  211 + F[n] *= A * stim::TAU;
  212 + }
  213 +}
  214 +
  215 +} //end namespace stim
  216 +
  217 +#endif
... ...
stim/optics/scalarwave.h 0 โ†’ 100644
  1 +#ifndef STIM_SCALARWAVE_H
  2 +#define STIM_SCALARWAVE_H
  3 +
  4 +
  5 +#include <string>
  6 +#include <sstream>
  7 +#include <cmath>
  8 +
  9 +//#include "../math/vector.h"
  10 +#include "../math/vec3.h"
  11 +#include "../math/quaternion.h"
  12 +#include "../math/constants.h"
  13 +#include "../math/plane.h"
  14 +#include "../math/complex.h"
  15 +
  16 +//CUDA
  17 +#include "../cuda/cudatools/devices.h"
  18 +#include "../cuda/cudatools/error.h"
  19 +#include "../cuda/sharedmem.cuh"
  20 +
  21 +namespace stim{
  22 +
  23 +template<typename T>
  24 +class scalarwave{
  25 +
  26 +protected:
  27 +
  28 + stim::vec3<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
  29 + stim::complex<T> E0; //amplitude
  30 +
  31 + /// Bend a plane wave via refraction, given that the new propagation direction is known
  32 + CUDA_CALLABLE scalarwave<T> bend(stim::vec3<T> kn) const{
  33 + return scalarwave<T>(kn.norm() * kmag(), E0);
  34 + }
  35 +
  36 +public:
  37 +
  38 + ///constructor: create a plane wave propagating along k
  39 + CUDA_CALLABLE scalarwave(vec3<T> kvec = stim::vec3<T>(0, 0, (T)stim::TAU), complex<T> E = 1){
  40 + k = kvec;
  41 + E0 = E;
  42 + }
  43 +
  44 + CUDA_CALLABLE scalarwave(T kx, T ky, T kz, complex<T> E = 1){
  45 + k = vec3<T>(kx, ky, kz);
  46 + E0 = E;
  47 + }
  48 +
  49 + ///multiplication operator: scale E0
  50 + CUDA_CALLABLE scalarwave<T> & operator* (const T & rhs){
  51 + E0 = E0 * rhs;
  52 + return *this;
  53 + }
  54 +
  55 + CUDA_CALLABLE T lambda() const{
  56 + return stim::TAU / k.len();
  57 + }
  58 +
  59 + CUDA_CALLABLE T kmag() const{
  60 + return k.len();
  61 + }
  62 +
  63 + CUDA_CALLABLE vec3< complex<T> > E(){
  64 + return E0;
  65 + }
  66 +
  67 + CUDA_CALLABLE vec3<T> kvec(){
  68 + return k;
  69 + }
  70 +
  71 + /// calculate the value of the field produced by the plane wave given a three-dimensional position
  72 + CUDA_CALLABLE complex<T> pos(T x, T y, T z){
  73 + return pos( stim::vec3<T>(x, y, z) );
  74 + }
  75 +
  76 + CUDA_CALLABLE complex<T> pos(vec3<T> p = vec3<T>(0, 0, 0)){
  77 + return E0 * exp(complex<T>(0, k.dot(p)));
  78 + }
  79 +
  80 + //scales k based on a transition from material ni to material nt
  81 + CUDA_CALLABLE scalarwave<T> n(T ni, T nt){
  82 + return scalarwave<T>(k * (nt / ni), E0);
  83 + }
  84 +
  85 + CUDA_CALLABLE scalarwave<T> refract(stim::vec3<T> kn) const{
  86 + return bend(kn);
  87 + }
  88 +
  89 + /// Calculate the result of a plane wave hitting an interface between two refractive indices
  90 +
  91 + /// @param P is a plane representing the position and orientation of the surface
  92 + /// @param n0 is the refractive index outside of the surface (in the direction of the normal)
  93 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  94 + /// @param r is the reflected component of the plane wave
  95 + /// @param t is the transmitted component of the plane wave
  96 + void scatter(stim::plane<T> P, T n0, T n1, scalarwave<T> &r, scalarwave<T> &t){
  97 + scatter(P, n1/n0, r, t);
  98 + }
  99 +
  100 + /// Calculate the scattering result when nr = n1/n0
  101 +
  102 + /// @param P is a plane representing the position and orientation of the surface
  103 + /// @param r is the ration n1/n0
  104 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  105 + /// @param r is the reflected component of the plane wave
  106 + /// @param t is the transmitted component of the plane wave
  107 + void scatter(stim::plane<T> P, T nr, scalarwave<T> &r, scalarwave<T> &t){
  108 + /*
  109 + int facing = P.face(k); //determine which direction the plane wave is coming in
  110 +
  111 + if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
  112 + P = P.flip(); //flip the plane
  113 + nr = 1/nr; //invert the refractive index (now nr = n0/n1)
  114 + }
  115 +
  116 + //use Snell's Law to calculate the transmitted angle
  117 + T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i
  118 + T theta_i = acos(cos_theta_i); //compute theta_i
  119 + T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law
  120 + T theta_t = asin(sin_theta_t); //compute the cosine of theta_t
  121 +
  122 + bool tir = false; //flag for total internal reflection
  123 + if(theta_t != theta_t){
  124 + tir = true;
  125 + theta_t = stim::PI / (T)2;
  126 + }
  127 +
  128 + //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
  129 + if(theta_i == 0){
  130 + T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients
  131 + T tp = 2 / (1 + nr);
  132 + vec3<T> kr = -k;
  133 + vec3<T> kt = k * nr; //set the k vectors for theta_i = 0
  134 + vec3< complex<T> > Er = E0 * rp; //compute the E vectors
  135 + vec3< complex<T> > Et = E0 * tp;
  136 + T phase_t = P.p().dot(k - kt); //compute the phase offset
  137 + T phase_r = P.p().dot(k - kr);
  138 +
  139 + //create the plane waves
  140 + r = planewave<T>(kr, Er, phase_r);
  141 + t = planewave<T>(kt, Et, phase_t);
  142 + return;
  143 + }
  144 +
  145 +
  146 + //compute the Fresnel coefficients
  147 + T rp, rs, tp, ts;
  148 + rp = tan(theta_t - theta_i) / tan(theta_t + theta_i);
  149 + rs = sin(theta_t - theta_i) / sin(theta_t + theta_i);
  150 +
  151 + if(tir){
  152 + tp = ts = 0;
  153 + }
  154 + else{
  155 + tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) );
  156 + ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i);
  157 + }
  158 +
  159 + //compute the coordinate space for the plane of incidence
  160 + vec3<T> z_hat = -P.norm();
  161 + vec3<T> y_hat = P.parallel(k).norm();
  162 + vec3<T> x_hat = y_hat.cross(z_hat).norm();
  163 +
  164 + //compute the k vectors for r and t
  165 + vec3<T> kr, kt;
  166 + kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag();
  167 + kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;
  168 +
  169 + //compute the magnitude of the p- and s-polarized components of the incident E vector
  170 + complex<T> Ei_s = E0.dot(x_hat);
  171 + int sgn = E0.dot(y_hat).sgn();
  172 + vec3< complex<T> > cx_hat = x_hat;
  173 + complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
  174 + //compute the magnitude of the p- and s-polarized components of the reflected E vector
  175 + complex<T> Er_s = Ei_s * rs;
  176 + complex<T> Er_p = Ei_p * rp;
  177 + //compute the magnitude of the p- and s-polarized components of the transmitted E vector
  178 + complex<T> Et_s = Ei_s * ts;
  179 + complex<T> Et_p = Ei_p * tp;
  180 +
  181 + //compute the reflected E vector
  182 + vec3< complex<T> > Er = vec3< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
  183 + //compute the transmitted E vector
  184 + vec3< complex<T> > Et = vec3< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;
  185 +
  186 + T phase_t = P.p().dot(k - kt);
  187 + T phase_r = P.p().dot(k - kr);
  188 +
  189 + //create the plane waves
  190 + r.k = kr;
  191 + r.E0 = Er * exp( complex<T>(0, phase_r) );
  192 +
  193 + t.k = kt;
  194 + t.E0 = Et * exp( complex<T>(0, phase_t) );
  195 + */
  196 + }
  197 +
  198 + std::string str()
  199 + {
  200 + std::stringstream ss;
  201 + ss<<"Plane Wave:"<<std::endl;
  202 + ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
  203 + return ss.str();
  204 + }
  205 +}; //end planewave class
  206 +
  207 +
  208 +/// CUDA kernel for computing the field produced by a batch of plane waves at an array of locations
  209 +template<typename T>
  210 +__global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){
  211 + extern __shared__ stim::scalarwave<T> shared_W[]; //declare the list of waves in shared memory
  212 +
  213 + stim::cuda::sharedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x); //copy the plane waves into shared memory for faster access
  214 + __syncthreads(); //synchronize threads to insure all data is copied
  215 +
  216 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  217 + if(i >= N) return; //exit if this thread is outside the array
  218 + T px, py, pz;
  219 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values and set positions
  220 + (y == NULL) ? py = 0 : py = y[i];
  221 + (z == NULL) ? pz = 0 : pz = z[i];
  222 +
  223 + stim::complex<T> f = 0; //create a register to store the result
  224 + for(size_t w = 0; w < n_waves; w++)
  225 + f += shared_W[w].pos(px, py, pz); //evaluate the plane wave
  226 + F[i] += f; //copy the result to device memory
  227 +}
  228 +
  229 +/// evaluate a scalar wave at several points, where all arrays are on the GPU
  230 +template<typename T>
  231 +void gpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  232 +
  233 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  234 + dim3 blocks(N / threads + 1); //calculate the optimal number of blocks
  235 + cuda_scalarwave<T><<< blocks, threads >>>(F, N, x, y, z, w); //call the kernel
  236 +}
  237 +
  238 +/// Sums a series of coherent plane waves at a specified point
  239 +/// @param field is the output array of field values corresponding to each input point
  240 +/// @param x is an array of x coordinates for the field point
  241 +/// @param y is an array of y coordinates for the field point
  242 +/// @param z is an array of z coordinates for the field point
  243 +/// @param N is the number of points in the input and output arrays
  244 +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  245 +/// @param A is the list of amplitudes for each wave
  246 +/// @param S is the list of propagation directions for each wave
  247 +template<typename T>
  248 +void cpu_sum_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > w_array){
  249 + size_t S = w_array.size(); //store the number of waves
  250 +#ifdef NO_CUDA
  251 + memset(F, 0, N * sizeof(stim::complex<T>));
  252 + T px, py, pz;
  253 + for(size_t i = 0; i < N; i++){ // for each element in the array
  254 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values
  255 + (y == NULL) ? py = 0 : py = y[i];
  256 + (z == NULL) ? pz = 0 : pz = z[i];
  257 +
  258 + for(size_t s = 0; s < S; s++){
  259 + F[i] += w_array[s].pos(px, py, pz); //sum all plane waves at this point
  260 + }
  261 + }
  262 +#else
  263 + stim::complex<T>* dev_F; //allocate space for the field
  264 + cudaMalloc(&dev_F, N * sizeof(stim::complex<T>));
  265 + cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used)
  266 +
  267 + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified)
  268 + if(x != NULL){
  269 + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
  270 + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
  271 + }
  272 +
  273 + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified)
  274 + if(y != NULL){
  275 + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
  276 + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
  277 + }
  278 +
  279 + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified)
  280 + if(z != NULL){
  281 + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
  282 + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
  283 + }
  284 +
  285 + size_t wave_bytes = sizeof(stim::scalarwave<T>);
  286 + size_t shared_bytes = stim::sharedMemPerBlock(); //calculate the maximum amount of shared memory available
  287 + size_t array_bytes = w_array.size() * wave_bytes; //calculate the maximum number of bytes required for the planewave array
  288 + size_t max_batch = shared_bytes / wave_bytes; //calculate number of plane waves that will fit into shared memory
  289 + size_t num_batches = w_array.size() / max_batch + 1; //calculate the number of batches required to process all plane waves
  290 + size_t batch_bytes = min(w_array.size(), max_batch) * wave_bytes; //initialize the batch size (in bytes) to the maximum batch required
  291 +
  292 + stim::scalarwave<T>* dev_w;
  293 + HANDLE_ERROR(cudaMalloc(&dev_w, batch_bytes)); //allocate memory for a single batch of plane waves
  294 +
  295 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  296 + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  297 +
  298 + size_t batch_size; //declare a variable to store the size of the current batch
  299 + size_t waves_processed = 0; //initialize the number of waves processed to zero
  300 + while(waves_processed < w_array.size()){ //while there are still waves to be processed
  301 + batch_size = min<size_t>(max_batch, w_array.size() - waves_processed); //process either a whole batch, or whatever is left
  302 + batch_bytes = batch_size * sizeof(stim::scalarwave<T>);
  303 + HANDLE_ERROR(cudaMemcpy(dev_w, &w_array[waves_processed], batch_bytes, cudaMemcpyHostToDevice)); //copy the plane waves into global memory
  304 + cuda_scalarwave<T><<< blocks, threads, batch_bytes >>>(dev_F, N, dev_x, dev_y, dev_z, dev_w, batch_size); //call the kernel
  305 + waves_processed += batch_size; //increment the counter indicating how many waves have been processed
  306 + }
  307 +
  308 + cudaMemcpy(F, dev_F, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory
  309 +
  310 + if(x != NULL) cudaFree(dev_x); //free everything
  311 + if(y != NULL) cudaFree(dev_y);
  312 + if(z != NULL) cudaFree(dev_z);
  313 + cudaFree(dev_F);
  314 + cudaFree(dev_w);
  315 +
  316 +#endif
  317 +}
  318 +
  319 +template<typename T>
  320 +void cpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  321 + std::vector< stim::scalarwave<T> > w_array(1, w);
  322 + cpu_sum_scalarwaves(F, N, x, y, z, w_array);
  323 +}
  324 +
  325 +
  326 +/// Sums a series of coherent plane waves at a specified point
  327 +/// @param x is the x coordinate of the field point
  328 +/// @param y is the y coordinate of the field point
  329 +/// @param z is the z coordinate of the field point
  330 +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  331 +/// @param A is the list of amplitudes for each wave
  332 +/// @param S is the list of propagation directions for each wave
  333 +template<typename T>
  334 +CUDA_CALLABLE stim::complex<T> sum_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave<T> > W){
  335 + size_t N = W.size(); //get the number of plane wave samples
  336 + stim::complex<T> field(0, 0); //initialize the field to zero (0)
  337 + stim::vec3<T> k; //allocate space for the direction vector
  338 + for(size_t i = 0; i < N; i++){
  339 + field += W[i].pos(x, y, z);
  340 + }
  341 + return field;
  342 +}
  343 +
  344 +} //end namespace stim
  345 +
  346 +template <typename T>
  347 +std::ostream& operator<<(std::ostream& os, stim::scalarwave<T> p)
  348 +{
  349 + os<<p.str();
  350 + return os;
  351 +}
  352 +
  353 +#endif
0 354 \ No newline at end of file
... ...
stim/optics/beam.h renamed to stim/optics_old/beam.h
1   -#ifndef RTS_BEAM
2   -#define RTS_BEAM
3   -
4   -#include "../math/vector.h"
5   -#include "../math/function.h"
6   -#include "../optics/planewave.h"
7   -#include <vector>
8   -
9   -namespace stim{
10   -
11   -template<typename P>
12   -class beam : public planewave<P>
13   -{
14   -public:
15   - enum beam_type {Uniform, Bartlett, Hamming, Hanning};
16   -
17   -private:
18   -
19   - P _na[2]; //numerical aperature of the focusing optics
20   - vec<P> f; //focal point
21   - function<P, P> apod; //apodization function
22   - unsigned int apod_res; //resolution of apodization filter functions
23   -
24   - void apod_uniform()
25   - {
26   - apod = (P)1;
27   - }
28   - void apod_bartlett()
29   - {
30   - apod = (P)1;
31   - apod.insert((P)1, (P)0);
32   - }
33   - void apod_hanning()
34   - {
35   - apod = (P)0;
36   - P x, y;
37   - for(unsigned int n=0; n<apod_res; n++)
38   - {
39   - x = (P)n/(P)apod_res;
40   - y = pow( cos( ((P)3.14159 * x) / 2 ), 2);
41   - apod.insert(x, y);
42   - }
43   - }
44   - void apod_hamming()
45   - {
46   - apod = (P)0;
47   - P x, y;
48   - for(unsigned int n=0; n<apod_res; n++)
49   - {
50   - x = (P)n/(P)apod_res;
51   - y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x);
52   - apod.insert(x, y);
53   - }
54   - }
55   -
56   - void set_apod(beam_type type)
57   - {
58   - if(type == Uniform)
59   - apod_uniform();
60   - if(type == Bartlett)
61   - apod_bartlett();
62   - if(type == Hanning)
63   - apod_hanning();
64   - if(type == Hamming)
65   - apod_hamming();
66   - }
67   -
68   -public:
69   -
70   - ///constructor: build a default beam (NA=1.0)
71   - beam(
72   - vec<P> k = rts::vec<P>(0, 0, rtsTAU),
73   - vec<P> _E0 = rts::vec<P>(1, 0, 0),
74   - beam_type _apod = Uniform)
75   - : planewave<P>(k, _E0)
76   - {
77   - _na[0] = (P)0.0;
78   - _na[1] = (P)1.0;
79   - f = vec<P>( (P)0, (P)0, (P)0 );
80   - apod_res = 256; //set the default resolution for apodization filters
81   - set_apod(_apod); //set the apodization function type
82   - }
83   -
84   - beam<P> refract(rts::vec<P> kn) const{
85   -
86   - beam<P> new_beam;
87   - new_beam._na[0] = _na[0];
88   - new_beam._na[1] = _na[1];
89   -
90   -
91   - rts::planewave<P> pw = planewave<P>::bend(kn);
92   - //std::cout<<pw.str()<<std::endl;
93   -
94   - new_beam.k = pw.kvec();
95   - new_beam.E0 = pw.E();
96   -
97   - return new_beam;
98   - }
99   -
100   - ///Numerical Aperature functions
101   - void NA(P na)
102   - {
103   - _na[0] = (P)0;
104   - _na[1] = na;
105   - }
106   - void NA(P na0, P na1)
107   - {
108   - _na[0] = na0;
109   - _na[1] = na1;
110   - }
111   -
112   - /*string str() :
113   - {
114   - stringstream ss;
115