Commit 8e4f836425e5bee321a75e5ee33dee9867df0a85
1 parent
3ff136b3
started a new optical framework focused on scalar simulation
Showing
20 changed files
with
2186 additions
and
1004 deletions
Show diff stats
stim/cuda/cudatools/devices.h
... | ... | @@ -15,7 +15,7 @@ int maxThreadsPerBlock() |
15 | 15 | } |
16 | 16 | |
17 | 17 | extern "C" |
18 | -int sharedMemPerBlock() | |
18 | +size_t sharedMemPerBlock() | |
19 | 19 | { |
20 | 20 | int device; |
21 | 21 | cudaGetDevice(&device); //get the id of the current device |
... | ... | @@ -23,6 +23,16 @@ int sharedMemPerBlock() |
23 | 23 | cudaGetDeviceProperties(&props, device); |
24 | 24 | return props.sharedMemPerBlock; |
25 | 25 | } |
26 | + | |
27 | +extern "C" | |
28 | +size_t constMem() | |
29 | +{ | |
30 | + int device; | |
31 | + cudaGetDevice(&device); //get the id of the current device | |
32 | + cudaDeviceProp props; //device property structure | |
33 | + cudaGetDeviceProperties(&props, device); | |
34 | + return props.totalConstMem; | |
35 | +} | |
26 | 36 | } //end namespace rts |
27 | 37 | |
28 | 38 | #endif | ... | ... |
stim/cuda/sharedmem.cuh
... | ... | @@ -5,7 +5,7 @@ |
5 | 5 | namespace stim{ |
6 | 6 | namespace cuda{ |
7 | 7 | |
8 | - // Copies values from global memory to shared memory, optimizing threads | |
8 | + // Copies values from texture memory to shared memory, optimizing threads | |
9 | 9 | template<typename T> |
10 | 10 | __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src, |
11 | 11 | unsigned int x, unsigned int y, unsigned int X, unsigned int Y, |
... | ... | @@ -35,6 +35,19 @@ namespace stim{ |
35 | 35 | } |
36 | 36 | } |
37 | 37 | |
38 | + // Copies values from global memory to shared memory, optimizing threads | |
39 | + template<typename T> | |
40 | + __device__ void sharedMemcpy(T* dest, T* src, size_t N, size_t tid, size_t nt){ | |
41 | + | |
42 | + size_t I = N / nt + 1; //calculate the number of iterations required to make the copy | |
43 | + size_t xi = tid; //initialize the source and destination index to the thread ID | |
44 | + for(size_t i = 0; i < I; i++){ //for each iteration | |
45 | + if(xi < N) //if the index is within the copy region | |
46 | + dest[xi] = src[xi]; //perform the copy | |
47 | + xi += nt; | |
48 | + } | |
49 | + } | |
50 | + | |
38 | 51 | |
39 | 52 | } |
40 | 53 | } | ... | ... |
stim/math/bessel.h
... | ... | @@ -17,6 +17,11 @@ static complex<double> czero(0.0,0.0); |
17 | 17 | template< typename P > |
18 | 18 | P gamma(P x) |
19 | 19 | { |
20 | + const P EPS = numeric_limits<P>::epsilon(); | |
21 | + const P FPMIN_MAG = numeric_limits<P>::min(); | |
22 | + const P FPMIN = numeric_limits<P>::lowest(); | |
23 | + const P FPMAX = numeric_limits<P>::max(); | |
24 | + | |
20 | 25 | int i,k,m; |
21 | 26 | P ga,gr,r,z; |
22 | 27 | |
... | ... | @@ -47,7 +52,7 @@ P gamma(P x) |
47 | 52 | -0.54e-14, |
48 | 53 | 0.14e-14}; |
49 | 54 | |
50 | - if (x > 171.0) return 1e308; // This value is an overflow flag. | |
55 | + if (x > 171.0) return FPMAX; // This value is an overflow flag. | |
51 | 56 | if (x == (int)x) { |
52 | 57 | if (x > 0.0) { |
53 | 58 | ga = 1.0; // use factorial |
... | ... | @@ -56,7 +61,7 @@ P gamma(P x) |
56 | 61 | } |
57 | 62 | } |
58 | 63 | else |
59 | - ga = 1e308; | |
64 | + ga = FPMAX; | |
60 | 65 | } |
61 | 66 | else { |
62 | 67 | if (fabs(x) > 1.0) { |
... | ... | @@ -89,6 +94,11 @@ template<typename P> |
89 | 94 | int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1, |
90 | 95 | P &j0p,P &j1p,P &y0p,P &y1p) |
91 | 96 | { |
97 | + const P EPS = numeric_limits<P>::epsilon(); | |
98 | + const P FPMIN_MAG = numeric_limits<P>::min(); | |
99 | + const P FPMIN = numeric_limits<P>::lowest(); | |
100 | + const P FPMAX = numeric_limits<P>::max(); | |
101 | + | |
92 | 102 | P x2,r,ec,w0,w1,r0,r1,cs0,cs1; |
93 | 103 | P cu,p0,q0,p1,q1,t1,t2; |
94 | 104 | int k,kz; |
... | ... | @@ -157,12 +167,12 @@ int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1, |
157 | 167 | if (x == 0.0) { |
158 | 168 | j0 = 1.0; |
159 | 169 | j1 = 0.0; |
160 | - y0 = -1e308; | |
161 | - y1 = -1e308; | |
170 | + y0 = -FPMIN; | |
171 | + y1 = -FPMIN; | |
162 | 172 | j0p = 0.0; |
163 | 173 | j1p = 0.5; |
164 | - y0p = 1e308; | |
165 | - y1p = 1e308; | |
174 | + y0p = FPMAX; | |
175 | + y1p = FPMAX; | |
166 | 176 | return 0; |
167 | 177 | } |
168 | 178 | x2 = x*x; |
... | ... | @@ -329,7 +339,7 @@ int msta1(P x,int mp) |
329 | 339 | for (i=0;i<20;i++) { |
330 | 340 | nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); |
331 | 341 | f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp; |
332 | - if (abs(nn-n1) < 1) break; | |
342 | + if (std::abs(nn-n1) < 1) break; | |
333 | 343 | n0 = n1; |
334 | 344 | f0 = f1; |
335 | 345 | n1 = nn; |
... | ... | @@ -361,7 +371,7 @@ int msta2(P x,int n,int mp) |
361 | 371 | for (i=0;i<20;i++) { |
362 | 372 | nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); |
363 | 373 | f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj; |
364 | - if (abs(nn-n1) < 1) break; | |
374 | + if (std::abs(nn-n1) < 1) break; | |
365 | 375 | n0 = n1; |
366 | 376 | f0 = f1; |
367 | 377 | n1 = nn; |
... | ... | @@ -596,21 +606,26 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv, |
596 | 606 | P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk; |
597 | 607 | int j,k,l,m,n,kz; |
598 | 608 | |
609 | + const P EPS = numeric_limits<P>::epsilon(); | |
610 | + const P FPMIN_MAG = numeric_limits<P>::min(); | |
611 | + const P FPMIN = numeric_limits<P>::lowest(); | |
612 | + const P FPMAX = numeric_limits<P>::max(); | |
613 | + | |
599 | 614 | x2 = x*x; |
600 | 615 | n = (int)v; |
601 | 616 | v0 = v-n; |
602 | 617 | if ((x < 0.0) || (v < 0.0)) return 1; |
603 | - if (x < 1e-15) { | |
618 | + if (x < EPS) { | |
604 | 619 | for (k=0;k<=n;k++) { |
605 | 620 | jv[k] = 0.0; |
606 | - yv[k] = -1e308; | |
621 | + yv[k] = FPMIN; | |
607 | 622 | djv[k] = 0.0; |
608 | - dyv[k] = 1e308; | |
623 | + dyv[k] = FPMAX; | |
609 | 624 | if (v0 == 0.0) { |
610 | 625 | jv[0] = 1.0; |
611 | 626 | djv[1] = 0.5; |
612 | 627 | } |
613 | - else djv[0] = 1e308; | |
628 | + else djv[0] = FPMAX; | |
614 | 629 | } |
615 | 630 | vm = v; |
616 | 631 | return 0; |
... | ... | @@ -623,7 +638,7 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv, |
623 | 638 | for (k=1;k<=40;k++) { |
624 | 639 | r *= -0.25*x2/(k*(k+vl)); |
625 | 640 | bjvl += r; |
626 | - if (fabs(r) < fabs(bjvl)*1e-15) break; | |
641 | + if (fabs(r) < fabs(bjvl)*EPS) break; | |
627 | 642 | } |
628 | 643 | vg = 1.0 + vl; |
629 | 644 | a = pow(0.5*x,vl)/gamma(vg); |
... | ... | @@ -686,7 +701,7 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv, |
686 | 701 | if (m < n) n = m; |
687 | 702 | else m = msta2(x,n,15); |
688 | 703 | f2 = 0.0; |
689 | - f1 = 1.0e-100; | |
704 | + f1 = FPMIN_MAG; | |
690 | 705 | for (k=m;k>=0;k--) { |
691 | 706 | f = 2.0*(v0+k+1.0)*f1/x-f2; |
692 | 707 | if (k <= n) jv[k] = f; |
... | ... | @@ -766,17 +781,17 @@ int bessjyv_sph(int v, P z, P &vm, P* cjv, |
766 | 781 | P* cyv, P* cjvp, P* cyvp) |
767 | 782 | { |
768 | 783 | //first, compute the bessel functions of fractional order |
769 | - bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); | |
784 | + bessjyv<P>(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp); | |
770 | 785 | |
771 | 786 | //iterate through each and scale |
772 | 787 | for(int n = 0; n<=v; n++) |
773 | 788 | { |
774 | 789 | |
775 | - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0)); | |
776 | - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0)); | |
790 | + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); | |
791 | + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); | |
777 | 792 | |
778 | - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0)); | |
779 | - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0)); | |
793 | + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); | |
794 | + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); | |
780 | 795 | } |
781 | 796 | |
782 | 797 | return 0; |
... | ... | @@ -1498,11 +1513,11 @@ int cbessjyva_sph(int v,complex<P> z,P &vm,complex<P>*cjv, |
1498 | 1513 | for(int n = 0; n<=v; n++) |
1499 | 1514 | { |
1500 | 1515 | |
1501 | - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0)); | |
1502 | - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0)); | |
1516 | + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); | |
1517 | + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); | |
1503 | 1518 | |
1504 | - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0)); | |
1505 | - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0)); | |
1519 | + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); | |
1520 | + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); | |
1506 | 1521 | } |
1507 | 1522 | |
1508 | 1523 | return 0; | ... | ... |
stim/math/complex.h
1 | -/*RTS Complex number class. This class is CUDA compatible, | |
2 | -and can therefore be used in CUDA code and on CUDA devices. | |
3 | -*/ | |
1 | +/// CUDA compatible complex number class | |
4 | 2 | |
5 | -#ifndef RTS_COMPLEX | |
6 | -#define RTS_COMPLEX | |
3 | +#ifndef STIM_COMPLEX | |
4 | +#define STIM_COMPLEX | |
7 | 5 | |
8 | -#include "../cuda/callable.h" | |
6 | +#include "../cuda/cudatools/callable.h" | |
9 | 7 | #include <cmath> |
10 | 8 | #include <string> |
11 | 9 | #include <sstream> |
... | ... | @@ -230,12 +228,6 @@ struct complex |
230 | 228 | return result; |
231 | 229 | } |
232 | 230 | |
233 | - /*CUDA_CALLABLE complex<T> pow(int y) | |
234 | - { | |
235 | - | |
236 | - return pow((double)y); | |
237 | - }*/ | |
238 | - | |
239 | 231 | CUDA_CALLABLE complex<T> pow(T y) |
240 | 232 | { |
241 | 233 | complex<T> result; |
... | ... | @@ -328,8 +320,31 @@ struct complex |
328 | 320 | return *this; |
329 | 321 | } |
330 | 322 | |
323 | + | |
324 | + | |
331 | 325 | }; |
332 | 326 | |
327 | +/// Cast an array of complex values to an array of real values | |
328 | +template<typename T> | |
329 | +static void real(T* r, complex<T>* c, size_t n){ | |
330 | + for(size_t i = 0; i < n; i++) | |
331 | + r[i] = c[i].real(); | |
332 | +} | |
333 | + | |
334 | +/// Cast an array of complex values to an array of real values | |
335 | +template<typename T> | |
336 | +static void imag(T* r, complex<T>* c, size_t n){ | |
337 | + for(size_t i = 0; i < n; i++) | |
338 | + r[i] = c[i].imag(); | |
339 | +} | |
340 | + | |
341 | +/// Calculate the magnitude of an array of complex values | |
342 | +template<typename T> | |
343 | +static void abs(T* m, complex<T>* c, size_t n){ | |
344 | + for(size_t i = 0; i < n; i++) | |
345 | + m[i] = c[i].abs(); | |
346 | +} | |
347 | + | |
333 | 348 | } //end RTS namespace |
334 | 349 | |
335 | 350 | //addition |
... | ... | @@ -432,17 +447,6 @@ CUDA_CALLABLE static T imag(stim::complex<T> a) |
432 | 447 | return a.i; |
433 | 448 | } |
434 | 449 | |
435 | -//trigonometric functions | |
436 | -//template<class A> | |
437 | -/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x) | |
438 | -{ | |
439 | - stim::complex<float> result; | |
440 | - result.r = sinf(x.r) * coshf(x.i); | |
441 | - result.i = cosf(x.r) * sinhf(x.i); | |
442 | - | |
443 | - return result; | |
444 | -}*/ | |
445 | - | |
446 | 450 | template<class A> |
447 | 451 | CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x) |
448 | 452 | { |
... | ... | @@ -453,17 +457,6 @@ CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x) |
453 | 457 | return result; |
454 | 458 | } |
455 | 459 | |
456 | -//floating point template | |
457 | -//template<class A> | |
458 | -/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x) | |
459 | -{ | |
460 | - stim::complex<float> result; | |
461 | - result.r = cosf(x.r) * coshf(x.i); | |
462 | - result.i = -(sinf(x.r) * sinhf(x.i)); | |
463 | - | |
464 | - return result; | |
465 | -}*/ | |
466 | - | |
467 | 460 | template<class A> |
468 | 461 | CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x) |
469 | 462 | { |
... | ... | @@ -496,10 +489,4 @@ std::istream& operator>>(std::istream& is, stim::complex<A>& x) |
496 | 489 | return is; //return the stream |
497 | 490 | } |
498 | 491 | |
499 | -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | |
500 | -//template<class T> using rtsComplex = stim::complex<T>; | |
501 | -//#endif | |
502 | - | |
503 | - | |
504 | - | |
505 | 492 | #endif | ... | ... |
stim/math/constants.h
1 | -#ifndef RTS_CONSTANTS_H | |
2 | -#define RTS_CONSTANTS_H | |
1 | +#ifndef STIM_CONSTANTS_H | |
2 | +#define STIM_CONSTANTS_H | |
3 | 3 | |
4 | -#define stimPI 3.14159 | |
5 | -#define stimTAU 2 * rtsPI | |
4 | +namespace stim{ | |
5 | + const double PI = 3.1415926535897932384626433832795028841971693993751058209749445923078164062862; | |
6 | + const double TAU = 2 * stim::PI; | |
7 | +} | |
6 | 8 | |
7 | 9 | #endif | ... | ... |
stim/math/matrix.h
... | ... | @@ -50,10 +50,8 @@ struct matrix |
50 | 50 | return *this; |
51 | 51 | } |
52 | 52 | |
53 | - | |
54 | 53 | template<typename Y> |
55 | - CUDA_CALLABLE vec<Y> operator*(vec<Y> rhs) | |
56 | - { | |
54 | + vec<Y> operator*(vec<Y> rhs){ | |
57 | 55 | unsigned int N = rhs.size(); |
58 | 56 | |
59 | 57 | vec<Y> result; |
... | ... | @@ -66,6 +64,16 @@ struct matrix |
66 | 64 | return result; |
67 | 65 | } |
68 | 66 | |
67 | + template<typename Y> | |
68 | + CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){ | |
69 | + vec3<Y> result = 0; | |
70 | + for(int r=0; r<3; r++) | |
71 | + for(int c=0; c<3; c++) | |
72 | + result[r] += (*this)(r, c) * rhs[c]; | |
73 | + | |
74 | + return result; | |
75 | + } | |
76 | + | |
69 | 77 | std::string toStr() |
70 | 78 | { |
71 | 79 | std::stringstream ss; |
... | ... | @@ -82,10 +90,6 @@ struct matrix |
82 | 90 | |
83 | 91 | return ss.str(); |
84 | 92 | } |
85 | - | |
86 | - | |
87 | - | |
88 | - | |
89 | 93 | }; |
90 | 94 | |
91 | 95 | } //end namespace rts | ... | ... |
1 | +#ifndef STIM_MESHGRID_H | |
2 | +#define STIM_MESHGRID_H | |
3 | + | |
4 | +namespace stim{ | |
5 | + | |
6 | + /// Create a 2D grid based on a pair of vectors representing the grid spacing (see Matlab) | |
7 | + /// @param X is an [nx x ny] array that will store the X coordinates for each 2D point | |
8 | + /// @param Y is an [nx x ny] array that will store the Y coordinates for each 2D point | |
9 | + /// @param x is an [nx] array that provides the positions of grid points in the x direction | |
10 | + /// @param nx is the number of grid points in the x direction | |
11 | + /// @param y is an [ny] array that provides the positions of grid points in the y direction | |
12 | + /// @param ny is the number of grid points in the y direction | |
13 | + template<typename T> | |
14 | + void meshgrid(T* X, T* Y, T* x, size_t nx, T* y, size_t ny){ | |
15 | + size_t xi, yi; //allocate index variables | |
16 | + for(yi = 0; yi < ny; yi++){ //iterate through each column | |
17 | + for(xi = 0; xi < nx; xi++){ //iterate through each row | |
18 | + X[yi * nx + xi] = x[xi]; | |
19 | + Y[yi * nx + xi] = y[yi]; | |
20 | + } | |
21 | + } | |
22 | + } | |
23 | + | |
24 | + /// Creates an array of n equally spaced values in the range [xmin xmax] | |
25 | + /// @param X is an array of length n that stores the values | |
26 | + /// @param xmin is the start point of the array | |
27 | + /// @param xmax is the end point of the array | |
28 | + /// @param n is the number of points in the array | |
29 | + template<typename T> | |
30 | + void linspace(T* X, T xmin, T xmax, size_t n){ | |
31 | + T alpha; | |
32 | + for(size_t i = 0; i < n; i++){ | |
33 | + alpha = (T)i / (T)n; | |
34 | + X[i] = (1 - alpha) * xmin + alpha * xmax; | |
35 | + } | |
36 | + } | |
37 | + | |
38 | + | |
39 | +} | |
40 | + | |
41 | + | |
42 | +#endif | |
0 | 43 | \ No newline at end of file | ... | ... |
stim/math/quaternion.h
... | ... | @@ -26,13 +26,13 @@ public: |
26 | 26 | |
27 | 27 | CUDA_CALLABLE void CreateRotation(T theta, T ux, T uy, T uz){ |
28 | 28 | |
29 | - vec<T> u(ux, uy, uz); | |
29 | + vec3<T> u(ux, uy, uz); | |
30 | 30 | CreateRotation(theta, u); |
31 | 31 | } |
32 | 32 | |
33 | - CUDA_CALLABLE void CreateRotation(T theta, vec<T> u){ | |
33 | + CUDA_CALLABLE void CreateRotation(T theta, vec3<T> u){ | |
34 | 34 | |
35 | - vec<T> u_hat = u.norm(); | |
35 | + vec3<T> u_hat = u.norm(); | |
36 | 36 | |
37 | 37 | //assign the given Euler rotation to this quaternion |
38 | 38 | w = (T)cos(theta/2); |
... | ... | @@ -41,9 +41,9 @@ public: |
41 | 41 | z = u_hat[2]*(T)sin(theta/2); |
42 | 42 | } |
43 | 43 | |
44 | - void CreateRotation(vec<T> from, vec<T> to){ | |
44 | + CUDA_CALLABLE void CreateRotation(vec3<T> from, vec3<T> to){ | |
45 | 45 | |
46 | - vec<T> r = from.cross(to); //compute the rotation vector | |
46 | + vec3<T> r = from.cross(to); //compute the rotation vector | |
47 | 47 | T theta = asin(r.len()); //compute the angle of the rotation about r |
48 | 48 | //deal with a zero vector (both k and kn point in the same direction) |
49 | 49 | if(theta == (T)0){ | ... | ... |
1 | +#ifndef STIM_VEC3_H | |
2 | +#define STIM_VEC3_H | |
3 | + | |
4 | + | |
5 | +#include <stim/cuda/cudatools/callable.h> | |
6 | + | |
7 | + | |
8 | +namespace stim{ | |
9 | + | |
10 | + | |
11 | +/// A class designed to act as a 3D vector with CUDA compatibility | |
12 | +template<typename T> | |
13 | +class vec3{ | |
14 | + | |
15 | +protected: | |
16 | + T ptr[3]; | |
17 | + | |
18 | +public: | |
19 | + | |
20 | + CUDA_CALLABLE vec3(){} | |
21 | + | |
22 | + CUDA_CALLABLE vec3(T v){ | |
23 | + ptr[0] = ptr[1] = ptr[2] = v; | |
24 | + } | |
25 | + | |
26 | + CUDA_CALLABLE vec3(T x, T y, T z){ | |
27 | + ptr[0] = x; | |
28 | + ptr[1] = y; | |
29 | + ptr[2] = z; | |
30 | + } | |
31 | + | |
32 | + //copy constructor | |
33 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | |
34 | + ptr[0] = other.ptr[0]; | |
35 | + ptr[1] = other.ptr[1]; | |
36 | + ptr[2] = other.ptr[2]; | |
37 | + } | |
38 | + | |
39 | + //access an element using an index | |
40 | + CUDA_CALLABLE T& operator[](int idx){ | |
41 | + return ptr[idx]; | |
42 | + } | |
43 | + | |
44 | +/// Casting operator. Creates a new vector with a new type U. | |
45 | + template< typename U > | |
46 | + CUDA_CALLABLE operator vec3<U>(){ | |
47 | + vec3<U> result; | |
48 | + result.ptr[0] = (U)ptr[0]; | |
49 | + result.ptr[1] = (U)ptr[1]; | |
50 | + result.ptr[2] = (U)ptr[2]; | |
51 | + | |
52 | + return result; | |
53 | + } | |
54 | + | |
55 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | |
56 | + CUDA_CALLABLE T len_sq() const{ | |
57 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | |
58 | + } | |
59 | + | |
60 | + /// computes the Euclidean length of the vector | |
61 | + CUDA_CALLABLE T len() const{ | |
62 | + return sqrt(len_sq()); | |
63 | + } | |
64 | + | |
65 | + | |
66 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | |
67 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | |
68 | + vec3<T> sph; | |
69 | + sph.ptr[0] = len(); | |
70 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | |
71 | + if(sph.ptr[0] == 0) | |
72 | + sph.ptr[2] = 0; | |
73 | + else | |
74 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | |
75 | + return sph; | |
76 | + } | |
77 | + | |
78 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | |
79 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | |
80 | + vec3<T> cart; | |
81 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | |
82 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | |
83 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | |
84 | + | |
85 | + return cart; | |
86 | + } | |
87 | + | |
88 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | |
89 | + CUDA_CALLABLE vec3<T> norm() const{ | |
90 | + vec3<T> result; | |
91 | + T l = len(); //compute the vector length | |
92 | + return (*this) / l; | |
93 | + } | |
94 | + | |
95 | + /// Computes the cross product of a 3-dimensional vector | |
96 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | |
97 | + | |
98 | + vec3<T> result; | |
99 | + | |
100 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | |
101 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | |
102 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | |
103 | + | |
104 | + return result; | |
105 | + } | |
106 | + | |
107 | + /// Compute the Euclidean inner (dot) product | |
108 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | |
109 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | |
110 | + } | |
111 | + | |
112 | + /// Arithmetic addition operator | |
113 | + | |
114 | + /// @param rhs is the right-hand-side operator for the addition | |
115 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | |
116 | + vec3<T> result; | |
117 | + result.ptr[0] = ptr[0] + rhs[0]; | |
118 | + result.ptr[1] = ptr[1] + rhs[1]; | |
119 | + result.ptr[2] = ptr[2] + rhs[2]; | |
120 | + return result; | |
121 | + } | |
122 | + | |
123 | + /// Arithmetic addition to a scalar | |
124 | + | |
125 | + /// @param rhs is the right-hand-side operator for the addition | |
126 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | |
127 | + vec3<T> result; | |
128 | + result.ptr[0] = ptr[0] + rhs; | |
129 | + result.ptr[1] = ptr[1] + rhs; | |
130 | + result.ptr[2] = ptr[2] + rhs; | |
131 | + return result; | |
132 | + } | |
133 | + | |
134 | + /// Arithmetic subtraction operator | |
135 | + | |
136 | + /// @param rhs is the right-hand-side operator for the subtraction | |
137 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | |
138 | + vec3<T> result; | |
139 | + result.ptr[0] = ptr[0] - rhs[0]; | |
140 | + result.ptr[1] = ptr[1] - rhs[1]; | |
141 | + result.ptr[2] = ptr[2] - rhs[2]; | |
142 | + return result; | |
143 | + } | |
144 | + /// Arithmetic subtraction to a scalar | |
145 | + | |
146 | + /// @param rhs is the right-hand-side operator for the addition | |
147 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | |
148 | + vec3<T> result; | |
149 | + result.ptr[0] = ptr[0] - rhs; | |
150 | + result.ptr[1] = ptr[1] - rhs; | |
151 | + result.ptr[2] = ptr[2] - rhs; | |
152 | + return result; | |
153 | + } | |
154 | + | |
155 | + /// Arithmetic scalar multiplication operator | |
156 | + | |
157 | + /// @param rhs is the right-hand-side operator for the subtraction | |
158 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | |
159 | + vec3<T> result; | |
160 | + result.ptr[0] = ptr[0] * rhs; | |
161 | + result.ptr[1] = ptr[1] * rhs; | |
162 | + result.ptr[2] = ptr[2] * rhs; | |
163 | + return result; | |
164 | + } | |
165 | + | |
166 | + /// Arithmetic scalar division operator | |
167 | + | |
168 | + /// @param rhs is the right-hand-side operator for the subtraction | |
169 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | |
170 | + return (*this) * ((T)1.0/rhs); | |
171 | + } | |
172 | + | |
173 | + /// Multiplication by a scalar, followed by assignment | |
174 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | |
175 | + ptr[0] = ptr[0] * rhs; | |
176 | + ptr[1] = ptr[1] * rhs; | |
177 | + ptr[2] = ptr[2] * rhs; | |
178 | + return *this; | |
179 | + } | |
180 | + | |
181 | + /// Addition and assignment | |
182 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | |
183 | + ptr[0] = ptr[0] + rhs; | |
184 | + ptr[1] = ptr[1] + rhs; | |
185 | + ptr[2] = ptr[2] + rhs; | |
186 | + return *this; | |
187 | + } | |
188 | + | |
189 | + /// Assign a scalar to all values | |
190 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | |
191 | + ptr[0] = ptr[0] = rhs; | |
192 | + ptr[1] = ptr[1] = rhs; | |
193 | + ptr[2] = ptr[2] = rhs; | |
194 | + return *this; | |
195 | + } | |
196 | + | |
197 | + /// Casting and assignment | |
198 | + template<typename Y> | |
199 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | |
200 | + ptr[0] = (T)rhs.ptr[0]; | |
201 | + ptr[1] = (T)rhs.ptr[1]; | |
202 | + ptr[2] = (T)rhs.ptr[2]; | |
203 | + return *this; | |
204 | + } | |
205 | + | |
206 | + /// Unary minus (returns the negative of the vector) | |
207 | + CUDA_CALLABLE vec3<T> operator-() const{ | |
208 | + vec3<T> result; | |
209 | + result.ptr[0] = -ptr[0]; | |
210 | + result.ptr[1] = -ptr[1]; | |
211 | + result.ptr[2] = -ptr[2]; | |
212 | + return result; | |
213 | + } | |
214 | + | |
215 | + | |
216 | + /// Outputs the vector as a string | |
217 | + std::string str() const{ | |
218 | + std::stringstream ss; | |
219 | + | |
220 | + size_t N = size(); | |
221 | + | |
222 | + ss<<"["; | |
223 | + for(size_t i=0; i<N; i++) | |
224 | + { | |
225 | + ss<<at(i); | |
226 | + if(i != N-1) | |
227 | + ss<<", "; | |
228 | + } | |
229 | + ss<<"]"; | |
230 | + | |
231 | + return ss.str(); | |
232 | + } | |
233 | + }; //end class triple | |
234 | +} //end namespace stim | |
235 | + | |
236 | +/// Multiply a vector by a constant when the vector is on the right hand side | |
237 | +template <typename T> | |
238 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | |
239 | + return rhs * lhs; | |
240 | +} | |
241 | + | |
242 | +#endif | |
0 | 243 | \ No newline at end of file | ... | ... |
stim/math/vector.h
stim/optics/planewave.h
1 | -#ifndef RTS_PLANEWAVE | |
2 | -#define RTS_PLANEWAVE | |
1 | +#ifndef STIM_PLANEWAVE_H | |
2 | +#define STIM_PLANEWAVE_H | |
3 | 3 | |
4 | 4 | #include <string> |
5 | 5 | #include <sstream> |
6 | +#include <cmath> | |
6 | 7 | |
7 | 8 | #include "../math/vector.h" |
8 | 9 | #include "../math/quaternion.h" |
9 | 10 | #include "../math/constants.h" |
10 | 11 | #include "../math/plane.h" |
11 | -#include "../cuda/callable.h" | |
12 | - | |
13 | -/*Basic conversions used here (assuming a vacuum) | |
14 | - lambda = | |
15 | -*/ | |
12 | +#include "../math/complex.h" | |
16 | 13 | |
17 | 14 | namespace stim{ |
15 | + namespace optics{ | |
16 | + | |
17 | + /// evaluate the scalar field produced by a plane wave at a point (x, y, z) | |
18 | + | |
19 | + /// @param x is the x-coordinate of the point | |
20 | + /// @param y is the y-coordinate of the point | |
21 | + /// @param z is the z-coordinate of the point | |
22 | + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0) | |
23 | + /// @param kx is the k-vector component in the x direction | |
24 | + /// @param ky is the k-vector component in the y direction | |
25 | + /// @param kz is the k-vector component in the z direction | |
26 | + template<typename T> | |
27 | + stim::complex<T> planewave_scalar(T x, T y, T z, stim::complex<T> A, T kx, T ky, T kz){ | |
28 | + T d = x * kx + y * ky + z * kz; //calculate the dot product between k and p = (x, y, z) to find the distance p is along the propagation direction | |
29 | + stim::complex<T> di = stim::complex<T>(0, d); //calculate the phase shift that will have to be applied to propagate the wave distance d | |
30 | + return A * exp(di); //multiply the phase term by the amplitude at (0, 0, 0) to propagate the wave to p | |
31 | + } | |
32 | + | |
33 | + /// evaluate the scalar field produced by a plane wave at several positions | |
34 | + | |
35 | + /// @param field is a pre-allocated block of memory that will store the complex field at all points | |
36 | + /// @param N is the number of field values to be evaluated | |
37 | + /// @param x is a set of x coordinates defining positions within the field (NULL implies that all values are zero) | |
38 | + /// @param y is a set of y coordinates defining positions within the field (NULL implies that all values are zero) | |
39 | + /// @param z is a set of z coordinates defining positions within the field (NULL implies that all values are zero) | |
40 | + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0) | |
41 | + /// @param kx is the k-vector component in the x direction | |
42 | + /// @param ky is the k-vector component in the y direction | |
43 | + /// @param kz is the k-vector component in the z direction | |
44 | + template<typename T> | |
45 | + void cpu_planewave_scalar(stim::complex<T>* field, size_t N, T* x, T* y = NULL, T* z = NULL, stim::complex<T> A = 1.0, T kx = 0.0, T ky = 0.0, T kz = 0.0){ | |
46 | + T px, py, pz; | |
47 | + for(size_t i = 0; i < N; i++){ // for each element in the array | |
48 | + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values | |
49 | + (y == NULL) ? py = 0 : py = y[i]; | |
50 | + (z == NULL) ? pz = 0 : pz = z[i]; | |
51 | + | |
52 | + field[i] = planewave_scalar(px, py, pz, A, kx, ky, kz); // call the single-value plane wave function | |
53 | + } | |
54 | + } | |
18 | 55 | |
19 | 56 | template<typename T> |
20 | 57 | class planewave{ |
21 | 58 | |
22 | 59 | protected: |
23 | 60 | |
24 | - vec<T> k; //k = tau / lambda | |
25 | - vec< complex<T> > E0; //amplitude | |
26 | - //T phi; | |
27 | - | |
28 | - CUDA_CALLABLE planewave<T> bend(rts::vec<T> kn) const{ | |
61 | + stim::vec<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda | |
62 | + stim::vec< stim::complex<T> > E0; //amplitude (for a scalar plane wave, only E0[0] is used) | |
29 | 63 | |
30 | - vec<T> kn_hat = kn.norm(); //normalize the new k | |
31 | - vec<T> k_hat = k.norm(); //normalize the current k | |
64 | + /// Bend a plane wave via refraction, given that the new propagation direction is known | |
65 | + CUDA_CALLABLE planewave<T> bend(stim::vec<T> kn) const{ | |
32 | 66 | |
33 | - //std::cout<<"PLANE WAVE BENDING------------------"<<std::endl; | |
34 | - //std::cout<<"kn_hat: "<<kn_hat<<" k_hat: "<<k_hat<<std::endl; | |
67 | + stim::vec<T> kn_hat = kn.norm(); //normalize the new k | |
68 | + stim::vec<T> k_hat = k.norm(); //normalize the current k | |
35 | 69 | |
36 | - planewave<T> new_p; //create a new plane wave | |
70 | + planewave<T> new_p; //create a new plane wave | |
37 | 71 | |
38 | - //if kn is equal to k or -k, handle the degenerate case | |
39 | - T k_dot_kn = k_hat.dot(kn_hat); | |
72 | + T k_dot_kn = k_hat.dot(kn_hat); //if kn is equal to k or -k, handle the degenerate case | |
40 | 73 | |
41 | 74 | //if k . n < 0, then the bend is a reflection |
42 | - //flip k_hat | |
43 | - if(k_dot_kn < 0) k_hat = -k_hat; | |
75 | + if(k_dot_kn < 0) k_hat = -k_hat; //flip k_hat | |
44 | 76 | |
45 | - //std::cout<<"k dot kn: "<<k_dot_kn<<std::endl; | |
46 | - | |
47 | - //std::cout<<"k_dot_kn: "<<k_dot_kn<<std::endl; | |
48 | 77 | if(k_dot_kn == -1){ |
49 | 78 | new_p.k = -k; |
50 | 79 | new_p.E0 = E0; |
... | ... | @@ -56,28 +85,11 @@ protected: |
56 | 85 | return new_p; |
57 | 86 | } |
58 | 87 | |
59 | - vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector | |
60 | - | |
61 | - //std::cout<<"r: "<<r<<std::endl; | |
62 | - | |
63 | - T theta = asin(r.len()); //compute the angle of the rotation about r | |
64 | - | |
65 | - | |
66 | - | |
67 | - //deal with a zero vector (both k and kn point in the same direction) | |
68 | - //if(theta == (T)0) | |
69 | - //{ | |
70 | - // new_p = *this; | |
71 | - // return new_p; | |
72 | - //} | |
73 | - | |
74 | - //create a quaternion to capture the rotation | |
75 | - quaternion<T> q; | |
76 | - q.CreateRotation(theta, r.norm()); | |
77 | - | |
78 | - //apply the rotation to E0 | |
79 | - vec< complex<T> > E0n = q.toMatrix3() * E0; | |
80 | - | |
88 | + vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector | |
89 | + T theta = asin(r.len()); //compute the angle of the rotation about r | |
90 | + quaternion<T> q; //create a quaternion to capture the rotation | |
91 | + q.CreateRotation(theta, r.norm()); | |
92 | + vec< complex<T> > E0n = q.toMatrix3() * E0; //apply the rotation to E0 | |
81 | 93 | new_p.k = kn_hat * kmag(); |
82 | 94 | new_p.E0 = E0n; |
83 | 95 | |
... | ... | @@ -86,16 +98,9 @@ protected: |
86 | 98 | |
87 | 99 | public: |
88 | 100 | |
89 | - | |
90 | - ///constructor: create a plane wave propagating along z, polarized along x | |
91 | - /*planewave(T lambda = (T)1) | |
92 | - { | |
93 | - k = rts::vec<T>(0, 0, 1) * (TAU/lambda); | |
94 | - E0 = rts::vec<T>(1, 0, 0); | |
95 | - }*/ | |
96 | - ///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega | |
97 | - CUDA_CALLABLE planewave(vec<T> kvec = rts::vec<T>(0, 0, rtsTAU), | |
98 | - vec< complex<T> > E = rts::vec<T>(1, 0, 0), T phase = 0) | |
101 | + ///constructor: create a plane wave propagating along k | |
102 | + CUDA_CALLABLE planewave(vec<T> kvec = stim::vec<T>(0, 0, stim::TAU), | |
103 | + vec< complex<T> > E = stim::vec<T>(1, 0, 0)) | |
99 | 104 | { |
100 | 105 | //phi = phase; |
101 | 106 | |
... | ... | @@ -107,27 +112,23 @@ public: |
107 | 112 | else{ |
108 | 113 | vec< complex<T> > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector |
109 | 114 | vec< complex<T> > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector |
110 | - E0 = E_hat * E_hat.dot(E); //compute the projection of _E0 onto E0_hat | |
115 | + E0 = E_hat;// * E_hat.dot(E); //compute the projection of _E0 onto E0_hat | |
111 | 116 | } |
112 | 117 | |
113 | 118 | E0 = E0 * exp( complex<T>(0, phase) ); |
114 | 119 | } |
115 | 120 | |
116 | 121 | ///multiplication operator: scale E0 |
117 | - CUDA_CALLABLE planewave<T> & operator* (const T & rhs) | |
118 | - { | |
119 | - | |
122 | + CUDA_CALLABLE planewave<T> & operator* (const T & rhs){ | |
120 | 123 | E0 = E0 * rhs; |
121 | 124 | return *this; |
122 | 125 | } |
123 | 126 | |
124 | - CUDA_CALLABLE T lambda() const | |
125 | - { | |
126 | - return rtsTAU / k.len(); | |
127 | + CUDA_CALLABLE T lambda() const{ | |
128 | + return stim::TAU / k.len(); | |
127 | 129 | } |
128 | 130 | |
129 | - CUDA_CALLABLE T kmag() const | |
130 | - { | |
131 | + CUDA_CALLABLE T kmag() const{ | |
131 | 132 | return k.len(); |
132 | 133 | } |
133 | 134 | |
... | ... | @@ -139,14 +140,11 @@ public: |
139 | 140 | return k; |
140 | 141 | } |
141 | 142 | |
142 | - /*CUDA_CALLABLE T phase(){ | |
143 | - return phi; | |
143 | + /// calculate the value of the field produced by the plane wave given a three-dimensional position | |
144 | + CUDA_CALLABLE vec< complex<T> > pos(T x, T y, T z){ | |
145 | + return pos( stim::vec<T>(x, y, z) ); | |
144 | 146 | } |
145 | 147 | |
146 | - CUDA_CALLABLE void phase(T p){ | |
147 | - phi = p; | |
148 | - }*/ | |
149 | - | |
150 | 148 | CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){ |
151 | 149 | vec< complex<T> > result; |
152 | 150 | |
... | ... | @@ -166,18 +164,32 @@ public: |
166 | 164 | return planewave<T>(k * (nt / ni), E0); |
167 | 165 | } |
168 | 166 | |
169 | - CUDA_CALLABLE planewave<T> refract(rts::vec<T> kn) const | |
170 | - { | |
167 | + CUDA_CALLABLE planewave<T> refract(stim::vec<T> kn) const{ | |
171 | 168 | return bend(kn); |
172 | 169 | } |
173 | 170 | |
174 | - void scatter(rts::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){ | |
171 | + /// Calculate the result of a plane wave hitting an interface between two refractive indices | |
172 | + | |
173 | + /// @param P is a plane representing the position and orientation of the surface | |
174 | + /// @param n0 is the refractive index outside of the surface (in the direction of the normal) | |
175 | + /// @param n1 is the refractive index inside the surface (in the direction away from the normal) | |
176 | + /// @param r is the reflected component of the plane wave | |
177 | + /// @param t is the transmitted component of the plane wave | |
178 | + void scatter(stim::plane<T> P, T n0, T n1, planewave<T> &r, planewave<T> &t){ | |
179 | + scatter(P, n1/n0, r, t); | |
180 | + } | |
181 | + | |
182 | + /// Calculate the scattering result when nr = n1/n0 | |
183 | + | |
184 | + /// @param P is a plane representing the position and orientation of the surface | |
185 | + /// @param r is the ration n1/n0 | |
186 | + /// @param n1 is the refractive index inside the surface (in the direction away from the normal) | |
187 | + /// @param r is the reflected component of the plane wave | |
188 | + /// @param t is the transmitted component of the plane wave | |
189 | + void scatter(stim::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){ | |
175 | 190 | |
176 | 191 | int facing = P.face(k); //determine which direction the plane wave is coming in |
177 | 192 | |
178 | - //if(facing == 0) //if the wave is tangent to the plane, return an identical wave | |
179 | - // return *this; | |
180 | - //else | |
181 | 193 | if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr |
182 | 194 | P = P.flip(); //flip the plane |
183 | 195 | nr = 1/nr; //invert the refractive index (now nr = n0/n1) |
... | ... | @@ -192,7 +204,7 @@ public: |
192 | 204 | bool tir = false; //flag for total internal reflection |
193 | 205 | if(theta_t != theta_t){ |
194 | 206 | tir = true; |
195 | - theta_t = rtsPI / (T)2; | |
207 | + theta_t = stim::PI / (T)2; | |
196 | 208 | } |
197 | 209 | |
198 | 210 | //handle the degenerate case where theta_i is 0 (the plane wave hits head-on) |
... | ... | @@ -205,17 +217,10 @@ public: |
205 | 217 | vec< complex<T> > Et = E0 * tp; |
206 | 218 | T phase_t = P.p().dot(k - kt); //compute the phase offset |
207 | 219 | T phase_r = P.p().dot(k - kr); |
208 | - //std::cout<<"Degeneracy: Head-On"<<std::endl; | |
209 | - //std::cout<<"rs: "<<rp<<" rp: "<<rp<<" ts: "<<tp<<" tp: "<<tp<<std::endl; | |
210 | - //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl; | |
211 | 220 | |
212 | 221 | //create the plane waves |
213 | 222 | r = planewave<T>(kr, Er, phase_r); |
214 | 223 | t = planewave<T>(kt, Et, phase_t); |
215 | - | |
216 | - //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl; | |
217 | - //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl; | |
218 | - //std::cout<<"--------------------------------"<<std::endl; | |
219 | 224 | return; |
220 | 225 | } |
221 | 226 | |
... | ... | @@ -245,11 +250,9 @@ public: |
245 | 250 | |
246 | 251 | //compute the magnitude of the p- and s-polarized components of the incident E vector |
247 | 252 | complex<T> Ei_s = E0.dot(x_hat); |
248 | - //int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0); | |
249 | 253 | int sgn = E0.dot(y_hat).sgn(); |
250 | 254 | vec< complex<T> > cx_hat = x_hat; |
251 | 255 | complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn; |
252 | - //T Ei_p = ( E0 - x_hat * Ei_s ).len(); | |
253 | 256 | //compute the magnitude of the p- and s-polarized components of the reflected E vector |
254 | 257 | complex<T> Er_s = Ei_s * rs; |
255 | 258 | complex<T> Er_p = Ei_p * rp; |
... | ... | @@ -257,14 +260,6 @@ public: |
257 | 260 | complex<T> Et_s = Ei_s * ts; |
258 | 261 | complex<T> Et_p = Ei_p * tp; |
259 | 262 | |
260 | - //std::cout<<"E0: "<<E0<<std::endl; | |
261 | - //std::cout<<"E0 dot y_hat: "<<E0.dot(y_hat)<<std::endl; | |
262 | - //std::cout<<"theta i: "<<theta_i<<" theta t: "<<theta_t<<std::endl; | |
263 | - //std::cout<<"x_hat: "<<x_hat<<" y_hat: "<<y_hat<<" z_hat: "<<z_hat<<std::endl; | |
264 | - //std::cout<<"Ei_s: "<<Ei_s<<" Ei_p: "<<Ei_p<<" Er_s: "<<Er_s<<" Er_p: "<<Er_p<<" Et_s: "<<Et_s<<" Et_p: "<<Et_p<<std::endl; | |
265 | - //std::cout<<"rs: "<<rs<<" rp: "<<rp<<" ts: "<<ts<<" tp: "<<tp<<std::endl; | |
266 | - | |
267 | - | |
268 | 263 | //compute the reflected E vector |
269 | 264 | vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s; |
270 | 265 | //compute the transmitted E vector |
... | ... | @@ -273,29 +268,12 @@ public: |
273 | 268 | T phase_t = P.p().dot(k - kt); |
274 | 269 | T phase_r = P.p().dot(k - kr); |
275 | 270 | |
276 | - //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl; | |
277 | - | |
278 | - //std::cout<<"phase: "<<phase<<std::endl; | |
279 | - | |
280 | 271 | //create the plane waves |
281 | 272 | r.k = kr; |
282 | 273 | r.E0 = Er * exp( complex<T>(0, phase_r) ); |
283 | - //r.phi = phase_r; | |
284 | - | |
285 | - //t = bend(kt); | |
286 | - //t.k = t.k * nr; | |
287 | 274 | |
288 | 275 | t.k = kt; |
289 | 276 | t.E0 = Et * exp( complex<T>(0, phase_t) ); |
290 | - //t.phi = phase_t; | |
291 | - //std::cout<<"i: "<<str()<<std::endl; | |
292 | - //std::cout<<"r: "<<r.str()<<std::endl; | |
293 | - //std::cout<<"t: "<<t.str()<<std::endl; | |
294 | - | |
295 | - //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl; | |
296 | - //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl; | |
297 | - //std::cout<<"--------------------------------"<<std::endl; | |
298 | - | |
299 | 277 | } |
300 | 278 | |
301 | 279 | std::string str() |
... | ... | @@ -305,14 +283,15 @@ public: |
305 | 283 | ss<<" "<<E0<<" e^i ( "<<k<<" . r )"; |
306 | 284 | return ss.str(); |
307 | 285 | } |
308 | -}; | |
309 | -} | |
286 | +}; //end planewave class | |
287 | +} //end namespace optics | |
288 | +} //end namespace stim | |
310 | 289 | |
311 | 290 | template <typename T> |
312 | -std::ostream& operator<<(std::ostream& os, rts::planewave<T> p) | |
291 | +std::ostream& operator<<(std::ostream& os, stim::optics::planewave<T> p) | |
313 | 292 | { |
314 | 293 | os<<p.str(); |
315 | 294 | return os; |
316 | 295 | } |
317 | 296 | |
318 | 297 | -#endif |
298 | +#endif | |
319 | 299 | \ No newline at end of file | ... | ... |
1 | +#ifndef RTS_BEAM | |
2 | +#define RTS_BEAM | |
3 | + | |
4 | +#include "../math/vec3.h" | |
5 | +#include "../optics/scalarwave.h" | |
6 | +#include "../math/bessel.h" | |
7 | +#include <vector> | |
8 | + | |
9 | +//Boost | |
10 | +//#include <boost/math/special_functions/bessel.hpp> | |
11 | +//#include <boost/math/special_functions/legendre.hpp> | |
12 | + | |
13 | +namespace stim{ | |
14 | + | |
15 | + /// Function returns the value of the scalar field produced by a beam with the specified parameters | |
16 | + | |
17 | + template<typename T> | |
18 | + std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d, T NA, T NA_in = 0){ | |
19 | + | |
20 | + std::vector< stim::vec3<T> > dirs(N); //allocate an array to store the focusing vectors | |
21 | + | |
22 | + ///compute the rotation operator to transform (0, 0, 1) to k | |
23 | + T cos_angle = d.dot(vec3<T>(0, 0, 1)); | |
24 | + stim::matrix<T, 3> rotation; | |
25 | + | |
26 | + //if the cosine of the angle is -1, the rotation is just a flip across the z axis | |
27 | + if(cos_angle == -1){ | |
28 | + rotation(2, 2) = -1; | |
29 | + } | |
30 | + else if(cos_angle != 1.0) | |
31 | + { | |
32 | + vec3<T> r_axis = vec3<T>(0, 0, 1).cross(d).norm(); //compute the axis of rotation | |
33 | + T angle = acos(cos_angle); //compute the angle of rotation | |
34 | + quaternion<T> quat; //create a quaternion describing the rotation | |
35 | + quat.CreateRotation(angle, r_axis); | |
36 | + rotation = quat.toMatrix3(); //compute the rotation matrix | |
37 | + } | |
38 | + | |
39 | + //find the phi values associated with the cassegrain ring | |
40 | + T PHI[2]; | |
41 | + PHI[0] = (T)asin(NA); | |
42 | + PHI[1] = (T)asin(NA_in); | |
43 | + | |
44 | + //calculate the z-axis cylinder coordinates associated with these angles | |
45 | + T Z[2]; | |
46 | + Z[0] = cos(PHI[0]); | |
47 | + Z[1] = cos(PHI[1]); | |
48 | + T range = Z[0] - Z[1]; | |
49 | + | |
50 | + //draw a distribution of random phi, z values | |
51 | + T z, phi, theta; | |
52 | + //T kmag = stim::TAU / lambda; | |
53 | + for(int i=0; i<N; i++){ //for each sample | |
54 | + z = (T)((double)rand() / (double)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder | |
55 | + theta = (T)(((double)rand() / (double)RAND_MAX) * stim::TAU); | |
56 | + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates | |
57 | + | |
58 | + //compute and store cartesian coordinates | |
59 | + vec3<T> spherical(1, theta, phi); //convert from spherical to cartesian coordinates | |
60 | + vec3<T> cart = spherical.sph2cart(); | |
61 | + dirs[i] = rotation * cart; //create a sample vector | |
62 | + } | |
63 | + return dirs; | |
64 | + } | |
65 | + | |
66 | +/// Class stim::beam represents a beam of light focused at a point and composed of several plane waves | |
67 | +template<typename T> | |
68 | +class scalarbeam | |
69 | +{ | |
70 | +public: | |
71 | + //enum beam_type {Uniform, Bartlett, Hamming, Hanning}; | |
72 | + | |
73 | +private: | |
74 | + | |
75 | + T NA[2]; //numerical aperature of the focusing optics | |
76 | + vec3<T> f; //focal point | |
77 | + vec3<T> d; //propagation direction | |
78 | + stim::complex<T> A; //beam amplitude | |
79 | + T lambda; //beam wavelength | |
80 | +public: | |
81 | + | |
82 | + ///constructor: build a default beam (NA=1.0) | |
83 | + scalarbeam(T wavelength = 1, stim::complex<T> amplitude = 1, vec3<T> focal_point = vec3<T>(0, 0, 0), vec3<T> direction = vec3<T>(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){ | |
84 | + lambda = wavelength; | |
85 | + A = amplitude; | |
86 | + f = focal_point; | |
87 | + d = direction.norm(); //make sure that the direction vector is normalized (makes calculations more efficient later on) | |
88 | + NA[0] = numerical_aperture; | |
89 | + NA[1] = center_obsc; | |
90 | + } | |
91 | + | |
92 | + ///Numerical Aperature functions | |
93 | + void setNA(T na) | |
94 | + { | |
95 | + NA[0] = (T)0; | |
96 | + NA[1] = na; | |
97 | + } | |
98 | + void setNA(T na0, T na1) | |
99 | + { | |
100 | + NA[0] = na0; | |
101 | + NA[1] = na1; | |
102 | + } | |
103 | + | |
104 | + //Monte-Carlo decomposition into plane waves | |
105 | + std::vector< scalarwave<T> > mc(size_t N = 100000) const{ | |
106 | + | |
107 | + std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]); //generate a random set of N vectors forming a focus | |
108 | + std::vector< scalarwave<T> > samples(N); //create a vector of plane waves | |
109 | + T kmag = (T)stim::TAU / lambda; //calculate the wavenumber | |
110 | + stim::complex<T> apw; //allocate space for the amplitude at the focal point | |
111 | + stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction | |
112 | + for(size_t i=0; i<N; i++){ //for each sample | |
113 | + kpw = dirs[i] * kmag; //calculate the k-vector for the new plane wave | |
114 | + apw = exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave | |
115 | + samples[i] = scalarwave<T>(kpw, apw); //create a plane wave based on the direction | |
116 | + } | |
117 | + | |
118 | + return samples; | |
119 | + } | |
120 | + | |
121 | + /// Calculate the field at a given point | |
122 | + /// @param x is the x-coordinate of the field point | |
123 | + /// @O is the approximation accuracy | |
124 | + stim::complex<T> field(T x, T y, T z, size_t O){ | |
125 | + std::vector< scalarwave<T> > W = mc(O); | |
126 | + T result = 0; //initialize the result to zero (0) | |
127 | + for(size_t i = 0; i < O; i++){ //for each plane wave | |
128 | + result += W[i].pos(x, y, z); | |
129 | + } | |
130 | + return result; | |
131 | + } | |
132 | + | |
133 | + /// Calculate the field at a set of positions | |
134 | + /*void field(stim::complex<T>* F, T* x, T* y, T* z, size_t N, size_t O){ | |
135 | + | |
136 | + memset(F, 0, N * sizeof(stim::complex<T>)); | |
137 | + std::vector< scalarwave<T> > W = mc(O); //get a random set of plane waves representing the beam | |
138 | + size_t o, n; | |
139 | + T px, py, pz; | |
140 | + for(n = 0; n < N; n++){ //for each point in the field | |
141 | + (x == NULL) ? px = 0 : px = x[n]; // test for NULL values | |
142 | + (y == NULL) ? py = 0 : py = y[n]; | |
143 | + (z == NULL) ? pz = 0 : pz = z[n]; | |
144 | + for(o = 0; o < O; o++){ //for each plane wave | |
145 | + F[n] += W[o].pos(px, py, pz); | |
146 | + } | |
147 | + } | |
148 | + }*/ | |
149 | + | |
150 | + std::string str() | |
151 | + { | |
152 | + std::stringstream ss; | |
153 | + ss<<"Beam:"<<std::endl; | |
154 | + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl; | |
155 | + ss<<" Beam Direction: "<<d<<std::endl; | |
156 | + if(NA[0] == 0) | |
157 | + ss<<" NA: "<<NA[1]; | |
158 | + else | |
159 | + ss<<" NA: "<<NA[0]<<" -- "<<NA[1]; | |
160 | + | |
161 | + return ss.str(); | |
162 | + } | |
163 | + | |
164 | + | |
165 | + | |
166 | +}; //end beam | |
167 | + | |
168 | +template<typename T> | |
169 | +void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){ | |
170 | + | |
171 | + memset(F, 0, N * sizeof(stim::complex<T>)); | |
172 | + T k = stim::TAU / lambda; | |
173 | + T jl, Pl, C, kr, cos_phi; | |
174 | + T cos_alpha_1 = cos(asin(NA_in)); | |
175 | + T cos_alpha_2 = cos(asin(NA)); | |
176 | + stim::vec3<T> p, ps; | |
177 | + | |
178 | + /*double vm; | |
179 | + size_t table_bytes = (Nl + 1) * sizeof(double); | |
180 | + double* jv = (double*) malloc( table_bytes ); | |
181 | + double* yv = (double*) malloc( table_bytes ); | |
182 | + double* djv= (double*) malloc( table_bytes ); | |
183 | + double* dyv= (double*) malloc( table_bytes ); | |
184 | + */ | |
185 | + | |
186 | + T vm; | |
187 | + size_t table_bytes = (Nl + 1) * sizeof(T); | |
188 | + T* jv = (T*) malloc( table_bytes ); | |
189 | + T* yv = (T*) malloc( table_bytes ); | |
190 | + T* djv= (T*) malloc( table_bytes ); | |
191 | + T* dyv= (T*) malloc( table_bytes ); | |
192 | + | |
193 | + for(size_t n = 0; n < N; n++){ | |
194 | + (x == NULL) ? p[0] = 0 : p[0] = x[n]; // test for NULL values and set positions | |
195 | + (y == NULL) ? p[1] = 0 : p[1] = y[n]; | |
196 | + (z == NULL) ? p[2] = 0 : p[2] = z[n]; | |
197 | + | |
198 | + ps = p.cart2sph(); //convert this point to spherical coordinates | |
199 | + kr = k * ps[0]; | |
200 | + cos_phi = std::cos(ps[2]); | |
201 | + stim::bessjyv_sph<T>(Nl, kr, vm, jv, yv, djv, dyv); | |
202 | + | |
203 | + for(int l = 0; l <= Nl; l++){ | |
204 | + //jl = boost::math::sph_bessel<T>(l, kr); | |
205 | + //jl = stim::bessjyv(l, kr | |
206 | + jl = (T)jv[l]; | |
207 | + Pl = 1;//boost::math::legendre_p<T>(l, cos_phi); | |
208 | + C = 1;//boost::math::legendre_p<T>(l+1, cos_alpha_1) - boost::math::legendre_p<T>(l + 1, cos_alpha_2) - boost::math::legendre_p<T>(l - 1, cos_alpha_1) + boost::math::legendre_p<T>(l - 1, cos_alpha_2); | |
209 | + F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C; | |
210 | + } | |
211 | + F[n] *= A * stim::TAU; | |
212 | + } | |
213 | +} | |
214 | + | |
215 | +} //end namespace stim | |
216 | + | |
217 | +#endif | ... | ... |
1 | +#ifndef STIM_SCALARWAVE_H | |
2 | +#define STIM_SCALARWAVE_H | |
3 | + | |
4 | + | |
5 | +#include <string> | |
6 | +#include <sstream> | |
7 | +#include <cmath> | |
8 | + | |
9 | +//#include "../math/vector.h" | |
10 | +#include "../math/vec3.h" | |
11 | +#include "../math/quaternion.h" | |
12 | +#include "../math/constants.h" | |
13 | +#include "../math/plane.h" | |
14 | +#include "../math/complex.h" | |
15 | + | |
16 | +//CUDA | |
17 | +#include "../cuda/cudatools/devices.h" | |
18 | +#include "../cuda/cudatools/error.h" | |
19 | +#include "../cuda/sharedmem.cuh" | |
20 | + | |
21 | +namespace stim{ | |
22 | + | |
23 | +template<typename T> | |
24 | +class scalarwave{ | |
25 | + | |
26 | +protected: | |
27 | + | |
28 | + stim::vec3<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda | |
29 | + stim::complex<T> E0; //amplitude | |
30 | + | |
31 | + /// Bend a plane wave via refraction, given that the new propagation direction is known | |
32 | + CUDA_CALLABLE scalarwave<T> bend(stim::vec3<T> kn) const{ | |
33 | + return scalarwave<T>(kn.norm() * kmag(), E0); | |
34 | + } | |
35 | + | |
36 | +public: | |
37 | + | |
38 | + ///constructor: create a plane wave propagating along k | |
39 | + CUDA_CALLABLE scalarwave(vec3<T> kvec = stim::vec3<T>(0, 0, (T)stim::TAU), complex<T> E = 1){ | |
40 | + k = kvec; | |
41 | + E0 = E; | |
42 | + } | |
43 | + | |
44 | + CUDA_CALLABLE scalarwave(T kx, T ky, T kz, complex<T> E = 1){ | |
45 | + k = vec3<T>(kx, ky, kz); | |
46 | + E0 = E; | |
47 | + } | |
48 | + | |
49 | + ///multiplication operator: scale E0 | |
50 | + CUDA_CALLABLE scalarwave<T> & operator* (const T & rhs){ | |
51 | + E0 = E0 * rhs; | |
52 | + return *this; | |
53 | + } | |
54 | + | |
55 | + CUDA_CALLABLE T lambda() const{ | |
56 | + return stim::TAU / k.len(); | |
57 | + } | |
58 | + | |
59 | + CUDA_CALLABLE T kmag() const{ | |
60 | + return k.len(); | |
61 | + } | |
62 | + | |
63 | + CUDA_CALLABLE vec3< complex<T> > E(){ | |
64 | + return E0; | |
65 | + } | |
66 | + | |
67 | + CUDA_CALLABLE vec3<T> kvec(){ | |
68 | + return k; | |
69 | + } | |
70 | + | |
71 | + /// calculate the value of the field produced by the plane wave given a three-dimensional position | |
72 | + CUDA_CALLABLE complex<T> pos(T x, T y, T z){ | |
73 | + return pos( stim::vec3<T>(x, y, z) ); | |
74 | + } | |
75 | + | |
76 | + CUDA_CALLABLE complex<T> pos(vec3<T> p = vec3<T>(0, 0, 0)){ | |
77 | + return E0 * exp(complex<T>(0, k.dot(p))); | |
78 | + } | |
79 | + | |
80 | + //scales k based on a transition from material ni to material nt | |
81 | + CUDA_CALLABLE scalarwave<T> n(T ni, T nt){ | |
82 | + return scalarwave<T>(k * (nt / ni), E0); | |
83 | + } | |
84 | + | |
85 | + CUDA_CALLABLE scalarwave<T> refract(stim::vec3<T> kn) const{ | |
86 | + return bend(kn); | |
87 | + } | |
88 | + | |
89 | + /// Calculate the result of a plane wave hitting an interface between two refractive indices | |
90 | + | |
91 | + /// @param P is a plane representing the position and orientation of the surface | |
92 | + /// @param n0 is the refractive index outside of the surface (in the direction of the normal) | |
93 | + /// @param n1 is the refractive index inside the surface (in the direction away from the normal) | |
94 | + /// @param r is the reflected component of the plane wave | |
95 | + /// @param t is the transmitted component of the plane wave | |
96 | + void scatter(stim::plane<T> P, T n0, T n1, scalarwave<T> &r, scalarwave<T> &t){ | |
97 | + scatter(P, n1/n0, r, t); | |
98 | + } | |
99 | + | |
100 | + /// Calculate the scattering result when nr = n1/n0 | |
101 | + | |
102 | + /// @param P is a plane representing the position and orientation of the surface | |
103 | + /// @param r is the ration n1/n0 | |
104 | + /// @param n1 is the refractive index inside the surface (in the direction away from the normal) | |
105 | + /// @param r is the reflected component of the plane wave | |
106 | + /// @param t is the transmitted component of the plane wave | |
107 | + void scatter(stim::plane<T> P, T nr, scalarwave<T> &r, scalarwave<T> &t){ | |
108 | + /* | |
109 | + int facing = P.face(k); //determine which direction the plane wave is coming in | |
110 | + | |
111 | + if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr | |
112 | + P = P.flip(); //flip the plane | |
113 | + nr = 1/nr; //invert the refractive index (now nr = n0/n1) | |
114 | + } | |
115 | + | |
116 | + //use Snell's Law to calculate the transmitted angle | |
117 | + T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i | |
118 | + T theta_i = acos(cos_theta_i); //compute theta_i | |
119 | + T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law | |
120 | + T theta_t = asin(sin_theta_t); //compute the cosine of theta_t | |
121 | + | |
122 | + bool tir = false; //flag for total internal reflection | |
123 | + if(theta_t != theta_t){ | |
124 | + tir = true; | |
125 | + theta_t = stim::PI / (T)2; | |
126 | + } | |
127 | + | |
128 | + //handle the degenerate case where theta_i is 0 (the plane wave hits head-on) | |
129 | + if(theta_i == 0){ | |
130 | + T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients | |
131 | + T tp = 2 / (1 + nr); | |
132 | + vec3<T> kr = -k; | |
133 | + vec3<T> kt = k * nr; //set the k vectors for theta_i = 0 | |
134 | + vec3< complex<T> > Er = E0 * rp; //compute the E vectors | |
135 | + vec3< complex<T> > Et = E0 * tp; | |
136 | + T phase_t = P.p().dot(k - kt); //compute the phase offset | |
137 | + T phase_r = P.p().dot(k - kr); | |
138 | + | |
139 | + //create the plane waves | |
140 | + r = planewave<T>(kr, Er, phase_r); | |
141 | + t = planewave<T>(kt, Et, phase_t); | |
142 | + return; | |
143 | + } | |
144 | + | |
145 | + | |
146 | + //compute the Fresnel coefficients | |
147 | + T rp, rs, tp, ts; | |
148 | + rp = tan(theta_t - theta_i) / tan(theta_t + theta_i); | |
149 | + rs = sin(theta_t - theta_i) / sin(theta_t + theta_i); | |
150 | + | |
151 | + if(tir){ | |
152 | + tp = ts = 0; | |
153 | + } | |
154 | + else{ | |
155 | + tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) ); | |
156 | + ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i); | |
157 | + } | |
158 | + | |
159 | + //compute the coordinate space for the plane of incidence | |
160 | + vec3<T> z_hat = -P.norm(); | |
161 | + vec3<T> y_hat = P.parallel(k).norm(); | |
162 | + vec3<T> x_hat = y_hat.cross(z_hat).norm(); | |
163 | + | |
164 | + //compute the k vectors for r and t | |
165 | + vec3<T> kr, kt; | |
166 | + kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag(); | |
167 | + kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr; | |
168 | + | |
169 | + //compute the magnitude of the p- and s-polarized components of the incident E vector | |
170 | + complex<T> Ei_s = E0.dot(x_hat); | |
171 | + int sgn = E0.dot(y_hat).sgn(); | |
172 | + vec3< complex<T> > cx_hat = x_hat; | |
173 | + complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn; | |
174 | + //compute the magnitude of the p- and s-polarized components of the reflected E vector | |
175 | + complex<T> Er_s = Ei_s * rs; | |
176 | + complex<T> Er_p = Ei_p * rp; | |
177 | + //compute the magnitude of the p- and s-polarized components of the transmitted E vector | |
178 | + complex<T> Et_s = Ei_s * ts; | |
179 | + complex<T> Et_p = Ei_p * tp; | |
180 | + | |
181 | + //compute the reflected E vector | |
182 | + vec3< complex<T> > Er = vec3< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s; | |
183 | + //compute the transmitted E vector | |
184 | + vec3< complex<T> > Et = vec3< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s; | |
185 | + | |
186 | + T phase_t = P.p().dot(k - kt); | |
187 | + T phase_r = P.p().dot(k - kr); | |
188 | + | |
189 | + //create the plane waves | |
190 | + r.k = kr; | |
191 | + r.E0 = Er * exp( complex<T>(0, phase_r) ); | |
192 | + | |
193 | + t.k = kt; | |
194 | + t.E0 = Et * exp( complex<T>(0, phase_t) ); | |
195 | + */ | |
196 | + } | |
197 | + | |
198 | + std::string str() | |
199 | + { | |
200 | + std::stringstream ss; | |
201 | + ss<<"Plane Wave:"<<std::endl; | |
202 | + ss<<" "<<E0<<" e^i ( "<<k<<" . r )"; | |
203 | + return ss.str(); | |
204 | + } | |
205 | +}; //end planewave class | |
206 | + | |
207 | + | |
208 | +/// CUDA kernel for computing the field produced by a batch of plane waves at an array of locations | |
209 | +template<typename T> | |
210 | +__global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){ | |
211 | + extern __shared__ stim::scalarwave<T> shared_W[]; //declare the list of waves in shared memory | |
212 | + | |
213 | + stim::cuda::sharedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x); //copy the plane waves into shared memory for faster access | |
214 | + __syncthreads(); //synchronize threads to insure all data is copied | |
215 | + | |
216 | + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array | |
217 | + if(i >= N) return; //exit if this thread is outside the array | |
218 | + T px, py, pz; | |
219 | + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values and set positions | |
220 | + (y == NULL) ? py = 0 : py = y[i]; | |
221 | + (z == NULL) ? pz = 0 : pz = z[i]; | |
222 | + | |
223 | + stim::complex<T> f = 0; //create a register to store the result | |
224 | + for(size_t w = 0; w < n_waves; w++) | |
225 | + f += shared_W[w].pos(px, py, pz); //evaluate the plane wave | |
226 | + F[i] += f; //copy the result to device memory | |
227 | +} | |
228 | + | |
229 | +/// evaluate a scalar wave at several points, where all arrays are on the GPU | |
230 | +template<typename T> | |
231 | +void gpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){ | |
232 | + | |
233 | + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device | |
234 | + dim3 blocks(N / threads + 1); //calculate the optimal number of blocks | |
235 | + cuda_scalarwave<T><<< blocks, threads >>>(F, N, x, y, z, w); //call the kernel | |
236 | +} | |
237 | + | |
238 | +/// Sums a series of coherent plane waves at a specified point | |
239 | +/// @param field is the output array of field values corresponding to each input point | |
240 | +/// @param x is an array of x coordinates for the field point | |
241 | +/// @param y is an array of y coordinates for the field point | |
242 | +/// @param z is an array of z coordinates for the field point | |
243 | +/// @param N is the number of points in the input and output arrays | |
244 | +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength) | |
245 | +/// @param A is the list of amplitudes for each wave | |
246 | +/// @param S is the list of propagation directions for each wave | |
247 | +template<typename T> | |
248 | +void cpu_sum_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > w_array){ | |
249 | + size_t S = w_array.size(); //store the number of waves | |
250 | +#ifdef NO_CUDA | |
251 | + memset(F, 0, N * sizeof(stim::complex<T>)); | |
252 | + T px, py, pz; | |
253 | + for(size_t i = 0; i < N; i++){ // for each element in the array | |
254 | + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values | |
255 | + (y == NULL) ? py = 0 : py = y[i]; | |
256 | + (z == NULL) ? pz = 0 : pz = z[i]; | |
257 | + | |
258 | + for(size_t s = 0; s < S; s++){ | |
259 | + F[i] += w_array[s].pos(px, py, pz); //sum all plane waves at this point | |
260 | + } | |
261 | + } | |
262 | +#else | |
263 | + stim::complex<T>* dev_F; //allocate space for the field | |
264 | + cudaMalloc(&dev_F, N * sizeof(stim::complex<T>)); | |
265 | + cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used) | |
266 | + | |
267 | + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified) | |
268 | + if(x != NULL){ | |
269 | + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T))); | |
270 | + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice)); | |
271 | + } | |
272 | + | |
273 | + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified) | |
274 | + if(y != NULL){ | |
275 | + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T))); | |
276 | + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice)); | |
277 | + } | |
278 | + | |
279 | + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified) | |
280 | + if(z != NULL){ | |
281 | + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T))); | |
282 | + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice)); | |
283 | + } | |
284 | + | |
285 | + size_t wave_bytes = sizeof(stim::scalarwave<T>); | |
286 | + size_t shared_bytes = stim::sharedMemPerBlock(); //calculate the maximum amount of shared memory available | |
287 | + size_t array_bytes = w_array.size() * wave_bytes; //calculate the maximum number of bytes required for the planewave array | |
288 | + size_t max_batch = shared_bytes / wave_bytes; //calculate number of plane waves that will fit into shared memory | |
289 | + size_t num_batches = w_array.size() / max_batch + 1; //calculate the number of batches required to process all plane waves | |
290 | + size_t batch_bytes = min(w_array.size(), max_batch) * wave_bytes; //initialize the batch size (in bytes) to the maximum batch required | |
291 | + | |
292 | + stim::scalarwave<T>* dev_w; | |
293 | + HANDLE_ERROR(cudaMalloc(&dev_w, batch_bytes)); //allocate memory for a single batch of plane waves | |
294 | + | |
295 | + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device | |
296 | + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks | |
297 | + | |
298 | + size_t batch_size; //declare a variable to store the size of the current batch | |
299 | + size_t waves_processed = 0; //initialize the number of waves processed to zero | |
300 | + while(waves_processed < w_array.size()){ //while there are still waves to be processed | |
301 | + batch_size = min<size_t>(max_batch, w_array.size() - waves_processed); //process either a whole batch, or whatever is left | |
302 | + batch_bytes = batch_size * sizeof(stim::scalarwave<T>); | |
303 | + HANDLE_ERROR(cudaMemcpy(dev_w, &w_array[waves_processed], batch_bytes, cudaMemcpyHostToDevice)); //copy the plane waves into global memory | |
304 | + cuda_scalarwave<T><<< blocks, threads, batch_bytes >>>(dev_F, N, dev_x, dev_y, dev_z, dev_w, batch_size); //call the kernel | |
305 | + waves_processed += batch_size; //increment the counter indicating how many waves have been processed | |
306 | + } | |
307 | + | |
308 | + cudaMemcpy(F, dev_F, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory | |
309 | + | |
310 | + if(x != NULL) cudaFree(dev_x); //free everything | |
311 | + if(y != NULL) cudaFree(dev_y); | |
312 | + if(z != NULL) cudaFree(dev_z); | |
313 | + cudaFree(dev_F); | |
314 | + cudaFree(dev_w); | |
315 | + | |
316 | +#endif | |
317 | +} | |
318 | + | |
319 | +template<typename T> | |
320 | +void cpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){ | |
321 | + std::vector< stim::scalarwave<T> > w_array(1, w); | |
322 | + cpu_sum_scalarwaves(F, N, x, y, z, w_array); | |
323 | +} | |
324 | + | |
325 | + | |
326 | +/// Sums a series of coherent plane waves at a specified point | |
327 | +/// @param x is the x coordinate of the field point | |
328 | +/// @param y is the y coordinate of the field point | |
329 | +/// @param z is the z coordinate of the field point | |
330 | +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength) | |
331 | +/// @param A is the list of amplitudes for each wave | |
332 | +/// @param S is the list of propagation directions for each wave | |
333 | +template<typename T> | |
334 | +CUDA_CALLABLE stim::complex<T> sum_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave<T> > W){ | |
335 | + size_t N = W.size(); //get the number of plane wave samples | |
336 | + stim::complex<T> field(0, 0); //initialize the field to zero (0) | |
337 | + stim::vec3<T> k; //allocate space for the direction vector | |
338 | + for(size_t i = 0; i < N; i++){ | |
339 | + field += W[i].pos(x, y, z); | |
340 | + } | |
341 | + return field; | |
342 | +} | |
343 | + | |
344 | +} //end namespace stim | |
345 | + | |
346 | +template <typename T> | |
347 | +std::ostream& operator<<(std::ostream& os, stim::scalarwave<T> p) | |
348 | +{ | |
349 | + os<<p.str(); | |
350 | + return os; | |
351 | +} | |
352 | + | |
353 | +#endif | |
0 | 354 | \ No newline at end of file | ... | ... |
stim/optics/beam.h renamed to stim/optics_old/beam.h
1 | -#ifndef RTS_BEAM | |
2 | -#define RTS_BEAM | |
3 | - | |
4 | -#include "../math/vector.h" | |
5 | -#include "../math/function.h" | |
6 | -#include "../optics/planewave.h" | |
7 | -#include <vector> | |
8 | - | |
9 | -namespace stim{ | |
10 | - | |
11 | -template<typename P> | |
12 | -class beam : public planewave<P> | |
13 | -{ | |
14 | -public: | |
15 | - enum beam_type {Uniform, Bartlett, Hamming, Hanning}; | |
16 | - | |
17 | -private: | |
18 | - | |
19 | - P _na[2]; //numerical aperature of the focusing optics | |
20 | - vec<P> f; //focal point | |
21 | - function<P, P> apod; //apodization function | |
22 | - unsigned int apod_res; //resolution of apodization filter functions | |
23 | - | |
24 | - void apod_uniform() | |
25 | - { | |
26 | - apod = (P)1; | |
27 | - } | |
28 | - void apod_bartlett() | |
29 | - { | |
30 | - apod = (P)1; | |
31 | - apod.insert((P)1, (P)0); | |
32 | - } | |
33 | - void apod_hanning() | |
34 | - { | |
35 | - apod = (P)0; | |
36 | - P x, y; | |
37 | - for(unsigned int n=0; n<apod_res; n++) | |
38 | - { | |
39 | - x = (P)n/(P)apod_res; | |
40 | - y = pow( cos( ((P)3.14159 * x) / 2 ), 2); | |
41 | - apod.insert(x, y); | |
42 | - } | |
43 | - } | |
44 | - void apod_hamming() | |
45 | - { | |
46 | - apod = (P)0; | |
47 | - P x, y; | |
48 | - for(unsigned int n=0; n<apod_res; n++) | |
49 | - { | |
50 | - x = (P)n/(P)apod_res; | |
51 | - y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x); | |
52 | - apod.insert(x, y); | |
53 | - } | |
54 | - } | |
55 | - | |
56 | - void set_apod(beam_type type) | |
57 | - { | |
58 | - if(type == Uniform) | |
59 | - apod_uniform(); | |
60 | - if(type == Bartlett) | |
61 | - apod_bartlett(); | |
62 | - if(type == Hanning) | |
63 | - apod_hanning(); | |
64 | - if(type == Hamming) | |
65 | - apod_hamming(); | |
66 | - } | |
67 | - | |
68 | -public: | |
69 | - | |
70 | - ///constructor: build a default beam (NA=1.0) | |
71 | - beam( | |
72 | - vec<P> k = rts::vec<P>(0, 0, rtsTAU), | |
73 | - vec<P> _E0 = rts::vec<P>(1, 0, 0), | |
74 | - beam_type _apod = Uniform) | |
75 | - : planewave<P>(k, _E0) | |
76 | - { | |
77 | - _na[0] = (P)0.0; | |
78 | - _na[1] = (P)1.0; | |
79 | - f = vec<P>( (P)0, (P)0, (P)0 ); | |
80 | - apod_res = 256; //set the default resolution for apodization filters | |
81 | - set_apod(_apod); //set the apodization function type | |
82 | - } | |
83 | - | |
84 | - beam<P> refract(rts::vec<P> kn) const{ | |
85 | - | |
86 | - beam<P> new_beam; | |
87 | - new_beam._na[0] = _na[0]; | |
88 | - new_beam._na[1] = _na[1]; | |
89 | - | |
90 | - | |
91 | - rts::planewave<P> pw = planewave<P>::bend(kn); | |
92 | - //std::cout<<pw.str()<<std::endl; | |
93 | - | |
94 | - new_beam.k = pw.kvec(); | |
95 | - new_beam.E0 = pw.E(); | |
96 | - | |
97 | - return new_beam; | |
98 | - } | |
99 | - | |
100 | - ///Numerical Aperature functions | |
101 | - void NA(P na) | |
102 | - { | |
103 | - _na[0] = (P)0; | |
104 | - _na[1] = na; | |
105 | - } | |
106 | - void NA(P na0, P na1) | |
107 | - { | |
108 | - _na[0] = na0; | |
109 | - _na[1] = na1; | |
110 | - } | |
111 | - | |
112 | - /*string str() : | |
113 | - { | |
114 | - stringstream ss; | |
115 |