Commit 8309b07a16caaadfd4bc85bfbebdf7041a63f0e0
1 parent
31262e83
fixed some vec3 errors
Showing
8 changed files
with
429 additions
and
593 deletions
Show diff stats
stim/math/complex.h
stim/math/plane_old.h deleted
1 | -#ifndef RTS_PLANE_H | ||
2 | -#define RTS_PLANE_H | ||
3 | - | ||
4 | -#include <iostream> | ||
5 | -#include <stim/math/vector.h> | ||
6 | -#include "rts/cuda/callable.h" | ||
7 | - | ||
8 | - | ||
9 | -namespace stim{ | ||
10 | -template <typename T, int D> class plane; | ||
11 | -} | ||
12 | - | ||
13 | -template <typename T, int D> | ||
14 | -CUDA_CALLABLE stim::plane<T, D> operator-(stim::plane<T, D> v); | ||
15 | - | ||
16 | -namespace stim{ | ||
17 | - | ||
18 | -template <class T, int D = 3> | ||
19 | -class plane{ | ||
20 | - | ||
21 | - //a plane is defined by a point and a normal | ||
22 | - | ||
23 | -private: | ||
24 | - | ||
25 | - vec<T, D> P; //point on the plane | ||
26 | - vec<T, D> N; //plane normal | ||
27 | - | ||
28 | - CUDA_CALLABLE void init(){ | ||
29 | - P = vec<T, D>(0, 0, 0); | ||
30 | - N = vec<T, D>(0, 0, 1); | ||
31 | - } | ||
32 | - | ||
33 | - | ||
34 | -public: | ||
35 | - | ||
36 | - //default constructor | ||
37 | - CUDA_CALLABLE plane(){ | ||
38 | - init(); | ||
39 | - } | ||
40 | - | ||
41 | - CUDA_CALLABLE plane(vec<T, D> n, vec<T, D> p = vec<T, D>(0, 0, 0)){ | ||
42 | - P = p; | ||
43 | - N = n.norm(); | ||
44 | - } | ||
45 | - | ||
46 | - CUDA_CALLABLE plane(T z_pos){ | ||
47 | - init(); | ||
48 | - P[2] = z_pos; | ||
49 | - } | ||
50 | - | ||
51 | - //create a plane from three points (a triangle) | ||
52 | - CUDA_CALLABLE plane(vec<T, D> a, vec<T, D> b, vec<T, D> c){ | ||
53 | - P = c; | ||
54 | - N = (c - a).cross(b - a); | ||
55 | - if(N.len() == 0) //handle the degenerate case when two vectors are the same, N = 0 | ||
56 | - N = 0; | ||
57 | - else | ||
58 | - N = N.norm(); | ||
59 | - } | ||
60 | - | ||
61 | - template< typename U > | ||
62 | - CUDA_CALLABLE operator plane<U, D>(){ | ||
63 | - | ||
64 | - plane<U, D> result(N, P); | ||
65 | - return result; | ||
66 | - } | ||
67 | - | ||
68 | - CUDA_CALLABLE vec<T, D> norm(){ | ||
69 | - return N; | ||
70 | - } | ||
71 | - | ||
72 | - CUDA_CALLABLE vec<T, D> p(){ | ||
73 | - return P; | ||
74 | - } | ||
75 | - | ||
76 | - //flip the plane front-to-back | ||
77 | - CUDA_CALLABLE plane<T, D> flip(){ | ||
78 | - plane<T, D> result = *this; | ||
79 | - result.N = -result.N; | ||
80 | - return result; | ||
81 | - } | ||
82 | - | ||
83 | - //determines how a vector v intersects the plane (1 = intersects front, 0 = within plane, -1 = intersects back) | ||
84 | - CUDA_CALLABLE int face(vec<T, D> v){ | ||
85 | - | ||
86 | - T dprod = v.dot(N); //get the dot product between v and N | ||
87 | - | ||
88 | - //conditional returns the appropriate value | ||
89 | - if(dprod < 0) | ||
90 | - return 1; | ||
91 | - else if(dprod > 0) | ||
92 | - return -1; | ||
93 | - else | ||
94 | - return 0; | ||
95 | - } | ||
96 | - | ||
97 | - //determine on which side of the plane a point lies (1 = front, 0 = on the plane, -1 = back) | ||
98 | - CUDA_CALLABLE int side(vec<T, D> p){ | ||
99 | - | ||
100 | - vec<T, D> v = p - P; //get the vector from P to the query point p | ||
101 | - | ||
102 | - return face(v); | ||
103 | - } | ||
104 | - | ||
105 | - //compute the component of v that is perpendicular to the plane | ||
106 | - CUDA_CALLABLE vec<T, D> perpendicular(vec<T, D> v){ | ||
107 | - return N * v.dot(N); | ||
108 | - } | ||
109 | - | ||
110 | - //compute the projection of v in the plane | ||
111 | - CUDA_CALLABLE vec<T, D> parallel(vec<T, D> v){ | ||
112 | - return v - perpendicular(v); | ||
113 | - } | ||
114 | - | ||
115 | - CUDA_CALLABLE void decompose(vec<T, D> v, vec<T, D>& para, vec<T, D>& perp){ | ||
116 | - perp = N * v.dot(N); | ||
117 | - para = v - perp; | ||
118 | - } | ||
119 | - | ||
120 | - //get both the parallel and perpendicular components of a vector v w.r.t. the plane | ||
121 | - CUDA_CALLABLE void project(vec<T, D> v, vec<T, D> &v_par, vec<T, D> &v_perp){ | ||
122 | - | ||
123 | - v_perp = v.dot(N); | ||
124 | - v_par = v - v_perp; | ||
125 | - } | ||
126 | - | ||
127 | - //compute the reflection of v off of the plane | ||
128 | - CUDA_CALLABLE vec<T, D> reflect(vec<T, D> v){ | ||
129 | - | ||
130 | - //compute the reflection using N_prime as the plane normal | ||
131 | - vec<T, D> par = parallel(v); | ||
132 | - vec<T, D> r = (-v) + par * 2; | ||
133 | - | ||
134 | - /*std::cout<<"----------------REFLECT-----------------------------"<<std::endl; | ||
135 | - std::cout<<str()<<std::endl; | ||
136 | - std::cout<<"v: "<<v<<std::endl; | ||
137 | - std::cout<<"r: "<<r<<std::endl; | ||
138 | - std::cout<<"Perpendicular: "<<perpendicular(v)<<std::endl; | ||
139 | - std::cout<<"Parallel: "<<par<<std::endl;*/ | ||
140 | - return r; | ||
141 | - | ||
142 | - } | ||
143 | - | ||
144 | - CUDA_CALLABLE rts::plane<T, D> operator-() | ||
145 | - { | ||
146 | - rts::plane<T, D> p = *this; | ||
147 | - | ||
148 | - //negate the normal vector | ||
149 | - p.N = -p.N; | ||
150 | - | ||
151 | - return p; | ||
152 | - } | ||
153 | - | ||
154 | - //output a string | ||
155 | - std::string str(){ | ||
156 | - std::stringstream ss; | ||
157 | - ss<<"P: "<<P<<std::endl; | ||
158 | - ss<<"N: "<<N; | ||
159 | - return ss.str(); | ||
160 | - } | ||
161 | - | ||
162 | - ///////Friendship | ||
163 | - //friend CUDA_CALLABLE rts::plane<T, D> operator- <> (rts::plane<T, D> v); | ||
164 | - | ||
165 | - | ||
166 | - | ||
167 | -}; | ||
168 | - | ||
169 | -} | ||
170 | - | ||
171 | -//arithmetic operators | ||
172 | - | ||
173 | -//negative operator flips the plane (front to back) | ||
174 | -//template <typename T, int D> | ||
175 | - | ||
176 | - | ||
177 | - | ||
178 | - | ||
179 | -#endif |
stim/math/quad.h deleted
1 | -#ifndef RTS_QUAD_H | ||
2 | -#define RTS_QUAD_H | ||
3 | - | ||
4 | -//enable CUDA_CALLABLE macro | ||
5 | -#include <stim/cuda/callable.h> | ||
6 | -#include <stim/math/vector.h> | ||
7 | -#include <stim/math/triangle.h> | ||
8 | -#include <stim/math/quaternion.h> | ||
9 | -#include <iostream> | ||
10 | -#include <iomanip> | ||
11 | -#include <algorithm> | ||
12 | - | ||
13 | -namespace stim{ | ||
14 | - | ||
15 | -//template for a quadangle class in ND space | ||
16 | -template <class T, int N = 3> | ||
17 | -struct quad | ||
18 | -{ | ||
19 | - /* | ||
20 | - B------------------>C | ||
21 | - ^ ^ | ||
22 | - | | | ||
23 | - Y | | ||
24 | - | | | ||
25 | - | | | ||
26 | - A---------X-------->O | ||
27 | - */ | ||
28 | - | ||
29 | - /*T A[N]; | ||
30 | - T B[N]; | ||
31 | - T C[N];*/ | ||
32 | - | ||
33 | - rts::vec<T, N> A; | ||
34 | - rts::vec<T, N> X; | ||
35 | - rts::vec<T, N> Y; | ||
36 | - | ||
37 | - | ||
38 | - CUDA_CALLABLE quad() | ||
39 | - { | ||
40 | - | ||
41 | - } | ||
42 | - | ||
43 | - CUDA_CALLABLE quad(vec<T, N> a, vec<T, N> b, vec<T, N> c) | ||
44 | - { | ||
45 | - | ||
46 | - A = a; | ||
47 | - Y = b - a; | ||
48 | - X = c - a - Y; | ||
49 | - | ||
50 | - } | ||
51 | - | ||
52 | - /******************************************************************* | ||
53 | - Constructor - create a quad from a position, normal, and rotation | ||
54 | - *******************************************************************/ | ||
55 | - CUDA_CALLABLE quad(rts::vec<T, N> c, rts::vec<T, N> normal, T width, T height, T theta) | ||
56 | - { | ||
57 | - | ||
58 | - //compute the X direction - start along world-space X | ||
59 | - Y = rts::vec<T, N>(0, 1, 0); | ||
60 | - if(Y == normal) | ||
61 | - Y = rts::vec<T, N>(0, 0, 1); | ||
62 | - | ||
63 | - X = Y.cross(normal).norm(); | ||
64 | - | ||
65 | - std::cout<<X<<std::endl; | ||
66 | - | ||
67 | - //rotate the X axis by theta radians | ||
68 | - rts::quaternion<T> q; | ||
69 | - q.CreateRotation(theta, normal); | ||
70 | - X = q.toMatrix3() * X; | ||
71 | - Y = normal.cross(X); | ||
72 | - | ||
73 | - //normalize everything | ||
74 | - X = X.norm(); | ||
75 | - Y = Y.norm(); | ||
76 | - | ||
77 | - //scale to match the quad width and height | ||
78 | - X = X * width; | ||
79 | - Y = Y * height; | ||
80 | - | ||
81 | - //set the corner of the plane | ||
82 | - A = c - X * 0.5f - Y * 0.5f; | ||
83 | - | ||
84 | - std::cout<<X<<std::endl; | ||
85 | - } | ||
86 | - | ||
87 | - //boolean comparison | ||
88 | - bool operator==(const quad<T, N> & rhs) | ||
89 | - { | ||
90 | - if(A == rhs.A && X == rhs.X && Y == rhs.Y) | ||
91 | - return true; | ||
92 | - else | ||
93 | - return false; | ||
94 | - } | ||
95 | - | ||
96 | - /******************************************* | ||
97 | - Return the normal for the quad | ||
98 | - *******************************************/ | ||
99 | - CUDA_CALLABLE rts::vec<T, N> n() | ||
100 | - { | ||
101 | - return (X.cross(Y)).norm(); | ||
102 | - } | ||
103 | - | ||
104 | - CUDA_CALLABLE rts::vec<T, N> p(T a, T b) | ||
105 | - { | ||
106 | - rts::vec<T, N> result; | ||
107 | - //given the two parameters a, b = [0 1], returns the position in world space | ||
108 | - result = A + X * a + Y * b; | ||
109 | - | ||
110 | - return result; | ||
111 | - } | ||
112 | - | ||
113 | - CUDA_CALLABLE rts::vec<T, N> operator()(T a, T b) | ||
114 | - { | ||
115 | - return p(a, b); | ||
116 | - } | ||
117 | - | ||
118 | - std::string str() | ||
119 | - { | ||
120 | - std::stringstream ss; | ||
121 | - | ||
122 | - ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; | ||
123 | - ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl; | ||
124 | - ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X; | ||
125 | - | ||
126 | - return ss.str(); | ||
127 | - | ||
128 | - } | ||
129 | - | ||
130 | - CUDA_CALLABLE quad<T, N> operator*(T rhs) | ||
131 | - { | ||
132 | - //scales the plane by a scalar value | ||
133 | - | ||
134 | - //compute the center point | ||
135 | - rts::vec<T, N> c = A + X*0.5f + Y*0.5f; | ||
136 | - | ||
137 | - //create the new quadangle | ||
138 | - quad<T, N> result; | ||
139 | - result.X = X * rhs; | ||
140 | - result.Y = Y * rhs; | ||
141 | - result.A = c - result.X*0.5f - result.Y*0.5f; | ||
142 | - | ||
143 | - return result; | ||
144 | - | ||
145 | - } | ||
146 | - | ||
147 | - CUDA_CALLABLE T dist(vec<T, N> p) | ||
148 | - { | ||
149 | - //compute the distance between a point and this quad | ||
150 | - | ||
151 | - //first break the quad up into two triangles | ||
152 | - triangle<T, N> T0(A, A+X, A+Y); | ||
153 | - triangle<T, N> T1(A+X+Y, A+X, A+Y); | ||
154 | - | ||
155 | - | ||
156 | - T d0 = T0.dist(p); | ||
157 | - T d1 = T1.dist(p); | ||
158 | - | ||
159 | - if(d0 < d1) | ||
160 | - return d0; | ||
161 | - else | ||
162 | - return d1; | ||
163 | - } | ||
164 | - | ||
165 | - CUDA_CALLABLE T dist_max(vec<T, N> p) | ||
166 | - { | ||
167 | - T da = (A - p).len(); | ||
168 | - T db = (A+X - p).len(); | ||
169 | - T dc = (A+Y - p).len(); | ||
170 | - T dd = (A+X+Y - p).len(); | ||
171 | - | ||
172 | - return std::max( da, std::max(db, std::max(dc, dd) ) ); | ||
173 | - } | ||
174 | -}; | ||
175 | - | ||
176 | -} //end namespace rts | ||
177 | - | ||
178 | -template <typename T, int N> | ||
179 | -std::ostream& operator<<(std::ostream& os, rts::quad<T, N> R) | ||
180 | -{ | ||
181 | - os<<R.str(); | ||
182 | - return os; | ||
183 | -} | ||
184 | - | ||
185 | - | ||
186 | -#endif |
stim/math/rect.h
@@ -28,13 +28,10 @@ class rect : plane <T> | @@ -28,13 +28,10 @@ class rect : plane <T> | ||
28 | O---------X---------> | 28 | O---------X---------> |
29 | */ | 29 | */ |
30 | 30 | ||
31 | -private: | ||
32 | - | ||
33 | - stim::vec<T> X; | ||
34 | - stim::vec<T> Y; | ||
35 | - | ||
36 | - | 31 | +protected: |
37 | 32 | ||
33 | + stim::vec3<T> X; | ||
34 | + stim::vec3<T> Y; | ||
38 | 35 | ||
39 | public: | 36 | public: |
40 | 37 | ||
@@ -65,7 +62,7 @@ public: | @@ -65,7 +62,7 @@ public: | ||
65 | ///create a rectangle from a center point, normal | 62 | ///create a rectangle from a center point, normal |
66 | ///@param c: x,y,z location of the center. | 63 | ///@param c: x,y,z location of the center. |
67 | ///@param n: x,y,z direction of the normal. | 64 | ///@param n: x,y,z direction of the normal. |
68 | - CUDA_CALLABLE rect(vec<T> c, vec<T> n = vec<T>(0, 0, 1)) | 65 | + CUDA_CALLABLE rect(vec3<T> c, vec3<T> n = vec3<T>(0, 0, 1)) |
69 | : plane<T>() | 66 | : plane<T>() |
70 | { | 67 | { |
71 | init(); //start with the default setting | 68 | init(); //start with the default setting |
@@ -76,7 +73,7 @@ public: | @@ -76,7 +73,7 @@ public: | ||
76 | ///@param c: x,y,z location of the center. | 73 | ///@param c: x,y,z location of the center. |
77 | ///@param s: size of the rectangle. | 74 | ///@param s: size of the rectangle. |
78 | ///@param n: x,y,z direction of the normal. | 75 | ///@param n: x,y,z direction of the normal. |
79 | - CUDA_CALLABLE rect(vec<T> c, T s, vec<T> n = vec<T>(0, 0, 1)) | 76 | + CUDA_CALLABLE rect(vec3<T> c, T s, vec3<T> n = vec3<T>(0, 0, 1)) |
80 | : plane<T>() | 77 | : plane<T>() |
81 | { | 78 | { |
82 | init(); //start with the default setting | 79 | init(); //start with the default setting |
@@ -89,7 +86,7 @@ public: | @@ -89,7 +86,7 @@ public: | ||
89 | ///@param center: x,y,z location of the center. | 86 | ///@param center: x,y,z location of the center. |
90 | ///@param directionX: u,v,w direction of the X vector. | 87 | ///@param directionX: u,v,w direction of the X vector. |
91 | ///@param directionY: u,v,w direction of the Y vector. | 88 | ///@param directionY: u,v,w direction of the Y vector. |
92 | - CUDA_CALLABLE rect(vec<T> center, vec<T> directionX, vec<T> directionY ) | 89 | + CUDA_CALLABLE rect(vec3<T> center, vec3<T> directionX, vec3<T> directionY ) |
93 | : plane<T>((directionX.cross(directionY)).norm(),center) | 90 | : plane<T>((directionX.cross(directionY)).norm(),center) |
94 | { | 91 | { |
95 | X = directionX; | 92 | X = directionX; |
@@ -101,7 +98,7 @@ public: | @@ -101,7 +98,7 @@ public: | ||
101 | ///@param center: x,y,z location of the center. | 98 | ///@param center: x,y,z location of the center. |
102 | ///@param directionX: u,v,w direction of the X vector. | 99 | ///@param directionX: u,v,w direction of the X vector. |
103 | ///@param directionY: u,v,w direction of the Y vector. | 100 | ///@param directionY: u,v,w direction of the Y vector. |
104 | - CUDA_CALLABLE rect(T size, vec<T> center, vec<T> directionX, vec<T> directionY ) | 101 | + CUDA_CALLABLE rect(T size, vec3<T> center, vec3<T> directionX, vec3<T> directionY ) |
105 | : plane<T>((directionX.cross(directionY)).norm(),center) | 102 | : plane<T>((directionX.cross(directionY)).norm(),center) |
106 | { | 103 | { |
107 | X = directionX; | 104 | X = directionX; |
@@ -114,7 +111,7 @@ public: | @@ -114,7 +111,7 @@ public: | ||
114 | ///@param center: x,y,z location of the center. | 111 | ///@param center: x,y,z location of the center. |
115 | ///@param directionX: u,v,w direction of the X vector. | 112 | ///@param directionX: u,v,w direction of the X vector. |
116 | ///@param directionY: u,v,w direction of the Y vector. | 113 | ///@param directionY: u,v,w direction of the Y vector. |
117 | - CUDA_CALLABLE rect(vec<T> size, vec<T> center, vec<T> directionX, vec<T> directionY) | 114 | + CUDA_CALLABLE rect(vec3<T> size, vec3<T> center, vec3<T> directionX, vec3<T> directionY) |
118 | : plane<T>((directionX.cross(directionY)).norm(), center) | 115 | : plane<T>((directionX.cross(directionY)).norm(), center) |
119 | { | 116 | { |
120 | X = directionX; | 117 | X = directionX; |
@@ -138,7 +135,7 @@ public: | @@ -138,7 +135,7 @@ public: | ||
138 | 135 | ||
139 | ///@param n; vector with the normal. | 136 | ///@param n; vector with the normal. |
140 | ///Orients the rectangle along the normal n. | 137 | ///Orients the rectangle along the normal n. |
141 | - CUDA_CALLABLE void normal(vec<T> n) | 138 | + CUDA_CALLABLE void normal(vec3<T> n) |
142 | { | 139 | { |
143 | //orient the rectangle along the specified normal | 140 | //orient the rectangle along the specified normal |
144 | rotate(n, X, Y); | 141 | rotate(n, X, Y); |
@@ -147,8 +144,8 @@ public: | @@ -147,8 +144,8 @@ public: | ||
147 | ///general init method that sets a general rectangle. | 144 | ///general init method that sets a general rectangle. |
148 | CUDA_CALLABLE void init() | 145 | CUDA_CALLABLE void init() |
149 | { | 146 | { |
150 | - X = vec<T>(1, 0, 0); | ||
151 | - Y = vec<T>(0, 1, 0); | 147 | + X = vec3<T>(1, 0, 0); |
148 | + Y = vec3<T>(0, 1, 0); | ||
152 | } | 149 | } |
153 | 150 | ||
154 | //boolean comparison | 151 | //boolean comparison |
@@ -162,18 +159,18 @@ public: | @@ -162,18 +159,18 @@ public: | ||
162 | 159 | ||
163 | 160 | ||
164 | //get the world space value given the planar coordinates a, b in [0, 1] | 161 | //get the world space value given the planar coordinates a, b in [0, 1] |
165 | - CUDA_CALLABLE stim::vec<T> p(T a, T b) | 162 | + CUDA_CALLABLE stim::vec3<T> p(T a, T b) |
166 | { | 163 | { |
167 | - stim::vec<T> result; | 164 | + stim::vec3<T> result; |
168 | //given the two parameters a, b = [0 1], returns the position in world space | 165 | //given the two parameters a, b = [0 1], returns the position in world space |
169 | - vec<T> A = this->P - X * (T)0.5 - Y * (T)0.5; | 166 | + vec3<T> A = this->P - X * (T)0.5 - Y * (T)0.5; |
170 | result = A + X * a + Y * b; | 167 | result = A + X * a + Y * b; |
171 | 168 | ||
172 | return result; | 169 | return result; |
173 | } | 170 | } |
174 | 171 | ||
175 | //parenthesis operator returns the world space given rectangular coordinates a and b in [0 1] | 172 | //parenthesis operator returns the world space given rectangular coordinates a and b in [0 1] |
176 | - CUDA_CALLABLE stim::vec<T> operator()(T a, T b) | 173 | + CUDA_CALLABLE stim::vec3<T> operator()(T a, T b) |
177 | { | 174 | { |
178 | return p(a, b); | 175 | return p(a, b); |
179 | } | 176 | } |
@@ -181,12 +178,12 @@ public: | @@ -181,12 +178,12 @@ public: | ||
181 | std::string str() | 178 | std::string str() |
182 | { | 179 | { |
183 | std::stringstream ss; | 180 | std::stringstream ss; |
184 | - vec<T> A = P - X * (T)0.5 - Y * (T)0.5; | 181 | + vec3<T> A = P - X * (T)0.5 - Y * (T)0.5; |
185 | ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; | 182 | ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; |
186 | ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl; | 183 | ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl; |
187 | ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X; | 184 | ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X; |
188 | 185 | ||
189 | - return ss.str(); | 186 | + return ss.str(); |
190 | 187 | ||
191 | } | 188 | } |
192 | 189 | ||
@@ -205,11 +202,11 @@ public: | @@ -205,11 +202,11 @@ public: | ||
205 | 202 | ||
206 | ///computes the distance between the specified point and this rectangle. | 203 | ///computes the distance between the specified point and this rectangle. |
207 | ///@param p: x, y, z coordinates of the point to calculate distance to. | 204 | ///@param p: x, y, z coordinates of the point to calculate distance to. |
208 | - CUDA_CALLABLE T dist(vec<T> p) | 205 | + CUDA_CALLABLE T dist(vec3<T> p) |
209 | { | 206 | { |
210 | //compute the distance between a point and this rect | 207 | //compute the distance between a point and this rect |
211 | 208 | ||
212 | - vec<T> A = P - X * (T)0.5 - Y * (T)0.5; | 209 | + vec3<T> A = P - X * (T)0.5 - Y * (T)0.5; |
213 | 210 | ||
214 | //first break the rect up into two triangles | 211 | //first break the rect up into two triangles |
215 | triangle<T> T0(A, A+X, A+Y); | 212 | triangle<T> T0(A, A+X, A+Y); |
@@ -225,16 +222,16 @@ public: | @@ -225,16 +222,16 @@ public: | ||
225 | return d1; | 222 | return d1; |
226 | } | 223 | } |
227 | 224 | ||
228 | - CUDA_CALLABLE T center(vec<T> p) | 225 | + CUDA_CALLABLE T center(vec3<T> p) |
229 | { | 226 | { |
230 | this->P = p; | 227 | this->P = p; |
231 | } | 228 | } |
232 | 229 | ||
233 | ///Returns the maximum distance of the rectangle from a point p to the sides of the rectangle. | 230 | ///Returns the maximum distance of the rectangle from a point p to the sides of the rectangle. |
234 | ///@param p: x, y, z point. | 231 | ///@param p: x, y, z point. |
235 | - CUDA_CALLABLE T dist_max(vec<T> p) | 232 | + CUDA_CALLABLE T dist_max(vec3<T> p) |
236 | { | 233 | { |
237 | - vec<T> A = P - X * (T)0.5 - Y * (T)0.5; | 234 | + vec3<T> A = P - X * (T)0.5 - Y * (T)0.5; |
238 | T da = (A - p).len(); | 235 | T da = (A - p).len(); |
239 | T db = (A+X - p).len(); | 236 | T db = (A+X - p).len(); |
240 | T dc = (A+Y - p).len(); | 237 | T dc = (A+Y - p).len(); |
stim/math/vec3.h
@@ -242,4 +242,11 @@ stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | @@ -242,4 +242,11 @@ stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | ||
242 | return rhs * lhs; | 242 | return rhs * lhs; |
243 | } | 243 | } |
244 | 244 | ||
245 | +//stream operator | ||
246 | +template<typename T> | ||
247 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | ||
248 | + os<<rhs.str(); | ||
249 | + return os; | ||
250 | +} | ||
251 | + | ||
245 | #endif | 252 | #endif |
246 | \ No newline at end of file | 253 | \ No newline at end of file |
stim/math/vector.h
@@ -5,8 +5,9 @@ | @@ -5,8 +5,9 @@ | ||
5 | #include <cmath> | 5 | #include <cmath> |
6 | #include <sstream> | 6 | #include <sstream> |
7 | #include <vector> | 7 | #include <vector> |
8 | - | 8 | + |
9 | #include <stim/cuda/cudatools/callable.h> | 9 | #include <stim/cuda/cudatools/callable.h> |
10 | +#include <stim/math/vec3.h> | ||
10 | 11 | ||
11 | namespace stim | 12 | namespace stim |
12 | { | 13 | { |
@@ -70,8 +71,8 @@ struct vec : public std::vector<T> | @@ -70,8 +71,8 @@ struct vec : public std::vector<T> | ||
70 | size_t N = other.size(); | 71 | size_t N = other.size(); |
71 | resize(N); //resize the current vector to match the copy | 72 | resize(N); //resize the current vector to match the copy |
72 | for(size_t i=0; i<N; i++){ //copy each element | 73 | for(size_t i=0; i<N; i++){ //copy each element |
73 | - at(i) = other[i]; | ||
74 | - } | 74 | + at(i) = other[i]; |
75 | + } | ||
75 | } | 76 | } |
76 | 77 | ||
77 | //I'm not sure what these were doing here. | 78 | //I'm not sure what these were doing here. |
@@ -318,8 +319,8 @@ struct vec : public std::vector<T> | @@ -318,8 +319,8 @@ struct vec : public std::vector<T> | ||
318 | } | 319 | } |
319 | 320 | ||
320 | /// Cast to a vec3 | 321 | /// Cast to a vec3 |
321 | - operator vec3<T>(){ | ||
322 | - vec3<T> r; | 322 | + operator stim::vec3<T>(){ |
323 | + stim::vec3<T> r; | ||
323 | size_t N = std::min<size_t>(size(), 3); | 324 | size_t N = std::min<size_t>(size(), 3); |
324 | for(size_t i = 0; i < N; i++) | 325 | for(size_t i = 0; i < N; i++) |
325 | r[i] = at(i); | 326 | r[i] = at(i); |
stim/optics/mie.h
1 | #ifndef STIM_MIE_H | 1 | #ifndef STIM_MIE_H |
2 | #define STIM_MIE_H | 2 | #define STIM_MIE_H |
3 | +#include <boost/math/special_functions/bessel.hpp> | ||
3 | 4 | ||
4 | #include "scalarwave.h" | 5 | #include "scalarwave.h" |
5 | #include "../math/bessel.h" | 6 | #include "../math/bessel.h" |
@@ -43,7 +44,6 @@ void B_coefficients(stim::complex<T>* B, T a, T k, stim::complex<T> n, int Nl){ | @@ -43,7 +44,6 @@ void B_coefficients(stim::complex<T>* B, T a, T k, stim::complex<T> n, int Nl){ | ||
43 | numerator = j_ka[l] * dj_kna[l] * (stim::complex<double>)n - j_kna[l] * dj_ka[l]; | 44 | numerator = j_ka[l] * dj_kna[l] * (stim::complex<double>)n - j_kna[l] * dj_ka[l]; |
44 | denominator = j_kna[l] * dh_ka - h_ka * dj_kna[l] * (stim::complex<double>)n; | 45 | denominator = j_kna[l] * dh_ka - h_ka * dj_kna[l] * (stim::complex<double>)n; |
45 | B[l] = (2 * l + 1) * pow(i, l) * numerator / denominator; | 46 | B[l] = (2 * l + 1) * pow(i, l) * numerator / denominator; |
46 | - std::cout<<B[l]<<std::endl; | ||
47 | } | 47 | } |
48 | } | 48 | } |
49 | 49 | ||
@@ -84,7 +84,7 @@ void A_coefficients(stim::complex<T>* A, T a, T k, stim::complex<T> n, int Nl){ | @@ -84,7 +84,7 @@ void A_coefficients(stim::complex<T>* A, T a, T k, stim::complex<T> n, int Nl){ | ||
84 | 84 | ||
85 | #define LOCAL_NL 16 | 85 | #define LOCAL_NL 16 |
86 | template<typename T> | 86 | template<typename T> |
87 | -__global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* hB, T kr_min, T dkr, size_t N_hB, int Nl){ | 87 | +__global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* hB, T r_min, T dr, size_t N_hB, int Nl){ |
88 | extern __shared__ stim::complex<T> shared_hB[]; //declare the list of waves in shared memory | 88 | extern __shared__ stim::complex<T> shared_hB[]; //declare the list of waves in shared memory |
89 | 89 | ||
90 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array | 90 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array |
@@ -96,14 +96,11 @@ __global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* | @@ -96,14 +96,11 @@ __global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* | ||
96 | 96 | ||
97 | T r = p.len(); //calculate the distance from the sphere | 97 | T r = p.len(); //calculate the distance from the sphere |
98 | if(r < a) return; //exit if the point is inside the sphere (we only calculate the internal field) | 98 | if(r < a) return; //exit if the point is inside the sphere (we only calculate the internal field) |
99 | - T k = W[0].kmag(); | ||
100 | - size_t NC = Nl + 1; //calculate the number of coefficients to be used | ||
101 | - T kr = r * k; //calculate the thread value for k*r | ||
102 | - T fij = (kr - kr_min)/dkr; //FP index into the spherical bessel LUT | 99 | + T fij = (r - r_min)/dr; //FP index into the spherical bessel LUT |
103 | size_t ij = (size_t) fij; //convert to an integral index | 100 | size_t ij = (size_t) fij; //convert to an integral index |
104 | T alpha = fij - ij; //calculate the fractional portion of the index | 101 | T alpha = fij - ij; //calculate the fractional portion of the index |
105 | - size_t n0j = ij * (NC); //start of the first entry in the LUT | ||
106 | - size_t n1j = (ij+1) * (NC); //start of the second entry in the LUT | 102 | + size_t n0j = ij * (Nl + 1); //start of the first entry in the LUT |
103 | + size_t n1j = (ij+1) * (Nl + 1); //start of the second entry in the LUT | ||
107 | 104 | ||
108 | T cos_phi; | 105 | T cos_phi; |
109 | T Pl_2, Pl_1, Pl; //declare registers to store the previous two Legendre polynomials | 106 | T Pl_2, Pl_1, Pl; //declare registers to store the previous two Legendre polynomials |
@@ -112,37 +109,36 @@ __global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* | @@ -112,37 +109,36 @@ __global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* | ||
112 | stim::complex<T> Ei = 0; //create a register to store the result | 109 | stim::complex<T> Ei = 0; //create a register to store the result |
113 | int l; | 110 | int l; |
114 | 111 | ||
115 | - stim::complex<T> hlBl[LOCAL_NL+1]; | ||
116 | - int shared_start = threadIdx.x * (Nl - LOCAL_NL); | 112 | + stim::complex<T> hlBl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed |
113 | + int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops | ||
117 | 114 | ||
118 | - #pragma unroll LOCAL_NL+1 | 115 | + #pragma unroll LOCAL_NL+1 //copy the first LOCAL_NL+1 h_l * B_l components to registers |
119 | for(l = 0; l <= LOCAL_NL; l++) | 116 | for(l = 0; l <= LOCAL_NL; l++) |
120 | hlBl[l] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha ); | 117 | hlBl[l] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha ); |
121 | 118 | ||
122 | - for(l = LOCAL_NL+1; l <= Nl; l++) | 119 | + for(l = LOCAL_NL+1; l <= Nl; l++) //copy any additional h_l * B_l components to shared memory |
123 | shared_hB[shared_start + (l - (LOCAL_NL+1))] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha ); | 120 | shared_hB[shared_start + (l - (LOCAL_NL+1))] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha ); |
124 | 121 | ||
125 | - for(size_t w = 0; w < nW; w++){ | 122 | + for(size_t w = 0; w < nW; w++){ //for each plane wave |
126 | cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle between the k vector and the direction from the sphere | 123 | cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle between the k vector and the direction from the sphere |
127 | - Pl_2 = 1; | 124 | + Pl_2 = 1; //the Legendre polynomials will be calculated recursively, initialize the first two steps of the recursive relation |
128 | Pl_1 = cos_phi; | 125 | Pl_1 = cos_phi; |
129 | - Ei += W[w].E() * hlBl[0] * Pl_2; | 126 | + Ei += W[w].E() * hlBl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation |
130 | Ei += W[w].E() * hlBl[1] * Pl_1; | 127 | Ei += W[w].E() * hlBl[1] * Pl_1; |
131 | 128 | ||
132 | - #pragma unroll LOCAL_NL-1 | 129 | + #pragma unroll LOCAL_NL-1 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file) |
133 | for(l = 2; l <= LOCAL_NL; l++){ | 130 | for(l = 2; l <= LOCAL_NL; l++){ |
134 | - Pl = ( (2 * l + 1) * cos_phi * Pl_1 - (l) * Pl_2 ) / (l+1); | ||
135 | - Ei += W[w].E() * hlBl[l] * Pl; | 131 | + Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs) |
132 | + Ei += W[w].E() * hlBl[l] * Pl; //calculate and sum the current field order | ||
136 | Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1 | 133 | Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1 |
137 | Pl_1 = Pl; | 134 | Pl_1 = Pl; |
138 | } | 135 | } |
139 | 136 | ||
140 | - for(l = LOCAL_NL+1; l <= Nl; l++){ | ||
141 | - Pl = ( (2 * l + 1) * cos_phi * Pl_1 - (l) * Pl_2 ) / (l+1); | ||
142 | - Ei += W[w].E() * shared_hB[shared_start + (l - (LOCAL_NL+1))] * Pl; | ||
143 | - Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1 | ||
144 | - Pl_1 = Pl; | ||
145 | - | 137 | + for(l = LOCAL_NL+1; l <= Nl; l++){ //do the same as above, except for any additional orders that are stored in shared memory (not registers) |
138 | + Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //again, this is where most computation in the kernel occurs | ||
139 | + Ei += W[w].E() * shared_hB[shared_start + l - LOCAL_NL - 1] * Pl; | ||
140 | + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1 | ||
141 | + Pl_1 = Pl; | ||
146 | } | 142 | } |
147 | } | 143 | } |
148 | E[i] += Ei; //copy the result to device memory | 144 | E[i] += Ei; //copy the result to device memory |
@@ -152,10 +148,10 @@ template<typename T> | @@ -152,10 +148,10 @@ template<typename T> | ||
152 | void gpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* hB, T kr_min, T dkr, size_t N_hB, size_t Nl){ | 148 | void gpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* hB, T kr_min, T dkr, size_t N_hB, size_t Nl){ |
153 | 149 | ||
154 | size_t max_shared_mem = stim::sharedMemPerBlock(); | 150 | size_t max_shared_mem = stim::sharedMemPerBlock(); |
155 | - int hBl_array = sizeof(stim::complex<T>) * (Nl + 1); | 151 | + size_t hBl_array = sizeof(stim::complex<T>) * (Nl + 1); |
156 | std::cout<<"hl*Bl array size: "<<hBl_array<<std::endl; | 152 | std::cout<<"hl*Bl array size: "<<hBl_array<<std::endl; |
157 | std::cout<<"shared memory: "<<max_shared_mem<<std::endl; | 153 | std::cout<<"shared memory: "<<max_shared_mem<<std::endl; |
158 | - int threads = (max_shared_mem / hBl_array) / 32 * 32; | 154 | + int threads = (int)((max_shared_mem / hBl_array) / 32 * 32); |
159 | std::cout<<"threads per block: "<<threads<<std::endl; | 155 | std::cout<<"threads per block: "<<threads<<std::endl; |
160 | dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks | 156 | dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks |
161 | 157 | ||
@@ -164,7 +160,6 @@ void gpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, sti | @@ -164,7 +160,6 @@ void gpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, sti | ||
164 | else shared_mem = threads * sizeof(stim::complex<T>) * (Nl - LOCAL_NL); //amount of shared memory to allocate | 160 | else shared_mem = threads * sizeof(stim::complex<T>) * (Nl - LOCAL_NL); //amount of shared memory to allocate |
165 | std::cout<<"shared memory allocated: "<<shared_mem<<std::endl; | 161 | std::cout<<"shared memory allocated: "<<shared_mem<<std::endl; |
166 | cuda_scalar_mie_scatter<T><<< blocks, threads, shared_mem >>>(E, N, x, y, z, W, nW, a, n, hB, kr_min, dkr, N_hB, (int)Nl); //call the kernel | 162 | cuda_scalar_mie_scatter<T><<< blocks, threads, shared_mem >>>(E, N, x, y, z, W, nW, a, n, hB, kr_min, dkr, N_hB, (int)Nl); //call the kernel |
167 | - | ||
168 | } | 163 | } |
169 | 164 | ||
170 | template<typename T> | 165 | template<typename T> |
@@ -261,16 +256,19 @@ void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std | @@ -261,16 +256,19 @@ void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std | ||
261 | exit(1); | 256 | exit(1); |
262 | } | 257 | } |
263 | 258 | ||
259 | + i_min--; //cuBLAS uses 1-based indexing for Fortran compatibility | ||
260 | + i_max--; | ||
264 | T r_min, r_max; //allocate space to store the minimum and maximum values | 261 | T r_min, r_max; //allocate space to store the minimum and maximum values |
265 | HANDLE_ERROR( cudaMemcpy(&r_min, dev_r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU | 262 | HANDLE_ERROR( cudaMemcpy(&r_min, dev_r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU |
266 | HANDLE_ERROR( cudaMemcpy(&r_max, dev_r + i_max, sizeof(T), cudaMemcpyDeviceToHost) ); | 263 | HANDLE_ERROR( cudaMemcpy(&r_max, dev_r + i_max, sizeof(T), cudaMemcpyDeviceToHost) ); |
267 | 264 | ||
265 | + r_min = max(r_min, a); //if the radius of the sphere is larger than r_min, change r_min to a (the scattered field doesn't exist inside the sphere) | ||
268 | 266 | ||
269 | //size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r | 267 | //size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r |
270 | size_t N_hB_lut = (size_t)((r_max - r_min) / r_spacing + 1); | 268 | size_t N_hB_lut = (size_t)((r_max - r_min) / r_spacing + 1); |
271 | 269 | ||
272 | - T kr_min = k * r_min; | ||
273 | - T kr_max = k * r_max; | 270 | + //T kr_min = k * r_min; |
271 | + //T kr_max = k * r_max; | ||
274 | 272 | ||
275 | //temporary variables | 273 | //temporary variables |
276 | double vm; //allocate space to store the return values for the bessel function calculation | 274 | double vm; //allocate space to store the return values for the bessel function calculation |
@@ -281,27 +279,29 @@ void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std | @@ -281,27 +279,29 @@ void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std | ||
281 | 279 | ||
282 | size_t hB_bytes = sizeof(stim::complex<T>) * (Nl+1) * N_hB_lut; | 280 | size_t hB_bytes = sizeof(stim::complex<T>) * (Nl+1) * N_hB_lut; |
283 | stim::complex<T>* hB_lut = (stim::complex<T>*) malloc(hB_bytes); //pointer to the look-up table | 281 | stim::complex<T>* hB_lut = (stim::complex<T>*) malloc(hB_bytes); //pointer to the look-up table |
284 | - T dkr = (kr_max - kr_min) / (N_hB_lut-1); //distance between values in the LUT | 282 | + T dr = (r_max - r_min) / (N_hB_lut-1); //distance between values in the LUT |
285 | std::cout<<"LUT jl bytes: "<<hB_bytes<<std::endl; | 283 | std::cout<<"LUT jl bytes: "<<hB_bytes<<std::endl; |
286 | stim::complex<T> hl; | 284 | stim::complex<T> hl; |
287 | - for(size_t kri = 0; kri < N_hB_lut; kri++){ //for each value in the LUT | ||
288 | - stim::bessjyv_sph<double>(Nl, kr_min + kri * dkr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl] | 285 | + for(size_t ri = 0; ri < N_hB_lut; ri++){ //for each value in the LUT |
286 | + stim::bessjyv_sph<double>(Nl, k * (r_min + ri * dr), vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl] | ||
289 | for(size_t l = 0; l <= Nl; l++){ //for each order | 287 | for(size_t l = 0; l <= Nl; l++){ //for each order |
290 | hl.r = (T)jv[l]; | 288 | hl.r = (T)jv[l]; |
291 | hl.i = (T)yv[l]; | 289 | hl.i = (T)yv[l]; |
292 | 290 | ||
293 | - hB_lut[kri * (Nl + 1) + l] = hl * B[l]; //store the bessel function result | 291 | + hB_lut[ri * (Nl + 1) + l] = hl * B[l]; //store the bessel function result |
292 | + //std::cout<<hB_lut[ri * (Nl + 1) + l]<<std::endl; | ||
294 | } | 293 | } |
295 | } | 294 | } |
296 | - | ||
297 | - //stim::cpu2image<T>(hankel_lut, "hankel.bmp", Nl+1, Nlut_j, stim::cmBrewer); | 295 | + T* real_lut = (T*) malloc(hB_bytes/2); |
296 | + stim::real(real_lut, hB_lut, N_hB_lut); | ||
297 | + stim::cpu2image<T>(real_lut, "hankel_B.bmp", Nl+1, N_hB_lut, stim::cmBrewer); | ||
298 | 298 | ||
299 | //Allocate device memory and copy everything to the GPU | 299 | //Allocate device memory and copy everything to the GPU |
300 | stim::complex<T>* dev_hB_lut; | 300 | stim::complex<T>* dev_hB_lut; |
301 | HANDLE_ERROR( cudaMalloc(&dev_hB_lut, hB_bytes) ); | 301 | HANDLE_ERROR( cudaMalloc(&dev_hB_lut, hB_bytes) ); |
302 | HANDLE_ERROR( cudaMemcpy(dev_hB_lut, hB_lut, hB_bytes, cudaMemcpyHostToDevice) ); | 302 | HANDLE_ERROR( cudaMemcpy(dev_hB_lut, hB_lut, hB_bytes, cudaMemcpyHostToDevice) ); |
303 | 303 | ||
304 | - gpu_scalar_mie_scatter<T>(dev_E, N, dev_x, dev_y, dev_z, dev_W, W.size(), a, n, dev_hB_lut, kr_min, dkr, N_hB_lut, Nl); | 304 | + gpu_scalar_mie_scatter<T>(dev_E, N, dev_x, dev_y, dev_z, dev_W, W.size(), a, n, dev_hB_lut, r_min, dr, N_hB_lut, Nl); |
305 | 305 | ||
306 | cudaMemcpy(E, dev_E, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory | 306 | cudaMemcpy(E, dev_E, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory |
307 | 307 | ||
@@ -349,9 +349,90 @@ void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std | @@ -349,9 +349,90 @@ void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std | ||
349 | } | 349 | } |
350 | 350 | ||
351 | template<typename T> | 351 | template<typename T> |
352 | -void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w, T a, stim::complex<T> n){ | 352 | +void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w, T a, stim::complex<T> n, T r_spacing = 0.1){ |
353 | std::vector< stim::scalarwave<T> > W(1, w); | 353 | std::vector< stim::scalarwave<T> > W(1, w); |
354 | - cpu_scalar_mie_scatter(E, N, x, y, z, W, a, n); | 354 | + cpu_scalar_mie_scatter(E, N, x, y, z, W, a, n, r_spacing); |
355 | +} | ||
356 | + | ||
357 | +template<typename T> | ||
358 | +__global__ void cuda_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* jA, T r_min, T dr, size_t N_jA, int Nl){ | ||
359 | + extern __shared__ stim::complex<T> shared_jA[]; //declare the list of waves in shared memory | ||
360 | + | ||
361 | + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array | ||
362 | + if(i >= N) return; //exit if this thread is outside the array | ||
363 | + stim::vec3<T> p; | ||
364 | + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions | ||
365 | + (y == NULL) ? p[1] = 0 : p[1] = y[i]; | ||
366 | + (z == NULL) ? p[2] = 0 : p[2] = z[i]; | ||
367 | + | ||
368 | + T r = p.len(); //calculate the distance from the sphere | ||
369 | + if(r > a) return; //exit if the point is inside the sphere (we only calculate the internal field) | ||
370 | + T fij = (r - r_min)/dr; //FP index into the spherical bessel LUT | ||
371 | + size_t ij = (size_t) fij; //convert to an integral index | ||
372 | + T alpha = fij - ij; //calculate the fractional portion of the index | ||
373 | + size_t n0j = ij * (Nl + 1); //start of the first entry in the LUT | ||
374 | + size_t n1j = (ij+1) * (Nl + 1); //start of the second entry in the LUT | ||
375 | + | ||
376 | + T cos_phi; | ||
377 | + T Pl_2, Pl_1, Pl; //declare registers to store the previous two Legendre polynomials | ||
378 | + | ||
379 | + stim::complex<T> jAl; | ||
380 | + stim::complex<T> Ei = 0; //create a register to store the result | ||
381 | + int l; | ||
382 | + | ||
383 | + stim::complex<T> jlAl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed | ||
384 | + int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops | ||
385 | + | ||
386 | + #pragma unroll LOCAL_NL+1 //copy the first LOCAL_NL+1 h_l * B_l components to registers | ||
387 | + for(l = 0; l <= LOCAL_NL; l++) | ||
388 | + jlAl[l] = clerp<T>( jA[n0j + l], jA[n1j + l], alpha ); | ||
389 | + | ||
390 | + for(l = LOCAL_NL+1; l <= Nl; l++) //copy any additional h_l * B_l components to shared memory | ||
391 | + shared_jA[shared_start + (l - (LOCAL_NL+1))] = clerp<T>( jA[n0j + l], jA[n1j + l], alpha ); | ||
392 | + | ||
393 | + for(size_t w = 0; w < nW; w++){ //for each plane wave | ||
394 | + if(r == 0) cos_phi = 0; | ||
395 | + else | ||
396 | + cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle between the k vector and the direction from the sphere | ||
397 | + Pl_2 = 1; //the Legendre polynomials will be calculated recursively, initialize the first two steps of the recursive relation | ||
398 | + Pl_1 = cos_phi; | ||
399 | + Ei += W[w].E() * jlAl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation | ||
400 | + Ei += W[w].E() * jlAl[1] * Pl_1; | ||
401 | + | ||
402 | + #pragma unroll LOCAL_NL-1 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file) | ||
403 | + for(l = 2; l <= LOCAL_NL; l++){ | ||
404 | + Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs) | ||
405 | + Ei += W[w].E() * jlAl[l] * Pl; //calculate and sum the current field order | ||
406 | + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1 | ||
407 | + Pl_1 = Pl; | ||
408 | + } | ||
409 | + | ||
410 | + for(l = LOCAL_NL+1; l <= Nl; l++){ //do the same as above, except for any additional orders that are stored in shared memory (not registers) | ||
411 | + Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //again, this is where most computation in the kernel occurs | ||
412 | + Ei += W[w].E() * shared_jA[shared_start + l - LOCAL_NL - 1] * Pl; | ||
413 | + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1 | ||
414 | + Pl_1 = Pl; | ||
415 | + } | ||
416 | + } | ||
417 | + E[i] = Ei; //copy the result to device memory | ||
418 | +} | ||
419 | + | ||
420 | +template<typename T> | ||
421 | +void gpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* jA, T r_min, T dr, size_t N_jA, size_t Nl){ | ||
422 | + | ||
423 | + size_t max_shared_mem = stim::sharedMemPerBlock(); | ||
424 | + size_t hBl_array = sizeof(stim::complex<T>) * (Nl + 1); | ||
425 | + std::cout<<"hl*Bl array size: "<<hBl_array<<std::endl; | ||
426 | + std::cout<<"shared memory: "<<max_shared_mem<<std::endl; | ||
427 | + int threads = (int)((max_shared_mem / hBl_array) / 32 * 32); | ||
428 | + std::cout<<"threads per block: "<<threads<<std::endl; | ||
429 | + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks | ||
430 | + | ||
431 | + size_t shared_mem; | ||
432 | + if(Nl <= LOCAL_NL) shared_mem = 0; | ||
433 | + else shared_mem = threads * sizeof(stim::complex<T>) * (Nl - LOCAL_NL); //amount of shared memory to allocate | ||
434 | + std::cout<<"shared memory allocated: "<<shared_mem<<std::endl; | ||
435 | + cuda_scalar_mie_internal<T><<< blocks, threads, shared_mem >>>(E, N, x, y, z, W, nW, a, n, jA, r_min, dr, N_jA, (int)Nl); //call the kernel | ||
355 | } | 436 | } |
356 | 437 | ||
357 | /// Calculate the scalar Mie solution for the internal field produced by a single plane wave scattered by a sphere | 438 | /// Calculate the scalar Mie solution for the internal field produced by a single plane wave scattered by a sphere |
@@ -365,18 +446,122 @@ void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, sti | @@ -365,18 +446,122 @@ void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, sti | ||
365 | /// @param a is the radius of the sphere | 446 | /// @param a is the radius of the sphere |
366 | /// @param n is the complex refractive index of the sphere | 447 | /// @param n is the complex refractive index of the sphere |
367 | template<typename T> | 448 | template<typename T> |
368 | -void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > W, T a, stim::complex<T> n){ | ||
369 | - | ||
370 | - //calculate the necessary number of orders required to represent the scattered field | 449 | +void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > W, T a, stim::complex<T> n, T r_spacing = 0.1){ |
450 | +//calculate the necessary number of orders required to represent the scattered field | ||
371 | T k = W[0].kmag(); | 451 | T k = W[0].kmag(); |
372 | 452 | ||
373 | - size_t Nl = ceil(k*a + 4 * cbrt( k * a ) + 2); | 453 | + int Nl = (int)ceil(k*a + 4 * cbrt( k * a ) + 2); |
454 | + if(Nl < LOCAL_NL) Nl = LOCAL_NL; //always do at least the minimum number of local operations (kernel optimization) | ||
374 | std::cout<<"Nl: "<<Nl<<std::endl; | 455 | std::cout<<"Nl: "<<Nl<<std::endl; |
375 | 456 | ||
376 | //calculate the scattering coefficients for the sphere | 457 | //calculate the scattering coefficients for the sphere |
377 | stim::complex<T>* A = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * (Nl + 1) ); //allocate space for the scattering coefficients | 458 | stim::complex<T>* A = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * (Nl + 1) ); //allocate space for the scattering coefficients |
378 | A_coefficients(A, a, k, n, Nl); | 459 | A_coefficients(A, a, k, n, Nl); |
379 | 460 | ||
461 | +#ifdef CUDA_FOUND | ||
462 | + stim::complex<T>* dev_E; //allocate space for the field | ||
463 | + cudaMalloc(&dev_E, N * sizeof(stim::complex<T>)); | ||
464 | + cudaMemcpy(dev_E, E, N * sizeof(stim::complex<T>), cudaMemcpyHostToDevice); | ||
465 | + //cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used) | ||
466 | + | ||
467 | + // COORDINATES | ||
468 | + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified) | ||
469 | + if(x != NULL){ | ||
470 | + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T))); | ||
471 | + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice)); | ||
472 | + } | ||
473 | + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified) | ||
474 | + if(y != NULL){ | ||
475 | + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T))); | ||
476 | + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice)); | ||
477 | + } | ||
478 | + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified) | ||
479 | + if(z != NULL){ | ||
480 | + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T))); | ||
481 | + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice)); | ||
482 | + } | ||
483 | + | ||
484 | + // PLANE WAVES | ||
485 | + stim::scalarwave<T>* dev_W; //allocate space and copy plane waves | ||
486 | + HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) ); | ||
487 | + HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) ); | ||
488 | + | ||
489 | + // BESSEL FUNCTION LOOK-UP TABLE | ||
490 | + //calculate the distance from the sphere center | ||
491 | + T* dev_r; | ||
492 | + HANDLE_ERROR( cudaMalloc(&dev_r, sizeof(T) * N) ); | ||
493 | + | ||
494 | + int threads = stim::maxThreadsPerBlock(); | ||
495 | + dim3 blocks((unsigned)(N / threads + 1)); | ||
496 | + cuda_dist<T> <<< blocks, threads >>>(dev_r, dev_x, dev_y, dev_z, N); | ||
497 | + | ||
498 | + //Find the minimum and maximum values of r | ||
499 | + cublasStatus_t stat; | ||
500 | + cublasHandle_t handle; | ||
501 | + | ||
502 | + stat = cublasCreate(&handle); //create a cuBLAS handle | ||
503 | + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure | ||
504 | + printf ("CUBLAS initialization failed\n"); | ||
505 | + exit(1); | ||
506 | + } | ||
507 | + | ||
508 | + int i_min, i_max; | ||
509 | + stat = cublasIsamin(handle, (int)N, dev_r, 1, &i_min); | ||
510 | + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure | ||
511 | + printf ("CUBLAS Error: failed to calculate minimum r value.\n"); | ||
512 | + exit(1); | ||
513 | + } | ||
514 | + stat = cublasIsamax(handle, (int)N, dev_r, 1, &i_max); | ||
515 | + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure | ||
516 | + printf ("CUBLAS Error: failed to calculate maximum r value.\n"); | ||
517 | + exit(1); | ||
518 | + } | ||
519 | + | ||
520 | + i_min--; //cuBLAS uses 1-based indexing for Fortran compatibility | ||
521 | + i_max--; | ||
522 | + T r_min, r_max; //allocate space to store the minimum and maximum values | ||
523 | + HANDLE_ERROR( cudaMemcpy(&r_min, dev_r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU | ||
524 | + HANDLE_ERROR( cudaMemcpy(&r_max, dev_r + i_max, sizeof(T), cudaMemcpyDeviceToHost) ); | ||
525 | + | ||
526 | + r_max = min(r_max, a); //the internal field doesn't exist outside of the sphere | ||
527 | + | ||
528 | + size_t N_jA_lut = (size_t)((r_max - r_min) / r_spacing + 1); | ||
529 | + | ||
530 | + //temporary variables | ||
531 | + double vm; //allocate space to store the return values for the bessel function calculation | ||
532 | + stim::complex<double>* jv = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) ); | ||
533 | + stim::complex<double>* yv = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) ); | ||
534 | + stim::complex<double>* djv= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) ); | ||
535 | + stim::complex<double>* dyv= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) ); | ||
536 | + | ||
537 | + size_t jA_bytes = sizeof(stim::complex<T>) * (Nl+1) * N_jA_lut; | ||
538 | + stim::complex<T>* jA_lut = (stim::complex<T>*) malloc(jA_bytes); //pointer to the look-up table | ||
539 | + T dr = (r_max - r_min) / (N_jA_lut-1); //distance between values in the LUT | ||
540 | + std::cout<<"LUT jl bytes: "<<jA_bytes<<std::endl; | ||
541 | + stim::complex<T> hl; | ||
542 | + stim::complex<double> nd = (stim::complex<double>)n; | ||
543 | + for(size_t ri = 0; ri < N_jA_lut; ri++){ //for each value in the LUT | ||
544 | + stim::cbessjyva_sph<double>(Nl, nd * k * (r_min + ri * dr), vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl] | ||
545 | + for(size_t l = 0; l <= Nl; l++){ //for each order | ||
546 | + jA_lut[ri * (Nl + 1) + l] = (stim::complex<T>)(jv[l] * (stim::complex<double>)A[l]); //store the bessel function result | ||
547 | + } | ||
548 | + } | ||
549 | + | ||
550 | + //Allocate device memory and copy everything to the GPU | ||
551 | + stim::complex<T>* dev_jA_lut; | ||
552 | + HANDLE_ERROR( cudaMalloc(&dev_jA_lut, jA_bytes) ); | ||
553 | + HANDLE_ERROR( cudaMemcpy(dev_jA_lut, jA_lut, jA_bytes, cudaMemcpyHostToDevice) ); | ||
554 | + | ||
555 | + gpu_scalar_mie_internal<T>(dev_E, N, dev_x, dev_y, dev_z, dev_W, W.size(), a, n, dev_jA_lut, r_min, dr, N_jA_lut, Nl); | ||
556 | + | ||
557 | + cudaMemcpy(E, dev_E, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory | ||
558 | + | ||
559 | + if(x != NULL) cudaFree(dev_x); //free everything | ||
560 | + if(y != NULL) cudaFree(dev_y); | ||
561 | + if(z != NULL) cudaFree(dev_z); | ||
562 | + cudaFree(dev_E); | ||
563 | +#else | ||
564 | + | ||
380 | //allocate space to store the bessel function call results | 565 | //allocate space to store the bessel function call results |
381 | double vm; | 566 | double vm; |
382 | stim::complex<double>* j_knr = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) ); | 567 | stim::complex<double>* j_knr = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) ); |
@@ -414,12 +599,13 @@ void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, st | @@ -414,12 +599,13 @@ void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, st | ||
414 | } | 599 | } |
415 | } | 600 | } |
416 | } | 601 | } |
602 | +#endif | ||
417 | } | 603 | } |
418 | 604 | ||
419 | template<typename T> | 605 | template<typename T> |
420 | -void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w, T a, stim::complex<T> n){ | 606 | +void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w, T a, stim::complex<T> n, T r_spacing = 0.1){ |
421 | std::vector< stim::scalarwave<T> > W(1, w); | 607 | std::vector< stim::scalarwave<T> > W(1, w); |
422 | - cpu_scalar_mie_internal(E, N, x, y, z, W, a, n); | 608 | + cpu_scalar_mie_internal(E, N, x, y, z, W, a, n, r_spacing); |
423 | } | 609 | } |
424 | 610 | ||
425 | } | 611 | } |
stim/optics/scalarbeam.h
1 | #ifndef RTS_BEAM | 1 | #ifndef RTS_BEAM |
2 | #define RTS_BEAM | 2 | #define RTS_BEAM |
3 | +#include <boost/math/special_functions/bessel.hpp> | ||
3 | 4 | ||
4 | #include "../math/vec3.h" | 5 | #include "../math/vec3.h" |
5 | #include "../optics/scalarwave.h" | 6 | #include "../optics/scalarwave.h" |
@@ -7,150 +8,68 @@ | @@ -7,150 +8,68 @@ | ||
7 | #include "../math/legendre.h" | 8 | #include "../math/legendre.h" |
8 | #include "../cuda/cudatools/devices.h" | 9 | #include "../cuda/cudatools/devices.h" |
9 | #include "../cuda/cudatools/timer.h" | 10 | #include "../cuda/cudatools/timer.h" |
11 | +#include "../optics/scalarfield.h" | ||
10 | #include <cublas_v2.h> | 12 | #include <cublas_v2.h> |
11 | #include <math_constants.h> | 13 | #include <math_constants.h> |
12 | #include <vector> | 14 | #include <vector> |
13 | #include <stdlib.h> | 15 | #include <stdlib.h> |
14 | 16 | ||
15 | -namespace stim{ | ||
16 | 17 | ||
17 | - /// Function returns the value of the scalar field produced by a beam with the specified parameters | ||
18 | - | ||
19 | - template<typename T> | ||
20 | - std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d, T NA, T NA_in = 0){ | ||
21 | - | ||
22 | - std::vector< stim::vec3<T> > dirs(N); //allocate an array to store the focusing vectors | ||
23 | - | ||
24 | - ///compute the rotation operator to transform (0, 0, 1) to k | ||
25 | - T cos_angle = d.dot(vec3<T>(0, 0, 1)); | ||
26 | - stim::matrix<T, 3> rotation; | ||
27 | - | ||
28 | - //if the cosine of the angle is -1, the rotation is just a flip across the z axis | ||
29 | - if(cos_angle == -1){ | ||
30 | - rotation(2, 2) = -1; | ||
31 | - } | ||
32 | - else if(cos_angle != 1.0) | ||
33 | - { | ||
34 | - vec3<T> r_axis = vec3<T>(0, 0, 1).cross(d).norm(); //compute the axis of rotation | ||
35 | - T angle = acos(cos_angle); //compute the angle of rotation | ||
36 | - quaternion<T> quat; //create a quaternion describing the rotation | ||
37 | - quat.CreateRotation(angle, r_axis); | ||
38 | - rotation = quat.toMatrix3(); //compute the rotation matrix | ||
39 | - } | ||
40 | - | ||
41 | - //find the phi values associated with the cassegrain ring | ||
42 | - T PHI[2]; | ||
43 | - PHI[0] = (T)asin(NA); | ||
44 | - PHI[1] = (T)asin(NA_in); | ||
45 | - | ||
46 | - //calculate the z-axis cylinder coordinates associated with these angles | ||
47 | - T Z[2]; | ||
48 | - Z[0] = cos(PHI[0]); | ||
49 | - Z[1] = cos(PHI[1]); | ||
50 | - T range = Z[0] - Z[1]; | ||
51 | - | ||
52 | - //draw a distribution of random phi, z values | ||
53 | - T z, phi, theta; | ||
54 | - //T kmag = stim::TAU / lambda; | ||
55 | - for(int i=0; i<N; i++){ //for each sample | ||
56 | - z = (T)((double)rand() / (double)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder | ||
57 | - theta = (T)(((double)rand() / (double)RAND_MAX) * stim::TAU); | ||
58 | - phi = acos(z); //project onto the sphere, computing phi in spherical coordinates | ||
59 | - | ||
60 | - //compute and store cartesian coordinates | ||
61 | - vec3<T> spherical(1, theta, phi); //convert from spherical to cartesian coordinates | ||
62 | - vec3<T> cart = spherical.sph2cart(); | ||
63 | - dirs[i] = rotation * cart; //create a sample vector | ||
64 | - } | ||
65 | - return dirs; | ||
66 | - } | ||
67 | - | ||
68 | -/// Class stim::beam represents a beam of light focused at a point and composed of several plane waves | ||
69 | -template<typename T> | ||
70 | -class scalarbeam | ||
71 | -{ | ||
72 | -public: | ||
73 | - //enum beam_type {Uniform, Bartlett, Hamming, Hanning}; | ||
74 | - | ||
75 | -private: | ||
76 | - | ||
77 | - T NA[2]; //numerical aperature of the focusing optics | ||
78 | - vec3<T> f; //focal point | ||
79 | - vec3<T> d; //propagation direction | ||
80 | - stim::complex<T> A; //beam amplitude | ||
81 | - T lambda; //beam wavelength | ||
82 | -public: | ||
83 | 18 | ||
84 | - ///constructor: build a default beam (NA=1.0) | ||
85 | - scalarbeam(T wavelength = 1, stim::complex<T> amplitude = 1, vec3<T> focal_point = vec3<T>(0, 0, 0), vec3<T> direction = vec3<T>(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){ | ||
86 | - lambda = wavelength; | ||
87 | - A = amplitude; | ||
88 | - f = focal_point; | ||
89 | - d = direction.norm(); //make sure that the direction vector is normalized (makes calculations more efficient later on) | ||
90 | - NA[0] = numerical_aperture; | ||
91 | - NA[1] = center_obsc; | ||
92 | - } | 19 | +namespace stim{ |
93 | 20 | ||
94 | - ///Numerical Aperature functions | ||
95 | - void setNA(T na) | ||
96 | - { | ||
97 | - NA[0] = (T)0; | ||
98 | - NA[1] = na; | ||
99 | - } | ||
100 | - void setNA(T na0, T na1) | ||
101 | - { | ||
102 | - NA[0] = na0; | ||
103 | - NA[1] = na1; | ||
104 | - } | 21 | +/// Function returns the value of the scalar field produced by a beam with the specified parameters |
105 | 22 | ||
106 | - //Monte-Carlo decomposition into plane waves | ||
107 | - std::vector< scalarwave<T> > mc(size_t N = 100000) const{ | 23 | +template<typename T> |
24 | +std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d, T NA, T NA_in = 0){ | ||
108 | 25 | ||
109 | - std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]); //generate a random set of N vectors forming a focus | ||
110 | - std::vector< scalarwave<T> > samples(N); //create a vector of plane waves | ||
111 | - T kmag = (T)stim::TAU / lambda; //calculate the wavenumber | ||
112 | - stim::complex<T> apw; //allocate space for the amplitude at the focal point | ||
113 | - T a = (T)(stim::TAU * (1 - cos(asin(NA[0]))) / (double)N); | ||
114 | - stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction | ||
115 | - for(size_t i=0; i<N; i++){ //for each sample | ||
116 | - kpw = dirs[i] * kmag; //calculate the k-vector for the new plane wave | ||
117 | - apw = a * exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave | ||
118 | - samples[i] = scalarwave<T>(kpw, apw); //create a plane wave based on the direction | ||
119 | - } | 26 | + std::vector< stim::vec3<T> > dirs(N); //allocate an array to store the focusing vectors |
120 | 27 | ||
121 | - return samples; | ||
122 | - } | 28 | + ///compute the rotation operator to transform (0, 0, 1) to k |
29 | + T cos_angle = d.dot(vec3<T>(0, 0, 1)); | ||
30 | + stim::matrix<T, 3> rotation; | ||
123 | 31 | ||
124 | - /// Calculate the field at a given point | ||
125 | - /// @param x is the x-coordinate of the field point | ||
126 | - /// @O is the approximation accuracy | ||
127 | - stim::complex<T> field(T x, T y, T z, size_t O){ | ||
128 | - std::vector< scalarwave<T> > W = mc(O); | ||
129 | - T result = 0; //initialize the result to zero (0) | ||
130 | - for(size_t i = 0; i < O; i++){ //for each plane wave | ||
131 | - result += W[i].pos(x, y, z); | ||
132 | - } | ||
133 | - return result; | 32 | + //if the cosine of the angle is -1, the rotation is just a flip across the z axis |
33 | + if(cos_angle == -1){ | ||
34 | + rotation(2, 2) = -1; | ||
134 | } | 35 | } |
135 | - | ||
136 | - std::string str() | 36 | + else if(cos_angle != 1.0) |
137 | { | 37 | { |
138 | - std::stringstream ss; | ||
139 | - ss<<"Beam:"<<std::endl; | ||
140 | - //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl; | ||
141 | - ss<<" Beam Direction: "<<d<<std::endl; | ||
142 | - if(NA[0] == 0) | ||
143 | - ss<<" NA: "<<NA[1]; | ||
144 | - else | ||
145 | - ss<<" NA: "<<NA[0]<<" -- "<<NA[1]; | ||
146 | - | ||
147 | - return ss.str(); | 38 | + vec3<T> r_axis = vec3<T>(0, 0, 1).cross(d).norm(); //compute the axis of rotation |
39 | + T angle = acos(cos_angle); //compute the angle of rotation | ||
40 | + quaternion<T> quat; //create a quaternion describing the rotation | ||
41 | + quat.CreateRotation(angle, r_axis); | ||
42 | + rotation = quat.toMatrix3(); //compute the rotation matrix | ||
148 | } | 43 | } |
149 | 44 | ||
45 | + //find the phi values associated with the cassegrain ring | ||
46 | + T PHI[2]; | ||
47 | + PHI[0] = (T)asin(NA); | ||
48 | + PHI[1] = (T)asin(NA_in); | ||
49 | + | ||
50 | + //calculate the z-axis cylinder coordinates associated with these angles | ||
51 | + T Z[2]; | ||
52 | + Z[0] = cos(PHI[0]); | ||
53 | + Z[1] = cos(PHI[1]); | ||
54 | + T range = Z[0] - Z[1]; | ||
55 | + | ||
56 | + //draw a distribution of random phi, z values | ||
57 | + T z, phi, theta; | ||
58 | + //T kmag = stim::TAU / lambda; | ||
59 | + for(int i=0; i<N; i++){ //for each sample | ||
60 | + z = (T)((double)rand() / (double)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder | ||
61 | + theta = (T)(((double)rand() / (double)RAND_MAX) * stim::TAU); | ||
62 | + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates | ||
63 | + | ||
64 | + //compute and store cartesian coordinates | ||
65 | + vec3<T> spherical(1, theta, phi); //convert from spherical to cartesian coordinates | ||
66 | + vec3<T> cart = spherical.sph2cart(); | ||
67 | + dirs[i] = rotation * cart; //create a sample vector | ||
68 | + } | ||
69 | + return dirs; | ||
70 | +} | ||
150 | 71 | ||
151 | - | ||
152 | -}; //end beam | ||
153 | - | 72 | + |
154 | /// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional) | 73 | /// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional) |
155 | /// @param C is a pointer to Nl + 1 values where the terms will be stored | 74 | /// @param C is a pointer to Nl + 1 values where the terms will be stored |
156 | template<typename T> | 75 | template<typename T> |
@@ -210,20 +129,19 @@ CUDA_CALLABLE T lerp(T v0, T v1, T t) { | @@ -210,20 +129,19 @@ CUDA_CALLABLE T lerp(T v0, T v1, T t) { | ||
210 | return fma(t, v1, fma(-t, v0, v0)); | 129 | return fma(t, v1, fma(-t, v0, v0)); |
211 | } | 130 | } |
212 | 131 | ||
213 | -#ifdef __CUDACC__ | 132 | +#ifdef CUDA_FOUND |
214 | template<typename T> | 133 | template<typename T> |
215 | -__global__ void cuda_scalar_psf(stim::complex<T>* E, size_t N, T* r, T* phi, T k, T A, size_t Nl, | 134 | +__global__ void cuda_scalar_psf(stim::complex<T>* E, size_t N, T* r, T* phi, T A, size_t Nl, |
216 | T* C, | 135 | T* C, |
217 | - T* lut_j, size_t Nj, T min_kr, T dkr){ | 136 | + T* lut_j, size_t Nj, T min_r, T dr){ |
218 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array | 137 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array |
219 | if(i >= N) return; //exit if this thread is outside the array | 138 | if(i >= N) return; //exit if this thread is outside the array |
220 | 139 | ||
221 | T cos_phi = cos(phi[i]); //calculate the thread value for cos(phi) | 140 | T cos_phi = cos(phi[i]); //calculate the thread value for cos(phi) |
222 | - T kr = r[i] * k; //calculate the thread value for k*r | ||
223 | stim::complex<T> Ei = 0; //initialize the value of the field to zero | 141 | stim::complex<T> Ei = 0; //initialize the value of the field to zero |
224 | size_t NC = Nl + 1; //calculate the number of coefficients to be used | 142 | size_t NC = Nl + 1; //calculate the number of coefficients to be used |
225 | 143 | ||
226 | - T fij = (kr - min_kr)/dkr; //FP index into the spherical bessel LUT | 144 | + T fij = (r[i] - min_r)/dr; //FP index into the spherical bessel LUT |
227 | size_t ij = (size_t) fij; //convert to an integral index | 145 | size_t ij = (size_t) fij; //convert to an integral index |
228 | T a = fij - ij; //calculate the fractional portion of the index | 146 | T a = fij - ij; //calculate the fractional portion of the index |
229 | size_t n0j = ij * (NC); //start of the first entry in the LUT | 147 | size_t n0j = ij * (NC); //start of the first entry in the LUT |
@@ -276,6 +194,8 @@ void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, | @@ -276,6 +194,8 @@ void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, | ||
276 | exit(1); | 194 | exit(1); |
277 | } | 195 | } |
278 | 196 | ||
197 | + i_min--; //cuBLAS uses 1-based indexing for Fortran compatibility | ||
198 | + i_max--; | ||
279 | T r_min, r_max; //allocate space to store the minimum and maximum values | 199 | T r_min, r_max; //allocate space to store the minimum and maximum values |
280 | HANDLE_ERROR( cudaMemcpy(&r_min, r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU | 200 | HANDLE_ERROR( cudaMemcpy(&r_min, r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU |
281 | HANDLE_ERROR( cudaMemcpy(&r_max, r + i_max, sizeof(T), cudaMemcpyDeviceToHost) ); | 201 | HANDLE_ERROR( cudaMemcpy(&r_max, r + i_max, sizeof(T), cudaMemcpyDeviceToHost) ); |
@@ -287,29 +207,19 @@ void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, | @@ -287,29 +207,19 @@ void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, | ||
287 | 207 | ||
288 | size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r | 208 | size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r |
289 | 209 | ||
290 | - T kr_min = k * r_min; | ||
291 | - T kr_max = k * r_max; | ||
292 | - | ||
293 | - //temporary variables | ||
294 | - double vm; //allocate space to store the return values for the bessel function calculation | ||
295 | - double* jv = (double*) malloc( (Nl + 1) * sizeof(double) ); | ||
296 | - double* yv = (double*) malloc( (Nl + 1) * sizeof(double) ); | ||
297 | - double* djv= (double*) malloc( (Nl + 1) * sizeof(double) ); | ||
298 | - double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) ); | ||
299 | 210 | ||
300 | size_t lutj_bytes = sizeof(T) * (Nl+1) * Nlut_j; | 211 | size_t lutj_bytes = sizeof(T) * (Nl+1) * Nlut_j; |
301 | - T* bessel_lut = (T*) malloc(lutj_bytes); //pointer to the look-up table | ||
302 | - T delta_kr = (kr_max - kr_min) / (Nlut_j-1); //distance between values in the LUT | ||
303 | - std::cout<<"LUT jl bytes: "<<lutj_bytes<<std::endl; | ||
304 | - for(size_t kri = 0; kri < Nlut_j; kri++){ //for each value in the LUT | ||
305 | - stim::bessjyv_sph<double>(Nl, kr_min + kri * delta_kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl] | 212 | + T* j_lut = (T*) malloc(lutj_bytes); //pointer to the look-up table |
213 | + T dr = (r_max - r_min) / (Nlut_j-1); //distance between values in the LUT | ||
214 | + T jl; | ||
215 | + for(size_t ri = 0; ri < Nlut_j; ri++){ //for each value in the LUT | ||
306 | for(size_t l = 0; l <= Nl; l++){ //for each order | 216 | for(size_t l = 0; l <= Nl; l++){ //for each order |
307 | - bessel_lut[kri * (Nl + 1) + l] = (T)jv[l]; //store the bessel function result | 217 | + jl = boost::math::sph_bessel<T>(l, k*(r_min + ri * dr)); //use boost to calculate the spherical bessel function |
218 | + j_lut[ri * (Nl + 1) + l] = jl; //store the bessel function result | ||
308 | } | 219 | } |
309 | } | 220 | } |
310 | 221 | ||
311 | - stim::cpu2image<T>(bessel_lut, "lut.bmp", Nl+1, Nlut_j, stim::cmBrewer); | ||
312 | - | 222 | + stim::cpu2image<T>(j_lut, "j_lut.bmp", Nl+1, Nlut_j, stim::cmBrewer); |
313 | //Allocate device memory and copy everything to the GPU | 223 | //Allocate device memory and copy everything to the GPU |
314 | 224 | ||
315 | T* gpu_C; | 225 | T* gpu_C; |
@@ -317,12 +227,12 @@ void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, | @@ -317,12 +227,12 @@ void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, | ||
317 | HANDLE_ERROR( cudaMemcpy(gpu_C, C, C_bytes, cudaMemcpyHostToDevice) ); | 227 | HANDLE_ERROR( cudaMemcpy(gpu_C, C, C_bytes, cudaMemcpyHostToDevice) ); |
318 | T* gpu_j_lut; | 228 | T* gpu_j_lut; |
319 | HANDLE_ERROR( cudaMalloc(&gpu_j_lut, lutj_bytes) ); | 229 | HANDLE_ERROR( cudaMalloc(&gpu_j_lut, lutj_bytes) ); |
320 | - HANDLE_ERROR( cudaMemcpy(gpu_j_lut, bessel_lut, lutj_bytes, cudaMemcpyHostToDevice) ); | 230 | + HANDLE_ERROR( cudaMemcpy(gpu_j_lut, j_lut, lutj_bytes, cudaMemcpyHostToDevice) ); |
321 | 231 | ||
322 | int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device | 232 | int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device |
323 | dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks | 233 | dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks |
324 | 234 | ||
325 | - cuda_scalar_psf<T><<< blocks, threads >>>(E, N, r, phi, (T)stim::TAU/lambda, A, Nl, gpu_C, gpu_j_lut, Nlut_j, kr_min, delta_kr); | 235 | + cuda_scalar_psf<T><<< blocks, threads >>>(E, N, r, phi, A, Nl, gpu_C, gpu_j_lut, Nlut_j, r_min, dr); |
326 | 236 | ||
327 | //free the LUT and condenser tables | 237 | //free the LUT and condenser tables |
328 | HANDLE_ERROR( cudaFree(gpu_C) ); | 238 | HANDLE_ERROR( cudaFree(gpu_C) ); |
@@ -392,7 +302,7 @@ __global__ void cuda_cart2psf(T* r, T* phi, size_t N, T* x, T* y, T* z, stim::ve | @@ -392,7 +302,7 @@ __global__ void cuda_cart2psf(T* r, T* phi, size_t N, T* x, T* y, T* z, stim::ve | ||
392 | phi[i] = ps[2]; //phi = [0 pi] | 302 | phi[i] = ps[2]; //phi = [0 pi] |
393 | } | 303 | } |
394 | 304 | ||
395 | -#ifdef __CUDACC__ | 305 | +#ifdef CUDA_FOUND |
396 | /// Calculate the analytical solution to a point spread function given a set of points in cartesian coordinates | 306 | /// Calculate the analytical solution to a point spread function given a set of points in cartesian coordinates |
397 | template<typename T> | 307 | template<typename T> |
398 | void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){ | 308 | void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){ |
@@ -419,7 +329,7 @@ template<typename T> | @@ -419,7 +329,7 @@ template<typename T> | ||
419 | void cpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){ | 329 | void cpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){ |
420 | 330 | ||
421 | // If CUDA is available, copy the cartesian points to the GPU and evaluate them in a kernel | 331 | // If CUDA is available, copy the cartesian points to the GPU and evaluate them in a kernel |
422 | -#ifdef __CUDACC__ | 332 | +#ifdef CUDA_FOUND |
423 | 333 | ||
424 | T* gpu_x = NULL; | 334 | T* gpu_x = NULL; |
425 | if(x != NULL){ | 335 | if(x != NULL){ |
@@ -470,13 +380,112 @@ void cpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lamb | @@ -470,13 +380,112 @@ void cpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lamb | ||
470 | phi[i] = ps[2]; //phi = [0 pi] | 380 | phi[i] = ps[2]; //phi = [0 pi] |
471 | } | 381 | } |
472 | 382 | ||
473 | - cpu_scalar_psf_local(F, N, r, phi, lambda, A, NA, NA_in, Nl); //call the spherical coordinate CPU function | 383 | + cpu_scalar_psf_local(E, N, r, phi, lambda, A, NA, NA_in, Nl); //call the spherical coordinate CPU function |
474 | 384 | ||
475 | free(r); | 385 | free(r); |
476 | free(phi); | 386 | free(phi); |
477 | #endif | 387 | #endif |
478 | } | 388 | } |
389 | + | ||
390 | +/// Class stim::beam represents a beam of light focused at a point and composed of several plane waves | ||
391 | +template<typename T> | ||
392 | +class scalarbeam | ||
393 | +{ | ||
394 | +public: | ||
395 | + //enum beam_type {Uniform, Bartlett, Hamming, Hanning}; | ||
396 | + | ||
397 | +private: | ||
398 | + | ||
399 | + T NA[2]; //numerical aperature of the focusing optics | ||
400 | + vec3<T> f; //focal point | ||
401 | + vec3<T> d; //propagation direction | ||
402 | + T A; //beam amplitude | ||
403 | + T lambda; //beam wavelength | ||
404 | +public: | ||
479 | 405 | ||
406 | + ///constructor: build a default beam (NA=1.0) | ||
407 | + scalarbeam(T wavelength = 1, T amplitude = 1, vec3<T> focal_point = vec3<T>(0, 0, 0), vec3<T> direction = vec3<T>(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){ | ||
408 | + lambda = wavelength; | ||
409 | + A = amplitude; | ||
410 | + f = focal_point; | ||
411 | + d = direction.norm(); //make sure that the direction vector is normalized (makes calculations more efficient later on) | ||
412 | + NA[0] = numerical_aperture; | ||
413 | + NA[1] = center_obsc; | ||
414 | + } | ||
415 | + | ||
416 | + ///Numerical Aperature functions | ||
417 | + void setNA(T na) | ||
418 | + { | ||
419 | + NA[0] = (T)0; | ||
420 | + NA[1] = na; | ||
421 | + } | ||
422 | + void setNA(T na0, T na1) | ||
423 | + { | ||
424 | + NA[0] = na0; | ||
425 | + NA[1] = na1; | ||
426 | + } | ||
427 | + | ||
428 | + //Monte-Carlo decomposition into plane waves | ||
429 | + std::vector< scalarwave<T> > mc(size_t N = 100000) const{ | ||
430 | + | ||
431 | + std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]); //generate a random set of N vectors forming a focus | ||
432 | + std::vector< scalarwave<T> > samples(N); //create a vector of plane waves | ||
433 | + T kmag = (T)stim::TAU / lambda; //calculate the wavenumber | ||
434 | + stim::complex<T> apw; //allocate space for the amplitude at the focal point | ||
435 | + T a = (T)(stim::TAU * ( (1 - cos(asin(NA[0]))) - (1 - cos(asin(NA[1])))) / (double)N); //constant value weights plane waves based on the aperture and number of samples (N) | ||
436 | + stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction | ||
437 | + for(size_t i=0; i<N; i++){ //for each sample | ||
438 | + kpw = dirs[i] * kmag; //calculate the k-vector for the new plane wave | ||
439 | + apw = a * exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave | ||
440 | + samples[i] = scalarwave<T>(kpw, apw); //create a plane wave based on the direction | ||
441 | + } | ||
442 | + return samples; | ||
443 | + } | ||
444 | + | ||
445 | + /// Evaluate the beam to a scalar field using Debye focusing | ||
446 | + void eval(stim::scalarfield<T>& E, size_t order = 500){ | ||
447 | + size_t array_size = E.grid_bytes(); | ||
448 | + T* X = (T*) malloc( array_size ); //allocate space for the coordinate meshes | ||
449 | + T* Y = (T*) malloc( array_size ); | ||
450 | + T* Z = (T*) malloc( array_size ); | ||
451 | + | ||
452 | + E.meshgrid(X, Y, Z, stim::CPUmem); //calculate the coordinate meshes | ||
453 | + cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[0], NA[1], order, E.spacing()); | ||
454 | + | ||
455 | + free(X); //free the coordinate meshes | ||
456 | + free(Y); | ||
457 | + free(Z); | ||
458 | + } | ||
459 | + | ||
460 | + /// Calculate the field at a given point | ||
461 | + /// @param x is the x-coordinate of the field point | ||
462 | + /// @O is the approximation accuracy | ||
463 | + stim::complex<T> field(T x, T y, T z, size_t O){ | ||
464 | + std::vector< scalarwave<T> > W = mc(O); | ||
465 | + T result = 0; //initialize the result to zero (0) | ||
466 | + for(size_t i = 0; i < O; i++){ //for each plane wave | ||
467 | + result += W[i].pos(x, y, z); | ||
468 | + } | ||
469 | + return result; | ||
470 | + } | ||
471 | + | ||
472 | + std::string str() | ||
473 | + { | ||
474 | + std::stringstream ss; | ||
475 | + ss<<"Beam:"<<std::endl; | ||
476 | + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl; | ||
477 | + ss<<" Beam Direction: "<<d<<std::endl; | ||
478 | + if(NA[0] == 0) | ||
479 | + ss<<" NA: "<<NA[1]; | ||
480 | + else | ||
481 | + ss<<" NA: "<<NA[0]<<" -- "<<NA[1]; | ||
482 | + | ||
483 | + return ss.str(); | ||
484 | + } | ||
485 | + | ||
486 | + | ||
487 | + | ||
488 | +}; //end beam | ||
480 | } //end namespace stim | 489 | } //end namespace stim |
481 | 490 | ||
482 | #endif | 491 | #endif |