Commit 0288346a9b7c9790d2df719321a38eee3274ca31
1 parent
361c02a1
added support for computing scalar beams analytically
Showing
3 changed files
with
151 additions
and
63 deletions
Show diff stats
stim/math/bessel.h
@@ -778,17 +778,23 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv, | @@ -778,17 +778,23 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv, | ||
778 | 778 | ||
779 | template<typename P> | 779 | template<typename P> |
780 | int bessjyv_sph(int v, P z, P &vm, P* cjv, | 780 | int bessjyv_sph(int v, P z, P &vm, P* cjv, |
781 | - P* cyv, P* cjvp, P* cyvp) | ||
782 | -{ | 781 | + P* cyv, P* cjvp, P* cyvp){ |
782 | + | ||
783 | //first, compute the bessel functions of fractional order | 783 | //first, compute the bessel functions of fractional order |
784 | bessjyv<P>(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp); | 784 | bessjyv<P>(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp); |
785 | 785 | ||
786 | + if(z == 0){ //handle degenerate case of z = 0 | ||
787 | + memset(cjv, 0, sizeof(P) * (v+1)); | ||
788 | + cjv[0] = 1; | ||
789 | + } | ||
790 | + | ||
786 | //iterate through each and scale | 791 | //iterate through each and scale |
787 | - for(int n = 0; n<=v; n++) | ||
788 | - { | 792 | + for(int n = 0; n<=v; n++){ |
789 | 793 | ||
790 | - cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); | ||
791 | - cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); | 794 | + if(z != 0){ //handle degenerate case of z = 0 |
795 | + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); | ||
796 | + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); | ||
797 | + } | ||
792 | 798 | ||
793 | cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); | 799 | cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); |
794 | cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); | 800 | cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); |
stim/math/legendre.h
1 | #ifndef RTS_LEGENDRE_H | 1 | #ifndef RTS_LEGENDRE_H |
2 | #define RTS_LEGENDRE_H | 2 | #define RTS_LEGENDRE_H |
3 | 3 | ||
4 | -#include "rts/cuda/callable.h" | 4 | +#include "../cuda/cudatools/callable.h" |
5 | 5 | ||
6 | namespace stim{ | 6 | namespace stim{ |
7 | 7 | ||
@@ -24,9 +24,11 @@ CUDA_CALLABLE void shift_legendre(int n, T x, T& P0, T& P1) | @@ -24,9 +24,11 @@ CUDA_CALLABLE void shift_legendre(int n, T x, T& P0, T& P1) | ||
24 | P1 = Pnew; | 24 | P1 = Pnew; |
25 | } | 25 | } |
26 | 26 | ||
27 | +/// Iteratively evaluates the Legendre polynomials for orders l = [0 n] | ||
27 | template <typename T> | 28 | template <typename T> |
28 | CUDA_CALLABLE void legendre(int n, T x, T* P) | 29 | CUDA_CALLABLE void legendre(int n, T x, T* P) |
29 | { | 30 | { |
31 | + if(n < 0) return; | ||
30 | P[0] = 1; | 32 | P[0] = 1; |
31 | 33 | ||
32 | if(n >= 1) | 34 | if(n >= 1) |
stim/optics/scalarbeam.h
@@ -4,12 +4,9 @@ | @@ -4,12 +4,9 @@ | ||
4 | #include "../math/vec3.h" | 4 | #include "../math/vec3.h" |
5 | #include "../optics/scalarwave.h" | 5 | #include "../optics/scalarwave.h" |
6 | #include "../math/bessel.h" | 6 | #include "../math/bessel.h" |
7 | +#include "../math/legendre.h" | ||
7 | #include <vector> | 8 | #include <vector> |
8 | 9 | ||
9 | -//Boost | ||
10 | -//#include <boost/math/special_functions/bessel.hpp> | ||
11 | -//#include <boost/math/special_functions/legendre.hpp> | ||
12 | - | ||
13 | namespace stim{ | 10 | namespace stim{ |
14 | 11 | ||
15 | /// Function returns the value of the scalar field produced by a beam with the specified parameters | 12 | /// Function returns the value of the scalar field produced by a beam with the specified parameters |
@@ -130,23 +127,6 @@ public: | @@ -130,23 +127,6 @@ public: | ||
130 | return result; | 127 | return result; |
131 | } | 128 | } |
132 | 129 | ||
133 | - /// Calculate the field at a set of positions | ||
134 | - /*void field(stim::complex<T>* F, T* x, T* y, T* z, size_t N, size_t O){ | ||
135 | - | ||
136 | - memset(F, 0, N * sizeof(stim::complex<T>)); | ||
137 | - std::vector< scalarwave<T> > W = mc(O); //get a random set of plane waves representing the beam | ||
138 | - size_t o, n; | ||
139 | - T px, py, pz; | ||
140 | - for(n = 0; n < N; n++){ //for each point in the field | ||
141 | - (x == NULL) ? px = 0 : px = x[n]; // test for NULL values | ||
142 | - (y == NULL) ? py = 0 : py = y[n]; | ||
143 | - (z == NULL) ? pz = 0 : pz = z[n]; | ||
144 | - for(o = 0; o < O; o++){ //for each plane wave | ||
145 | - F[n] += W[o].pos(px, py, pz); | ||
146 | - } | ||
147 | - } | ||
148 | - }*/ | ||
149 | - | ||
150 | std::string str() | 130 | std::string str() |
151 | { | 131 | { |
152 | std::stringstream ss; | 132 | std::stringstream ss; |
@@ -165,51 +145,151 @@ public: | @@ -165,51 +145,151 @@ public: | ||
165 | 145 | ||
166 | }; //end beam | 146 | }; //end beam |
167 | 147 | ||
148 | +/// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional) | ||
149 | +/// @param C is a pointer to Nl + 1 values where the terms will be stored | ||
168 | template<typename T> | 150 | template<typename T> |
169 | -void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){ | 151 | +CUDA_CALLABLE void cpu_aperture_integral(T* C, size_t Nl, T NA, T NA_in = 0){ |
170 | 152 | ||
171 | - memset(F, 0, N * sizeof(stim::complex<T>)); | 153 | + size_t table_bytes = (Nl + 1) * sizeof(T); //calculate the number of bytes required to store the terms |
154 | + T cos_alpha_1 = cos(asin(NA_in)); //calculate the cosine of the angle subtended by the central obscuration | ||
155 | + T cos_alpha_2 = cos(asin(NA)); //calculate the cosine of the angle subtended by the aperture | ||
156 | + | ||
157 | + // the aperture integral is computed using four individual Legendre polynomials, each a function of the angles subtended | ||
158 | + // by the objective and central obscuration | ||
159 | + T* Pln_a1 = (T*) malloc(table_bytes); | ||
160 | + stim::legendre<T>(Nl-1, cos_alpha_1, &Pln_a1[1]); | ||
161 | + Pln_a1[0] = 1; | ||
162 | + | ||
163 | + T* Pln_a2 = (T*) malloc(table_bytes); | ||
164 | + stim::legendre<T>(Nl-1, cos_alpha_2, &Pln_a2[1]); | ||
165 | + Pln_a2[0] = 1; | ||
166 | + | ||
167 | + T* Plp_a1 = (T*) malloc(table_bytes+sizeof(T)); | ||
168 | + stim::legendre<T>(Nl+1, cos_alpha_1, Plp_a1); | ||
169 | + | ||
170 | + T* Plp_a2 = (T*) malloc(table_bytes+sizeof(T)); | ||
171 | + stim::legendre<T>(Nl+1, cos_alpha_2, Plp_a2); | ||
172 | + | ||
173 | + for(size_t l = 0; l <= Nl; l++){ | ||
174 | + C[l] = Plp_a1[l+1] - Plp_a2[l+1] - Pln_a1[l] + Pln_a2[l]; | ||
175 | + } | ||
176 | + | ||
177 | + free(Pln_a1); | ||
178 | + free(Pln_a2); | ||
179 | + free(Plp_a1); | ||
180 | + free(Plp_a2); | ||
181 | +} | ||
182 | + | ||
183 | +/// performs linear interpolation into a look-up table | ||
184 | +template<typename T> | ||
185 | +T lut_lookup(T* lut, T val, size_t N, T min_val, T delta, size_t stride = 0){ | ||
186 | + size_t idx = (size_t)((val - min_val) / delta); | ||
187 | + T alpha = val - idx * delta + min_val; | ||
188 | + | ||
189 | + if(alpha == 0) return lut[idx]; | ||
190 | + else return lut[idx * stride] * (1 - alpha) + lut[ (idx+1) * stride] * alpha; | ||
191 | +} | ||
192 | + | ||
193 | +template<typename T> | ||
194 | +void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* r, T* phi, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){ | ||
172 | T k = stim::TAU / lambda; | 195 | T k = stim::TAU / lambda; |
173 | - T jl, Pl, C, kr, cos_phi; | ||
174 | - T cos_alpha_1 = cos(asin(NA_in)); | ||
175 | - T cos_alpha_2 = cos(asin(NA)); | ||
176 | - stim::vec3<T> p, ps; | ||
177 | 196 | ||
178 | - /*double vm; | ||
179 | - size_t table_bytes = (Nl + 1) * sizeof(double); | ||
180 | - double* jv = (double*) malloc( table_bytes ); | ||
181 | - double* yv = (double*) malloc( table_bytes ); | ||
182 | - double* djv= (double*) malloc( table_bytes ); | ||
183 | - double* dyv= (double*) malloc( table_bytes ); | ||
184 | - */ | ||
185 | - | ||
186 | - T vm; | ||
187 | - size_t table_bytes = (Nl + 1) * sizeof(T); | ||
188 | - T* jv = (T*) malloc( table_bytes ); | ||
189 | - T* yv = (T*) malloc( table_bytes ); | ||
190 | - T* djv= (T*) malloc( table_bytes ); | ||
191 | - T* dyv= (T*) malloc( table_bytes ); | ||
192 | - | ||
193 | - for(size_t n = 0; n < N; n++){ | ||
194 | - (x == NULL) ? p[0] = 0 : p[0] = x[n]; // test for NULL values and set positions | ||
195 | - (y == NULL) ? p[1] = 0 : p[1] = y[n]; | ||
196 | - (z == NULL) ? p[2] = 0 : p[2] = z[n]; | ||
197 | - | ||
198 | - ps = p.cart2sph(); //convert this point to spherical coordinates | ||
199 | - kr = k * ps[0]; | ||
200 | - cos_phi = std::cos(ps[2]); | ||
201 | - stim::bessjyv_sph<T>(Nl, kr, vm, jv, yv, djv, dyv); | 197 | + T* C = (T*) malloc( (Nl + 1) * sizeof(T) ); //allocate space for the aperture integral terms |
198 | + cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms | ||
199 | + memset(F, 0, N * sizeof(stim::complex<T>)); | ||
200 | +#ifdef NO_CUDA | ||
201 | + memset(F, 0, N * sizeof(stim::complex<T>)); | ||
202 | + T jl, Pl, kr, cos_phi; | ||
203 | + | ||
204 | + double vm; | ||
205 | + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) ); | ||
206 | + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) ); | ||
207 | + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) ); | ||
208 | + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) ); | ||
209 | + | ||
210 | + T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T)); | ||
211 | + | ||
212 | + for(size_t n = 0; n < N; n++){ //for each point in the field | ||
213 | + kr = k * r[n]; //calculate kr (the optical distance between the focal point and p) | ||
214 | + cos_phi = std::cos(phi[n]); //calculate the cosine of phi | ||
215 | + stim::bessjyv_sph<double>(Nl, kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl] | ||
216 | + stim::legendre<T>(Nl, cos_phi, Pl_cos_phi); //calculate the [0 Nl] legendre polynomials for this point | ||
202 | 217 | ||
203 | for(int l = 0; l <= Nl; l++){ | 218 | for(int l = 0; l <= Nl; l++){ |
204 | - //jl = boost::math::sph_bessel<T>(l, kr); | ||
205 | - //jl = stim::bessjyv(l, kr | ||
206 | jl = (T)jv[l]; | 219 | jl = (T)jv[l]; |
207 | - Pl = 1;//boost::math::legendre_p<T>(l, cos_phi); | ||
208 | - C = 1;//boost::math::legendre_p<T>(l+1, cos_alpha_1) - boost::math::legendre_p<T>(l + 1, cos_alpha_2) - boost::math::legendre_p<T>(l - 1, cos_alpha_1) + boost::math::legendre_p<T>(l - 1, cos_alpha_2); | ||
209 | - F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C; | 220 | + Pl = Pl_cos_phi[l]; |
221 | + F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l]; | ||
222 | + } | ||
223 | + F[n] *= A * stim::TAU; | ||
224 | + } | ||
225 | + | ||
226 | + free(C); | ||
227 | + free(Pl_cos_phi); | ||
228 | +#else | ||
229 | + T min_r = r[0]; | ||
230 | + T max_r = r[0]; | ||
231 | + for(size_t i = 0; i < N; i++){ //find the minimum and maximum values of r (min and max distance from the focal point) | ||
232 | + if(r[i] < min_r) min_r = r[i]; | ||
233 | + if(r[i] > max_r) max_r = r[i]; | ||
234 | + } | ||
235 | + T min_kr = k * min_r; | ||
236 | + T max_kr = k * max_r; | ||
237 | + | ||
238 | + //temporary variables | ||
239 | + double vm; | ||
240 | + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) ); | ||
241 | + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) ); | ||
242 | + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) ); | ||
243 | + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) ); | ||
244 | + | ||
245 | + size_t Nlut = (size_t)sqrt(N) * 2; | ||
246 | + T* bessel_lut = (T*) malloc(sizeof(T) * (Nl+1) * Nlut); | ||
247 | + T delta_kr = (max_kr - min_kr) / (Nlut-1); | ||
248 | + for(size_t kri = 0; kri < Nlut; kri++){ | ||
249 | + stim::bessjyv_sph<double>(Nl, min_kr + kri * delta_kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl] | ||
250 | + for(size_t l = 0; l <= Nl; l++){ | ||
251 | + bessel_lut[kri * (Nl + 1) + l] = (T)jv[l]; | ||
252 | + } | ||
253 | + } | ||
254 | + | ||
255 | + T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T)); | ||
256 | + T kr, cos_phi, jl, Pl; | ||
257 | + for(size_t n = 0; n < N; n++){ //for each point in the field | ||
258 | + kr = k * r[n]; //calculate kr (the optical distance between the focal point and p) | ||
259 | + cos_phi = std::cos(phi[n]); //calculate the cosine of phi | ||
260 | + stim::legendre<T>(Nl, cos_phi, Pl_cos_phi); //calculate the [0 Nl] legendre polynomials for this point | ||
261 | + | ||
262 | + for(int l = 0; l <= Nl; l++){ | ||
263 | + jl = lut_lookup<T>(&bessel_lut[l], kr, Nlut, min_kr, delta_kr, Nl+1); | ||
264 | + Pl = Pl_cos_phi[l]; | ||
265 | + F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l]; | ||
210 | } | 266 | } |
211 | F[n] *= A * stim::TAU; | 267 | F[n] *= A * stim::TAU; |
212 | } | 268 | } |
269 | +#endif | ||
270 | +} | ||
271 | + | ||
272 | + | ||
273 | +template<typename T> | ||
274 | +void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){ | ||
275 | + T* r = (T*) malloc(N * sizeof(T)); //allocate space for p in spherical coordinates | ||
276 | + T* phi = (T*) malloc(N * sizeof(T)); // only r and phi are necessary (the scalar PSF is symmetric about theta) | ||
277 | + | ||
278 | + stim::vec3<T> p, ps; | ||
279 | + for(size_t i = 0; i < N; i++){ | ||
280 | + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions | ||
281 | + (y == NULL) ? p[1] = 0 : p[1] = y[i]; | ||
282 | + (z == NULL) ? p[2] = 0 : p[2] = z[i]; | ||
283 | + | ||
284 | + ps = p.cart2sph(); //convert from cartesian to spherical coordinates | ||
285 | + r[i] = ps[0]; //store r | ||
286 | + phi[i] = ps[2]; //phi = [0 pi] | ||
287 | + } | ||
288 | + | ||
289 | + cpu_scalar_psf(F, N, r, phi, lambda, A, f, NA, NA_in, Nl); //call the spherical coordinate CPU function | ||
290 | + | ||
291 | + free(r); | ||
292 | + free(phi); | ||
213 | } | 293 | } |
214 | 294 | ||
215 | } //end namespace stim | 295 | } //end namespace stim |