Commit 0288346a9b7c9790d2df719321a38eee3274ca31

Authored by David Mayerich
1 parent 361c02a1

added support for computing scalar beams analytically

stim/math/bessel.h
@@ -778,17 +778,23 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv, @@ -778,17 +778,23 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv,
778 778
779 template<typename P> 779 template<typename P>
780 int bessjyv_sph(int v, P z, P &vm, P* cjv, 780 int bessjyv_sph(int v, P z, P &vm, P* cjv,
781 - P* cyv, P* cjvp, P* cyvp)  
782 -{ 781 + P* cyv, P* cjvp, P* cyvp){
  782 +
783 //first, compute the bessel functions of fractional order 783 //first, compute the bessel functions of fractional order
784 bessjyv<P>(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp); 784 bessjyv<P>(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp);
785 785
  786 + if(z == 0){ //handle degenerate case of z = 0
  787 + memset(cjv, 0, sizeof(P) * (v+1));
  788 + cjv[0] = 1;
  789 + }
  790 +
786 //iterate through each and scale 791 //iterate through each and scale
787 - for(int n = 0; n<=v; n++)  
788 - { 792 + for(int n = 0; n<=v; n++){
789 793
790 - cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));  
791 - cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); 794 + if(z != 0){ //handle degenerate case of z = 0
  795 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  796 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
  797 + }
792 798
793 cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); 799 cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
794 cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); 800 cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
stim/math/legendre.h
1 #ifndef RTS_LEGENDRE_H 1 #ifndef RTS_LEGENDRE_H
2 #define RTS_LEGENDRE_H 2 #define RTS_LEGENDRE_H
3 3
4 -#include "rts/cuda/callable.h" 4 +#include "../cuda/cudatools/callable.h"
5 5
6 namespace stim{ 6 namespace stim{
7 7
@@ -24,9 +24,11 @@ CUDA_CALLABLE void shift_legendre(int n, T x, T&amp; P0, T&amp; P1) @@ -24,9 +24,11 @@ CUDA_CALLABLE void shift_legendre(int n, T x, T&amp; P0, T&amp; P1)
24 P1 = Pnew; 24 P1 = Pnew;
25 } 25 }
26 26
  27 +/// Iteratively evaluates the Legendre polynomials for orders l = [0 n]
27 template <typename T> 28 template <typename T>
28 CUDA_CALLABLE void legendre(int n, T x, T* P) 29 CUDA_CALLABLE void legendre(int n, T x, T* P)
29 { 30 {
  31 + if(n < 0) return;
30 P[0] = 1; 32 P[0] = 1;
31 33
32 if(n >= 1) 34 if(n >= 1)
stim/optics/scalarbeam.h
@@ -4,12 +4,9 @@ @@ -4,12 +4,9 @@
4 #include "../math/vec3.h" 4 #include "../math/vec3.h"
5 #include "../optics/scalarwave.h" 5 #include "../optics/scalarwave.h"
6 #include "../math/bessel.h" 6 #include "../math/bessel.h"
  7 +#include "../math/legendre.h"
7 #include <vector> 8 #include <vector>
8 9
9 -//Boost  
10 -//#include <boost/math/special_functions/bessel.hpp>  
11 -//#include <boost/math/special_functions/legendre.hpp>  
12 -  
13 namespace stim{ 10 namespace stim{
14 11
15 /// Function returns the value of the scalar field produced by a beam with the specified parameters 12 /// Function returns the value of the scalar field produced by a beam with the specified parameters
@@ -130,23 +127,6 @@ public: @@ -130,23 +127,6 @@ public:
130 return result; 127 return result;
131 } 128 }
132 129
133 - /// Calculate the field at a set of positions  
134 - /*void field(stim::complex<T>* F, T* x, T* y, T* z, size_t N, size_t O){  
135 -  
136 - memset(F, 0, N * sizeof(stim::complex<T>));  
137 - std::vector< scalarwave<T> > W = mc(O); //get a random set of plane waves representing the beam  
138 - size_t o, n;  
139 - T px, py, pz;  
140 - for(n = 0; n < N; n++){ //for each point in the field  
141 - (x == NULL) ? px = 0 : px = x[n]; // test for NULL values  
142 - (y == NULL) ? py = 0 : py = y[n];  
143 - (z == NULL) ? pz = 0 : pz = z[n];  
144 - for(o = 0; o < O; o++){ //for each plane wave  
145 - F[n] += W[o].pos(px, py, pz);  
146 - }  
147 - }  
148 - }*/  
149 -  
150 std::string str() 130 std::string str()
151 { 131 {
152 std::stringstream ss; 132 std::stringstream ss;
@@ -165,51 +145,151 @@ public: @@ -165,51 +145,151 @@ public:
165 145
166 }; //end beam 146 }; //end beam
167 147
  148 +/// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional)
  149 +/// @param C is a pointer to Nl + 1 values where the terms will be stored
168 template<typename T> 150 template<typename T>
169 -void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){ 151 +CUDA_CALLABLE void cpu_aperture_integral(T* C, size_t Nl, T NA, T NA_in = 0){
170 152
171 - memset(F, 0, N * sizeof(stim::complex<T>)); 153 + size_t table_bytes = (Nl + 1) * sizeof(T); //calculate the number of bytes required to store the terms
  154 + T cos_alpha_1 = cos(asin(NA_in)); //calculate the cosine of the angle subtended by the central obscuration
  155 + T cos_alpha_2 = cos(asin(NA)); //calculate the cosine of the angle subtended by the aperture
  156 +
  157 + // the aperture integral is computed using four individual Legendre polynomials, each a function of the angles subtended
  158 + // by the objective and central obscuration
  159 + T* Pln_a1 = (T*) malloc(table_bytes);
  160 + stim::legendre<T>(Nl-1, cos_alpha_1, &Pln_a1[1]);
  161 + Pln_a1[0] = 1;
  162 +
  163 + T* Pln_a2 = (T*) malloc(table_bytes);
  164 + stim::legendre<T>(Nl-1, cos_alpha_2, &Pln_a2[1]);
  165 + Pln_a2[0] = 1;
  166 +
  167 + T* Plp_a1 = (T*) malloc(table_bytes+sizeof(T));
  168 + stim::legendre<T>(Nl+1, cos_alpha_1, Plp_a1);
  169 +
  170 + T* Plp_a2 = (T*) malloc(table_bytes+sizeof(T));
  171 + stim::legendre<T>(Nl+1, cos_alpha_2, Plp_a2);
  172 +
  173 + for(size_t l = 0; l <= Nl; l++){
  174 + C[l] = Plp_a1[l+1] - Plp_a2[l+1] - Pln_a1[l] + Pln_a2[l];
  175 + }
  176 +
  177 + free(Pln_a1);
  178 + free(Pln_a2);
  179 + free(Plp_a1);
  180 + free(Plp_a2);
  181 +}
  182 +
  183 +/// performs linear interpolation into a look-up table
  184 +template<typename T>
  185 +T lut_lookup(T* lut, T val, size_t N, T min_val, T delta, size_t stride = 0){
  186 + size_t idx = (size_t)((val - min_val) / delta);
  187 + T alpha = val - idx * delta + min_val;
  188 +
  189 + if(alpha == 0) return lut[idx];
  190 + else return lut[idx * stride] * (1 - alpha) + lut[ (idx+1) * stride] * alpha;
  191 +}
  192 +
  193 +template<typename T>
  194 +void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* r, T* phi, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){
172 T k = stim::TAU / lambda; 195 T k = stim::TAU / lambda;
173 - T jl, Pl, C, kr, cos_phi;  
174 - T cos_alpha_1 = cos(asin(NA_in));  
175 - T cos_alpha_2 = cos(asin(NA));  
176 - stim::vec3<T> p, ps;  
177 196
178 - /*double vm;  
179 - size_t table_bytes = (Nl + 1) * sizeof(double);  
180 - double* jv = (double*) malloc( table_bytes );  
181 - double* yv = (double*) malloc( table_bytes );  
182 - double* djv= (double*) malloc( table_bytes );  
183 - double* dyv= (double*) malloc( table_bytes );  
184 - */  
185 -  
186 - T vm;  
187 - size_t table_bytes = (Nl + 1) * sizeof(T);  
188 - T* jv = (T*) malloc( table_bytes );  
189 - T* yv = (T*) malloc( table_bytes );  
190 - T* djv= (T*) malloc( table_bytes );  
191 - T* dyv= (T*) malloc( table_bytes );  
192 -  
193 - for(size_t n = 0; n < N; n++){  
194 - (x == NULL) ? p[0] = 0 : p[0] = x[n]; // test for NULL values and set positions  
195 - (y == NULL) ? p[1] = 0 : p[1] = y[n];  
196 - (z == NULL) ? p[2] = 0 : p[2] = z[n];  
197 -  
198 - ps = p.cart2sph(); //convert this point to spherical coordinates  
199 - kr = k * ps[0];  
200 - cos_phi = std::cos(ps[2]);  
201 - stim::bessjyv_sph<T>(Nl, kr, vm, jv, yv, djv, dyv); 197 + T* C = (T*) malloc( (Nl + 1) * sizeof(T) ); //allocate space for the aperture integral terms
  198 + cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms
  199 + memset(F, 0, N * sizeof(stim::complex<T>));
  200 +#ifdef NO_CUDA
  201 + memset(F, 0, N * sizeof(stim::complex<T>));
  202 + T jl, Pl, kr, cos_phi;
  203 +
  204 + double vm;
  205 + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
  206 + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
  207 + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
  208 + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
  209 +
  210 + T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T));
  211 +
  212 + for(size_t n = 0; n < N; n++){ //for each point in the field
  213 + kr = k * r[n]; //calculate kr (the optical distance between the focal point and p)
  214 + cos_phi = std::cos(phi[n]); //calculate the cosine of phi
  215 + stim::bessjyv_sph<double>(Nl, kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]
  216 + stim::legendre<T>(Nl, cos_phi, Pl_cos_phi); //calculate the [0 Nl] legendre polynomials for this point
202 217
203 for(int l = 0; l <= Nl; l++){ 218 for(int l = 0; l <= Nl; l++){
204 - //jl = boost::math::sph_bessel<T>(l, kr);  
205 - //jl = stim::bessjyv(l, kr  
206 jl = (T)jv[l]; 219 jl = (T)jv[l];
207 - Pl = 1;//boost::math::legendre_p<T>(l, cos_phi);  
208 - C = 1;//boost::math::legendre_p<T>(l+1, cos_alpha_1) - boost::math::legendre_p<T>(l + 1, cos_alpha_2) - boost::math::legendre_p<T>(l - 1, cos_alpha_1) + boost::math::legendre_p<T>(l - 1, cos_alpha_2);  
209 - F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C; 220 + Pl = Pl_cos_phi[l];
  221 + F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l];
  222 + }
  223 + F[n] *= A * stim::TAU;
  224 + }
  225 +
  226 + free(C);
  227 + free(Pl_cos_phi);
  228 +#else
  229 + T min_r = r[0];
  230 + T max_r = r[0];
  231 + for(size_t i = 0; i < N; i++){ //find the minimum and maximum values of r (min and max distance from the focal point)
  232 + if(r[i] < min_r) min_r = r[i];
  233 + if(r[i] > max_r) max_r = r[i];
  234 + }
  235 + T min_kr = k * min_r;
  236 + T max_kr = k * max_r;
  237 +
  238 + //temporary variables
  239 + double vm;
  240 + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
  241 + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
  242 + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
  243 + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
  244 +
  245 + size_t Nlut = (size_t)sqrt(N) * 2;
  246 + T* bessel_lut = (T*) malloc(sizeof(T) * (Nl+1) * Nlut);
  247 + T delta_kr = (max_kr - min_kr) / (Nlut-1);
  248 + for(size_t kri = 0; kri < Nlut; kri++){
  249 + stim::bessjyv_sph<double>(Nl, min_kr + kri * delta_kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]
  250 + for(size_t l = 0; l <= Nl; l++){
  251 + bessel_lut[kri * (Nl + 1) + l] = (T)jv[l];
  252 + }
  253 + }
  254 +
  255 + T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T));
  256 + T kr, cos_phi, jl, Pl;
  257 + for(size_t n = 0; n < N; n++){ //for each point in the field
  258 + kr = k * r[n]; //calculate kr (the optical distance between the focal point and p)
  259 + cos_phi = std::cos(phi[n]); //calculate the cosine of phi
  260 + stim::legendre<T>(Nl, cos_phi, Pl_cos_phi); //calculate the [0 Nl] legendre polynomials for this point
  261 +
  262 + for(int l = 0; l <= Nl; l++){
  263 + jl = lut_lookup<T>(&bessel_lut[l], kr, Nlut, min_kr, delta_kr, Nl+1);
  264 + Pl = Pl_cos_phi[l];
  265 + F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l];
210 } 266 }
211 F[n] *= A * stim::TAU; 267 F[n] *= A * stim::TAU;
212 } 268 }
  269 +#endif
  270 +}
  271 +
  272 +
  273 +template<typename T>
  274 +void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){
  275 + T* r = (T*) malloc(N * sizeof(T)); //allocate space for p in spherical coordinates
  276 + T* phi = (T*) malloc(N * sizeof(T)); // only r and phi are necessary (the scalar PSF is symmetric about theta)
  277 +
  278 + stim::vec3<T> p, ps;
  279 + for(size_t i = 0; i < N; i++){
  280 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  281 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  282 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  283 +
  284 + ps = p.cart2sph(); //convert from cartesian to spherical coordinates
  285 + r[i] = ps[0]; //store r
  286 + phi[i] = ps[2]; //phi = [0 pi]
  287 + }
  288 +
  289 + cpu_scalar_psf(F, N, r, phi, lambda, A, f, NA, NA_in, Nl); //call the spherical coordinate CPU function
  290 +
  291 + free(r);
  292 + free(phi);
213 } 293 }
214 294
215 } //end namespace stim 295 } //end namespace stim