diff --git a/stim/math/bessel.h b/stim/math/bessel.h index 1be8607..6de92a2 100644 --- a/stim/math/bessel.h +++ b/stim/math/bessel.h @@ -778,17 +778,23 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv, template int bessjyv_sph(int v, P z, P &vm, P* cjv, - P* cyv, P* cjvp, P* cyvp) -{ + P* cyv, P* cjvp, P* cyvp){ + //first, compute the bessel functions of fractional order bessjyv

(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp); + if(z == 0){ //handle degenerate case of z = 0 + memset(cjv, 0, sizeof(P) * (v+1)); + cjv[0] = 1; + } + //iterate through each and scale - for(int n = 0; n<=v; n++) - { + for(int n = 0; n<=v; n++){ - cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); - cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); + if(z != 0){ //handle degenerate case of z = 0 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); + } cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); diff --git a/stim/math/legendre.h b/stim/math/legendre.h index 4859740..1099d4e 100644 --- a/stim/math/legendre.h +++ b/stim/math/legendre.h @@ -1,7 +1,7 @@ #ifndef RTS_LEGENDRE_H #define RTS_LEGENDRE_H -#include "rts/cuda/callable.h" +#include "../cuda/cudatools/callable.h" namespace stim{ @@ -24,9 +24,11 @@ CUDA_CALLABLE void shift_legendre(int n, T x, T& P0, T& P1) P1 = Pnew; } +/// Iteratively evaluates the Legendre polynomials for orders l = [0 n] template CUDA_CALLABLE void legendre(int n, T x, T* P) { + if(n < 0) return; P[0] = 1; if(n >= 1) diff --git a/stim/optics/scalarbeam.h b/stim/optics/scalarbeam.h index 106c728..cfb817b 100644 --- a/stim/optics/scalarbeam.h +++ b/stim/optics/scalarbeam.h @@ -4,12 +4,9 @@ #include "../math/vec3.h" #include "../optics/scalarwave.h" #include "../math/bessel.h" +#include "../math/legendre.h" #include -//Boost -//#include -//#include - namespace stim{ /// Function returns the value of the scalar field produced by a beam with the specified parameters @@ -130,23 +127,6 @@ public: return result; } - /// Calculate the field at a set of positions - /*void field(stim::complex* F, T* x, T* y, T* z, size_t N, size_t O){ - - memset(F, 0, N * sizeof(stim::complex)); - std::vector< scalarwave > W = mc(O); //get a random set of plane waves representing the beam - size_t o, n; - T px, py, pz; - for(n = 0; n < N; n++){ //for each point in the field - (x == NULL) ? px = 0 : px = x[n]; // test for NULL values - (y == NULL) ? py = 0 : py = y[n]; - (z == NULL) ? pz = 0 : pz = z[n]; - for(o = 0; o < O; o++){ //for each plane wave - F[n] += W[o].pos(px, py, pz); - } - } - }*/ - std::string str() { std::stringstream ss; @@ -165,51 +145,151 @@ public: }; //end beam +/// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional) +/// @param C is a pointer to Nl + 1 values where the terms will be stored template -void cpu_scalar_psf(stim::complex* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3 f, T NA, T NA_in, int Nl){ +CUDA_CALLABLE void cpu_aperture_integral(T* C, size_t Nl, T NA, T NA_in = 0){ - memset(F, 0, N * sizeof(stim::complex)); + size_t table_bytes = (Nl + 1) * sizeof(T); //calculate the number of bytes required to store the terms + T cos_alpha_1 = cos(asin(NA_in)); //calculate the cosine of the angle subtended by the central obscuration + T cos_alpha_2 = cos(asin(NA)); //calculate the cosine of the angle subtended by the aperture + + // the aperture integral is computed using four individual Legendre polynomials, each a function of the angles subtended + // by the objective and central obscuration + T* Pln_a1 = (T*) malloc(table_bytes); + stim::legendre(Nl-1, cos_alpha_1, &Pln_a1[1]); + Pln_a1[0] = 1; + + T* Pln_a2 = (T*) malloc(table_bytes); + stim::legendre(Nl-1, cos_alpha_2, &Pln_a2[1]); + Pln_a2[0] = 1; + + T* Plp_a1 = (T*) malloc(table_bytes+sizeof(T)); + stim::legendre(Nl+1, cos_alpha_1, Plp_a1); + + T* Plp_a2 = (T*) malloc(table_bytes+sizeof(T)); + stim::legendre(Nl+1, cos_alpha_2, Plp_a2); + + for(size_t l = 0; l <= Nl; l++){ + C[l] = Plp_a1[l+1] - Plp_a2[l+1] - Pln_a1[l] + Pln_a2[l]; + } + + free(Pln_a1); + free(Pln_a2); + free(Plp_a1); + free(Plp_a2); +} + +/// performs linear interpolation into a look-up table +template +T lut_lookup(T* lut, T val, size_t N, T min_val, T delta, size_t stride = 0){ + size_t idx = (size_t)((val - min_val) / delta); + T alpha = val - idx * delta + min_val; + + if(alpha == 0) return lut[idx]; + else return lut[idx * stride] * (1 - alpha) + lut[ (idx+1) * stride] * alpha; +} + +template +void cpu_scalar_psf(stim::complex* F, size_t N, T* r, T* phi, T lambda, T A, stim::vec3 f, T NA, T NA_in, int Nl){ T k = stim::TAU / lambda; - T jl, Pl, C, kr, cos_phi; - T cos_alpha_1 = cos(asin(NA_in)); - T cos_alpha_2 = cos(asin(NA)); - stim::vec3 p, ps; - /*double vm; - size_t table_bytes = (Nl + 1) * sizeof(double); - double* jv = (double*) malloc( table_bytes ); - double* yv = (double*) malloc( table_bytes ); - double* djv= (double*) malloc( table_bytes ); - double* dyv= (double*) malloc( table_bytes ); - */ - - T vm; - size_t table_bytes = (Nl + 1) * sizeof(T); - T* jv = (T*) malloc( table_bytes ); - T* yv = (T*) malloc( table_bytes ); - T* djv= (T*) malloc( table_bytes ); - T* dyv= (T*) malloc( table_bytes ); - - for(size_t n = 0; n < N; n++){ - (x == NULL) ? p[0] = 0 : p[0] = x[n]; // test for NULL values and set positions - (y == NULL) ? p[1] = 0 : p[1] = y[n]; - (z == NULL) ? p[2] = 0 : p[2] = z[n]; - - ps = p.cart2sph(); //convert this point to spherical coordinates - kr = k * ps[0]; - cos_phi = std::cos(ps[2]); - stim::bessjyv_sph(Nl, kr, vm, jv, yv, djv, dyv); + T* C = (T*) malloc( (Nl + 1) * sizeof(T) ); //allocate space for the aperture integral terms + cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms + memset(F, 0, N * sizeof(stim::complex)); +#ifdef NO_CUDA + memset(F, 0, N * sizeof(stim::complex)); + T jl, Pl, kr, cos_phi; + + double vm; + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) ); + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) ); + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) ); + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) ); + + T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T)); + + for(size_t n = 0; n < N; n++){ //for each point in the field + kr = k * r[n]; //calculate kr (the optical distance between the focal point and p) + cos_phi = std::cos(phi[n]); //calculate the cosine of phi + stim::bessjyv_sph(Nl, kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl] + stim::legendre(Nl, cos_phi, Pl_cos_phi); //calculate the [0 Nl] legendre polynomials for this point for(int l = 0; l <= Nl; l++){ - //jl = boost::math::sph_bessel(l, kr); - //jl = stim::bessjyv(l, kr jl = (T)jv[l]; - Pl = 1;//boost::math::legendre_p(l, cos_phi); - C = 1;//boost::math::legendre_p(l+1, cos_alpha_1) - boost::math::legendre_p(l + 1, cos_alpha_2) - boost::math::legendre_p(l - 1, cos_alpha_1) + boost::math::legendre_p(l - 1, cos_alpha_2); - F[n] += pow(complex(0, 1), l) * jl * Pl * C; + Pl = Pl_cos_phi[l]; + F[n] += pow(complex(0, 1), l) * jl * Pl * C[l]; + } + F[n] *= A * stim::TAU; + } + + free(C); + free(Pl_cos_phi); +#else + T min_r = r[0]; + T max_r = r[0]; + for(size_t i = 0; i < N; i++){ //find the minimum and maximum values of r (min and max distance from the focal point) + if(r[i] < min_r) min_r = r[i]; + if(r[i] > max_r) max_r = r[i]; + } + T min_kr = k * min_r; + T max_kr = k * max_r; + + //temporary variables + double vm; + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) ); + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) ); + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) ); + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) ); + + size_t Nlut = (size_t)sqrt(N) * 2; + T* bessel_lut = (T*) malloc(sizeof(T) * (Nl+1) * Nlut); + T delta_kr = (max_kr - min_kr) / (Nlut-1); + for(size_t kri = 0; kri < Nlut; kri++){ + stim::bessjyv_sph(Nl, min_kr + kri * delta_kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl] + for(size_t l = 0; l <= Nl; l++){ + bessel_lut[kri * (Nl + 1) + l] = (T)jv[l]; + } + } + + T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T)); + T kr, cos_phi, jl, Pl; + for(size_t n = 0; n < N; n++){ //for each point in the field + kr = k * r[n]; //calculate kr (the optical distance between the focal point and p) + cos_phi = std::cos(phi[n]); //calculate the cosine of phi + stim::legendre(Nl, cos_phi, Pl_cos_phi); //calculate the [0 Nl] legendre polynomials for this point + + for(int l = 0; l <= Nl; l++){ + jl = lut_lookup(&bessel_lut[l], kr, Nlut, min_kr, delta_kr, Nl+1); + Pl = Pl_cos_phi[l]; + F[n] += pow(complex(0, 1), l) * jl * Pl * C[l]; } F[n] *= A * stim::TAU; } +#endif +} + + +template +void cpu_scalar_psf(stim::complex* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3 f, T NA, T NA_in, int Nl){ + T* r = (T*) malloc(N * sizeof(T)); //allocate space for p in spherical coordinates + T* phi = (T*) malloc(N * sizeof(T)); // only r and phi are necessary (the scalar PSF is symmetric about theta) + + stim::vec3 p, ps; + for(size_t i = 0; i < N; i++){ + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions + (y == NULL) ? p[1] = 0 : p[1] = y[i]; + (z == NULL) ? p[2] = 0 : p[2] = z[i]; + + ps = p.cart2sph(); //convert from cartesian to spherical coordinates + r[i] = ps[0]; //store r + phi[i] = ps[2]; //phi = [0 pi] + } + + cpu_scalar_psf(F, N, r, phi, lambda, A, f, NA, NA_in, Nl); //call the spherical coordinate CPU function + + free(r); + free(phi); } } //end namespace stim -- libgit2 0.21.4