Authored by David Mayerich
1 parent 361c02a1

### added support for computing scalar beams analytically

Showing 3 changed files with 151 additions and 63 deletions
stim/math/bessel.h

 ... ... @@ -778,17 +778,23 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv, 778 778 779 779 template 780 780 int bessjyv_sph(int v, P z, P &vm, P* cjv, 781 - P* cyv, P* cjvp, P* cyvp) 782 -{ 781 + P* cyv, P* cjvp, P* cyvp){ 782 + 783 783 //first, compute the bessel functions of fractional order 784 784 bessjyv

(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp); 785 785 786 + if(z == 0){ //handle degenerate case of z = 0 787 + memset(cjv, 0, sizeof(P) * (v+1)); 788 + cjv[0] = 1; 789 + } 790 + 786 791 //iterate through each and scale 787 - for(int n = 0; n<=v; n++) 788 - { 792 + for(int n = 0; n<=v; n++){ 789 793 790 - cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); 791 - cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); 794 + if(z != 0){ //handle degenerate case of z = 0 795 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); 796 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); 797 + } 792 798 793 799 cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); 794 800 cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); ... ...

stim/math/legendre.h

 1 1 #ifndef RTS_LEGENDRE_H 2 2 #define RTS_LEGENDRE_H 3 3 4 -#include "rts/cuda/callable.h" 4 +#include "../cuda/cudatools/callable.h" 5 5 6 6 namespace stim{ 7 7 ... ... @@ -24,9 +24,11 @@ CUDA_CALLABLE void shift_legendre(int n, T x, T& P0, T& P1) 24 24 P1 = Pnew; 25 25 } 26 26 27 +/// Iteratively evaluates the Legendre polynomials for orders l = [0 n] 27 28 template 28 29 CUDA_CALLABLE void legendre(int n, T x, T* P) 29 30 { 31 + if(n < 0) return; 30 32 P[0] = 1; 31 33 32 34 if(n >= 1) ... ...
stim/optics/scalarbeam.h

 ... ... @@ -4,12 +4,9 @@ 4 4 #include "../math/vec3.h" 5 5 #include "../optics/scalarwave.h" 6 6 #include "../math/bessel.h" 7 +#include "../math/legendre.h" 7 8 #include 8 9 9 -//Boost 10 -//#include 11 -//#include 12 - 13 10 namespace stim{ 14 11 15 12 /// Function returns the value of the scalar field produced by a beam with the specified parameters ... ... @@ -130,23 +127,6 @@ public: 130 127 return result; 131 128 } 132 129 133 - /// Calculate the field at a set of positions 134 - /*void field(stim::complex* F, T* x, T* y, T* z, size_t N, size_t O){ 135 - 136 - memset(F, 0, N * sizeof(stim::complex)); 137 - std::vector< scalarwave > W = mc(O); //get a random set of plane waves representing the beam 138 - size_t o, n; 139 - T px, py, pz; 140 - for(n = 0; n < N; n++){ //for each point in the field 141 - (x == NULL) ? px = 0 : px = x[n]; // test for NULL values 142 - (y == NULL) ? py = 0 : py = y[n]; 143 - (z == NULL) ? pz = 0 : pz = z[n]; 144 - for(o = 0; o < O; o++){ //for each plane wave 145 - F[n] += W[o].pos(px, py, pz); 146 - } 147 - } 148 - }*/ 149 - 150 130 std::string str() 151 131 { 152 132 std::stringstream ss; ... ... @@ -165,51 +145,151 @@ public: 165 145 166 146 }; //end beam 167 147 148 +/// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional) 149 +/// @param C is a pointer to Nl + 1 values where the terms will be stored 168 150 template 169 -void cpu_scalar_psf(stim::complex* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3 f, T NA, T NA_in, int Nl){ 151 +CUDA_CALLABLE void cpu_aperture_integral(T* C, size_t Nl, T NA, T NA_in = 0){ 170 152 171 - memset(F, 0, N * sizeof(stim::complex)); 153 + size_t table_bytes = (Nl + 1) * sizeof(T); //calculate the number of bytes required to store the terms 154 + T cos_alpha_1 = cos(asin(NA_in)); //calculate the cosine of the angle subtended by the central obscuration 155 + T cos_alpha_2 = cos(asin(NA)); //calculate the cosine of the angle subtended by the aperture 156 + 157 + // the aperture integral is computed using four individual Legendre polynomials, each a function of the angles subtended 158 + // by the objective and central obscuration 159 + T* Pln_a1 = (T*) malloc(table_bytes); 160 + stim::legendre(Nl-1, cos_alpha_1, &Pln_a1[1]); 161 + Pln_a1[0] = 1; 162 + 163 + T* Pln_a2 = (T*) malloc(table_bytes); 164 + stim::legendre(Nl-1, cos_alpha_2, &Pln_a2[1]); 165 + Pln_a2[0] = 1; 166 + 167 + T* Plp_a1 = (T*) malloc(table_bytes+sizeof(T)); 168 + stim::legendre(Nl+1, cos_alpha_1, Plp_a1); 169 + 170 + T* Plp_a2 = (T*) malloc(table_bytes+sizeof(T)); 171 + stim::legendre(Nl+1, cos_alpha_2, Plp_a2); 172 + 173 + for(size_t l = 0; l <= Nl; l++){ 174 + C[l] = Plp_a1[l+1] - Plp_a2[l+1] - Pln_a1[l] + Pln_a2[l]; 175 + } 176 + 177 + free(Pln_a1); 178 + free(Pln_a2); 179 + free(Plp_a1); 180 + free(Plp_a2); 181 +} 182 + 183 +/// performs linear interpolation into a look-up table 184 +template 185 +T lut_lookup(T* lut, T val, size_t N, T min_val, T delta, size_t stride = 0){ 186 + size_t idx = (size_t)((val - min_val) / delta); 187 + T alpha = val - idx * delta + min_val; 188 + 189 + if(alpha == 0) return lut[idx]; 190 + else return lut[idx * stride] * (1 - alpha) + lut[ (idx+1) * stride] * alpha; 191 +} 192 + 193 +template 194 +void cpu_scalar_psf(stim::complex* F, size_t N, T* r, T* phi, T lambda, T A, stim::vec3 f, T NA, T NA_in, int Nl){ 172 195 T k = stim::TAU / lambda; 173 - T jl, Pl, C, kr, cos_phi; 174 - T cos_alpha_1 = cos(asin(NA_in)); 175 - T cos_alpha_2 = cos(asin(NA)); 176 - stim::vec3 p, ps; 177 196 178 - /*double vm; 179 - size_t table_bytes = (Nl + 1) * sizeof(double); 180 - double* jv = (double*) malloc( table_bytes ); 181 - double* yv = (double*) malloc( table_bytes ); 182 - double* djv= (double*) malloc( table_bytes ); 183 - double* dyv= (double*) malloc( table_bytes ); 184 - */ 185 - 186 - T vm; 187 - size_t table_bytes = (Nl + 1) * sizeof(T); 188 - T* jv = (T*) malloc( table_bytes ); 189 - T* yv = (T*) malloc( table_bytes ); 190 - T* djv= (T*) malloc( table_bytes ); 191 - T* dyv= (T*) malloc( table_bytes ); 192 - 193 - for(size_t n = 0; n < N; n++){ 194 - (x == NULL) ? p[0] = 0 : p[0] = x[n]; // test for NULL values and set positions 195 - (y == NULL) ? p[1] = 0 : p[1] = y[n]; 196 - (z == NULL) ? p[2] = 0 : p[2] = z[n]; 197 - 198 - ps = p.cart2sph(); //convert this point to spherical coordinates 199 - kr = k * ps[0]; 200 - cos_phi = std::cos(ps[2]); 201 - stim::bessjyv_sph(Nl, kr, vm, jv, yv, djv, dyv); 197 + T* C = (T*) malloc( (Nl + 1) * sizeof(T) ); //allocate space for the aperture integral terms 198 + cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms 199 + memset(F, 0, N * sizeof(stim::complex)); 200 +#ifdef NO_CUDA 201 + memset(F, 0, N * sizeof(stim::complex)); 202 + T jl, Pl, kr, cos_phi; 203 + 204 + double vm; 205 + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) ); 206 + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) ); 207 + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) ); 208 + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) ); 209 + 210 + T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T)); 211 + 212 + for(size_t n = 0; n < N; n++){ //for each point in the field 213 + kr = k * r[n]; //calculate kr (the optical distance between the focal point and p) 214 + cos_phi = std::cos(phi[n]); //calculate the cosine of phi 215 + stim::bessjyv_sph(Nl, kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl] 216 + stim::legendre(Nl, cos_phi, Pl_cos_phi); //calculate the [0 Nl] legendre polynomials for this point 202 217 203 218 for(int l = 0; l <= Nl; l++){ 204 - //jl = boost::math::sph_bessel(l, kr); 205 - //jl = stim::bessjyv(l, kr 206 219 jl = (T)jv[l]; 207 - Pl = 1;//boost::math::legendre_p(l, cos_phi); 208 - C = 1;//boost::math::legendre_p(l+1, cos_alpha_1) - boost::math::legendre_p(l + 1, cos_alpha_2) - boost::math::legendre_p(l - 1, cos_alpha_1) + boost::math::legendre_p(l - 1, cos_alpha_2); 209 - F[n] += pow(complex(0, 1), l) * jl * Pl * C; 220 + Pl = Pl_cos_phi[l]; 221 + F[n] += pow(complex(0, 1), l) * jl * Pl * C[l]; 222 + } 223 + F[n] *= A * stim::TAU; 224 + } 225 + 226 + free(C); 227 + free(Pl_cos_phi); 228 +#else 229 + T min_r = r[0]; 230 + T max_r = r[0]; 231 + for(size_t i = 0; i < N; i++){ //find the minimum and maximum values of r (min and max distance from the focal point) 232 + if(r[i] < min_r) min_r = r[i]; 233 + if(r[i] > max_r) max_r = r[i]; 234 + } 235 + T min_kr = k * min_r; 236 + T max_kr = k * max_r; 237 + 238 + //temporary variables 239 + double vm; 240 + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) ); 241 + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) ); 242 + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) ); 243 + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) ); 244 + 245 + size_t Nlut = (size_t)sqrt(N) * 2; 246 + T* bessel_lut = (T*) malloc(sizeof(T) * (Nl+1) * Nlut); 247 + T delta_kr = (max_kr - min_kr) / (Nlut-1); 248 + for(size_t kri = 0; kri < Nlut; kri++){ 249 + stim::bessjyv_sph(Nl, min_kr + kri * delta_kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl] 250 + for(size_t l = 0; l <= Nl; l++){ 251 + bessel_lut[kri * (Nl + 1) + l] = (T)jv[l]; 252 + } 253 + } 254 + 255 + T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T)); 256 + T kr, cos_phi, jl, Pl; 257 + for(size_t n = 0; n < N; n++){ //for each point in the field 258 + kr = k * r[n]; //calculate kr (the optical distance between the focal point and p) 259 + cos_phi = std::cos(phi[n]); //calculate the cosine of phi 260 + stim::legendre(Nl, cos_phi, Pl_cos_phi); //calculate the [0 Nl] legendre polynomials for this point 261 + 262 + for(int l = 0; l <= Nl; l++){ 263 + jl = lut_lookup(&bessel_lut[l], kr, Nlut, min_kr, delta_kr, Nl+1); 264 + Pl = Pl_cos_phi[l]; 265 + F[n] += pow(complex(0, 1), l) * jl * Pl * C[l]; 210 266 } 211 267 F[n] *= A * stim::TAU; 212 268 } 269 +#endif 270 +} 271 + 272 + 273 +template 274 +void cpu_scalar_psf(stim::complex* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3 f, T NA, T NA_in, int Nl){ 275 + T* r = (T*) malloc(N * sizeof(T)); //allocate space for p in spherical coordinates 276 + T* phi = (T*) malloc(N * sizeof(T)); // only r and phi are necessary (the scalar PSF is symmetric about theta) 277 + 278 + stim::vec3 p, ps; 279 + for(size_t i = 0; i < N; i++){ 280 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions 281 + (y == NULL) ? p[1] = 0 : p[1] = y[i]; 282 + (z == NULL) ? p[2] = 0 : p[2] = z[i]; 283 + 284 + ps = p.cart2sph(); //convert from cartesian to spherical coordinates 285 + r[i] = ps[0]; //store r 286 + phi[i] = ps[2]; //phi = [0 pi] 287 + } 288 + 289 + cpu_scalar_psf(F, N, r, phi, lambda, A, f, NA, NA_in, Nl); //call the spherical coordinate CPU function 290 + 291 + free(r); 292 + free(phi); 213 293 } 214 294 215 295 } //end namespace stim ... ...