diff --git a/stim/math/bessel.h b/stim/math/bessel.h
index 1be8607..6de92a2 100644
--- a/stim/math/bessel.h
+++ b/stim/math/bessel.h
@@ -778,17 +778,23 @@ int bessjyv(P v,P x,P &vm,P *jv,P *yv,
 
 template<typename P>
 int bessjyv_sph(int v, P z, P &vm, P* cjv,
-    P* cyv, P* cjvp, P* cyvp)
-{
+    P* cyv, P* cjvp, P* cyvp){
+	
     //first, compute the bessel functions of fractional order
     bessjyv<P>(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp);
 
+	if(z == 0){													//handle degenerate case of z = 0
+		memset(cjv, 0, sizeof(P) * (v+1));
+		cjv[0] = 1;
+	}
+
     //iterate through each and scale
-    for(int n = 0; n<=v; n++)
-    {
+    for(int n = 0; n<=v; n++){
 
-        cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
-        cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
+		if(z != 0){												//handle degenerate case of z = 0
+			cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
+			cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
+		}
 
         cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
         cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
diff --git a/stim/math/legendre.h b/stim/math/legendre.h
index 4859740..1099d4e 100644
--- a/stim/math/legendre.h
+++ b/stim/math/legendre.h
@@ -1,7 +1,7 @@
 #ifndef RTS_LEGENDRE_H
 #define RTS_LEGENDRE_H
 
-#include "rts/cuda/callable.h"
+#include "../cuda/cudatools/callable.h"
 
 namespace stim{
 
@@ -24,9 +24,11 @@ CUDA_CALLABLE void shift_legendre(int n, T x, T& P0, T& P1)
 	P1 = Pnew;
 }
 
+/// Iteratively evaluates the Legendre polynomials for orders l = [0 n]
 template <typename T>
 CUDA_CALLABLE void legendre(int n, T x, T* P)
 {
+	if(n < 0) return;
     P[0] = 1;
 
     if(n >= 1)
diff --git a/stim/optics/scalarbeam.h b/stim/optics/scalarbeam.h
index 106c728..cfb817b 100644
--- a/stim/optics/scalarbeam.h
+++ b/stim/optics/scalarbeam.h
@@ -4,12 +4,9 @@
 #include "../math/vec3.h"
 #include "../optics/scalarwave.h"
 #include "../math/bessel.h"
+#include "../math/legendre.h"
 #include <vector>
 
-//Boost
-//#include <boost/math/special_functions/bessel.hpp>
-//#include <boost/math/special_functions/legendre.hpp>
-
 namespace stim{
 
 		/// Function returns the value of the scalar field produced by a beam with the specified parameters
@@ -130,23 +127,6 @@ public:
 		return result;
 	}
 
-	/// Calculate the field at a set of positions
-	/*void field(stim::complex<T>* F, T* x, T* y, T* z, size_t N, size_t O){
-
-		memset(F, 0, N * sizeof(stim::complex<T>));
-		std::vector< scalarwave<T> > W = mc(O);					//get a random set of plane waves representing the beam
-		size_t o, n;
-		T px, py, pz;
-		for(n = 0; n < N; n++){									//for each point in the field
-			(x == NULL) ? px = 0 : px = x[n];								// test for NULL values
-			(y == NULL) ? py = 0 : py = y[n];
-			(z == NULL) ? pz = 0 : pz = z[n];
-			for(o = 0; o < O; o++){								//for each plane wave
-				F[n] += W[o].pos(px, py, pz);
-			}
-		}
-	}*/
-
 	std::string str()
 	{
 		std::stringstream ss;
@@ -165,51 +145,151 @@ public:
 
 };			//end beam
 
+/// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional)
+/// @param C is a pointer to Nl + 1 values where the terms will be stored
 template<typename T>
-void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){
+CUDA_CALLABLE void cpu_aperture_integral(T* C, size_t Nl, T NA, T NA_in = 0){
 
-	memset(F, 0, N * sizeof(stim::complex<T>));
+	size_t table_bytes = (Nl + 1) * sizeof(T);				//calculate the number of bytes required to store the terms
+	T cos_alpha_1 = cos(asin(NA_in));						//calculate the cosine of the angle subtended by the central obscuration
+	T cos_alpha_2 = cos(asin(NA));							//calculate the cosine of the angle subtended by the aperture
+
+	// the aperture integral is computed using four individual Legendre polynomials, each a function of the angles subtended
+	//		by the objective and central obscuration
+	T* Pln_a1 = (T*) malloc(table_bytes);
+	stim::legendre<T>(Nl-1, cos_alpha_1, &Pln_a1[1]);
+	Pln_a1[0] = 1;
+
+	T* Pln_a2 = (T*) malloc(table_bytes);
+	stim::legendre<T>(Nl-1, cos_alpha_2, &Pln_a2[1]);
+	Pln_a2[0] = 1;
+
+	T* Plp_a1 = (T*) malloc(table_bytes+sizeof(T));
+	stim::legendre<T>(Nl+1, cos_alpha_1, Plp_a1);
+
+	T* Plp_a2 = (T*) malloc(table_bytes+sizeof(T));
+	stim::legendre<T>(Nl+1, cos_alpha_2, Plp_a2);
+
+	for(size_t l = 0; l <= Nl; l++){
+		C[l] = Plp_a1[l+1] - Plp_a2[l+1] - Pln_a1[l] + Pln_a2[l];
+	}
+
+	free(Pln_a1);
+	free(Pln_a2);
+	free(Plp_a1);
+	free(Plp_a2);
+}
+
+/// performs linear interpolation into a look-up table
+template<typename T>
+T lut_lookup(T* lut, T val, size_t N, T min_val, T delta, size_t stride = 0){
+	size_t idx = (size_t)((val - min_val) / delta);
+	T alpha = val - idx * delta + min_val;
+
+	if(alpha == 0) return lut[idx];
+	else return lut[idx * stride] * (1 - alpha) + lut[ (idx+1) * stride] * alpha;
+}
+
+template<typename T>
+void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* r, T* phi, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){
 	T k = stim::TAU / lambda;
-	T jl, Pl, C, kr, cos_phi;
-	T cos_alpha_1 = cos(asin(NA_in));
-	T cos_alpha_2 = cos(asin(NA));
-	stim::vec3<T> p, ps;
 
-	/*double vm;
-	size_t table_bytes = (Nl + 1) * sizeof(double);
-	double* jv = (double*) malloc( table_bytes );
-	double* yv = (double*) malloc( table_bytes );
-	double* djv= (double*) malloc( table_bytes );
-	double* dyv= (double*) malloc( table_bytes );
-	*/
-
-	T vm;
-	size_t table_bytes = (Nl + 1) * sizeof(T);
-	T* jv = (T*) malloc( table_bytes );
-	T* yv = (T*) malloc( table_bytes );
-	T* djv= (T*) malloc( table_bytes );
-	T* dyv= (T*) malloc( table_bytes );
-
-	for(size_t n = 0; n < N; n++){
-		(x == NULL) ? p[0] = 0 : p[0] = x[n];								// test for NULL values and set positions
-		(y == NULL) ? p[1] = 0 : p[1] = y[n];
-		(z == NULL) ? p[2] = 0 : p[2] = z[n];
-
-		ps = p.cart2sph();												//convert this point to spherical coordinates
-		kr = k * ps[0];
-		cos_phi = std::cos(ps[2]);
-		stim::bessjyv_sph<T>(Nl, kr, vm, jv, yv, djv, dyv);
+	T* C = (T*) malloc( (Nl + 1) * sizeof(T) );					//allocate space for the aperture integral terms
+	cpu_aperture_integral(C, Nl, NA, NA_in);			//calculate the aperture integral terms
+	memset(F, 0, N * sizeof(stim::complex<T>));
+#ifdef NO_CUDA
+	memset(F, 0, N * sizeof(stim::complex<T>));
+	T jl, Pl, kr, cos_phi;
+
+	double vm;
+	double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
+	double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
+	double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
+	double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
+
+	T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T));
+
+	for(size_t n = 0; n < N; n++){								//for each point in the field
+		kr = k * r[n];											//calculate kr (the optical distance between the focal point and p)
+		cos_phi = std::cos(phi[n]);								//calculate the cosine of phi
+		stim::bessjyv_sph<double>(Nl, kr, vm, jv, yv, djv, dyv);		//compute the list of spherical bessel functions from [0 Nl]
+		stim::legendre<T>(Nl, cos_phi, Pl_cos_phi);				//calculate the [0 Nl] legendre polynomials for this point
 
 		for(int l = 0; l <= Nl; l++){
-			//jl = boost::math::sph_bessel<T>(l, kr);
-			//jl = stim::bessjyv(l, kr
 			jl = (T)jv[l];
-			Pl = 1;//boost::math::legendre_p<T>(l, cos_phi);
-			C = 1;//boost::math::legendre_p<T>(l+1, cos_alpha_1) - boost::math::legendre_p<T>(l + 1, cos_alpha_2) - boost::math::legendre_p<T>(l - 1, cos_alpha_1) + boost::math::legendre_p<T>(l - 1, cos_alpha_2);
-			F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C;
+			Pl = Pl_cos_phi[l];
+			F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l];
+		}
+		F[n] *= A * stim::TAU;
+	}
+
+	free(C);
+	free(Pl_cos_phi);
+#else
+	T min_r = r[0];
+	T max_r = r[0];
+	for(size_t i = 0; i < N; i++){								//find the minimum and maximum values of r (min and max distance from the focal point)
+		if(r[i] < min_r) min_r = r[i];
+		if(r[i] > max_r) max_r = r[i];
+	}
+	T min_kr = k * min_r;
+	T max_kr = k * max_r;
+
+	//temporary variables
+	double vm;
+	double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
+	double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
+	double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
+	double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
+
+	size_t Nlut = (size_t)sqrt(N) * 2;
+	T* bessel_lut = (T*) malloc(sizeof(T) * (Nl+1) * Nlut);
+	T delta_kr = (max_kr - min_kr) / (Nlut-1);
+	for(size_t kri = 0; kri < Nlut; kri++){
+		stim::bessjyv_sph<double>(Nl, min_kr + kri * delta_kr, vm, jv, yv, djv, dyv);		//compute the list of spherical bessel functions from [0 Nl]
+		for(size_t l = 0; l <= Nl; l++){
+			bessel_lut[kri * (Nl + 1) + l] = (T)jv[l];
+		}
+	}
+
+	T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T));
+	T kr, cos_phi, jl, Pl;
+	for(size_t n = 0; n < N; n++){								//for each point in the field
+		kr = k * r[n];											//calculate kr (the optical distance between the focal point and p)
+		cos_phi = std::cos(phi[n]);								//calculate the cosine of phi
+		stim::legendre<T>(Nl, cos_phi, Pl_cos_phi);				//calculate the [0 Nl] legendre polynomials for this point
+
+		for(int l = 0; l <= Nl; l++){
+			jl = lut_lookup<T>(&bessel_lut[l], kr, Nlut, min_kr, delta_kr, Nl+1);
+			Pl = Pl_cos_phi[l];
+			F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l];
 		}
 		F[n] *= A * stim::TAU;
 	}
+#endif
+}
+
+
+template<typename T>
+void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){
+	T* r = (T*) malloc(N * sizeof(T));					//allocate space for p in spherical coordinates
+	T* phi = (T*) malloc(N * sizeof(T));				//	only r and phi are necessary (the scalar PSF is symmetric about theta)
+
+	stim::vec3<T> p, ps;
+	for(size_t i = 0; i < N; i++){
+		(x == NULL) ? p[0] = 0 : p[0] = x[i];	// test for NULL values and set positions
+		(y == NULL) ? p[1] = 0 : p[1] = y[i];
+		(z == NULL) ? p[2] = 0 : p[2] = z[i];
+
+		ps = p.cart2sph();						//convert from cartesian to spherical coordinates
+		r[i] = ps[0];							//store r
+		phi[i] = ps[2];							//phi = [0 pi]
+	}
+
+	cpu_scalar_psf(F, N, r, phi, lambda, A, f, NA, NA_in, Nl);		//call the spherical coordinate CPU function
+
+	free(r);
+	free(phi);
 }
 
 }			//end namespace stim
--
libgit2 0.21.4