Blame view

stim/optics/scalarbeam.h 19 KB
8e4f8364   David Mayerich   started a new opt...
1
2
  #ifndef RTS_BEAM
  #define RTS_BEAM
8309b07a   David Mayerich   fixed some vec3 e...
3
  #include <boost/math/special_functions/bessel.hpp>
8e4f8364   David Mayerich   started a new opt...
4
5
6
7
  
  #include "../math/vec3.h"
  #include "../optics/scalarwave.h"
  #include "../math/bessel.h"
0288346a   David Mayerich   added support for...
8
  #include "../math/legendre.h"
9339fbad   David Mayerich   implementing mie ...
9
10
  #include "../cuda/cudatools/devices.h"
  #include "../cuda/cudatools/timer.h"
8309b07a   David Mayerich   fixed some vec3 e...
11
  #include "../optics/scalarfield.h"
9339fbad   David Mayerich   implementing mie ...
12
13
  #include <cublas_v2.h>
  #include <math_constants.h>
8e4f8364   David Mayerich   started a new opt...
14
  #include <vector>
9339fbad   David Mayerich   implementing mie ...
15
  #include <stdlib.h>
8e4f8364   David Mayerich   started a new opt...
16
  
8e4f8364   David Mayerich   started a new opt...
17
  
8e4f8364   David Mayerich   started a new opt...
18
  
8309b07a   David Mayerich   fixed some vec3 e...
19
  namespace stim{
8e4f8364   David Mayerich   started a new opt...
20
  
8309b07a   David Mayerich   fixed some vec3 e...
21
  /// Function returns the value of the scalar field produced by a beam with the specified parameters
8e4f8364   David Mayerich   started a new opt...
22
  
8309b07a   David Mayerich   fixed some vec3 e...
23
24
  template<typename T>
  std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d, T NA, T NA_in = 0){
8e4f8364   David Mayerich   started a new opt...
25
  
8309b07a   David Mayerich   fixed some vec3 e...
26
  	std::vector< stim::vec3<T> > dirs(N);					//allocate an array to store the focusing vectors
8e4f8364   David Mayerich   started a new opt...
27
  
8309b07a   David Mayerich   fixed some vec3 e...
28
29
30
  	///compute the rotation operator to transform (0, 0, 1) to k
  	T cos_angle = d.dot(vec3<T>(0, 0, 1));
  	stim::matrix<T, 3> rotation;
8e4f8364   David Mayerich   started a new opt...
31
  
8309b07a   David Mayerich   fixed some vec3 e...
32
33
34
  	//if the cosine of the angle is -1, the rotation is just a flip across the z axis
  	if(cos_angle == -1){
  		rotation(2, 2) = -1;
8e4f8364   David Mayerich   started a new opt...
35
  	}
8309b07a   David Mayerich   fixed some vec3 e...
36
  	else if(cos_angle != 1.0)
8e4f8364   David Mayerich   started a new opt...
37
  	{
8309b07a   David Mayerich   fixed some vec3 e...
38
39
40
41
42
  		vec3<T> r_axis = vec3<T>(0, 0, 1).cross(d).norm();	//compute the axis of rotation
  		T angle = acos(cos_angle);							//compute the angle of rotation
  		quaternion<T> quat;							//create a quaternion describing the rotation
  		quat.CreateRotation(angle, r_axis);
  		rotation = quat.toMatrix3();							//compute the rotation matrix
8e4f8364   David Mayerich   started a new opt...
43
44
  	}
  
8309b07a   David Mayerich   fixed some vec3 e...
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  	//find the phi values associated with the cassegrain ring
  	T PHI[2];
  	PHI[0] = (T)asin(NA);
  	PHI[1] = (T)asin(NA_in);
  
  	//calculate the z-axis cylinder coordinates associated with these angles
  	T Z[2];
  	Z[0] = cos(PHI[0]);
  	Z[1] = cos(PHI[1]);
  	T range = Z[0] - Z[1];
  
  	//draw a distribution of random phi, z values
  	T z, phi, theta;
  	//T kmag = stim::TAU / lambda;
  	for(int i=0; i<N; i++){								//for each sample
  		z = (T)((double)rand() / (double)RAND_MAX) * range + Z[1];			//find a random position on the surface of a cylinder
  		theta = (T)(((double)rand() / (double)RAND_MAX) * stim::TAU);
  		phi = acos(z);													//project onto the sphere, computing phi in spherical coordinates
  
  		//compute and store cartesian coordinates
  		vec3<T> spherical(1, theta, phi);								//convert from spherical to cartesian coordinates
  		vec3<T> cart = spherical.sph2cart();
  		dirs[i] = rotation * cart;										//create a sample vector
  	}
  	return dirs;
  }
8e4f8364   David Mayerich   started a new opt...
71
  
8309b07a   David Mayerich   fixed some vec3 e...
72
  		
0288346a   David Mayerich   added support for...
73
74
  /// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional)
  /// @param C is a pointer to Nl + 1 values where the terms will be stored
8e4f8364   David Mayerich   started a new opt...
75
  template<typename T>
9339fbad   David Mayerich   implementing mie ...
76
  CUDA_CALLABLE void cpu_aperture_integral(T* C, int Nl, T NA, T NA_in = 0){
8e4f8364   David Mayerich   started a new opt...
77
  
0288346a   David Mayerich   added support for...
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
  	size_t table_bytes = (Nl + 1) * sizeof(T);				//calculate the number of bytes required to store the terms
  	T cos_alpha_1 = cos(asin(NA_in));						//calculate the cosine of the angle subtended by the central obscuration
  	T cos_alpha_2 = cos(asin(NA));							//calculate the cosine of the angle subtended by the aperture
  
  	// the aperture integral is computed using four individual Legendre polynomials, each a function of the angles subtended
  	//		by the objective and central obscuration
  	T* Pln_a1 = (T*) malloc(table_bytes);
  	stim::legendre<T>(Nl-1, cos_alpha_1, &Pln_a1[1]);
  	Pln_a1[0] = 1;
  
  	T* Pln_a2 = (T*) malloc(table_bytes);
  	stim::legendre<T>(Nl-1, cos_alpha_2, &Pln_a2[1]);
  	Pln_a2[0] = 1;
  
  	T* Plp_a1 = (T*) malloc(table_bytes+sizeof(T));
  	stim::legendre<T>(Nl+1, cos_alpha_1, Plp_a1);
  
  	T* Plp_a2 = (T*) malloc(table_bytes+sizeof(T));
  	stim::legendre<T>(Nl+1, cos_alpha_2, Plp_a2);
  
  	for(size_t l = 0; l <= Nl; l++){
  		C[l] = Plp_a1[l+1] - Plp_a2[l+1] - Pln_a1[l] + Pln_a2[l];
  	}
  
  	free(Pln_a1);
  	free(Pln_a2);
  	free(Plp_a1);
  	free(Plp_a2);
  }
  
  /// performs linear interpolation into a look-up table
  template<typename T>
9339fbad   David Mayerich   implementing mie ...
110
111
112
113
114
115
116
117
118
119
120
  CUDA_CALLABLE void lut_lookup(T* lut_values, T* lut, T val, size_t N, T min_val, T delta, size_t n_vals){
  	T idx = ((val - min_val) / delta);
  	size_t i = (size_t) idx;
  	T a1 = idx - i;
  	T a0 = 1 - a1;
  	size_t n0 = i * n_vals;
  	size_t n1 = (i+1) * n_vals;
  	for(size_t n = 0; n < n_vals; n++){
  		lut_values[n] = lut[n0 + n] * a0 + lut[n1 + n] * a1;
  	}
  }
0288346a   David Mayerich   added support for...
121
  
9339fbad   David Mayerich   implementing mie ...
122
  template <typename T>
31262e83   David Mayerich   GPU implementatio...
123
124
125
126
127
  CUDA_CALLABLE stim::complex<T> clerp(stim::complex<T> v0, stim::complex<T> v1, T t) {
      return stim::complex<T>( fma(t, v1.r, fma(-t, v0.r, v0.r)), fma(t, v1.i, fma(-t, v0.i, v0.i)) );
  }
  
  template <typename T>
9339fbad   David Mayerich   implementing mie ...
128
129
  CUDA_CALLABLE T lerp(T v0, T v1, T t) {
      return fma(t, v1, fma(-t, v0, v0));
0288346a   David Mayerich   added support for...
130
131
  }
  
8309b07a   David Mayerich   fixed some vec3 e...
132
  #ifdef CUDA_FOUND
0288346a   David Mayerich   added support for...
133
  template<typename T>
8309b07a   David Mayerich   fixed some vec3 e...
134
  __global__ void cuda_scalar_psf(stim::complex<T>* E, size_t N, T* r, T* phi, T A, size_t Nl,
9339fbad   David Mayerich   implementing mie ...
135
  								T* C, 
8309b07a   David Mayerich   fixed some vec3 e...
136
  								T* lut_j, size_t Nj, T min_r, T dr){
9339fbad   David Mayerich   implementing mie ...
137
138
139
140
  	size_t i = blockIdx.x * blockDim.x + threadIdx.x;				//get the index into the array
  	if(i >= N) return;												//exit if this thread is outside the array
  
  	T cos_phi = cos(phi[i]);									//calculate the thread value for cos(phi)
9339fbad   David Mayerich   implementing mie ...
141
142
143
  	stim::complex<T> Ei = 0;									//initialize the value of the field to zero
  	size_t NC = Nl + 1;										//calculate the number of coefficients to be used
  
8309b07a   David Mayerich   fixed some vec3 e...
144
  	T fij = (r[i] - min_r)/dr;								//FP index into the spherical bessel LUT
9339fbad   David Mayerich   implementing mie ...
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
  	size_t ij = (size_t) fij;								//convert to an integral index
  	T a = fij - ij;											//calculate the fractional portion of the index
  	size_t n0j = ij * (NC);									//start of the first entry in the LUT
  	size_t n1j = (ij+1) * (NC);								//start of the second entry in the LUT
  
  	T jl;											//declare register to store the spherical bessel function
  	T Pl_2, Pl_1;									//declare registers to store the previous two Legendre polynomials
  	T Pl = 1;										//initialize the current value for the Legendre polynomial
  	stim::complex<T> im(0, 1);						//declare i (imaginary 1)
  	stim::complex<T> i_pow(1, 0);					//i_pow stores the current value of i^l so it doesn't have to be re-computed every iteration
  	for(int l = 0; l <= Nl; l++){					//for each order
  		jl = lerp<T>( lut_j[n0j + l], lut_j[n1j + l], a );	//read jl from the LUT and interpolate the result
  		Ei += i_pow * jl * Pl * C[l];				//calculate the value for the field and sum
  		i_pow *= im;								//multiply i^l * i for the next iteration
  		Pl_2 = Pl_1;								//shift Pl_1 -> Pl_2 and Pl -> Pl_1
  		Pl_1 = Pl;
  		if(l == 0){									//computing Pl is done recursively, where the recursive relation
  			Pl = cos_phi;							//	requires the first two orders. This defines the second.
  		}
  		else{										//if this is not the first iteration, use the recursive relation to calculate Pl
  			Pl = ( (2 * (l+1) - 1) * cos_phi * Pl_1 - (l) * Pl_2 ) / (l+1);
  		}
  		
  	}
  	E[i] = Ei * A * 2 * CUDART_PI_F;						//scale the integral by the amplitude
  }
  
  template<typename T>
  void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl, T r_spacing){
  
  	//Find the minimum and maximum values of r
      cublasStatus_t stat;
      cublasHandle_t handle;
  
  	stat = cublasCreate(&handle);							//create a cuBLAS handle
  	if (stat != CUBLAS_STATUS_SUCCESS){						//test for failure
          printf ("CUBLAS initialization failed\n");
  		exit(1);
  	}
  
  	int i_min, i_max;
  	stat = cublasIsamin(handle, (int)N, r, 1, &i_min);
  	if (stat != CUBLAS_STATUS_SUCCESS){						//test for failure
          printf ("CUBLAS Error: failed to calculate minimum r value.\n");
  		exit(1);
  	}
  	stat = cublasIsamax(handle, (int)N, r, 1, &i_max);
  	if (stat != CUBLAS_STATUS_SUCCESS){						//test for failure
          printf ("CUBLAS Error: failed to calculate maximum r value.\n");
  		exit(1);
  	}
  
8309b07a   David Mayerich   fixed some vec3 e...
197
198
  	i_min--;												//cuBLAS uses 1-based indexing for Fortran compatibility
  	i_max--;
9339fbad   David Mayerich   implementing mie ...
199
200
201
202
203
204
205
206
207
208
209
  	T r_min, r_max;											//allocate space to store the minimum and maximum values
  	HANDLE_ERROR( cudaMemcpy(&r_min, r + i_min, sizeof(T), cudaMemcpyDeviceToHost) );		//copy the min and max values from the device to the CPU
  	HANDLE_ERROR( cudaMemcpy(&r_max, r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
  
  	T k = (T)stim::TAU / lambda;							//calculate the wavenumber from lambda
  	size_t C_bytes = (Nl + 1) * sizeof(T);
  	T* C = (T*) malloc( C_bytes );							//allocate space for the aperture integral terms
  	cpu_aperture_integral(C, Nl, NA, NA_in);				//calculate the aperture integral terms
  
  	size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1);			//number of values in the look-up table based on the user-specified spacing along r
  
9339fbad   David Mayerich   implementing mie ...
210
211
  
  	size_t lutj_bytes = sizeof(T) * (Nl+1) * Nlut_j;
8309b07a   David Mayerich   fixed some vec3 e...
212
213
214
215
  	T* j_lut = (T*) malloc(lutj_bytes);													//pointer to the look-up table
  	T dr = (r_max - r_min) / (Nlut_j-1);												//distance between values in the LUT
  	T jl;
  	for(size_t ri = 0; ri < Nlut_j; ri++){													//for each value in the LUT
9339fbad   David Mayerich   implementing mie ...
216
  		for(size_t l = 0; l <= Nl; l++){													//for each order
8309b07a   David Mayerich   fixed some vec3 e...
217
218
  			jl = boost::math::sph_bessel<T>(l, k*(r_min + ri * dr));					//use boost to calculate the spherical bessel function
  			j_lut[ri * (Nl + 1) + l] = jl;												//store the bessel function result
9339fbad   David Mayerich   implementing mie ...
219
220
221
  		}
  	}
  
8309b07a   David Mayerich   fixed some vec3 e...
222
  	stim::cpu2image<T>(j_lut, "j_lut.bmp", Nl+1, Nlut_j, stim::cmBrewer);
9339fbad   David Mayerich   implementing mie ...
223
224
225
226
227
228
229
  	//Allocate device memory and copy everything to the GPU
  
  	T* gpu_C;
  	HANDLE_ERROR( cudaMalloc(&gpu_C, C_bytes) );
  	HANDLE_ERROR( cudaMemcpy(gpu_C, C, C_bytes, cudaMemcpyHostToDevice) );
  	T* gpu_j_lut;
  	HANDLE_ERROR( cudaMalloc(&gpu_j_lut, lutj_bytes) );
8309b07a   David Mayerich   fixed some vec3 e...
230
  	HANDLE_ERROR( cudaMemcpy(gpu_j_lut, j_lut, lutj_bytes, cudaMemcpyHostToDevice) );
9339fbad   David Mayerich   implementing mie ...
231
232
233
  
  	int threads = stim::maxThreadsPerBlock();			//get the maximum number of threads per block for the CUDA device
  	dim3 blocks( (unsigned)(N / threads + 1));						//calculate the optimal number of blocks
8e4f8364   David Mayerich   started a new opt...
234
  
8309b07a   David Mayerich   fixed some vec3 e...
235
  	cuda_scalar_psf<T><<< blocks, threads >>>(E, N, r, phi, A, Nl, gpu_C, gpu_j_lut, Nlut_j, r_min, dr);
9339fbad   David Mayerich   implementing mie ...
236
237
238
239
240
241
242
243
244
245
246
247
248
  
  	//free the LUT and condenser tables
  	HANDLE_ERROR( cudaFree(gpu_C) );
  	HANDLE_ERROR( cudaFree(gpu_j_lut) );
  }
  #endif
  
  /// Calculate the analytical solution to a scalar point spread function given a set of spherical coordinates about the PSF (beam propagation along phi = theta = 0)
  template<typename T>
  void cpu_scalar_psf_local(stim::complex<T>* F, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl){
  	T k = (T)stim::TAU / lambda;
  	size_t C_bytes = (Nl + 1) * sizeof(T);
  	T* C = (T*) malloc( C_bytes );					//allocate space for the aperture integral terms
0288346a   David Mayerich   added support for...
249
250
  	cpu_aperture_integral(C, Nl, NA, NA_in);			//calculate the aperture integral terms
  	memset(F, 0, N * sizeof(stim::complex<T>));
0288346a   David Mayerich   added support for...
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
  	T jl, Pl, kr, cos_phi;
  
  	double vm;
  	double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
  	double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
  	double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
  	double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
  
  	T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T));
  
  	for(size_t n = 0; n < N; n++){								//for each point in the field
  		kr = k * r[n];											//calculate kr (the optical distance between the focal point and p)
  		cos_phi = std::cos(phi[n]);								//calculate the cosine of phi
  		stim::bessjyv_sph<double>(Nl, kr, vm, jv, yv, djv, dyv);		//compute the list of spherical bessel functions from [0 Nl]
  		stim::legendre<T>(Nl, cos_phi, Pl_cos_phi);				//calculate the [0 Nl] legendre polynomials for this point
8e4f8364   David Mayerich   started a new opt...
266
267
  
  		for(int l = 0; l <= Nl; l++){
8e4f8364   David Mayerich   started a new opt...
268
  			jl = (T)jv[l];
0288346a   David Mayerich   added support for...
269
270
271
272
273
274
275
276
  			Pl = Pl_cos_phi[l];
  			F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l];
  		}
  		F[n] *= A * stim::TAU;
  	}
  
  	free(C);
  	free(Pl_cos_phi);
9339fbad   David Mayerich   implementing mie ...
277
  }
0288346a   David Mayerich   added support for...
278
  
9339fbad   David Mayerich   implementing mie ...
279
280
281
282
283
284
285
286
  /// Converts a set of cartesian points into spherical coordinates surrounding a point spread function (PSF)
  /// @param r is the output distance from the PSF
  /// @param phi is the non-symmetric direction about the PSF
  /// @param x (x, y, z) are the cartesian coordinates in world space
  /// @f is the focal point of the PSF in cartesian coordinates
  /// @d is the propagation direction of the PSF in cartesian coordinates
  template<typename T>
  __global__ void cuda_cart2psf(T* r, T* phi, size_t N, T* x, T* y, T* z, stim::vec3<T> f, stim::quaternion<T> q){
0288346a   David Mayerich   added support for...
287
  
9339fbad   David Mayerich   implementing mie ...
288
289
  	size_t i = blockIdx.x * blockDim.x + threadIdx.x;				//get the index into the array
  	if(i >= N) return;												//exit if this thread is outside the array
0288346a   David Mayerich   added support for...
290
  
9339fbad   David Mayerich   implementing mie ...
291
292
293
294
295
  	stim::vec3<T> p;									//declare a 3D point
  	
  	(x == NULL) ? p[0] = 0 : p[0] = x[i];				// test for NULL values and set positions
  	(y == NULL) ? p[1] = 0 : p[1] = y[i];
  	(z == NULL) ? p[2] = 0 : p[2] = z[i];
0288346a   David Mayerich   added support for...
296
  
9339fbad   David Mayerich   implementing mie ...
297
298
299
300
301
302
  	p = p - f;											//shift the point to the center of the PSF (focal point)
  	p = q.toMatrix3() * p;								//rotate the point to align with the propagation direction
  
  	stim::vec3<T> ps = p.cart2sph();									//convert from cartesian to spherical coordinates
  	r[i] = ps[0];										//store r
  	phi[i] = ps[2];										//phi = [0 pi]
0288346a   David Mayerich   added support for...
303
304
  }
  
8309b07a   David Mayerich   fixed some vec3 e...
305
  #ifdef CUDA_FOUND
9339fbad   David Mayerich   implementing mie ...
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
  /// Calculate the analytical solution to a point spread function given a set of points in cartesian coordinates
  template<typename T>
  void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
  	
  	T* gpu_r;															//allocate space for the coordinates in r
  	HANDLE_ERROR( cudaMalloc(&gpu_r, sizeof(T) * N) );
  	T* gpu_phi;
  	HANDLE_ERROR( cudaMalloc(&gpu_phi, sizeof(T) * N) );
  	//stim::complex<T>* gpu_E;
  	//HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
  
  	stim::quaternion<T> q;												//create a quaternion
  	q.CreateRotation(d, stim::vec3<T>(0, 0, 1));						//create a mapping from the propagation direction to the PSF space
  	int threads = stim::maxThreadsPerBlock();							//get the maximum number of threads per block for the CUDA device
  	dim3 blocks( (unsigned)(N / threads + 1));							//calculate the optimal number of blocks
  	cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q);	//call the CUDA kernel to move the cartesian coordinates to PSF space
  
  	gpu_scalar_psf_local(E, N, gpu_r, gpu_phi, lambda, A, NA, NA_in, Nl, r_spacing);
  
  }
  #endif
0288346a   David Mayerich   added support for...
327
328
  
  template<typename T>
9339fbad   David Mayerich   implementing mie ...
329
330
331
  void cpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
  
  // If CUDA is available, copy the cartesian points to the GPU and evaluate them in a kernel
8309b07a   David Mayerich   fixed some vec3 e...
332
  #ifdef CUDA_FOUND
9339fbad   David Mayerich   implementing mie ...
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
  
  	T* gpu_x = NULL;
  	if(x != NULL){
  		HANDLE_ERROR( cudaMalloc(&gpu_x, sizeof(T) * N) );
  		HANDLE_ERROR( cudaMemcpy(gpu_x, x, sizeof(T) * N, cudaMemcpyHostToDevice) );
  	}
  	T* gpu_y = NULL;
  	if(y != NULL){
  		HANDLE_ERROR( cudaMalloc(&gpu_y, sizeof(T) * N) );
  		HANDLE_ERROR( cudaMemcpy(gpu_y, y, sizeof(T) * N, cudaMemcpyHostToDevice) );
  	}
  	T* gpu_z = NULL;
  	if(z != NULL){
  		HANDLE_ERROR( cudaMalloc(&gpu_z, sizeof(T) * N) );
  		HANDLE_ERROR( cudaMemcpy(gpu_z, z, sizeof(T) * N, cudaMemcpyHostToDevice) );
  	}
  
  	stim::complex<T>* gpu_E;
  	HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
  	HANDLE_ERROR( cudaMemcpy(gpu_E, E, sizeof(stim::complex<T>) * N, cudaMemcpyHostToDevice) );
  	gpu_scalar_psf_cart<T>(gpu_E, N, gpu_x, gpu_y, gpu_z, lambda, A, f, d, NA, NA_in, Nl, r_spacing);
  	HANDLE_ERROR( cudaMemcpy(E, gpu_E, sizeof(stim::complex<T>) * N, cudaMemcpyDeviceToHost) );
  
  	HANDLE_ERROR( cudaFree(gpu_x) );
  	HANDLE_ERROR( cudaFree(gpu_y) );
  	HANDLE_ERROR( cudaFree(gpu_z) );
  	HANDLE_ERROR( cudaFree(gpu_E) );
  
  #else
0288346a   David Mayerich   added support for...
362
363
364
  	T* r = (T*) malloc(N * sizeof(T));					//allocate space for p in spherical coordinates
  	T* phi = (T*) malloc(N * sizeof(T));				//	only r and phi are necessary (the scalar PSF is symmetric about theta)
  
9339fbad   David Mayerich   implementing mie ...
365
366
367
368
  	stim::quaternion<T> q;
  	q.CreateRotation(d, stim::vec3<T>(0, 0, 1));
  	stim::matrix<T, 3> R = q.toMatrix3();
  	stim::vec3<T> p, ps, ds;
0288346a   David Mayerich   added support for...
369
370
371
372
373
  	for(size_t i = 0; i < N; i++){
  		(x == NULL) ? p[0] = 0 : p[0] = x[i];	// test for NULL values and set positions
  		(y == NULL) ? p[1] = 0 : p[1] = y[i];
  		(z == NULL) ? p[2] = 0 : p[2] = z[i];
  
9339fbad   David Mayerich   implementing mie ...
374
375
376
377
  		p = p - f;
  
  		p = R * p;					//rotate the cartesian point
  
0288346a   David Mayerich   added support for...
378
379
380
381
382
  		ps = p.cart2sph();						//convert from cartesian to spherical coordinates
  		r[i] = ps[0];							//store r
  		phi[i] = ps[2];							//phi = [0 pi]
  	}
  
8309b07a   David Mayerich   fixed some vec3 e...
383
  	cpu_scalar_psf_local(E, N, r, phi, lambda, A, NA, NA_in, Nl);		//call the spherical coordinate CPU function
0288346a   David Mayerich   added support for...
384
385
386
  
  	free(r);
  	free(phi);
9339fbad   David Mayerich   implementing mie ...
387
  #endif
8e4f8364   David Mayerich   started a new opt...
388
  }
8309b07a   David Mayerich   fixed some vec3 e...
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
  		
  /// Class stim::beam represents a beam of light focused at a point and composed of several plane waves
  template<typename T>
  class scalarbeam
  {
  public:
  	//enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  
  private:
  	
  	T NA[2];				//numerical aperature of the focusing optics	
  	vec3<T> f;				//focal point
  	vec3<T> d;				//propagation direction
  	T A;		//beam amplitude
  	T lambda;				//beam wavelength
  public:
8e4f8364   David Mayerich   started a new opt...
405
  
8309b07a   David Mayerich   fixed some vec3 e...
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
  	///constructor: build a default beam (NA=1.0)
  	scalarbeam(T wavelength = 1, T amplitude = 1, vec3<T> focal_point = vec3<T>(0, 0, 0), vec3<T> direction = vec3<T>(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){
  		lambda = wavelength;
  		A = amplitude;
  		f = focal_point;
  		d = direction.norm();					//make sure that the direction vector is normalized (makes calculations more efficient later on)
  		NA[0] = numerical_aperture;
  		NA[1] = center_obsc;
  	}
  
  	///Numerical Aperature functions
  	void setNA(T na)
  	{
  		NA[0] = (T)0;
  		NA[1] = na;
  	}
  	void setNA(T na0, T na1)
  	{
  		NA[0] = na0;
  		NA[1] = na1;
  	}
  
  	//Monte-Carlo decomposition into plane waves
  	std::vector< scalarwave<T> > mc(size_t N = 100000) const{
  
  		std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]);	//generate a random set of N vectors forming a focus
  		std::vector< scalarwave<T> > samples(N);											//create a vector of plane waves
  		T kmag = (T)stim::TAU / lambda;								//calculate the wavenumber
  		stim::complex<T> apw;										//allocate space for the amplitude at the focal point
  		T a = (T)(stim::TAU * ( (1 - cos(asin(NA[0]))) - (1 - cos(asin(NA[1])))) / (double)N);			//constant value weights plane waves based on the aperture and number of samples (N)
  		stim::vec3<T> kpw;											//declare the new k-vector based on the focused plane wave direction
  		for(size_t i=0; i<N; i++){										//for each sample
  			kpw = dirs[i] * kmag;									//calculate the k-vector for the new plane wave
  			apw = a * exp(stim::complex<T>(0, kpw.dot(-f)));				//calculate the amplitude for the new plane wave
  			samples[i] = scalarwave<T>(kpw, apw);			//create a plane wave based on the direction
  		}
  		return samples;
  	}
  
  	/// Evaluate the beam to a scalar field using Debye focusing
  	void eval(stim::scalarfield<T>& E, size_t order = 500){
  		size_t array_size = E.grid_bytes();
  		T* X = (T*) malloc( array_size );			//allocate space for the coordinate meshes
  		T* Y = (T*) malloc( array_size );
  		T* Z = (T*) malloc( array_size );
  
  		E.meshgrid(X, Y, Z, stim::CPUmem);			//calculate the coordinate meshes
  		cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[0], NA[1], order, E.spacing());
  
  		free(X);									//free the coordinate meshes
  		free(Y);
  		free(Z);
  	}
  
  	/// Calculate the field at a given point
  	/// @param x is the x-coordinate of the field point
  	/// @O is the approximation accuracy
  	stim::complex<T> field(T x, T y, T z, size_t O){
  		std::vector< scalarwave<T> > W = mc(O);
  		T result = 0;											//initialize the result to zero (0)
  		for(size_t i = 0; i < O; i++){							//for each plane wave
  			result += W[i].pos(x, y, z);
  		}
  		return result;
  	}
  
  	std::string str()
  	{
  		std::stringstream ss;
  		ss<<"Beam:"<<std::endl;
  		//ss<<"	Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  		ss<<"	Beam Direction: "<<d<<std::endl;
  		if(NA[0] == 0)
  			ss<<"	NA: "<<NA[1];
  		else
  			ss<<"	NA: "<<NA[0]<<" -- "<<NA[1];
  
  		return ss.str();
  	}
  
  
  
  };			//end beam
8e4f8364   David Mayerich   started a new opt...
489
490
491
  }			//end namespace stim
  
  #endif