Blame view

stim/optics/scalarbeam.h 19.8 KB
8e4f8364   David Mayerich   started a new opt...
1
2
  #ifndef RTS_BEAM
  #define RTS_BEAM
8309b07a   David Mayerich   fixed some vec3 e...
3
  #include <boost/math/special_functions/bessel.hpp>
8e4f8364   David Mayerich   started a new opt...
4
5
6
7
  
  #include "../math/vec3.h"
  #include "../optics/scalarwave.h"
  #include "../math/bessel.h"
0288346a   David Mayerich   added support for...
8
  #include "../math/legendre.h"
9339fbad   David Mayerich   implementing mie ...
9
10
  #include "../cuda/cudatools/devices.h"
  #include "../cuda/cudatools/timer.h"
8309b07a   David Mayerich   fixed some vec3 e...
11
  #include "../optics/scalarfield.h"
9339fbad   David Mayerich   implementing mie ...
12
13
  #include <cublas_v2.h>
  #include <math_constants.h>
8e4f8364   David Mayerich   started a new opt...
14
  #include <vector>
9339fbad   David Mayerich   implementing mie ...
15
  #include <stdlib.h>
8e4f8364   David Mayerich   started a new opt...
16
  
8e4f8364   David Mayerich   started a new opt...
17
  
8e4f8364   David Mayerich   started a new opt...
18
  
8309b07a   David Mayerich   fixed some vec3 e...
19
  namespace stim{
8e4f8364   David Mayerich   started a new opt...
20
  
8309b07a   David Mayerich   fixed some vec3 e...
21
  /// Function returns the value of the scalar field produced by a beam with the specified parameters
8e4f8364   David Mayerich   started a new opt...
22
  
8309b07a   David Mayerich   fixed some vec3 e...
23
24
  template<typename T>
  std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d, T NA, T NA_in = 0){
8e4f8364   David Mayerich   started a new opt...
25
  
8309b07a   David Mayerich   fixed some vec3 e...
26
  	std::vector< stim::vec3<T> > dirs(N);					//allocate an array to store the focusing vectors
8e4f8364   David Mayerich   started a new opt...
27
  
8309b07a   David Mayerich   fixed some vec3 e...
28
29
  	///compute the rotation operator to transform (0, 0, 1) to k
  	T cos_angle = d.dot(vec3<T>(0, 0, 1));
6c4afcac   David Mayerich   introduced a gene...
30
  	stim::matrix_sq<T, 3> rotation;
8e4f8364   David Mayerich   started a new opt...
31
  
8309b07a   David Mayerich   fixed some vec3 e...
32
33
34
  	//if the cosine of the angle is -1, the rotation is just a flip across the z axis
  	if(cos_angle == -1){
  		rotation(2, 2) = -1;
8e4f8364   David Mayerich   started a new opt...
35
  	}
8309b07a   David Mayerich   fixed some vec3 e...
36
  	else if(cos_angle != 1.0)
8e4f8364   David Mayerich   started a new opt...
37
  	{
8309b07a   David Mayerich   fixed some vec3 e...
38
39
40
41
42
  		vec3<T> r_axis = vec3<T>(0, 0, 1).cross(d).norm();	//compute the axis of rotation
  		T angle = acos(cos_angle);							//compute the angle of rotation
  		quaternion<T> quat;							//create a quaternion describing the rotation
  		quat.CreateRotation(angle, r_axis);
  		rotation = quat.toMatrix3();							//compute the rotation matrix
8e4f8364   David Mayerich   started a new opt...
43
44
  	}
  
8309b07a   David Mayerich   fixed some vec3 e...
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  	//find the phi values associated with the cassegrain ring
  	T PHI[2];
  	PHI[0] = (T)asin(NA);
  	PHI[1] = (T)asin(NA_in);
  
  	//calculate the z-axis cylinder coordinates associated with these angles
  	T Z[2];
  	Z[0] = cos(PHI[0]);
  	Z[1] = cos(PHI[1]);
  	T range = Z[0] - Z[1];
  
  	//draw a distribution of random phi, z values
  	T z, phi, theta;
  	//T kmag = stim::TAU / lambda;
  	for(int i=0; i<N; i++){								//for each sample
  		z = (T)((double)rand() / (double)RAND_MAX) * range + Z[1];			//find a random position on the surface of a cylinder
  		theta = (T)(((double)rand() / (double)RAND_MAX) * stim::TAU);
  		phi = acos(z);													//project onto the sphere, computing phi in spherical coordinates
  
  		//compute and store cartesian coordinates
  		vec3<T> spherical(1, theta, phi);								//convert from spherical to cartesian coordinates
  		vec3<T> cart = spherical.sph2cart();
  		dirs[i] = rotation * cart;										//create a sample vector
  	}
  	return dirs;
  }
8e4f8364   David Mayerich   started a new opt...
71
  
8309b07a   David Mayerich   fixed some vec3 e...
72
  		
0288346a   David Mayerich   added support for...
73
74
  /// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional)
  /// @param C is a pointer to Nl + 1 values where the terms will be stored
8e4f8364   David Mayerich   started a new opt...
75
  template<typename T>
9339fbad   David Mayerich   implementing mie ...
76
  CUDA_CALLABLE void cpu_aperture_integral(T* C, int Nl, T NA, T NA_in = 0){
8e4f8364   David Mayerich   started a new opt...
77
  
0288346a   David Mayerich   added support for...
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
  	size_t table_bytes = (Nl + 1) * sizeof(T);				//calculate the number of bytes required to store the terms
  	T cos_alpha_1 = cos(asin(NA_in));						//calculate the cosine of the angle subtended by the central obscuration
  	T cos_alpha_2 = cos(asin(NA));							//calculate the cosine of the angle subtended by the aperture
  
  	// the aperture integral is computed using four individual Legendre polynomials, each a function of the angles subtended
  	//		by the objective and central obscuration
  	T* Pln_a1 = (T*) malloc(table_bytes);
  	stim::legendre<T>(Nl-1, cos_alpha_1, &Pln_a1[1]);
  	Pln_a1[0] = 1;
  
  	T* Pln_a2 = (T*) malloc(table_bytes);
  	stim::legendre<T>(Nl-1, cos_alpha_2, &Pln_a2[1]);
  	Pln_a2[0] = 1;
  
  	T* Plp_a1 = (T*) malloc(table_bytes+sizeof(T));
  	stim::legendre<T>(Nl+1, cos_alpha_1, Plp_a1);
  
  	T* Plp_a2 = (T*) malloc(table_bytes+sizeof(T));
  	stim::legendre<T>(Nl+1, cos_alpha_2, Plp_a2);
  
  	for(size_t l = 0; l <= Nl; l++){
  		C[l] = Plp_a1[l+1] - Plp_a2[l+1] - Pln_a1[l] + Pln_a2[l];
  	}
  
  	free(Pln_a1);
  	free(Pln_a2);
  	free(Plp_a1);
  	free(Plp_a2);
  }
  
  /// performs linear interpolation into a look-up table
  template<typename T>
9339fbad   David Mayerich   implementing mie ...
110
111
112
113
114
115
116
117
118
119
120
  CUDA_CALLABLE void lut_lookup(T* lut_values, T* lut, T val, size_t N, T min_val, T delta, size_t n_vals){
  	T idx = ((val - min_val) / delta);
  	size_t i = (size_t) idx;
  	T a1 = idx - i;
  	T a0 = 1 - a1;
  	size_t n0 = i * n_vals;
  	size_t n1 = (i+1) * n_vals;
  	for(size_t n = 0; n < n_vals; n++){
  		lut_values[n] = lut[n0 + n] * a0 + lut[n1 + n] * a1;
  	}
  }
0288346a   David Mayerich   added support for...
121
  
9339fbad   David Mayerich   implementing mie ...
122
  template <typename T>
31262e83   David Mayerich   GPU implementatio...
123
  CUDA_CALLABLE stim::complex<T> clerp(stim::complex<T> v0, stim::complex<T> v1, T t) {
3669ed62   David Mayerich   fixed fused multi...
124
      return stim::complex<T>( fmaf(t, v1.r, fmaf(-t, v0.r, v0.r)), fmaf(t, v1.i, fmaf(-t, v0.i, v0.i)) );
31262e83   David Mayerich   GPU implementatio...
125
126
127
  }
  
  template <typename T>
9339fbad   David Mayerich   implementing mie ...
128
  CUDA_CALLABLE T lerp(T v0, T v1, T t) {
3669ed62   David Mayerich   fixed fused multi...
129
      return fmaf(t, v1, fmaf(-t, v0, v0));
0288346a   David Mayerich   added support for...
130
131
  }
  
8309b07a   David Mayerich   fixed some vec3 e...
132
  #ifdef CUDA_FOUND
0288346a   David Mayerich   added support for...
133
  template<typename T>
8309b07a   David Mayerich   fixed some vec3 e...
134
  __global__ void cuda_scalar_psf(stim::complex<T>* E, size_t N, T* r, T* phi, T A, size_t Nl,
9339fbad   David Mayerich   implementing mie ...
135
  								T* C, 
8309b07a   David Mayerich   fixed some vec3 e...
136
  								T* lut_j, size_t Nj, T min_r, T dr){
9339fbad   David Mayerich   implementing mie ...
137
138
139
140
  	size_t i = blockIdx.x * blockDim.x + threadIdx.x;				//get the index into the array
  	if(i >= N) return;												//exit if this thread is outside the array
  
  	T cos_phi = cos(phi[i]);									//calculate the thread value for cos(phi)
9339fbad   David Mayerich   implementing mie ...
141
142
143
  	stim::complex<T> Ei = 0;									//initialize the value of the field to zero
  	size_t NC = Nl + 1;										//calculate the number of coefficients to be used
  
8309b07a   David Mayerich   fixed some vec3 e...
144
  	T fij = (r[i] - min_r)/dr;								//FP index into the spherical bessel LUT
9339fbad   David Mayerich   implementing mie ...
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
  	size_t ij = (size_t) fij;								//convert to an integral index
  	T a = fij - ij;											//calculate the fractional portion of the index
  	size_t n0j = ij * (NC);									//start of the first entry in the LUT
  	size_t n1j = (ij+1) * (NC);								//start of the second entry in the LUT
  
  	T jl;											//declare register to store the spherical bessel function
  	T Pl_2, Pl_1;									//declare registers to store the previous two Legendre polynomials
  	T Pl = 1;										//initialize the current value for the Legendre polynomial
  	stim::complex<T> im(0, 1);						//declare i (imaginary 1)
  	stim::complex<T> i_pow(1, 0);					//i_pow stores the current value of i^l so it doesn't have to be re-computed every iteration
  	for(int l = 0; l <= Nl; l++){					//for each order
  		jl = lerp<T>( lut_j[n0j + l], lut_j[n1j + l], a );	//read jl from the LUT and interpolate the result
  		Ei += i_pow * jl * Pl * C[l];				//calculate the value for the field and sum
  		i_pow *= im;								//multiply i^l * i for the next iteration
  		Pl_2 = Pl_1;								//shift Pl_1 -> Pl_2 and Pl -> Pl_1
  		Pl_1 = Pl;
  		if(l == 0){									//computing Pl is done recursively, where the recursive relation
  			Pl = cos_phi;							//	requires the first two orders. This defines the second.
  		}
  		else{										//if this is not the first iteration, use the recursive relation to calculate Pl
  			Pl = ( (2 * (l+1) - 1) * cos_phi * Pl_1 - (l) * Pl_2 ) / (l+1);
  		}
  		
  	}
  	E[i] = Ei * A * 2 * CUDART_PI_F;						//scale the integral by the amplitude
  }
  
  template<typename T>
  void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl, T r_spacing){
  
  	//Find the minimum and maximum values of r
      cublasStatus_t stat;
      cublasHandle_t handle;
  
  	stat = cublasCreate(&handle);							//create a cuBLAS handle
  	if (stat != CUBLAS_STATUS_SUCCESS){						//test for failure
          printf ("CUBLAS initialization failed\n");
  		exit(1);
  	}
  
  	int i_min, i_max;
  	stat = cublasIsamin(handle, (int)N, r, 1, &i_min);
  	if (stat != CUBLAS_STATUS_SUCCESS){						//test for failure
          printf ("CUBLAS Error: failed to calculate minimum r value.\n");
  		exit(1);
  	}
  	stat = cublasIsamax(handle, (int)N, r, 1, &i_max);
  	if (stat != CUBLAS_STATUS_SUCCESS){						//test for failure
          printf ("CUBLAS Error: failed to calculate maximum r value.\n");
  		exit(1);
  	}
4252d827   David Mayerich   ivote3 fixes and ...
196
  	cublasDestroy(handle);
9339fbad   David Mayerich   implementing mie ...
197
  
8309b07a   David Mayerich   fixed some vec3 e...
198
199
  	i_min--;												//cuBLAS uses 1-based indexing for Fortran compatibility
  	i_max--;
9339fbad   David Mayerich   implementing mie ...
200
201
202
203
204
205
206
207
208
209
210
  	T r_min, r_max;											//allocate space to store the minimum and maximum values
  	HANDLE_ERROR( cudaMemcpy(&r_min, r + i_min, sizeof(T), cudaMemcpyDeviceToHost) );		//copy the min and max values from the device to the CPU
  	HANDLE_ERROR( cudaMemcpy(&r_max, r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
  
  	T k = (T)stim::TAU / lambda;							//calculate the wavenumber from lambda
  	size_t C_bytes = (Nl + 1) * sizeof(T);
  	T* C = (T*) malloc( C_bytes );							//allocate space for the aperture integral terms
  	cpu_aperture_integral(C, Nl, NA, NA_in);				//calculate the aperture integral terms
  
  	size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1);			//number of values in the look-up table based on the user-specified spacing along r
  
9339fbad   David Mayerich   implementing mie ...
211
212
  
  	size_t lutj_bytes = sizeof(T) * (Nl+1) * Nlut_j;
8309b07a   David Mayerich   fixed some vec3 e...
213
214
215
  	T* j_lut = (T*) malloc(lutj_bytes);													//pointer to the look-up table
  	T dr = (r_max - r_min) / (Nlut_j-1);												//distance between values in the LUT
  	T jl;
4252d827   David Mayerich   ivote3 fixes and ...
216
  	unsigned l;
8309b07a   David Mayerich   fixed some vec3 e...
217
  	for(size_t ri = 0; ri < Nlut_j; ri++){													//for each value in the LUT
4252d827   David Mayerich   ivote3 fixes and ...
218
  		for(l = 0; l <= (unsigned)Nl; l++){													//for each order
8309b07a   David Mayerich   fixed some vec3 e...
219
220
  			jl = boost::math::sph_bessel<T>(l, k*(r_min + ri * dr));					//use boost to calculate the spherical bessel function
  			j_lut[ri * (Nl + 1) + l] = jl;												//store the bessel function result
9339fbad   David Mayerich   implementing mie ...
221
222
223
  		}
  	}
  
963d0676   David Mayerich   bug fixes related...
224
  	//stim::cpu2image<T>(j_lut, "j_lut.bmp", Nl+1, Nlut_j, stim::cmBrewer);
9339fbad   David Mayerich   implementing mie ...
225
226
227
228
229
230
231
  	//Allocate device memory and copy everything to the GPU
  
  	T* gpu_C;
  	HANDLE_ERROR( cudaMalloc(&gpu_C, C_bytes) );
  	HANDLE_ERROR( cudaMemcpy(gpu_C, C, C_bytes, cudaMemcpyHostToDevice) );
  	T* gpu_j_lut;
  	HANDLE_ERROR( cudaMalloc(&gpu_j_lut, lutj_bytes) );
8309b07a   David Mayerich   fixed some vec3 e...
232
  	HANDLE_ERROR( cudaMemcpy(gpu_j_lut, j_lut, lutj_bytes, cudaMemcpyHostToDevice) );
9339fbad   David Mayerich   implementing mie ...
233
234
235
  
  	int threads = stim::maxThreadsPerBlock();			//get the maximum number of threads per block for the CUDA device
  	dim3 blocks( (unsigned)(N / threads + 1));						//calculate the optimal number of blocks
8e4f8364   David Mayerich   started a new opt...
236
  
8309b07a   David Mayerich   fixed some vec3 e...
237
  	cuda_scalar_psf<T><<< blocks, threads >>>(E, N, r, phi, A, Nl, gpu_C, gpu_j_lut, Nlut_j, r_min, dr);
9339fbad   David Mayerich   implementing mie ...
238
239
240
241
242
243
244
245
246
247
248
249
250
  
  	//free the LUT and condenser tables
  	HANDLE_ERROR( cudaFree(gpu_C) );
  	HANDLE_ERROR( cudaFree(gpu_j_lut) );
  }
  #endif
  
  /// Calculate the analytical solution to a scalar point spread function given a set of spherical coordinates about the PSF (beam propagation along phi = theta = 0)
  template<typename T>
  void cpu_scalar_psf_local(stim::complex<T>* F, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl){
  	T k = (T)stim::TAU / lambda;
  	size_t C_bytes = (Nl + 1) * sizeof(T);
  	T* C = (T*) malloc( C_bytes );					//allocate space for the aperture integral terms
0288346a   David Mayerich   added support for...
251
252
  	cpu_aperture_integral(C, Nl, NA, NA_in);			//calculate the aperture integral terms
  	memset(F, 0, N * sizeof(stim::complex<T>));
0288346a   David Mayerich   added support for...
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
  	T jl, Pl, kr, cos_phi;
  
  	double vm;
  	double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
  	double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
  	double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
  	double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
  
  	T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T));
  
  	for(size_t n = 0; n < N; n++){								//for each point in the field
  		kr = k * r[n];											//calculate kr (the optical distance between the focal point and p)
  		cos_phi = std::cos(phi[n]);								//calculate the cosine of phi
  		stim::bessjyv_sph<double>(Nl, kr, vm, jv, yv, djv, dyv);		//compute the list of spherical bessel functions from [0 Nl]
  		stim::legendre<T>(Nl, cos_phi, Pl_cos_phi);				//calculate the [0 Nl] legendre polynomials for this point
8e4f8364   David Mayerich   started a new opt...
268
269
  
  		for(int l = 0; l <= Nl; l++){
8e4f8364   David Mayerich   started a new opt...
270
  			jl = (T)jv[l];
0288346a   David Mayerich   added support for...
271
272
273
274
275
276
277
278
  			Pl = Pl_cos_phi[l];
  			F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l];
  		}
  		F[n] *= A * stim::TAU;
  	}
  
  	free(C);
  	free(Pl_cos_phi);
9339fbad   David Mayerich   implementing mie ...
279
  }
0288346a   David Mayerich   added support for...
280
  
9339fbad   David Mayerich   implementing mie ...
281
282
283
284
285
286
287
288
  /// Converts a set of cartesian points into spherical coordinates surrounding a point spread function (PSF)
  /// @param r is the output distance from the PSF
  /// @param phi is the non-symmetric direction about the PSF
  /// @param x (x, y, z) are the cartesian coordinates in world space
  /// @f is the focal point of the PSF in cartesian coordinates
  /// @d is the propagation direction of the PSF in cartesian coordinates
  template<typename T>
  __global__ void cuda_cart2psf(T* r, T* phi, size_t N, T* x, T* y, T* z, stim::vec3<T> f, stim::quaternion<T> q){
0288346a   David Mayerich   added support for...
289
  
9339fbad   David Mayerich   implementing mie ...
290
291
  	size_t i = blockIdx.x * blockDim.x + threadIdx.x;				//get the index into the array
  	if(i >= N) return;												//exit if this thread is outside the array
0288346a   David Mayerich   added support for...
292
  
9339fbad   David Mayerich   implementing mie ...
293
294
295
296
297
  	stim::vec3<T> p;									//declare a 3D point
  	
  	(x == NULL) ? p[0] = 0 : p[0] = x[i];				// test for NULL values and set positions
  	(y == NULL) ? p[1] = 0 : p[1] = y[i];
  	(z == NULL) ? p[2] = 0 : p[2] = z[i];
0288346a   David Mayerich   added support for...
298
  
9339fbad   David Mayerich   implementing mie ...
299
300
301
302
303
304
  	p = p - f;											//shift the point to the center of the PSF (focal point)
  	p = q.toMatrix3() * p;								//rotate the point to align with the propagation direction
  
  	stim::vec3<T> ps = p.cart2sph();									//convert from cartesian to spherical coordinates
  	r[i] = ps[0];										//store r
  	phi[i] = ps[2];										//phi = [0 pi]
0288346a   David Mayerich   added support for...
305
306
  }
  
8309b07a   David Mayerich   fixed some vec3 e...
307
  #ifdef CUDA_FOUND
9339fbad   David Mayerich   implementing mie ...
308
309
310
311
312
313
314
315
316
317
318
319
320
  /// Calculate the analytical solution to a point spread function given a set of points in cartesian coordinates
  template<typename T>
  void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
  	
  	T* gpu_r;															//allocate space for the coordinates in r
  	HANDLE_ERROR( cudaMalloc(&gpu_r, sizeof(T) * N) );
  	T* gpu_phi;
  	HANDLE_ERROR( cudaMalloc(&gpu_phi, sizeof(T) * N) );
  	//stim::complex<T>* gpu_E;
  	//HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
  
  	stim::quaternion<T> q;												//create a quaternion
  	q.CreateRotation(d, stim::vec3<T>(0, 0, 1));						//create a mapping from the propagation direction to the PSF space
6c4afcac   David Mayerich   introduced a gene...
321
  	stim::matrix_sq<T, 3> rot = q.toMatrix3();
9339fbad   David Mayerich   implementing mie ...
322
323
324
325
326
327
  	int threads = stim::maxThreadsPerBlock();							//get the maximum number of threads per block for the CUDA device
  	dim3 blocks( (unsigned)(N / threads + 1));							//calculate the optimal number of blocks
  	cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q);	//call the CUDA kernel to move the cartesian coordinates to PSF space
  
  	gpu_scalar_psf_local(E, N, gpu_r, gpu_phi, lambda, A, NA, NA_in, Nl, r_spacing);
  
4252d827   David Mayerich   ivote3 fixes and ...
328
329
330
  	HANDLE_ERROR( cudaFree(gpu_r) );
  	HANDLE_ERROR( cudaFree(gpu_phi) );
  
9339fbad   David Mayerich   implementing mie ...
331
332
  }
  #endif
0288346a   David Mayerich   added support for...
333
334
  
  template<typename T>
9339fbad   David Mayerich   implementing mie ...
335
336
337
  void cpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
  
  // If CUDA is available, copy the cartesian points to the GPU and evaluate them in a kernel
8309b07a   David Mayerich   fixed some vec3 e...
338
  #ifdef CUDA_FOUND
9339fbad   David Mayerich   implementing mie ...
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
  
  	T* gpu_x = NULL;
  	if(x != NULL){
  		HANDLE_ERROR( cudaMalloc(&gpu_x, sizeof(T) * N) );
  		HANDLE_ERROR( cudaMemcpy(gpu_x, x, sizeof(T) * N, cudaMemcpyHostToDevice) );
  	}
  	T* gpu_y = NULL;
  	if(y != NULL){
  		HANDLE_ERROR( cudaMalloc(&gpu_y, sizeof(T) * N) );
  		HANDLE_ERROR( cudaMemcpy(gpu_y, y, sizeof(T) * N, cudaMemcpyHostToDevice) );
  	}
  	T* gpu_z = NULL;
  	if(z != NULL){
  		HANDLE_ERROR( cudaMalloc(&gpu_z, sizeof(T) * N) );
  		HANDLE_ERROR( cudaMemcpy(gpu_z, z, sizeof(T) * N, cudaMemcpyHostToDevice) );
  	}
  
  	stim::complex<T>* gpu_E;
  	HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
  	HANDLE_ERROR( cudaMemcpy(gpu_E, E, sizeof(stim::complex<T>) * N, cudaMemcpyHostToDevice) );
  	gpu_scalar_psf_cart<T>(gpu_E, N, gpu_x, gpu_y, gpu_z, lambda, A, f, d, NA, NA_in, Nl, r_spacing);
  	HANDLE_ERROR( cudaMemcpy(E, gpu_E, sizeof(stim::complex<T>) * N, cudaMemcpyDeviceToHost) );
  
  	HANDLE_ERROR( cudaFree(gpu_x) );
  	HANDLE_ERROR( cudaFree(gpu_y) );
  	HANDLE_ERROR( cudaFree(gpu_z) );
  	HANDLE_ERROR( cudaFree(gpu_E) );
  
  #else
0288346a   David Mayerich   added support for...
368
369
370
  	T* r = (T*) malloc(N * sizeof(T));					//allocate space for p in spherical coordinates
  	T* phi = (T*) malloc(N * sizeof(T));				//	only r and phi are necessary (the scalar PSF is symmetric about theta)
  
9339fbad   David Mayerich   implementing mie ...
371
372
373
374
  	stim::quaternion<T> q;
  	q.CreateRotation(d, stim::vec3<T>(0, 0, 1));
  	stim::matrix<T, 3> R = q.toMatrix3();
  	stim::vec3<T> p, ps, ds;
0288346a   David Mayerich   added support for...
375
376
377
378
379
  	for(size_t i = 0; i < N; i++){
  		(x == NULL) ? p[0] = 0 : p[0] = x[i];	// test for NULL values and set positions
  		(y == NULL) ? p[1] = 0 : p[1] = y[i];
  		(z == NULL) ? p[2] = 0 : p[2] = z[i];
  
9339fbad   David Mayerich   implementing mie ...
380
381
382
383
  		p = p - f;
  
  		p = R * p;					//rotate the cartesian point
  
0288346a   David Mayerich   added support for...
384
385
386
387
388
  		ps = p.cart2sph();						//convert from cartesian to spherical coordinates
  		r[i] = ps[0];							//store r
  		phi[i] = ps[2];							//phi = [0 pi]
  	}
  
8309b07a   David Mayerich   fixed some vec3 e...
389
  	cpu_scalar_psf_local(E, N, r, phi, lambda, A, NA, NA_in, Nl);		//call the spherical coordinate CPU function
0288346a   David Mayerich   added support for...
390
391
392
  
  	free(r);
  	free(phi);
9339fbad   David Mayerich   implementing mie ...
393
  #endif
8e4f8364   David Mayerich   started a new opt...
394
  }
8309b07a   David Mayerich   fixed some vec3 e...
395
396
397
398
399
400
401
402
403
404
405
406
407
  		
  /// Class stim::beam represents a beam of light focused at a point and composed of several plane waves
  template<typename T>
  class scalarbeam
  {
  public:
  	//enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  
  private:
  	
  	T NA[2];				//numerical aperature of the focusing optics	
  	vec3<T> f;				//focal point
  	vec3<T> d;				//propagation direction
2a10ecf4   David Mayerich   updated files and...
408
  	T A;					//beam amplitude
8309b07a   David Mayerich   fixed some vec3 e...
409
410
  	T lambda;				//beam wavelength
  public:
8e4f8364   David Mayerich   started a new opt...
411
  
8309b07a   David Mayerich   fixed some vec3 e...
412
413
414
415
416
417
  	///constructor: build a default beam (NA=1.0)
  	scalarbeam(T wavelength = 1, T amplitude = 1, vec3<T> focal_point = vec3<T>(0, 0, 0), vec3<T> direction = vec3<T>(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){
  		lambda = wavelength;
  		A = amplitude;
  		f = focal_point;
  		d = direction.norm();					//make sure that the direction vector is normalized (makes calculations more efficient later on)
6a53ac0c   David Mayerich   updated optics fi...
418
419
  		NA[0] = center_obsc;
  		NA[1] = numerical_aperture;
8309b07a   David Mayerich   fixed some vec3 e...
420
421
422
423
424
425
426
427
  	}
  
  	///Numerical Aperature functions
  	void setNA(T na)
  	{
  		NA[0] = (T)0;
  		NA[1] = na;
  	}
6a53ac0c   David Mayerich   updated optics fi...
428
  	void setNA(T na_in, T na_out)
8309b07a   David Mayerich   fixed some vec3 e...
429
  	{
6a53ac0c   David Mayerich   updated optics fi...
430
431
  		NA[0] = na_in;
  		NA[1] = na_out;
8309b07a   David Mayerich   fixed some vec3 e...
432
433
434
435
436
  	}
  
  	//Monte-Carlo decomposition into plane waves
  	std::vector< scalarwave<T> > mc(size_t N = 100000) const{
  
8309b07a   David Mayerich   fixed some vec3 e...
437
  		T kmag = (T)stim::TAU / lambda;								//calculate the wavenumber
6a53ac0c   David Mayerich   updated optics fi...
438
  		stim::vec3<T> kpw;											//declare the new k-vector based on the focused plane wave direction
8309b07a   David Mayerich   fixed some vec3 e...
439
  		stim::complex<T> apw;										//allocate space for the amplitude at the focal point
6a53ac0c   David Mayerich   updated optics fi...
440
441
442
443
444
445
446
447
448
449
450
451
452
  
  		//deal with the degenerative case where the outer NA is 0, in which case the beam will be specified as a single plane wave
  		if (NA[1] == 0) {
  			std::vector< scalarwave<T> > samples(1);				//create a vector containing 1 sample
  			kpw = d * kmag;											//calculate the k-vector for the plane wave (beam direction scaled by wavenumber)
  			apw = A * exp(stim::complex<T>(0, kpw.dot(-f)));		//calculate the amplitude of the plane wave
  			samples[0] = scalarwave<T>(kpw, apw);					//create a plane wave based on the direction
  			return samples;
  		}
  
  		//otherwise, evaluate the system using N monte-carlo samples
  		std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]);	//generate a random set of N vectors forming a focus
  		std::vector< scalarwave<T> > samples(N);											//create a vector of plane waves		
8309b07a   David Mayerich   fixed some vec3 e...
453
  		T a = (T)(stim::TAU * ( (1 - cos(asin(NA[0]))) - (1 - cos(asin(NA[1])))) / (double)N);			//constant value weights plane waves based on the aperture and number of samples (N)
2a10ecf4   David Mayerich   updated files and...
454
  		for(size_t i=0; i<N; i++){									//for each sample
8309b07a   David Mayerich   fixed some vec3 e...
455
  			kpw = dirs[i] * kmag;									//calculate the k-vector for the new plane wave
2a10ecf4   David Mayerich   updated files and...
456
457
  			apw = a * A * exp(stim::complex<T>(0, kpw.dot(-f)));	//calculate the amplitude for the new plane wave
  			samples[i] = scalarwave<T>(kpw, apw);					//create a plane wave based on the direction
8309b07a   David Mayerich   fixed some vec3 e...
458
459
460
461
  		}
  		return samples;
  	}
  
963d0676   David Mayerich   bug fixes related...
462
  	void eval(stim::scalarfield<T>& E, T* X, T* Y, T* Z, int order = 500){
6a53ac0c   David Mayerich   updated optics fi...
463
  		cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[1], NA[0], order, E.spacing());
963d0676   David Mayerich   bug fixes related...
464
465
  	}
  
8309b07a   David Mayerich   fixed some vec3 e...
466
  	/// Evaluate the beam to a scalar field using Debye focusing
963d0676   David Mayerich   bug fixes related...
467
  	void eval(stim::scalarfield<T>& E, int order = 500){
6a53ac0c   David Mayerich   updated optics fi...
468
469
  		E.meshgrid();															//calculate a meshgrid if one isn't created
  
4252d827   David Mayerich   ivote3 fixes and ...
470
  		if(E.gpu())
6a53ac0c   David Mayerich   updated optics fi...
471
  			gpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[1], NA[0], order, E.spacing());
4252d827   David Mayerich   ivote3 fixes and ...
472
  		else
6a53ac0c   David Mayerich   updated optics fi...
473
474
  			cpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[1], NA[0], order, E.spacing());
  		
8309b07a   David Mayerich   fixed some vec3 e...
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
  	}
  
  	/// Calculate the field at a given point
  	/// @param x is the x-coordinate of the field point
  	/// @O is the approximation accuracy
  	stim::complex<T> field(T x, T y, T z, size_t O){
  		std::vector< scalarwave<T> > W = mc(O);
  		T result = 0;											//initialize the result to zero (0)
  		for(size_t i = 0; i < O; i++){							//for each plane wave
  			result += W[i].pos(x, y, z);
  		}
  		return result;
  	}
  
  	std::string str()
  	{
  		std::stringstream ss;
  		ss<<"Beam:"<<std::endl;
  		//ss<<"	Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  		ss<<"	Beam Direction: "<<d<<std::endl;
  		if(NA[0] == 0)
  			ss<<"	NA: "<<NA[1];
  		else
  			ss<<"	NA: "<<NA[0]<<" -- "<<NA[1];
  
  		return ss.str();
  	}
  
  
  
  };			//end beam
8e4f8364   David Mayerich   started a new opt...
506
507
508
  }			//end namespace stim
  
  #endif