Blame view

stim/optics/scalarbeam.h 19.2 KB
8e4f8364   David Mayerich   started a new opt...
1
2
  #ifndef RTS_BEAM
  #define RTS_BEAM
8309b07a   David Mayerich   fixed some vec3 e...
3
  #include <boost/math/special_functions/bessel.hpp>
8e4f8364   David Mayerich   started a new opt...
4
5
6
7
  
  #include "../math/vec3.h"
  #include "../optics/scalarwave.h"
  #include "../math/bessel.h"
0288346a   David Mayerich   added support for...
8
  #include "../math/legendre.h"
9339fbad   David Mayerich   implementing mie ...
9
10
  #include "../cuda/cudatools/devices.h"
  #include "../cuda/cudatools/timer.h"
8309b07a   David Mayerich   fixed some vec3 e...
11
  #include "../optics/scalarfield.h"
9339fbad   David Mayerich   implementing mie ...
12
13
  #include <cublas_v2.h>
  #include <math_constants.h>
8e4f8364   David Mayerich   started a new opt...
14
  #include <vector>
9339fbad   David Mayerich   implementing mie ...
15
  #include <stdlib.h>
8e4f8364   David Mayerich   started a new opt...
16
  
8e4f8364   David Mayerich   started a new opt...
17
  
8e4f8364   David Mayerich   started a new opt...
18
  
8309b07a   David Mayerich   fixed some vec3 e...
19
  namespace stim{
8e4f8364   David Mayerich   started a new opt...
20
  
8309b07a   David Mayerich   fixed some vec3 e...
21
  /// Function returns the value of the scalar field produced by a beam with the specified parameters
8e4f8364   David Mayerich   started a new opt...
22
  
8309b07a   David Mayerich   fixed some vec3 e...
23
24
  template<typename T>
  std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d, T NA, T NA_in = 0){
8e4f8364   David Mayerich   started a new opt...
25
  
8309b07a   David Mayerich   fixed some vec3 e...
26
  	std::vector< stim::vec3<T> > dirs(N);					//allocate an array to store the focusing vectors
8e4f8364   David Mayerich   started a new opt...
27
  
8309b07a   David Mayerich   fixed some vec3 e...
28
29
30
  	///compute the rotation operator to transform (0, 0, 1) to k
  	T cos_angle = d.dot(vec3<T>(0, 0, 1));
  	stim::matrix<T, 3> rotation;
8e4f8364   David Mayerich   started a new opt...
31
  
8309b07a   David Mayerich   fixed some vec3 e...
32
33
34
  	//if the cosine of the angle is -1, the rotation is just a flip across the z axis
  	if(cos_angle == -1){
  		rotation(2, 2) = -1;
8e4f8364   David Mayerich   started a new opt...
35
  	}
8309b07a   David Mayerich   fixed some vec3 e...
36
  	else if(cos_angle != 1.0)
8e4f8364   David Mayerich   started a new opt...
37
  	{
8309b07a   David Mayerich   fixed some vec3 e...
38
39
40
41
42
  		vec3<T> r_axis = vec3<T>(0, 0, 1).cross(d).norm();	//compute the axis of rotation
  		T angle = acos(cos_angle);							//compute the angle of rotation
  		quaternion<T> quat;							//create a quaternion describing the rotation
  		quat.CreateRotation(angle, r_axis);
  		rotation = quat.toMatrix3();							//compute the rotation matrix
8e4f8364   David Mayerich   started a new opt...
43
44
  	}
  
8309b07a   David Mayerich   fixed some vec3 e...
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  	//find the phi values associated with the cassegrain ring
  	T PHI[2];
  	PHI[0] = (T)asin(NA);
  	PHI[1] = (T)asin(NA_in);
  
  	//calculate the z-axis cylinder coordinates associated with these angles
  	T Z[2];
  	Z[0] = cos(PHI[0]);
  	Z[1] = cos(PHI[1]);
  	T range = Z[0] - Z[1];
  
  	//draw a distribution of random phi, z values
  	T z, phi, theta;
  	//T kmag = stim::TAU / lambda;
  	for(int i=0; i<N; i++){								//for each sample
  		z = (T)((double)rand() / (double)RAND_MAX) * range + Z[1];			//find a random position on the surface of a cylinder
  		theta = (T)(((double)rand() / (double)RAND_MAX) * stim::TAU);
  		phi = acos(z);													//project onto the sphere, computing phi in spherical coordinates
  
  		//compute and store cartesian coordinates
  		vec3<T> spherical(1, theta, phi);								//convert from spherical to cartesian coordinates
  		vec3<T> cart = spherical.sph2cart();
  		dirs[i] = rotation * cart;										//create a sample vector
  	}
  	return dirs;
  }
8e4f8364   David Mayerich   started a new opt...
71
  
8309b07a   David Mayerich   fixed some vec3 e...
72
  		
0288346a   David Mayerich   added support for...
73
74
  /// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional)
  /// @param C is a pointer to Nl + 1 values where the terms will be stored
8e4f8364   David Mayerich   started a new opt...
75
  template<typename T>
9339fbad   David Mayerich   implementing mie ...
76
  CUDA_CALLABLE void cpu_aperture_integral(T* C, int Nl, T NA, T NA_in = 0){
8e4f8364   David Mayerich   started a new opt...
77
  
0288346a   David Mayerich   added support for...
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
  	size_t table_bytes = (Nl + 1) * sizeof(T);				//calculate the number of bytes required to store the terms
  	T cos_alpha_1 = cos(asin(NA_in));						//calculate the cosine of the angle subtended by the central obscuration
  	T cos_alpha_2 = cos(asin(NA));							//calculate the cosine of the angle subtended by the aperture
  
  	// the aperture integral is computed using four individual Legendre polynomials, each a function of the angles subtended
  	//		by the objective and central obscuration
  	T* Pln_a1 = (T*) malloc(table_bytes);
  	stim::legendre<T>(Nl-1, cos_alpha_1, &Pln_a1[1]);
  	Pln_a1[0] = 1;
  
  	T* Pln_a2 = (T*) malloc(table_bytes);
  	stim::legendre<T>(Nl-1, cos_alpha_2, &Pln_a2[1]);
  	Pln_a2[0] = 1;
  
  	T* Plp_a1 = (T*) malloc(table_bytes+sizeof(T));
  	stim::legendre<T>(Nl+1, cos_alpha_1, Plp_a1);
  
  	T* Plp_a2 = (T*) malloc(table_bytes+sizeof(T));
  	stim::legendre<T>(Nl+1, cos_alpha_2, Plp_a2);
  
  	for(size_t l = 0; l <= Nl; l++){
  		C[l] = Plp_a1[l+1] - Plp_a2[l+1] - Pln_a1[l] + Pln_a2[l];
  	}
  
  	free(Pln_a1);
  	free(Pln_a2);
  	free(Plp_a1);
  	free(Plp_a2);
  }
  
  /// performs linear interpolation into a look-up table
  template<typename T>
9339fbad   David Mayerich   implementing mie ...
110
111
112
113
114
115
116
117
118
119
120
  CUDA_CALLABLE void lut_lookup(T* lut_values, T* lut, T val, size_t N, T min_val, T delta, size_t n_vals){
  	T idx = ((val - min_val) / delta);
  	size_t i = (size_t) idx;
  	T a1 = idx - i;
  	T a0 = 1 - a1;
  	size_t n0 = i * n_vals;
  	size_t n1 = (i+1) * n_vals;
  	for(size_t n = 0; n < n_vals; n++){
  		lut_values[n] = lut[n0 + n] * a0 + lut[n1 + n] * a1;
  	}
  }
0288346a   David Mayerich   added support for...
121
  
9339fbad   David Mayerich   implementing mie ...
122
  template <typename T>
31262e83   David Mayerich   GPU implementatio...
123
  CUDA_CALLABLE stim::complex<T> clerp(stim::complex<T> v0, stim::complex<T> v1, T t) {
3669ed62   David Mayerich   fixed fused multi...
124
      return stim::complex<T>( fmaf(t, v1.r, fmaf(-t, v0.r, v0.r)), fmaf(t, v1.i, fmaf(-t, v0.i, v0.i)) );
31262e83   David Mayerich   GPU implementatio...
125
126
127
  }
  
  template <typename T>
9339fbad   David Mayerich   implementing mie ...
128
  CUDA_CALLABLE T lerp(T v0, T v1, T t) {
3669ed62   David Mayerich   fixed fused multi...
129
      return fmaf(t, v1, fmaf(-t, v0, v0));
0288346a   David Mayerich   added support for...
130
131
  }
  
8309b07a   David Mayerich   fixed some vec3 e...
132
  #ifdef CUDA_FOUND
0288346a   David Mayerich   added support for...
133
  template<typename T>
8309b07a   David Mayerich   fixed some vec3 e...
134
  __global__ void cuda_scalar_psf(stim::complex<T>* E, size_t N, T* r, T* phi, T A, size_t Nl,
9339fbad   David Mayerich   implementing mie ...
135
  								T* C, 
8309b07a   David Mayerich   fixed some vec3 e...
136
  								T* lut_j, size_t Nj, T min_r, T dr){
9339fbad   David Mayerich   implementing mie ...
137
138
139
140
  	size_t i = blockIdx.x * blockDim.x + threadIdx.x;				//get the index into the array
  	if(i >= N) return;												//exit if this thread is outside the array
  
  	T cos_phi = cos(phi[i]);									//calculate the thread value for cos(phi)
9339fbad   David Mayerich   implementing mie ...
141
142
143
  	stim::complex<T> Ei = 0;									//initialize the value of the field to zero
  	size_t NC = Nl + 1;										//calculate the number of coefficients to be used
  
8309b07a   David Mayerich   fixed some vec3 e...
144
  	T fij = (r[i] - min_r)/dr;								//FP index into the spherical bessel LUT
9339fbad   David Mayerich   implementing mie ...
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
  	size_t ij = (size_t) fij;								//convert to an integral index
  	T a = fij - ij;											//calculate the fractional portion of the index
  	size_t n0j = ij * (NC);									//start of the first entry in the LUT
  	size_t n1j = (ij+1) * (NC);								//start of the second entry in the LUT
  
  	T jl;											//declare register to store the spherical bessel function
  	T Pl_2, Pl_1;									//declare registers to store the previous two Legendre polynomials
  	T Pl = 1;										//initialize the current value for the Legendre polynomial
  	stim::complex<T> im(0, 1);						//declare i (imaginary 1)
  	stim::complex<T> i_pow(1, 0);					//i_pow stores the current value of i^l so it doesn't have to be re-computed every iteration
  	for(int l = 0; l <= Nl; l++){					//for each order
  		jl = lerp<T>( lut_j[n0j + l], lut_j[n1j + l], a );	//read jl from the LUT and interpolate the result
  		Ei += i_pow * jl * Pl * C[l];				//calculate the value for the field and sum
  		i_pow *= im;								//multiply i^l * i for the next iteration
  		Pl_2 = Pl_1;								//shift Pl_1 -> Pl_2 and Pl -> Pl_1
  		Pl_1 = Pl;
  		if(l == 0){									//computing Pl is done recursively, where the recursive relation
  			Pl = cos_phi;							//	requires the first two orders. This defines the second.
  		}
  		else{										//if this is not the first iteration, use the recursive relation to calculate Pl
  			Pl = ( (2 * (l+1) - 1) * cos_phi * Pl_1 - (l) * Pl_2 ) / (l+1);
  		}
  		
  	}
  	E[i] = Ei * A * 2 * CUDART_PI_F;						//scale the integral by the amplitude
  }
  
  template<typename T>
  void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl, T r_spacing){
  
  	//Find the minimum and maximum values of r
      cublasStatus_t stat;
      cublasHandle_t handle;
  
  	stat = cublasCreate(&handle);							//create a cuBLAS handle
  	if (stat != CUBLAS_STATUS_SUCCESS){						//test for failure
          printf ("CUBLAS initialization failed\n");
  		exit(1);
  	}
  
  	int i_min, i_max;
  	stat = cublasIsamin(handle, (int)N, r, 1, &i_min);
  	if (stat != CUBLAS_STATUS_SUCCESS){						//test for failure
          printf ("CUBLAS Error: failed to calculate minimum r value.\n");
  		exit(1);
  	}
  	stat = cublasIsamax(handle, (int)N, r, 1, &i_max);
  	if (stat != CUBLAS_STATUS_SUCCESS){						//test for failure
          printf ("CUBLAS Error: failed to calculate maximum r value.\n");
  		exit(1);
  	}
4252d827   David Mayerich   ivote3 fixes and ...
196
  	cublasDestroy(handle);
9339fbad   David Mayerich   implementing mie ...
197
  
8309b07a   David Mayerich   fixed some vec3 e...
198
199
  	i_min--;												//cuBLAS uses 1-based indexing for Fortran compatibility
  	i_max--;
9339fbad   David Mayerich   implementing mie ...
200
201
202
203
204
205
206
207
208
209
210
  	T r_min, r_max;											//allocate space to store the minimum and maximum values
  	HANDLE_ERROR( cudaMemcpy(&r_min, r + i_min, sizeof(T), cudaMemcpyDeviceToHost) );		//copy the min and max values from the device to the CPU
  	HANDLE_ERROR( cudaMemcpy(&r_max, r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
  
  	T k = (T)stim::TAU / lambda;							//calculate the wavenumber from lambda
  	size_t C_bytes = (Nl + 1) * sizeof(T);
  	T* C = (T*) malloc( C_bytes );							//allocate space for the aperture integral terms
  	cpu_aperture_integral(C, Nl, NA, NA_in);				//calculate the aperture integral terms
  
  	size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1);			//number of values in the look-up table based on the user-specified spacing along r
  
9339fbad   David Mayerich   implementing mie ...
211
212
  
  	size_t lutj_bytes = sizeof(T) * (Nl+1) * Nlut_j;
8309b07a   David Mayerich   fixed some vec3 e...
213
214
215
  	T* j_lut = (T*) malloc(lutj_bytes);													//pointer to the look-up table
  	T dr = (r_max - r_min) / (Nlut_j-1);												//distance between values in the LUT
  	T jl;
4252d827   David Mayerich   ivote3 fixes and ...
216
  	unsigned l;
8309b07a   David Mayerich   fixed some vec3 e...
217
  	for(size_t ri = 0; ri < Nlut_j; ri++){													//for each value in the LUT
4252d827   David Mayerich   ivote3 fixes and ...
218
  		for(l = 0; l <= (unsigned)Nl; l++){													//for each order
8309b07a   David Mayerich   fixed some vec3 e...
219
220
  			jl = boost::math::sph_bessel<T>(l, k*(r_min + ri * dr));					//use boost to calculate the spherical bessel function
  			j_lut[ri * (Nl + 1) + l] = jl;												//store the bessel function result
9339fbad   David Mayerich   implementing mie ...
221
222
223
  		}
  	}
  
963d0676   David Mayerich   bug fixes related...
224
  	//stim::cpu2image<T>(j_lut, "j_lut.bmp", Nl+1, Nlut_j, stim::cmBrewer);
9339fbad   David Mayerich   implementing mie ...
225
226
227
228
229
230
231
  	//Allocate device memory and copy everything to the GPU
  
  	T* gpu_C;
  	HANDLE_ERROR( cudaMalloc(&gpu_C, C_bytes) );
  	HANDLE_ERROR( cudaMemcpy(gpu_C, C, C_bytes, cudaMemcpyHostToDevice) );
  	T* gpu_j_lut;
  	HANDLE_ERROR( cudaMalloc(&gpu_j_lut, lutj_bytes) );
8309b07a   David Mayerich   fixed some vec3 e...
232
  	HANDLE_ERROR( cudaMemcpy(gpu_j_lut, j_lut, lutj_bytes, cudaMemcpyHostToDevice) );
9339fbad   David Mayerich   implementing mie ...
233
234
235
  
  	int threads = stim::maxThreadsPerBlock();			//get the maximum number of threads per block for the CUDA device
  	dim3 blocks( (unsigned)(N / threads + 1));						//calculate the optimal number of blocks
8e4f8364   David Mayerich   started a new opt...
236
  
8309b07a   David Mayerich   fixed some vec3 e...
237
  	cuda_scalar_psf<T><<< blocks, threads >>>(E, N, r, phi, A, Nl, gpu_C, gpu_j_lut, Nlut_j, r_min, dr);
9339fbad   David Mayerich   implementing mie ...
238
239
240
241
242
243
244
245
246
247
248
249
250
  
  	//free the LUT and condenser tables
  	HANDLE_ERROR( cudaFree(gpu_C) );
  	HANDLE_ERROR( cudaFree(gpu_j_lut) );
  }
  #endif
  
  /// Calculate the analytical solution to a scalar point spread function given a set of spherical coordinates about the PSF (beam propagation along phi = theta = 0)
  template<typename T>
  void cpu_scalar_psf_local(stim::complex<T>* F, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl){
  	T k = (T)stim::TAU / lambda;
  	size_t C_bytes = (Nl + 1) * sizeof(T);
  	T* C = (T*) malloc( C_bytes );					//allocate space for the aperture integral terms
0288346a   David Mayerich   added support for...
251
252
  	cpu_aperture_integral(C, Nl, NA, NA_in);			//calculate the aperture integral terms
  	memset(F, 0, N * sizeof(stim::complex<T>));
0288346a   David Mayerich   added support for...
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
  	T jl, Pl, kr, cos_phi;
  
  	double vm;
  	double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
  	double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
  	double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
  	double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
  
  	T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T));
  
  	for(size_t n = 0; n < N; n++){								//for each point in the field
  		kr = k * r[n];											//calculate kr (the optical distance between the focal point and p)
  		cos_phi = std::cos(phi[n]);								//calculate the cosine of phi
  		stim::bessjyv_sph<double>(Nl, kr, vm, jv, yv, djv, dyv);		//compute the list of spherical bessel functions from [0 Nl]
  		stim::legendre<T>(Nl, cos_phi, Pl_cos_phi);				//calculate the [0 Nl] legendre polynomials for this point
8e4f8364   David Mayerich   started a new opt...
268
269
  
  		for(int l = 0; l <= Nl; l++){
8e4f8364   David Mayerich   started a new opt...
270
  			jl = (T)jv[l];
0288346a   David Mayerich   added support for...
271
272
273
274
275
276
277
278
  			Pl = Pl_cos_phi[l];
  			F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l];
  		}
  		F[n] *= A * stim::TAU;
  	}
  
  	free(C);
  	free(Pl_cos_phi);
9339fbad   David Mayerich   implementing mie ...
279
  }
0288346a   David Mayerich   added support for...
280
  
9339fbad   David Mayerich   implementing mie ...
281
282
283
284
285
286
287
288
  /// Converts a set of cartesian points into spherical coordinates surrounding a point spread function (PSF)
  /// @param r is the output distance from the PSF
  /// @param phi is the non-symmetric direction about the PSF
  /// @param x (x, y, z) are the cartesian coordinates in world space
  /// @f is the focal point of the PSF in cartesian coordinates
  /// @d is the propagation direction of the PSF in cartesian coordinates
  template<typename T>
  __global__ void cuda_cart2psf(T* r, T* phi, size_t N, T* x, T* y, T* z, stim::vec3<T> f, stim::quaternion<T> q){
0288346a   David Mayerich   added support for...
289
  
9339fbad   David Mayerich   implementing mie ...
290
291
  	size_t i = blockIdx.x * blockDim.x + threadIdx.x;				//get the index into the array
  	if(i >= N) return;												//exit if this thread is outside the array
0288346a   David Mayerich   added support for...
292
  
9339fbad   David Mayerich   implementing mie ...
293
294
295
296
297
  	stim::vec3<T> p;									//declare a 3D point
  	
  	(x == NULL) ? p[0] = 0 : p[0] = x[i];				// test for NULL values and set positions
  	(y == NULL) ? p[1] = 0 : p[1] = y[i];
  	(z == NULL) ? p[2] = 0 : p[2] = z[i];
0288346a   David Mayerich   added support for...
298
  
9339fbad   David Mayerich   implementing mie ...
299
300
301
302
303
304
  	p = p - f;											//shift the point to the center of the PSF (focal point)
  	p = q.toMatrix3() * p;								//rotate the point to align with the propagation direction
  
  	stim::vec3<T> ps = p.cart2sph();									//convert from cartesian to spherical coordinates
  	r[i] = ps[0];										//store r
  	phi[i] = ps[2];										//phi = [0 pi]
0288346a   David Mayerich   added support for...
305
306
  }
  
8309b07a   David Mayerich   fixed some vec3 e...
307
  #ifdef CUDA_FOUND
9339fbad   David Mayerich   implementing mie ...
308
309
310
311
312
313
314
315
316
317
318
319
320
  /// Calculate the analytical solution to a point spread function given a set of points in cartesian coordinates
  template<typename T>
  void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
  	
  	T* gpu_r;															//allocate space for the coordinates in r
  	HANDLE_ERROR( cudaMalloc(&gpu_r, sizeof(T) * N) );
  	T* gpu_phi;
  	HANDLE_ERROR( cudaMalloc(&gpu_phi, sizeof(T) * N) );
  	//stim::complex<T>* gpu_E;
  	//HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
  
  	stim::quaternion<T> q;												//create a quaternion
  	q.CreateRotation(d, stim::vec3<T>(0, 0, 1));						//create a mapping from the propagation direction to the PSF space
963d0676   David Mayerich   bug fixes related...
321
  	stim::matrix<T, 3> rot = q.toMatrix3();
9339fbad   David Mayerich   implementing mie ...
322
323
324
325
326
327
  	int threads = stim::maxThreadsPerBlock();							//get the maximum number of threads per block for the CUDA device
  	dim3 blocks( (unsigned)(N / threads + 1));							//calculate the optimal number of blocks
  	cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q);	//call the CUDA kernel to move the cartesian coordinates to PSF space
  
  	gpu_scalar_psf_local(E, N, gpu_r, gpu_phi, lambda, A, NA, NA_in, Nl, r_spacing);
  
4252d827   David Mayerich   ivote3 fixes and ...
328
329
330
  	HANDLE_ERROR( cudaFree(gpu_r) );
  	HANDLE_ERROR( cudaFree(gpu_phi) );
  
9339fbad   David Mayerich   implementing mie ...
331
332
  }
  #endif
0288346a   David Mayerich   added support for...
333
334
  
  template<typename T>
9339fbad   David Mayerich   implementing mie ...
335
336
337
  void cpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
  
  // If CUDA is available, copy the cartesian points to the GPU and evaluate them in a kernel
8309b07a   David Mayerich   fixed some vec3 e...
338
  #ifdef CUDA_FOUND
9339fbad   David Mayerich   implementing mie ...
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
  
  	T* gpu_x = NULL;
  	if(x != NULL){
  		HANDLE_ERROR( cudaMalloc(&gpu_x, sizeof(T) * N) );
  		HANDLE_ERROR( cudaMemcpy(gpu_x, x, sizeof(T) * N, cudaMemcpyHostToDevice) );
  	}
  	T* gpu_y = NULL;
  	if(y != NULL){
  		HANDLE_ERROR( cudaMalloc(&gpu_y, sizeof(T) * N) );
  		HANDLE_ERROR( cudaMemcpy(gpu_y, y, sizeof(T) * N, cudaMemcpyHostToDevice) );
  	}
  	T* gpu_z = NULL;
  	if(z != NULL){
  		HANDLE_ERROR( cudaMalloc(&gpu_z, sizeof(T) * N) );
  		HANDLE_ERROR( cudaMemcpy(gpu_z, z, sizeof(T) * N, cudaMemcpyHostToDevice) );
  	}
  
  	stim::complex<T>* gpu_E;
  	HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
  	HANDLE_ERROR( cudaMemcpy(gpu_E, E, sizeof(stim::complex<T>) * N, cudaMemcpyHostToDevice) );
  	gpu_scalar_psf_cart<T>(gpu_E, N, gpu_x, gpu_y, gpu_z, lambda, A, f, d, NA, NA_in, Nl, r_spacing);
  	HANDLE_ERROR( cudaMemcpy(E, gpu_E, sizeof(stim::complex<T>) * N, cudaMemcpyDeviceToHost) );
  
  	HANDLE_ERROR( cudaFree(gpu_x) );
  	HANDLE_ERROR( cudaFree(gpu_y) );
  	HANDLE_ERROR( cudaFree(gpu_z) );
  	HANDLE_ERROR( cudaFree(gpu_E) );
  
  #else
0288346a   David Mayerich   added support for...
368
369
370
  	T* r = (T*) malloc(N * sizeof(T));					//allocate space for p in spherical coordinates
  	T* phi = (T*) malloc(N * sizeof(T));				//	only r and phi are necessary (the scalar PSF is symmetric about theta)
  
9339fbad   David Mayerich   implementing mie ...
371
372
373
374
  	stim::quaternion<T> q;
  	q.CreateRotation(d, stim::vec3<T>(0, 0, 1));
  	stim::matrix<T, 3> R = q.toMatrix3();
  	stim::vec3<T> p, ps, ds;
0288346a   David Mayerich   added support for...
375
376
377
378
379
  	for(size_t i = 0; i < N; i++){
  		(x == NULL) ? p[0] = 0 : p[0] = x[i];	// test for NULL values and set positions
  		(y == NULL) ? p[1] = 0 : p[1] = y[i];
  		(z == NULL) ? p[2] = 0 : p[2] = z[i];
  
9339fbad   David Mayerich   implementing mie ...
380
381
382
383
  		p = p - f;
  
  		p = R * p;					//rotate the cartesian point
  
0288346a   David Mayerich   added support for...
384
385
386
387
388
  		ps = p.cart2sph();						//convert from cartesian to spherical coordinates
  		r[i] = ps[0];							//store r
  		phi[i] = ps[2];							//phi = [0 pi]
  	}
  
8309b07a   David Mayerich   fixed some vec3 e...
389
  	cpu_scalar_psf_local(E, N, r, phi, lambda, A, NA, NA_in, Nl);		//call the spherical coordinate CPU function
0288346a   David Mayerich   added support for...
390
391
392
  
  	free(r);
  	free(phi);
9339fbad   David Mayerich   implementing mie ...
393
  #endif
8e4f8364   David Mayerich   started a new opt...
394
  }
8309b07a   David Mayerich   fixed some vec3 e...
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
  		
  /// Class stim::beam represents a beam of light focused at a point and composed of several plane waves
  template<typename T>
  class scalarbeam
  {
  public:
  	//enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  
  private:
  	
  	T NA[2];				//numerical aperature of the focusing optics	
  	vec3<T> f;				//focal point
  	vec3<T> d;				//propagation direction
  	T A;		//beam amplitude
  	T lambda;				//beam wavelength
  public:
8e4f8364   David Mayerich   started a new opt...
411
  
8309b07a   David Mayerich   fixed some vec3 e...
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
  	///constructor: build a default beam (NA=1.0)
  	scalarbeam(T wavelength = 1, T amplitude = 1, vec3<T> focal_point = vec3<T>(0, 0, 0), vec3<T> direction = vec3<T>(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){
  		lambda = wavelength;
  		A = amplitude;
  		f = focal_point;
  		d = direction.norm();					//make sure that the direction vector is normalized (makes calculations more efficient later on)
  		NA[0] = numerical_aperture;
  		NA[1] = center_obsc;
  	}
  
  	///Numerical Aperature functions
  	void setNA(T na)
  	{
  		NA[0] = (T)0;
  		NA[1] = na;
  	}
  	void setNA(T na0, T na1)
  	{
  		NA[0] = na0;
  		NA[1] = na1;
  	}
  
  	//Monte-Carlo decomposition into plane waves
  	std::vector< scalarwave<T> > mc(size_t N = 100000) const{
  
  		std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]);	//generate a random set of N vectors forming a focus
  		std::vector< scalarwave<T> > samples(N);											//create a vector of plane waves
  		T kmag = (T)stim::TAU / lambda;								//calculate the wavenumber
  		stim::complex<T> apw;										//allocate space for the amplitude at the focal point
  		T a = (T)(stim::TAU * ( (1 - cos(asin(NA[0]))) - (1 - cos(asin(NA[1])))) / (double)N);			//constant value weights plane waves based on the aperture and number of samples (N)
  		stim::vec3<T> kpw;											//declare the new k-vector based on the focused plane wave direction
  		for(size_t i=0; i<N; i++){										//for each sample
  			kpw = dirs[i] * kmag;									//calculate the k-vector for the new plane wave
  			apw = a * exp(stim::complex<T>(0, kpw.dot(-f)));				//calculate the amplitude for the new plane wave
  			samples[i] = scalarwave<T>(kpw, apw);			//create a plane wave based on the direction
  		}
  		return samples;
  	}
  
963d0676   David Mayerich   bug fixes related...
451
452
453
454
  	void eval(stim::scalarfield<T>& E, T* X, T* Y, T* Z, int order = 500){
  		cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[0], NA[1], order, E.spacing());
  	}
  
8309b07a   David Mayerich   fixed some vec3 e...
455
  	/// Evaluate the beam to a scalar field using Debye focusing
963d0676   David Mayerich   bug fixes related...
456
  	void eval(stim::scalarfield<T>& E, int order = 500){
4252d827   David Mayerich   ivote3 fixes and ...
457
458
459
460
461
462
  		E.meshgrid();								//calculate a meshgrid if one isn't created
  		if(E.gpu())
  			gpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[0], NA[1], order, E.spacing());
  		else
  			cpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[0], NA[1], order, E.spacing());
  		//eval(E, E.x(), E.y(), E.z(), order);
8309b07a   David Mayerich   fixed some vec3 e...
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
  	}
  
  	/// Calculate the field at a given point
  	/// @param x is the x-coordinate of the field point
  	/// @O is the approximation accuracy
  	stim::complex<T> field(T x, T y, T z, size_t O){
  		std::vector< scalarwave<T> > W = mc(O);
  		T result = 0;											//initialize the result to zero (0)
  		for(size_t i = 0; i < O; i++){							//for each plane wave
  			result += W[i].pos(x, y, z);
  		}
  		return result;
  	}
  
  	std::string str()
  	{
  		std::stringstream ss;
  		ss<<"Beam:"<<std::endl;
  		//ss<<"	Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  		ss<<"	Beam Direction: "<<d<<std::endl;
  		if(NA[0] == 0)
  			ss<<"	NA: "<<NA[1];
  		else
  			ss<<"	NA: "<<NA[0]<<" -- "<<NA[1];
  
  		return ss.str();
  	}
  
  
  
  };			//end beam
8e4f8364   David Mayerich   started a new opt...
494
495
496
  }			//end namespace stim
  
  #endif