8e4f8364
David Mayerich
started a new opt...
|
1
2
|
#ifndef RTS_BEAM
#define RTS_BEAM
|
8309b07a
David Mayerich
fixed some vec3 e...
|
3
|
#include <boost/math/special_functions/bessel.hpp>
|
8e4f8364
David Mayerich
started a new opt...
|
4
5
6
7
|
#include "../math/vec3.h"
#include "../optics/scalarwave.h"
#include "../math/bessel.h"
|
0288346a
David Mayerich
added support for...
|
8
|
#include "../math/legendre.h"
|
9339fbad
David Mayerich
implementing mie ...
|
9
10
|
#include "../cuda/cudatools/devices.h"
#include "../cuda/cudatools/timer.h"
|
8309b07a
David Mayerich
fixed some vec3 e...
|
11
|
#include "../optics/scalarfield.h"
|
9339fbad
David Mayerich
implementing mie ...
|
12
13
|
#include <cublas_v2.h>
#include <math_constants.h>
|
8e4f8364
David Mayerich
started a new opt...
|
14
|
#include <vector>
|
9339fbad
David Mayerich
implementing mie ...
|
15
|
#include <stdlib.h>
|
8e4f8364
David Mayerich
started a new opt...
|
16
|
|
8e4f8364
David Mayerich
started a new opt...
|
17
|
|
8e4f8364
David Mayerich
started a new opt...
|
18
|
|
8309b07a
David Mayerich
fixed some vec3 e...
|
19
|
namespace stim{
|
8e4f8364
David Mayerich
started a new opt...
|
20
|
|
8309b07a
David Mayerich
fixed some vec3 e...
|
21
|
/// Function returns the value of the scalar field produced by a beam with the specified parameters
|
8e4f8364
David Mayerich
started a new opt...
|
22
|
|
8309b07a
David Mayerich
fixed some vec3 e...
|
23
24
|
template<typename T>
std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d, T NA, T NA_in = 0){
|
8e4f8364
David Mayerich
started a new opt...
|
25
|
|
8309b07a
David Mayerich
fixed some vec3 e...
|
26
|
std::vector< stim::vec3<T> > dirs(N); //allocate an array to store the focusing vectors
|
8e4f8364
David Mayerich
started a new opt...
|
27
|
|
8309b07a
David Mayerich
fixed some vec3 e...
|
28
29
|
///compute the rotation operator to transform (0, 0, 1) to k
T cos_angle = d.dot(vec3<T>(0, 0, 1));
|
6c4afcac
David Mayerich
introduced a gene...
|
30
|
stim::matrix_sq<T, 3> rotation;
|
8e4f8364
David Mayerich
started a new opt...
|
31
|
|
8309b07a
David Mayerich
fixed some vec3 e...
|
32
33
34
|
//if the cosine of the angle is -1, the rotation is just a flip across the z axis
if(cos_angle == -1){
rotation(2, 2) = -1;
|
8e4f8364
David Mayerich
started a new opt...
|
35
|
}
|
8309b07a
David Mayerich
fixed some vec3 e...
|
36
|
else if(cos_angle != 1.0)
|
8e4f8364
David Mayerich
started a new opt...
|
37
|
{
|
8309b07a
David Mayerich
fixed some vec3 e...
|
38
39
40
41
42
|
vec3<T> r_axis = vec3<T>(0, 0, 1).cross(d).norm(); //compute the axis of rotation
T angle = acos(cos_angle); //compute the angle of rotation
quaternion<T> quat; //create a quaternion describing the rotation
quat.CreateRotation(angle, r_axis);
rotation = quat.toMatrix3(); //compute the rotation matrix
|
8e4f8364
David Mayerich
started a new opt...
|
43
44
|
}
|
8309b07a
David Mayerich
fixed some vec3 e...
|
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
//find the phi values associated with the cassegrain ring
T PHI[2];
PHI[0] = (T)asin(NA);
PHI[1] = (T)asin(NA_in);
//calculate the z-axis cylinder coordinates associated with these angles
T Z[2];
Z[0] = cos(PHI[0]);
Z[1] = cos(PHI[1]);
T range = Z[0] - Z[1];
//draw a distribution of random phi, z values
T z, phi, theta;
//T kmag = stim::TAU / lambda;
for(int i=0; i<N; i++){ //for each sample
z = (T)((double)rand() / (double)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
theta = (T)(((double)rand() / (double)RAND_MAX) * stim::TAU);
phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
//compute and store cartesian coordinates
vec3<T> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
vec3<T> cart = spherical.sph2cart();
dirs[i] = rotation * cart; //create a sample vector
}
return dirs;
}
|
8e4f8364
David Mayerich
started a new opt...
|
71
|
|
8309b07a
David Mayerich
fixed some vec3 e...
|
72
|
|
0288346a
David Mayerich
added support for...
|
73
74
|
/// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional)
/// @param C is a pointer to Nl + 1 values where the terms will be stored
|
8e4f8364
David Mayerich
started a new opt...
|
75
|
template<typename T>
|
9339fbad
David Mayerich
implementing mie ...
|
76
|
CUDA_CALLABLE void cpu_aperture_integral(T* C, int Nl, T NA, T NA_in = 0){
|
8e4f8364
David Mayerich
started a new opt...
|
77
|
|
0288346a
David Mayerich
added support for...
|
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
size_t table_bytes = (Nl + 1) * sizeof(T); //calculate the number of bytes required to store the terms
T cos_alpha_1 = cos(asin(NA_in)); //calculate the cosine of the angle subtended by the central obscuration
T cos_alpha_2 = cos(asin(NA)); //calculate the cosine of the angle subtended by the aperture
// the aperture integral is computed using four individual Legendre polynomials, each a function of the angles subtended
// by the objective and central obscuration
T* Pln_a1 = (T*) malloc(table_bytes);
stim::legendre<T>(Nl-1, cos_alpha_1, &Pln_a1[1]);
Pln_a1[0] = 1;
T* Pln_a2 = (T*) malloc(table_bytes);
stim::legendre<T>(Nl-1, cos_alpha_2, &Pln_a2[1]);
Pln_a2[0] = 1;
T* Plp_a1 = (T*) malloc(table_bytes+sizeof(T));
stim::legendre<T>(Nl+1, cos_alpha_1, Plp_a1);
T* Plp_a2 = (T*) malloc(table_bytes+sizeof(T));
stim::legendre<T>(Nl+1, cos_alpha_2, Plp_a2);
for(size_t l = 0; l <= Nl; l++){
C[l] = Plp_a1[l+1] - Plp_a2[l+1] - Pln_a1[l] + Pln_a2[l];
}
free(Pln_a1);
free(Pln_a2);
free(Plp_a1);
free(Plp_a2);
}
/// performs linear interpolation into a look-up table
template<typename T>
|
9339fbad
David Mayerich
implementing mie ...
|
110
111
112
113
114
115
116
117
118
119
120
|
CUDA_CALLABLE void lut_lookup(T* lut_values, T* lut, T val, size_t N, T min_val, T delta, size_t n_vals){
T idx = ((val - min_val) / delta);
size_t i = (size_t) idx;
T a1 = idx - i;
T a0 = 1 - a1;
size_t n0 = i * n_vals;
size_t n1 = (i+1) * n_vals;
for(size_t n = 0; n < n_vals; n++){
lut_values[n] = lut[n0 + n] * a0 + lut[n1 + n] * a1;
}
}
|
0288346a
David Mayerich
added support for...
|
121
|
|
9339fbad
David Mayerich
implementing mie ...
|
122
|
template <typename T>
|
31262e83
David Mayerich
GPU implementatio...
|
123
|
CUDA_CALLABLE stim::complex<T> clerp(stim::complex<T> v0, stim::complex<T> v1, T t) {
|
3669ed62
David Mayerich
fixed fused multi...
|
124
|
return stim::complex<T>( fmaf(t, v1.r, fmaf(-t, v0.r, v0.r)), fmaf(t, v1.i, fmaf(-t, v0.i, v0.i)) );
|
31262e83
David Mayerich
GPU implementatio...
|
125
126
127
|
}
template <typename T>
|
9339fbad
David Mayerich
implementing mie ...
|
128
|
CUDA_CALLABLE T lerp(T v0, T v1, T t) {
|
3669ed62
David Mayerich
fixed fused multi...
|
129
|
return fmaf(t, v1, fmaf(-t, v0, v0));
|
0288346a
David Mayerich
added support for...
|
130
131
|
}
|
8309b07a
David Mayerich
fixed some vec3 e...
|
132
|
#ifdef CUDA_FOUND
|
0288346a
David Mayerich
added support for...
|
133
|
template<typename T>
|
8309b07a
David Mayerich
fixed some vec3 e...
|
134
|
__global__ void cuda_scalar_psf(stim::complex<T>* E, size_t N, T* r, T* phi, T A, size_t Nl,
|
9339fbad
David Mayerich
implementing mie ...
|
135
|
T* C,
|
8309b07a
David Mayerich
fixed some vec3 e...
|
136
|
T* lut_j, size_t Nj, T min_r, T dr){
|
9339fbad
David Mayerich
implementing mie ...
|
137
138
139
140
|
size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
if(i >= N) return; //exit if this thread is outside the array
T cos_phi = cos(phi[i]); //calculate the thread value for cos(phi)
|
9339fbad
David Mayerich
implementing mie ...
|
141
142
143
|
stim::complex<T> Ei = 0; //initialize the value of the field to zero
size_t NC = Nl + 1; //calculate the number of coefficients to be used
|
8309b07a
David Mayerich
fixed some vec3 e...
|
144
|
T fij = (r[i] - min_r)/dr; //FP index into the spherical bessel LUT
|
9339fbad
David Mayerich
implementing mie ...
|
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
|
size_t ij = (size_t) fij; //convert to an integral index
T a = fij - ij; //calculate the fractional portion of the index
size_t n0j = ij * (NC); //start of the first entry in the LUT
size_t n1j = (ij+1) * (NC); //start of the second entry in the LUT
T jl; //declare register to store the spherical bessel function
T Pl_2, Pl_1; //declare registers to store the previous two Legendre polynomials
T Pl = 1; //initialize the current value for the Legendre polynomial
stim::complex<T> im(0, 1); //declare i (imaginary 1)
stim::complex<T> i_pow(1, 0); //i_pow stores the current value of i^l so it doesn't have to be re-computed every iteration
for(int l = 0; l <= Nl; l++){ //for each order
jl = lerp<T>( lut_j[n0j + l], lut_j[n1j + l], a ); //read jl from the LUT and interpolate the result
Ei += i_pow * jl * Pl * C[l]; //calculate the value for the field and sum
i_pow *= im; //multiply i^l * i for the next iteration
Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
Pl_1 = Pl;
if(l == 0){ //computing Pl is done recursively, where the recursive relation
Pl = cos_phi; // requires the first two orders. This defines the second.
}
else{ //if this is not the first iteration, use the recursive relation to calculate Pl
Pl = ( (2 * (l+1) - 1) * cos_phi * Pl_1 - (l) * Pl_2 ) / (l+1);
}
}
E[i] = Ei * A * 2 * CUDART_PI_F; //scale the integral by the amplitude
}
template<typename T>
void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl, T r_spacing){
//Find the minimum and maximum values of r
cublasStatus_t stat;
cublasHandle_t handle;
stat = cublasCreate(&handle); //create a cuBLAS handle
if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
printf ("CUBLAS initialization failed\n");
exit(1);
}
int i_min, i_max;
stat = cublasIsamin(handle, (int)N, r, 1, &i_min);
if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
printf ("CUBLAS Error: failed to calculate minimum r value.\n");
exit(1);
}
stat = cublasIsamax(handle, (int)N, r, 1, &i_max);
if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
printf ("CUBLAS Error: failed to calculate maximum r value.\n");
exit(1);
}
|
4252d827
David Mayerich
ivote3 fixes and ...
|
196
|
cublasDestroy(handle);
|
9339fbad
David Mayerich
implementing mie ...
|
197
|
|
8309b07a
David Mayerich
fixed some vec3 e...
|
198
199
|
i_min--; //cuBLAS uses 1-based indexing for Fortran compatibility
i_max--;
|
9339fbad
David Mayerich
implementing mie ...
|
200
201
202
203
204
205
206
207
208
209
210
|
T r_min, r_max; //allocate space to store the minimum and maximum values
HANDLE_ERROR( cudaMemcpy(&r_min, r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU
HANDLE_ERROR( cudaMemcpy(&r_max, r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
T k = (T)stim::TAU / lambda; //calculate the wavenumber from lambda
size_t C_bytes = (Nl + 1) * sizeof(T);
T* C = (T*) malloc( C_bytes ); //allocate space for the aperture integral terms
cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms
size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r
|
9339fbad
David Mayerich
implementing mie ...
|
211
212
|
size_t lutj_bytes = sizeof(T) * (Nl+1) * Nlut_j;
|
8309b07a
David Mayerich
fixed some vec3 e...
|
213
214
215
|
T* j_lut = (T*) malloc(lutj_bytes); //pointer to the look-up table
T dr = (r_max - r_min) / (Nlut_j-1); //distance between values in the LUT
T jl;
|
4252d827
David Mayerich
ivote3 fixes and ...
|
216
|
unsigned l;
|
8309b07a
David Mayerich
fixed some vec3 e...
|
217
|
for(size_t ri = 0; ri < Nlut_j; ri++){ //for each value in the LUT
|
4252d827
David Mayerich
ivote3 fixes and ...
|
218
|
for(l = 0; l <= (unsigned)Nl; l++){ //for each order
|
8309b07a
David Mayerich
fixed some vec3 e...
|
219
220
|
jl = boost::math::sph_bessel<T>(l, k*(r_min + ri * dr)); //use boost to calculate the spherical bessel function
j_lut[ri * (Nl + 1) + l] = jl; //store the bessel function result
|
9339fbad
David Mayerich
implementing mie ...
|
221
222
223
|
}
}
|
963d0676
David Mayerich
bug fixes related...
|
224
|
//stim::cpu2image<T>(j_lut, "j_lut.bmp", Nl+1, Nlut_j, stim::cmBrewer);
|
9339fbad
David Mayerich
implementing mie ...
|
225
226
227
228
229
230
231
|
//Allocate device memory and copy everything to the GPU
T* gpu_C;
HANDLE_ERROR( cudaMalloc(&gpu_C, C_bytes) );
HANDLE_ERROR( cudaMemcpy(gpu_C, C, C_bytes, cudaMemcpyHostToDevice) );
T* gpu_j_lut;
HANDLE_ERROR( cudaMalloc(&gpu_j_lut, lutj_bytes) );
|
8309b07a
David Mayerich
fixed some vec3 e...
|
232
|
HANDLE_ERROR( cudaMemcpy(gpu_j_lut, j_lut, lutj_bytes, cudaMemcpyHostToDevice) );
|
9339fbad
David Mayerich
implementing mie ...
|
233
234
235
|
int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks
|
8e4f8364
David Mayerich
started a new opt...
|
236
|
|
8309b07a
David Mayerich
fixed some vec3 e...
|
237
|
cuda_scalar_psf<T><<< blocks, threads >>>(E, N, r, phi, A, Nl, gpu_C, gpu_j_lut, Nlut_j, r_min, dr);
|
9339fbad
David Mayerich
implementing mie ...
|
238
239
240
241
242
243
244
245
246
247
248
249
250
|
//free the LUT and condenser tables
HANDLE_ERROR( cudaFree(gpu_C) );
HANDLE_ERROR( cudaFree(gpu_j_lut) );
}
#endif
/// Calculate the analytical solution to a scalar point spread function given a set of spherical coordinates about the PSF (beam propagation along phi = theta = 0)
template<typename T>
void cpu_scalar_psf_local(stim::complex<T>* F, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl){
T k = (T)stim::TAU / lambda;
size_t C_bytes = (Nl + 1) * sizeof(T);
T* C = (T*) malloc( C_bytes ); //allocate space for the aperture integral terms
|
0288346a
David Mayerich
added support for...
|
251
252
|
cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms
memset(F, 0, N * sizeof(stim::complex<T>));
|
0288346a
David Mayerich
added support for...
|
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
|
T jl, Pl, kr, cos_phi;
double vm;
double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T));
for(size_t n = 0; n < N; n++){ //for each point in the field
kr = k * r[n]; //calculate kr (the optical distance between the focal point and p)
cos_phi = std::cos(phi[n]); //calculate the cosine of phi
stim::bessjyv_sph<double>(Nl, kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]
stim::legendre<T>(Nl, cos_phi, Pl_cos_phi); //calculate the [0 Nl] legendre polynomials for this point
|
8e4f8364
David Mayerich
started a new opt...
|
268
269
|
for(int l = 0; l <= Nl; l++){
|
8e4f8364
David Mayerich
started a new opt...
|
270
|
jl = (T)jv[l];
|
0288346a
David Mayerich
added support for...
|
271
272
273
274
275
276
277
278
|
Pl = Pl_cos_phi[l];
F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l];
}
F[n] *= A * stim::TAU;
}
free(C);
free(Pl_cos_phi);
|
9339fbad
David Mayerich
implementing mie ...
|
279
|
}
|
0288346a
David Mayerich
added support for...
|
280
|
|
9339fbad
David Mayerich
implementing mie ...
|
281
282
283
284
285
286
287
288
|
/// Converts a set of cartesian points into spherical coordinates surrounding a point spread function (PSF)
/// @param r is the output distance from the PSF
/// @param phi is the non-symmetric direction about the PSF
/// @param x (x, y, z) are the cartesian coordinates in world space
/// @f is the focal point of the PSF in cartesian coordinates
/// @d is the propagation direction of the PSF in cartesian coordinates
template<typename T>
__global__ void cuda_cart2psf(T* r, T* phi, size_t N, T* x, T* y, T* z, stim::vec3<T> f, stim::quaternion<T> q){
|
0288346a
David Mayerich
added support for...
|
289
|
|
9339fbad
David Mayerich
implementing mie ...
|
290
291
|
size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
if(i >= N) return; //exit if this thread is outside the array
|
0288346a
David Mayerich
added support for...
|
292
|
|
9339fbad
David Mayerich
implementing mie ...
|
293
294
295
296
297
|
stim::vec3<T> p; //declare a 3D point
(x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
(y == NULL) ? p[1] = 0 : p[1] = y[i];
(z == NULL) ? p[2] = 0 : p[2] = z[i];
|
0288346a
David Mayerich
added support for...
|
298
|
|
9339fbad
David Mayerich
implementing mie ...
|
299
300
301
302
303
304
|
p = p - f; //shift the point to the center of the PSF (focal point)
p = q.toMatrix3() * p; //rotate the point to align with the propagation direction
stim::vec3<T> ps = p.cart2sph(); //convert from cartesian to spherical coordinates
r[i] = ps[0]; //store r
phi[i] = ps[2]; //phi = [0 pi]
|
0288346a
David Mayerich
added support for...
|
305
306
|
}
|
8309b07a
David Mayerich
fixed some vec3 e...
|
307
|
#ifdef CUDA_FOUND
|
9339fbad
David Mayerich
implementing mie ...
|
308
309
310
311
312
313
314
315
316
317
318
319
320
|
/// Calculate the analytical solution to a point spread function given a set of points in cartesian coordinates
template<typename T>
void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
T* gpu_r; //allocate space for the coordinates in r
HANDLE_ERROR( cudaMalloc(&gpu_r, sizeof(T) * N) );
T* gpu_phi;
HANDLE_ERROR( cudaMalloc(&gpu_phi, sizeof(T) * N) );
//stim::complex<T>* gpu_E;
//HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
stim::quaternion<T> q; //create a quaternion
q.CreateRotation(d, stim::vec3<T>(0, 0, 1)); //create a mapping from the propagation direction to the PSF space
|
6c4afcac
David Mayerich
introduced a gene...
|
321
|
stim::matrix_sq<T, 3> rot = q.toMatrix3();
|
9339fbad
David Mayerich
implementing mie ...
|
322
323
324
325
326
327
|
int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks
cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space
gpu_scalar_psf_local(E, N, gpu_r, gpu_phi, lambda, A, NA, NA_in, Nl, r_spacing);
|
4252d827
David Mayerich
ivote3 fixes and ...
|
328
329
330
|
HANDLE_ERROR( cudaFree(gpu_r) );
HANDLE_ERROR( cudaFree(gpu_phi) );
|
9339fbad
David Mayerich
implementing mie ...
|
331
332
|
}
#endif
|
0288346a
David Mayerich
added support for...
|
333
334
|
template<typename T>
|
9339fbad
David Mayerich
implementing mie ...
|
335
336
337
|
void cpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
// If CUDA is available, copy the cartesian points to the GPU and evaluate them in a kernel
|
8309b07a
David Mayerich
fixed some vec3 e...
|
338
|
#ifdef CUDA_FOUND
|
9339fbad
David Mayerich
implementing mie ...
|
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
|
T* gpu_x = NULL;
if(x != NULL){
HANDLE_ERROR( cudaMalloc(&gpu_x, sizeof(T) * N) );
HANDLE_ERROR( cudaMemcpy(gpu_x, x, sizeof(T) * N, cudaMemcpyHostToDevice) );
}
T* gpu_y = NULL;
if(y != NULL){
HANDLE_ERROR( cudaMalloc(&gpu_y, sizeof(T) * N) );
HANDLE_ERROR( cudaMemcpy(gpu_y, y, sizeof(T) * N, cudaMemcpyHostToDevice) );
}
T* gpu_z = NULL;
if(z != NULL){
HANDLE_ERROR( cudaMalloc(&gpu_z, sizeof(T) * N) );
HANDLE_ERROR( cudaMemcpy(gpu_z, z, sizeof(T) * N, cudaMemcpyHostToDevice) );
}
stim::complex<T>* gpu_E;
HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
HANDLE_ERROR( cudaMemcpy(gpu_E, E, sizeof(stim::complex<T>) * N, cudaMemcpyHostToDevice) );
gpu_scalar_psf_cart<T>(gpu_E, N, gpu_x, gpu_y, gpu_z, lambda, A, f, d, NA, NA_in, Nl, r_spacing);
HANDLE_ERROR( cudaMemcpy(E, gpu_E, sizeof(stim::complex<T>) * N, cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaFree(gpu_x) );
HANDLE_ERROR( cudaFree(gpu_y) );
HANDLE_ERROR( cudaFree(gpu_z) );
HANDLE_ERROR( cudaFree(gpu_E) );
#else
|
0288346a
David Mayerich
added support for...
|
368
369
370
|
T* r = (T*) malloc(N * sizeof(T)); //allocate space for p in spherical coordinates
T* phi = (T*) malloc(N * sizeof(T)); // only r and phi are necessary (the scalar PSF is symmetric about theta)
|
9339fbad
David Mayerich
implementing mie ...
|
371
372
373
374
|
stim::quaternion<T> q;
q.CreateRotation(d, stim::vec3<T>(0, 0, 1));
stim::matrix<T, 3> R = q.toMatrix3();
stim::vec3<T> p, ps, ds;
|
0288346a
David Mayerich
added support for...
|
375
376
377
378
379
|
for(size_t i = 0; i < N; i++){
(x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
(y == NULL) ? p[1] = 0 : p[1] = y[i];
(z == NULL) ? p[2] = 0 : p[2] = z[i];
|
9339fbad
David Mayerich
implementing mie ...
|
380
381
382
383
|
p = p - f;
p = R * p; //rotate the cartesian point
|
0288346a
David Mayerich
added support for...
|
384
385
386
387
388
|
ps = p.cart2sph(); //convert from cartesian to spherical coordinates
r[i] = ps[0]; //store r
phi[i] = ps[2]; //phi = [0 pi]
}
|
8309b07a
David Mayerich
fixed some vec3 e...
|
389
|
cpu_scalar_psf_local(E, N, r, phi, lambda, A, NA, NA_in, Nl); //call the spherical coordinate CPU function
|
0288346a
David Mayerich
added support for...
|
390
391
392
|
free(r);
free(phi);
|
9339fbad
David Mayerich
implementing mie ...
|
393
|
#endif
|
8e4f8364
David Mayerich
started a new opt...
|
394
|
}
|
8309b07a
David Mayerich
fixed some vec3 e...
|
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
|
/// Class stim::beam represents a beam of light focused at a point and composed of several plane waves
template<typename T>
class scalarbeam
{
public:
//enum beam_type {Uniform, Bartlett, Hamming, Hanning};
private:
T NA[2]; //numerical aperature of the focusing optics
vec3<T> f; //focal point
vec3<T> d; //propagation direction
T A; //beam amplitude
T lambda; //beam wavelength
public:
|
8e4f8364
David Mayerich
started a new opt...
|
411
|
|
8309b07a
David Mayerich
fixed some vec3 e...
|
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
|
///constructor: build a default beam (NA=1.0)
scalarbeam(T wavelength = 1, T amplitude = 1, vec3<T> focal_point = vec3<T>(0, 0, 0), vec3<T> direction = vec3<T>(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){
lambda = wavelength;
A = amplitude;
f = focal_point;
d = direction.norm(); //make sure that the direction vector is normalized (makes calculations more efficient later on)
NA[0] = numerical_aperture;
NA[1] = center_obsc;
}
///Numerical Aperature functions
void setNA(T na)
{
NA[0] = (T)0;
NA[1] = na;
}
void setNA(T na0, T na1)
{
NA[0] = na0;
NA[1] = na1;
}
//Monte-Carlo decomposition into plane waves
std::vector< scalarwave<T> > mc(size_t N = 100000) const{
std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]); //generate a random set of N vectors forming a focus
std::vector< scalarwave<T> > samples(N); //create a vector of plane waves
T kmag = (T)stim::TAU / lambda; //calculate the wavenumber
stim::complex<T> apw; //allocate space for the amplitude at the focal point
T a = (T)(stim::TAU * ( (1 - cos(asin(NA[0]))) - (1 - cos(asin(NA[1])))) / (double)N); //constant value weights plane waves based on the aperture and number of samples (N)
stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction
for(size_t i=0; i<N; i++){ //for each sample
kpw = dirs[i] * kmag; //calculate the k-vector for the new plane wave
apw = a * exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave
samples[i] = scalarwave<T>(kpw, apw); //create a plane wave based on the direction
}
return samples;
}
|
963d0676
David Mayerich
bug fixes related...
|
451
452
453
454
|
void eval(stim::scalarfield<T>& E, T* X, T* Y, T* Z, int order = 500){
cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[0], NA[1], order, E.spacing());
}
|
8309b07a
David Mayerich
fixed some vec3 e...
|
455
|
/// Evaluate the beam to a scalar field using Debye focusing
|
963d0676
David Mayerich
bug fixes related...
|
456
|
void eval(stim::scalarfield<T>& E, int order = 500){
|
4252d827
David Mayerich
ivote3 fixes and ...
|
457
458
459
460
461
462
|
E.meshgrid(); //calculate a meshgrid if one isn't created
if(E.gpu())
gpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[0], NA[1], order, E.spacing());
else
cpu_scalar_psf_cart<T>(E.ptr(), E.size(), E.x(), E.y(), E.z(), lambda, A, f, d, NA[0], NA[1], order, E.spacing());
//eval(E, E.x(), E.y(), E.z(), order);
|
8309b07a
David Mayerich
fixed some vec3 e...
|
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
|
}
/// Calculate the field at a given point
/// @param x is the x-coordinate of the field point
/// @O is the approximation accuracy
stim::complex<T> field(T x, T y, T z, size_t O){
std::vector< scalarwave<T> > W = mc(O);
T result = 0; //initialize the result to zero (0)
for(size_t i = 0; i < O; i++){ //for each plane wave
result += W[i].pos(x, y, z);
}
return result;
}
std::string str()
{
std::stringstream ss;
ss<<"Beam:"<<std::endl;
//ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
ss<<" Beam Direction: "<<d<<std::endl;
if(NA[0] == 0)
ss<<" NA: "<<NA[1];
else
ss<<" NA: "<<NA[0]<<" -- "<<NA[1];
return ss.str();
}
}; //end beam
|
8e4f8364
David Mayerich
started a new opt...
|
494
495
496
|
} //end namespace stim
#endif
|