963d0676
David Mayerich
bug fixes related...
|
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
|
/// Perform a k-space transform of a scalar field (FFT). The given field has a width of x and the calculated momentum space has a
/// width of kx (in radians).
/// @param K is a pointer to the output array of all plane waves in the field
/// @param kx is the width of the frame in momentum space
/// @param ky is the height of the frame in momentum space
/// @param E is the field to be transformed
/// @param x is the width of the field in the spatial domain
/// @param y is the height of the field in the spatial domain
/// @param nx is the number of pixels representing the field in the x (and kx) direction
/// @param ny is the number of pixels representing the field in the y (and ky) direction
template<typename T>
void cpu_scalar_to_kspace(stim::complex<T>* K, T& kx, T& ky, stim::complex<T>* E, T x, T y, size_t nx, size_t ny){
kx = stim::TAU * nx / x; //calculate the width of the momentum space
ky = stim::TAU * ny / y;
stim::complex<T>* dev_FFT;
HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array
stim::complex<T>* dev_E;
HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field
HANDLE_ERROR( cudaMemcpy(dev_E, E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory
cufftResult result;
cufftHandle plan;
result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C);
if(result != CUFFT_SUCCESS){
std::cout<<"Error creating cuFFT plan."<<std::endl;
exit(1);
}
result = cufftExecC2C(plan, (cufftComplex*)dev_E, (cufftComplex*)dev_FFT, CUFFT_FORWARD);
if(result != CUFFT_SUCCESS){
std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl;
exit(1);
}
stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny);
HANDLE_ERROR( cudaMemcpy(fft, dev_FFT, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) );
stim::cpu_fftshift(K, fft, nx, ny);
//memcpy(K, fft, sizeof(stim::complex<T>) * nx * ny);
}
template<typename T>
void cpu_scalar_from_kspace(stim::complex<T>* E, T& x, T& y, stim::complex<T>* K, T kx, T ky, size_t nx, size_t ny){
x = stim::TAU * nx / kx; //calculate the width of the momentum space
y = stim::TAU * ny / ky;
stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny);
stim::cpu_ifftshift(fft, K, nx, ny);
//memcpy(fft, K, sizeof(stim::complex<T>) * nx * ny);
stim::complex<T>* dev_FFT;
HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array
HANDLE_ERROR( cudaMemcpy(dev_FFT, fft, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory
stim::complex<T>* dev_E;
HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field
cufftResult result;
cufftHandle plan;
result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C);
if(result != CUFFT_SUCCESS){
std::cout<<"Error creating cuFFT plan."<<std::endl;
exit(1);
}
result = cufftExecC2C(plan, (cufftComplex*)dev_FFT, (cufftComplex*)dev_E, CUFFT_INVERSE);
if(result != CUFFT_SUCCESS){
std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl;
exit(1);
}
HANDLE_ERROR( cudaMemcpy(E, dev_E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) );
}
/// Propagate a field slice along its orthogonal direction by a given distance z
/// @param Enew is the resulting propogated field
/// @param E is the field to be propogated
/// @param sx is the size of the field in the lateral x direction
/// @param sy is the size of the field in the lateral y direction
/// @param z is the distance to be propagated
/// @param k is the wavenumber 2*pi/lambda
/// @param nx is the number of samples in the field along the lateral x direction
/// @param ny is the number of samples in the field along the lateral y direction
template<typename T>
void cpu_scalar_propagate(stim::complex<T>* Enew, stim::complex<T>* E, T sx, T sy, T z, T k, size_t nx, size_t ny){
stim::complex<T>* K = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * nx * ny );
T Kx, Ky; //width and height in k space
cpu_scalar_to_kspace(K, Kx, Ky, E ,sx, sy, nx, ny);
T* mag = (T*) malloc( sizeof(T) * nx * ny );
stim::abs(mag, K, nx * ny);
stim::cpu2image<float>(mag, "kspace_pre_shift.bmp", nx, ny, stim::cmBrewer);
size_t kxi, kyi;
size_t i;
T kx, kx_sq, ky, ky_sq, k_sq;
T kz;
stim::complex<T> shift;
T min_kx = -Kx / 2;
T dkx = Kx / (nx);
T min_ky = -Ky / 2;
T dky = Ky / (ny);
for(kyi = 0; kyi < ny; kyi++){ //for each plane wave in the ky direction
for(kxi = 0; kxi < nx; kxi++){ //for each plane wave in the ky direction
i = kyi * nx + kxi;
kx = min_kx + kxi * dkx; //calculate the position of the current plane wave
ky = min_ky + kyi * dky;
kx_sq = kx * kx;
ky_sq = ky * ky;
k_sq = k*k;
if(kx_sq + ky_sq < k_sq){
kz = sqrt(k_sq - kx_sq - ky_sq); //estimate kz using the Fresnel approximation
shift = -exp(stim::complex<T>(0, kz * z));
K[i] *= shift;
K[i] /= (nx*ny); //normalize the DFT
}
else{
K[i] = 0;
}
}
}
stim::abs(mag, K, nx * ny);
stim::cpu2image<float>(mag, "kspace_post_shift.bmp", nx, ny, stim::cmBrewer);
cpu_scalar_from_kspace(Enew, sx, sy, K, Kx, Ky, nx, ny);
}
/// Apply a lowpass filter to a field slice
/// @param Enew is the resulting propogated field
/// @param E is the field to be propogated
/// @param sx is the size of the field in the lateral x direction
/// @param sy is the size of the field in the lateral y direction
/// @param highest is the highest spatial frequency that can pass through the filter
/// @param nx is the number of samples in the field along the lateral x direction
/// @param ny is the number of samples in the field along the lateral y direction
template<typename T>
void cpu_scalar_lowpass(stim::complex<T>* Enew, stim::complex<T>* E, T sx, T sy, T highest, size_t nx, size_t ny){
stim::complex<T>* K = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * nx * ny );
T Kx, Ky; //width and height in k space
cpu_scalar_to_kspace(K, Kx, Ky, E ,sx, sy, nx, ny);
T* mag = (T*) malloc( sizeof(T) * nx * ny );
stim::abs(mag, K, nx * ny);
stim::cpu2image<float>(mag, "kspace_pre_lowpass.bmp", nx, ny, stim::cmBrewer);
size_t kxi, kyi;
size_t i;
T kx, kx_sq, ky, ky_sq, k_sq;
T kz;
stim::complex<T> shift;
T min_kx = -Kx / 2;
T dkx = Kx / (nx);
T min_ky = -Ky / 2;
T dky = Ky / (ny);
T highest_sq = highest * highest;
for(kyi = 0; kyi < ny; kyi++){ //for each plane wave in the ky direction
for(kxi = 0; kxi < nx; kxi++){ //for each plane wave in the ky direction
i = kyi * nx + kxi;
kx = min_kx + kxi * dkx; //calculate the position of the current plane wave
ky = min_ky + kyi * dky;
kx_sq = kx * kx;
ky_sq = ky * ky;
if(kx_sq + ky_sq > highest_sq){
K[i] = 0;
}
else
K[i] /= nx * ny; //normalize the DFT
}
}
stim::abs(mag, K, nx * ny);
stim::cpu2image<float>(mag, "kspace_post_lowpass.bmp", nx, ny, stim::cmBrewer);
cpu_scalar_from_kspace(Enew, sx, sy, K, Kx, Ky, nx, ny);
}
|