Commit ad2123e6f9eb1be476fccb05626ce7aaa24d502d
Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib
Showing
3 changed files
with
336 additions
and
0 deletions
Show diff stats
1 | +#ifndef STIM_FFT_H | ||
2 | +#define STIM_FFT_H | ||
3 | + | ||
4 | +namespace stim{ | ||
5 | + | ||
6 | + template<class T> | ||
7 | + void circshift(T *out, const T *in, size_t xdim, size_t ydim, size_t xshift, size_t yshift){ | ||
8 | + size_t i, j, ii, jj; | ||
9 | + for (i =0; i < xdim; i++) { | ||
10 | + ii = (i + xshift) % xdim; | ||
11 | + for (j = 0; j < ydim; j++) { | ||
12 | + jj = (j + yshift) % ydim; | ||
13 | + out[ii * ydim + jj] = in[i * ydim + j]; | ||
14 | + } | ||
15 | + } | ||
16 | + } | ||
17 | + | ||
18 | + template<typename T> | ||
19 | + void cpu_fftshift(T* out, T* in, size_t xdim, size_t ydim){ | ||
20 | + circshift(out, in, xdim, ydim, xdim/2, ydim/2); | ||
21 | + } | ||
22 | + | ||
23 | + template<typename T> | ||
24 | + void cpu_ifftshift(T* out, T* in, size_t xdim, size_t ydim){ | ||
25 | + circshift(out, in, xdim, ydim, xdim/2, ydim/2); | ||
26 | + } | ||
27 | + | ||
28 | + | ||
29 | +} | ||
30 | + | ||
31 | +#endif | ||
0 | \ No newline at end of file | 32 | \ No newline at end of file |
1 | +#ifndef STIM_LENS_H | ||
2 | +#define STIM_LENS_H | ||
3 | + | ||
4 | +#include "scalarwave.h" | ||
5 | +#include "../math/bessel.h" | ||
6 | +#include "../cuda/cudatools/devices.h" | ||
7 | +#include "../visualization/colormap.h" | ||
8 | +#include "../math/fft.h" | ||
9 | + | ||
10 | +#include "cufft.h" | ||
11 | + | ||
12 | +#include <cmath> | ||
13 | + | ||
14 | +namespace stim{ | ||
15 | + | ||
16 | + /// Perform a k-space transform of a scalar field (FFT). The given field has a width of x and the calculated momentum space has a | ||
17 | + /// width of kx (in radians). | ||
18 | + /// @param K is a pointer to the output array of all plane waves in the field | ||
19 | + /// @param kx is the width of the frame in momentum space | ||
20 | + /// @param ky is the height of the frame in momentum space | ||
21 | + /// @param E is the field to be transformed | ||
22 | + /// @param x is the width of the field in the spatial domain | ||
23 | + /// @param y is the height of the field in the spatial domain | ||
24 | + /// @param nx is the number of pixels representing the field in the x (and kx) direction | ||
25 | + /// @param ny is the number of pixels representing the field in the y (and ky) direction | ||
26 | + template<typename T> | ||
27 | + void cpu_scalar_to_kspace(stim::complex<T>* K, T& kx, T& ky, stim::complex<T>* E, T x, T y, size_t nx, size_t ny){ | ||
28 | + | ||
29 | + kx = stim::TAU * nx / x; //calculate the width of the momentum space | ||
30 | + ky = stim::TAU * ny / y; | ||
31 | + | ||
32 | + stim::complex<T>* dev_FFT; | ||
33 | + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array | ||
34 | + | ||
35 | + stim::complex<T>* dev_E; | ||
36 | + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field | ||
37 | + HANDLE_ERROR( cudaMemcpy(dev_E, E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory | ||
38 | + | ||
39 | + cufftResult result; | ||
40 | + cufftHandle plan; | ||
41 | + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C); | ||
42 | + if(result != CUFFT_SUCCESS){ | ||
43 | + std::cout<<"Error creating cuFFT plan."<<std::endl; | ||
44 | + exit(1); | ||
45 | + } | ||
46 | + | ||
47 | + result = cufftExecC2C(plan, (cufftComplex*)dev_E, (cufftComplex*)dev_FFT, CUFFT_FORWARD); | ||
48 | + if(result != CUFFT_SUCCESS){ | ||
49 | + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl; | ||
50 | + exit(1); | ||
51 | + } | ||
52 | + | ||
53 | + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny); | ||
54 | + HANDLE_ERROR( cudaMemcpy(fft, dev_FFT, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) ); | ||
55 | + | ||
56 | + stim::cpu_fftshift(K, fft, nx, ny); | ||
57 | + } | ||
58 | + | ||
59 | + template<typename T> | ||
60 | + void cpu_scalar_from_kspace(stim::complex<T>* E, T& x, T& y, stim::complex<T>* K, T kx, T ky, size_t nx, size_t ny){ | ||
61 | + | ||
62 | + x = stim::TAU * nx / kx; //calculate the width of the momentum space | ||
63 | + y = stim::TAU * ny / ky; | ||
64 | + | ||
65 | + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny); | ||
66 | + stim::cpu_ifftshift(fft, K, nx, ny); | ||
67 | + | ||
68 | + stim::complex<T>* dev_FFT; | ||
69 | + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array | ||
70 | + HANDLE_ERROR( cudaMemcpy(dev_FFT, fft, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory | ||
71 | + | ||
72 | + stim::complex<T>* dev_E; | ||
73 | + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field | ||
74 | + | ||
75 | + cufftResult result; | ||
76 | + cufftHandle plan; | ||
77 | + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C); | ||
78 | + if(result != CUFFT_SUCCESS){ | ||
79 | + std::cout<<"Error creating cuFFT plan."<<std::endl; | ||
80 | + exit(1); | ||
81 | + } | ||
82 | + | ||
83 | + result = cufftExecC2C(plan, (cufftComplex*)dev_FFT, (cufftComplex*)dev_E, CUFFT_FORWARD); | ||
84 | + if(result != CUFFT_SUCCESS){ | ||
85 | + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl; | ||
86 | + exit(1); | ||
87 | + } | ||
88 | + | ||
89 | + HANDLE_ERROR( cudaMemcpy(E, dev_E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) ); | ||
90 | + | ||
91 | + | ||
92 | + } | ||
93 | + | ||
94 | + /// Propagate a field slice along its orthogonal direction by a given distance z | ||
95 | + /// @param Enew is the resulting propogated field | ||
96 | + template<typename T> | ||
97 | + void cpu_scalar_propagate(stim::complex<T>* Enew, stim::complex<T>* E, T sx, T sy, T z, T k, size_t nx, size_t ny){ | ||
98 | + | ||
99 | + stim::complex<T>* K = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * nx * ny ); | ||
100 | + | ||
101 | + T Kx, Ky; //width and height in k space | ||
102 | + cpu_scalar_to_kspace(K, Kx, Ky, E ,sx, sy, nx, ny); | ||
103 | + | ||
104 | + T* mag = (T*) malloc( sizeof(T) * nx * ny ); | ||
105 | + stim::abs(mag, K, nx * ny); | ||
106 | + stim::cpu2image<float>(mag, "kspace_pre_shift.bmp", nx, ny, stim::cmBrewer); | ||
107 | + | ||
108 | + size_t kxi, kyi; | ||
109 | + size_t i; | ||
110 | + T kx, kx_sq, ky, ky_sq, k_sq; | ||
111 | + T kz; | ||
112 | + stim::complex<T> shift; | ||
113 | + T min_kx = -Kx / 2; | ||
114 | + T dkx = Kx / (nx); | ||
115 | + T min_ky = -Ky / 2; | ||
116 | + T dky = Ky / (ny); | ||
117 | + for(kyi = 0; kyi < ny; kyi++){ //for each plane wave in the ky direction | ||
118 | + for(kxi = 0; kxi < nx; kxi++){ //for each plane wave in the ky direction | ||
119 | + i = kyi * nx + kxi; | ||
120 | + | ||
121 | + kx = min_kx + kxi * dkx; //calculate the position of the current plane wave | ||
122 | + ky = min_ky + kyi * dky; | ||
123 | + | ||
124 | + kx_sq = kx * kx; | ||
125 | + ky_sq = ky * ky; | ||
126 | + k_sq = k*k; | ||
127 | + | ||
128 | + if(kx_sq + ky_sq < k_sq){ | ||
129 | + kz = sqrt(k*k - kx * kx - ky * ky); //estimate kz using the Fresnel approximation | ||
130 | + shift = -exp(stim::complex<T>(0, kz * z)); | ||
131 | + K[i] *= shift; | ||
132 | + } | ||
133 | + else{ | ||
134 | + K[i] = 0; | ||
135 | + } | ||
136 | + } | ||
137 | + } | ||
138 | + | ||
139 | + stim::abs(mag, K, nx * ny); | ||
140 | + stim::cpu2image<float>(mag, "kspace_post_shift.bmp", nx, ny, stim::cmBrewer); | ||
141 | + | ||
142 | + cpu_scalar_from_kspace(Enew, sx, sy, K, Kx, Ky, nx, ny); | ||
143 | + } | ||
144 | + | ||
145 | +} | ||
146 | + | ||
147 | + | ||
148 | +#endif | ||
0 | \ No newline at end of file | 149 | \ No newline at end of file |
1 | +#ifndef STIM_SCALARFIELD_H | ||
2 | +#define STIM_SCALARFIELD_H | ||
3 | + | ||
4 | +#include "../math/rect.h" | ||
5 | +#include "../math/complex.h" | ||
6 | + | ||
7 | +namespace stim{ | ||
8 | + | ||
9 | + enum locationType {CPUmem, GPUmem}; | ||
10 | + | ||
11 | + /// Class represents a scalar optical field. | ||
12 | + | ||
13 | + /// In general, this class is designed to operate between the CPU and GPU. So, make sure all functions have an option to create the output on either. | ||
14 | + /// The field is stored *either* on the GPU or host memory, but not both. This enforces that there can't be different copies of the same field. | ||
15 | + /// This class is designed to be included in all of the other scalar optics classes, allowing them to render output data so make sure to keep it general and compatible. | ||
16 | + | ||
17 | +template<typename T> | ||
18 | +class scalarfield : public rect<T>{ | ||
19 | + | ||
20 | +protected: | ||
21 | + stim::complex<T>* E; | ||
22 | + size_t R[2]; | ||
23 | + locationType loc; | ||
24 | + | ||
25 | + | ||
26 | + | ||
27 | +public: | ||
28 | + | ||
29 | + CUDA_CALLABLE scalarfield(size_t X, size_t Y, T size = 1, T z_pos = 0) : rect<T>::rect(size, z_pos){ | ||
30 | + R[0] = X; //set the field resolution | ||
31 | + R[1] = Y; | ||
32 | + | ||
33 | + E = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * R[0] * R[1]); //allocate in CPU memory | ||
34 | + loc = CPUmem; | ||
35 | + } | ||
36 | + | ||
37 | + CUDA_CALLABLE ~scalarfield(){ | ||
38 | + if(loc == CPUmem) free(E); | ||
39 | + else cudaFree(E); | ||
40 | + } | ||
41 | + | ||
42 | + /// Returns the number of values in the field | ||
43 | + CUDA_CALLABLE size_t size(){ | ||
44 | + return R[0] * R[1]; | ||
45 | + } | ||
46 | + | ||
47 | + CUDA_CALLABLE size_t grid_bytes(){ | ||
48 | + return sizeof(stim::complex<T>) * R[0] * R[1]; | ||
49 | + } | ||
50 | + | ||
51 | + /// Calculates the distance between points on the grid | ||
52 | + T spacing(){ | ||
53 | + T du = rect<T>::X.len() / R[0]; | ||
54 | + T dv = rect<T>::Y.len() / R[1]; | ||
55 | + return min<T>(du, dv); | ||
56 | + } | ||
57 | + | ||
58 | + /// Copy the field array to the GPU, if it isn't already there | ||
59 | + void to_gpu(){ | ||
60 | + if(loc == GPUmem) return; | ||
61 | + else{ | ||
62 | + stim::complex<T>* dev_E; | ||
63 | + HANDLE_ERROR( cudaMalloc(&dev_E, e_bytes()) ); //allocate GPU memory | ||
64 | + HANDLE_ERROR( cudaMemcpy(dev_E, E, e_bytes(), cudaMemcpyHostToDevice) ); //copy the field to the GPU | ||
65 | + free(E); //free the CPU memory | ||
66 | + E = dev_E; //swap pointers | ||
67 | + } | ||
68 | + } | ||
69 | + | ||
70 | + /// Copy the field array to the CPU, if it isn't already there | ||
71 | + void to_cpu(){ | ||
72 | + if(loc == CPUmem) return; | ||
73 | + else{ | ||
74 | + stim::complex<T>* host_E = (stim::complex<T>*) malloc(e_bytes()); //allocate space in main memory | ||
75 | + HANDLE_ERROR( cudaMemcpy(host_E, E, e_bytes(), cudaMemcpyDeviceToHost) ); //copy from GPU to CPU | ||
76 | + HANDLE_ERROR( cudaFree(E) ); //free device memory | ||
77 | + E = host_E; //swap pointers | ||
78 | + } | ||
79 | + } | ||
80 | + | ||
81 | + std::string str(){ | ||
82 | + std::stringstream ss; | ||
83 | + ss<<rect<T>::str()<<std::endl; | ||
84 | + ss<<"[ "<<R[0]<<" x "<<R[1]<<" ]"<<std::endl; | ||
85 | + ss<<"location: "; | ||
86 | + if(loc == CPUmem) ss<<"CPU"; | ||
87 | + else ss<<"GPU"; | ||
88 | + | ||
89 | + ss<<endl; | ||
90 | + return ss.str(); | ||
91 | + } | ||
92 | + | ||
93 | + stim::complex<T>* ptr(){ | ||
94 | + return E; | ||
95 | + } | ||
96 | + | ||
97 | + /// Evaluate the cartesian coordinates of each point in the field. The resulting arrays are allocated in the same memory where the field is stored. | ||
98 | + void meshgrid(T* X, T* Y, T* Z, locationType location){ | ||
99 | + size_t array_size = sizeof(T) * R[0] * R[1]; | ||
100 | + if(location == CPUmem){ | ||
101 | + | ||
102 | + T du = 1.0 / (R[0] - 1); //calculate the spacing between points in the grid | ||
103 | + T dv = 1.0 / (R[1] - 1); | ||
104 | + | ||
105 | + size_t ui, vi, i; | ||
106 | + stim::vec3<T> p; | ||
107 | + for(vi = 0; vi < R[1]; vi++){ | ||
108 | + i = vi * R[0]; | ||
109 | + for(ui = 0; ui < R[0]; ui++){ | ||
110 | + p = rect<T>::p(ui * du, vi * dv); | ||
111 | + X[i] = p[0]; | ||
112 | + Y[i] = p[1]; | ||
113 | + Z[i] = p[2]; | ||
114 | + i++; | ||
115 | + } | ||
116 | + } | ||
117 | + stim::cpu2image(X, "X.bmp", R[0], R[1], stim::cmBrewer); | ||
118 | + stim::cpu2image(Y, "Y.bmp", R[0], R[1], stim::cmBrewer); | ||
119 | + stim::cpu2image(Z, "Z.bmp", R[0], R[1], stim::cmBrewer); | ||
120 | + } | ||
121 | + else{ | ||
122 | + std::cout<<"GPU allocation of a meshgrid isn't supported yet. You'll have to write kernels to do the calculation."; | ||
123 | + exit(1); | ||
124 | + } | ||
125 | + } | ||
126 | + | ||
127 | + void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer){ | ||
128 | + | ||
129 | + if(loc == GPUmem) to_cpu(); //if the field is in the GPU, move it to the CPU | ||
130 | + T* image = (T*) malloc( sizeof(T) * size() ); //allocate space for the real image | ||
131 | + | ||
132 | + switch(type){ //get the specified component from the complex value | ||
133 | + case complexMag: | ||
134 | + stim::abs(image, E, size()); | ||
135 | + break; | ||
136 | + case complexReal: | ||
137 | + stim::real(image, E, size()); | ||
138 | + break; | ||
139 | + case complexImaginary: | ||
140 | + stim::imag(image, E, size()); | ||
141 | + } | ||
142 | + stim::cpu2image(image, filename, R[0], R[1], cmap); //save the resulting image | ||
143 | + free(image); //free the real image | ||
144 | + } | ||
145 | + | ||
146 | +}; //end class scalarfield | ||
147 | +} | ||
148 | + | ||
149 | +//stream insertion operator | ||
150 | +template<typename T> | ||
151 | +std::ostream& operator<<(std::ostream& os, stim::scalarfield<T>& rhs){ | ||
152 | + os<<rhs.str(); | ||
153 | + return os; | ||
154 | +} | ||
155 | + | ||
156 | + | ||
157 | +#endif | ||
0 | \ No newline at end of file | 158 | \ No newline at end of file |