Commit 3a74ec6d78ca82b863e8027db6c071fc79ebd79a
1 parent
8309b07a
added a new scalarfield class for rendering and storing samples of EM fields
Showing
3 changed files
with
336 additions
and
0 deletions
Show diff stats
1 | +#ifndef STIM_FFT_H | |
2 | +#define STIM_FFT_H | |
3 | + | |
4 | +namespace stim{ | |
5 | + | |
6 | + template<class T> | |
7 | + void circshift(T *out, const T *in, size_t xdim, size_t ydim, size_t xshift, size_t yshift){ | |
8 | + size_t i, j, ii, jj; | |
9 | + for (i =0; i < xdim; i++) { | |
10 | + ii = (i + xshift) % xdim; | |
11 | + for (j = 0; j < ydim; j++) { | |
12 | + jj = (j + yshift) % ydim; | |
13 | + out[ii * ydim + jj] = in[i * ydim + j]; | |
14 | + } | |
15 | + } | |
16 | + } | |
17 | + | |
18 | + template<typename T> | |
19 | + void cpu_fftshift(T* out, T* in, size_t xdim, size_t ydim){ | |
20 | + circshift(out, in, xdim, ydim, xdim/2, ydim/2); | |
21 | + } | |
22 | + | |
23 | + template<typename T> | |
24 | + void cpu_ifftshift(T* out, T* in, size_t xdim, size_t ydim){ | |
25 | + circshift(out, in, xdim, ydim, xdim/2, ydim/2); | |
26 | + } | |
27 | + | |
28 | + | |
29 | +} | |
30 | + | |
31 | +#endif | |
0 | 32 | \ No newline at end of file | ... | ... |
1 | +#ifndef STIM_LENS_H | |
2 | +#define STIM_LENS_H | |
3 | + | |
4 | +#include "scalarwave.h" | |
5 | +#include "../math/bessel.h" | |
6 | +#include "../cuda/cudatools/devices.h" | |
7 | +#include "../visualization/colormap.h" | |
8 | +#include "../math/fft.h" | |
9 | + | |
10 | +#include "cufft.h" | |
11 | + | |
12 | +#include <cmath> | |
13 | + | |
14 | +namespace stim{ | |
15 | + | |
16 | + /// Perform a k-space transform of a scalar field (FFT). The given field has a width of x and the calculated momentum space has a | |
17 | + /// width of kx (in radians). | |
18 | + /// @param K is a pointer to the output array of all plane waves in the field | |
19 | + /// @param kx is the width of the frame in momentum space | |
20 | + /// @param ky is the height of the frame in momentum space | |
21 | + /// @param E is the field to be transformed | |
22 | + /// @param x is the width of the field in the spatial domain | |
23 | + /// @param y is the height of the field in the spatial domain | |
24 | + /// @param nx is the number of pixels representing the field in the x (and kx) direction | |
25 | + /// @param ny is the number of pixels representing the field in the y (and ky) direction | |
26 | + template<typename T> | |
27 | + void cpu_scalar_to_kspace(stim::complex<T>* K, T& kx, T& ky, stim::complex<T>* E, T x, T y, size_t nx, size_t ny){ | |
28 | + | |
29 | + kx = stim::TAU * nx / x; //calculate the width of the momentum space | |
30 | + ky = stim::TAU * ny / y; | |
31 | + | |
32 | + stim::complex<T>* dev_FFT; | |
33 | + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array | |
34 | + | |
35 | + stim::complex<T>* dev_E; | |
36 | + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field | |
37 | + HANDLE_ERROR( cudaMemcpy(dev_E, E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory | |
38 | + | |
39 | + cufftResult result; | |
40 | + cufftHandle plan; | |
41 | + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C); | |
42 | + if(result != CUFFT_SUCCESS){ | |
43 | + std::cout<<"Error creating cuFFT plan."<<std::endl; | |
44 | + exit(1); | |
45 | + } | |
46 | + | |
47 | + result = cufftExecC2C(plan, (cufftComplex*)dev_E, (cufftComplex*)dev_FFT, CUFFT_FORWARD); | |
48 | + if(result != CUFFT_SUCCESS){ | |
49 | + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl; | |
50 | + exit(1); | |
51 | + } | |
52 | + | |
53 | + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny); | |
54 | + HANDLE_ERROR( cudaMemcpy(fft, dev_FFT, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) ); | |
55 | + | |
56 | + stim::cpu_fftshift(K, fft, nx, ny); | |
57 | + } | |
58 | + | |
59 | + template<typename T> | |
60 | + void cpu_scalar_from_kspace(stim::complex<T>* E, T& x, T& y, stim::complex<T>* K, T kx, T ky, size_t nx, size_t ny){ | |
61 | + | |
62 | + x = stim::TAU * nx / kx; //calculate the width of the momentum space | |
63 | + y = stim::TAU * ny / ky; | |
64 | + | |
65 | + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny); | |
66 | + stim::cpu_ifftshift(fft, K, nx, ny); | |
67 | + | |
68 | + stim::complex<T>* dev_FFT; | |
69 | + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array | |
70 | + HANDLE_ERROR( cudaMemcpy(dev_FFT, fft, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory | |
71 | + | |
72 | + stim::complex<T>* dev_E; | |
73 | + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field | |
74 | + | |
75 | + cufftResult result; | |
76 | + cufftHandle plan; | |
77 | + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C); | |
78 | + if(result != CUFFT_SUCCESS){ | |
79 | + std::cout<<"Error creating cuFFT plan."<<std::endl; | |
80 | + exit(1); | |
81 | + } | |
82 | + | |
83 | + result = cufftExecC2C(plan, (cufftComplex*)dev_FFT, (cufftComplex*)dev_E, CUFFT_FORWARD); | |
84 | + if(result != CUFFT_SUCCESS){ | |
85 | + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl; | |
86 | + exit(1); | |
87 | + } | |
88 | + | |
89 | + HANDLE_ERROR( cudaMemcpy(E, dev_E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) ); | |
90 | + | |
91 | + | |
92 | + } | |
93 | + | |
94 | + /// Propagate a field slice along its orthogonal direction by a given distance z | |
95 | + /// @param Enew is the resulting propogated field | |
96 | + template<typename T> | |
97 | + void cpu_scalar_propagate(stim::complex<T>* Enew, stim::complex<T>* E, T sx, T sy, T z, T k, size_t nx, size_t ny){ | |
98 | + | |
99 | + stim::complex<T>* K = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * nx * ny ); | |
100 | + | |
101 | + T Kx, Ky; //width and height in k space | |
102 | + cpu_scalar_to_kspace(K, Kx, Ky, E ,sx, sy, nx, ny); | |
103 | + | |
104 | + T* mag = (T*) malloc( sizeof(T) * nx * ny ); | |
105 | + stim::abs(mag, K, nx * ny); | |
106 | + stim::cpu2image<float>(mag, "kspace_pre_shift.bmp", nx, ny, stim::cmBrewer); | |
107 | + | |
108 | + size_t kxi, kyi; | |
109 | + size_t i; | |
110 | + T kx, kx_sq, ky, ky_sq, k_sq; | |
111 | + T kz; | |
112 | + stim::complex<T> shift; | |
113 | + T min_kx = -Kx / 2; | |
114 | + T dkx = Kx / (nx); | |
115 | + T min_ky = -Ky / 2; | |
116 | + T dky = Ky / (ny); | |
117 | + for(kyi = 0; kyi < ny; kyi++){ //for each plane wave in the ky direction | |
118 | + for(kxi = 0; kxi < nx; kxi++){ //for each plane wave in the ky direction | |
119 | + i = kyi * nx + kxi; | |
120 | + | |
121 | + kx = min_kx + kxi * dkx; //calculate the position of the current plane wave | |
122 | + ky = min_ky + kyi * dky; | |
123 | + | |
124 | + kx_sq = kx * kx; | |
125 | + ky_sq = ky * ky; | |
126 | + k_sq = k*k; | |
127 | + | |
128 | + if(kx_sq + ky_sq < k_sq){ | |
129 | + kz = sqrt(k*k - kx * kx - ky * ky); //estimate kz using the Fresnel approximation | |
130 | + shift = -exp(stim::complex<T>(0, kz * z)); | |
131 | + K[i] *= shift; | |
132 | + } | |
133 | + else{ | |
134 | + K[i] = 0; | |
135 | + } | |
136 | + } | |
137 | + } | |
138 | + | |
139 | + stim::abs(mag, K, nx * ny); | |
140 | + stim::cpu2image<float>(mag, "kspace_post_shift.bmp", nx, ny, stim::cmBrewer); | |
141 | + | |
142 | + cpu_scalar_from_kspace(Enew, sx, sy, K, Kx, Ky, nx, ny); | |
143 | + } | |
144 | + | |
145 | +} | |
146 | + | |
147 | + | |
148 | +#endif | |
0 | 149 | \ No newline at end of file | ... | ... |
1 | +#ifndef STIM_SCALARFIELD_H | |
2 | +#define STIM_SCALARFIELD_H | |
3 | + | |
4 | +#include "../math/rect.h" | |
5 | +#include "../math/complex.h" | |
6 | + | |
7 | +namespace stim{ | |
8 | + | |
9 | + enum locationType {CPUmem, GPUmem}; | |
10 | + | |
11 | + /// Class represents a scalar optical field. | |
12 | + | |
13 | + /// In general, this class is designed to operate between the CPU and GPU. So, make sure all functions have an option to create the output on either. | |
14 | + /// The field is stored *either* on the GPU or host memory, but not both. This enforces that there can't be different copies of the same field. | |
15 | + /// This class is designed to be included in all of the other scalar optics classes, allowing them to render output data so make sure to keep it general and compatible. | |
16 | + | |
17 | +template<typename T> | |
18 | +class scalarfield : public rect<T>{ | |
19 | + | |
20 | +protected: | |
21 | + stim::complex<T>* E; | |
22 | + size_t R[2]; | |
23 | + locationType loc; | |
24 | + | |
25 | + | |
26 | + | |
27 | +public: | |
28 | + | |
29 | + CUDA_CALLABLE scalarfield(size_t X, size_t Y, T size = 1, T z_pos = 0) : rect<T>::rect(size, z_pos){ | |
30 | + R[0] = X; //set the field resolution | |
31 | + R[1] = Y; | |
32 | + | |
33 | + E = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * R[0] * R[1]); //allocate in CPU memory | |
34 | + loc = CPUmem; | |
35 | + } | |
36 | + | |
37 | + CUDA_CALLABLE ~scalarfield(){ | |
38 | + if(loc == CPUmem) free(E); | |
39 | + else cudaFree(E); | |
40 | + } | |
41 | + | |
42 | + /// Returns the number of values in the field | |
43 | + CUDA_CALLABLE size_t size(){ | |
44 | + return R[0] * R[1]; | |
45 | + } | |
46 | + | |
47 | + CUDA_CALLABLE size_t grid_bytes(){ | |
48 | + return sizeof(stim::complex<T>) * R[0] * R[1]; | |
49 | + } | |
50 | + | |
51 | + /// Calculates the distance between points on the grid | |
52 | + T spacing(){ | |
53 | + T du = rect<T>::X.len() / R[0]; | |
54 | + T dv = rect<T>::Y.len() / R[1]; | |
55 | + return min<T>(du, dv); | |
56 | + } | |
57 | + | |
58 | + /// Copy the field array to the GPU, if it isn't already there | |
59 | + void to_gpu(){ | |
60 | + if(loc == GPUmem) return; | |
61 | + else{ | |
62 | + stim::complex<T>* dev_E; | |
63 | + HANDLE_ERROR( cudaMalloc(&dev_E, e_bytes()) ); //allocate GPU memory | |
64 | + HANDLE_ERROR( cudaMemcpy(dev_E, E, e_bytes(), cudaMemcpyHostToDevice) ); //copy the field to the GPU | |
65 | + free(E); //free the CPU memory | |
66 | + E = dev_E; //swap pointers | |
67 | + } | |
68 | + } | |
69 | + | |
70 | + /// Copy the field array to the CPU, if it isn't already there | |
71 | + void to_cpu(){ | |
72 | + if(loc == CPUmem) return; | |
73 | + else{ | |
74 | + stim::complex<T>* host_E = (stim::complex<T>*) malloc(e_bytes()); //allocate space in main memory | |
75 | + HANDLE_ERROR( cudaMemcpy(host_E, E, e_bytes(), cudaMemcpyDeviceToHost) ); //copy from GPU to CPU | |
76 | + HANDLE_ERROR( cudaFree(E) ); //free device memory | |
77 | + E = host_E; //swap pointers | |
78 | + } | |
79 | + } | |
80 | + | |
81 | + std::string str(){ | |
82 | + std::stringstream ss; | |
83 | + ss<<rect<T>::str()<<std::endl; | |
84 | + ss<<"[ "<<R[0]<<" x "<<R[1]<<" ]"<<std::endl; | |
85 | + ss<<"location: "; | |
86 | + if(loc == CPUmem) ss<<"CPU"; | |
87 | + else ss<<"GPU"; | |
88 | + | |
89 | + ss<<endl; | |
90 | + return ss.str(); | |
91 | + } | |
92 | + | |
93 | + stim::complex<T>* ptr(){ | |
94 | + return E; | |
95 | + } | |
96 | + | |
97 | + /// Evaluate the cartesian coordinates of each point in the field. The resulting arrays are allocated in the same memory where the field is stored. | |
98 | + void meshgrid(T* X, T* Y, T* Z, locationType location){ | |
99 | + size_t array_size = sizeof(T) * R[0] * R[1]; | |
100 | + if(location == CPUmem){ | |
101 | + | |
102 | + T du = 1.0 / (R[0] - 1); //calculate the spacing between points in the grid | |
103 | + T dv = 1.0 / (R[1] - 1); | |
104 | + | |
105 | + size_t ui, vi, i; | |
106 | + stim::vec3<T> p; | |
107 | + for(vi = 0; vi < R[1]; vi++){ | |
108 | + i = vi * R[0]; | |
109 | + for(ui = 0; ui < R[0]; ui++){ | |
110 | + p = rect<T>::p(ui * du, vi * dv); | |
111 | + X[i] = p[0]; | |
112 | + Y[i] = p[1]; | |
113 | + Z[i] = p[2]; | |
114 | + i++; | |
115 | + } | |
116 | + } | |
117 | + stim::cpu2image(X, "X.bmp", R[0], R[1], stim::cmBrewer); | |
118 | + stim::cpu2image(Y, "Y.bmp", R[0], R[1], stim::cmBrewer); | |
119 | + stim::cpu2image(Z, "Z.bmp", R[0], R[1], stim::cmBrewer); | |
120 | + } | |
121 | + else{ | |
122 | + std::cout<<"GPU allocation of a meshgrid isn't supported yet. You'll have to write kernels to do the calculation."; | |
123 | + exit(1); | |
124 | + } | |
125 | + } | |
126 | + | |
127 | + void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer){ | |
128 | + | |
129 | + if(loc == GPUmem) to_cpu(); //if the field is in the GPU, move it to the CPU | |
130 | + T* image = (T*) malloc( sizeof(T) * size() ); //allocate space for the real image | |
131 | + | |
132 | + switch(type){ //get the specified component from the complex value | |
133 | + case complexMag: | |
134 | + stim::abs(image, E, size()); | |
135 | + break; | |
136 | + case complexReal: | |
137 | + stim::real(image, E, size()); | |
138 | + break; | |
139 | + case complexImaginary: | |
140 | + stim::imag(image, E, size()); | |
141 | + } | |
142 | + stim::cpu2image(image, filename, R[0], R[1], cmap); //save the resulting image | |
143 | + free(image); //free the real image | |
144 | + } | |
145 | + | |
146 | +}; //end class scalarfield | |
147 | +} | |
148 | + | |
149 | +//stream insertion operator | |
150 | +template<typename T> | |
151 | +std::ostream& operator<<(std::ostream& os, stim::scalarfield<T>& rhs){ | |
152 | + os<<rhs.str(); | |
153 | + return os; | |
154 | +} | |
155 | + | |
156 | + | |
157 | +#endif | |
0 | 158 | \ No newline at end of file | ... | ... |