Commit ad2123e6f9eb1be476fccb05626ce7aaa24d502d

Authored by David Mayerich
2 parents 0c9d37c7 3a74ec6d

Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib

stim/math/fft.h 0 → 100644
  1 +#ifndef STIM_FFT_H
  2 +#define STIM_FFT_H
  3 +
  4 +namespace stim{
  5 +
  6 + template<class T>
  7 + void circshift(T *out, const T *in, size_t xdim, size_t ydim, size_t xshift, size_t yshift){
  8 + size_t i, j, ii, jj;
  9 + for (i =0; i < xdim; i++) {
  10 + ii = (i + xshift) % xdim;
  11 + for (j = 0; j < ydim; j++) {
  12 + jj = (j + yshift) % ydim;
  13 + out[ii * ydim + jj] = in[i * ydim + j];
  14 + }
  15 + }
  16 + }
  17 +
  18 + template<typename T>
  19 + void cpu_fftshift(T* out, T* in, size_t xdim, size_t ydim){
  20 + circshift(out, in, xdim, ydim, xdim/2, ydim/2);
  21 + }
  22 +
  23 + template<typename T>
  24 + void cpu_ifftshift(T* out, T* in, size_t xdim, size_t ydim){
  25 + circshift(out, in, xdim, ydim, xdim/2, ydim/2);
  26 + }
  27 +
  28 +
  29 +}
  30 +
  31 +#endif
0 \ No newline at end of file 32 \ No newline at end of file
stim/optics/lens.h 0 → 100644
  1 +#ifndef STIM_LENS_H
  2 +#define STIM_LENS_H
  3 +
  4 +#include "scalarwave.h"
  5 +#include "../math/bessel.h"
  6 +#include "../cuda/cudatools/devices.h"
  7 +#include "../visualization/colormap.h"
  8 +#include "../math/fft.h"
  9 +
  10 +#include "cufft.h"
  11 +
  12 +#include <cmath>
  13 +
  14 +namespace stim{
  15 +
  16 + /// Perform a k-space transform of a scalar field (FFT). The given field has a width of x and the calculated momentum space has a
  17 + /// width of kx (in radians).
  18 + /// @param K is a pointer to the output array of all plane waves in the field
  19 + /// @param kx is the width of the frame in momentum space
  20 + /// @param ky is the height of the frame in momentum space
  21 + /// @param E is the field to be transformed
  22 + /// @param x is the width of the field in the spatial domain
  23 + /// @param y is the height of the field in the spatial domain
  24 + /// @param nx is the number of pixels representing the field in the x (and kx) direction
  25 + /// @param ny is the number of pixels representing the field in the y (and ky) direction
  26 + template<typename T>
  27 + void cpu_scalar_to_kspace(stim::complex<T>* K, T& kx, T& ky, stim::complex<T>* E, T x, T y, size_t nx, size_t ny){
  28 +
  29 + kx = stim::TAU * nx / x; //calculate the width of the momentum space
  30 + ky = stim::TAU * ny / y;
  31 +
  32 + stim::complex<T>* dev_FFT;
  33 + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array
  34 +
  35 + stim::complex<T>* dev_E;
  36 + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field
  37 + HANDLE_ERROR( cudaMemcpy(dev_E, E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory
  38 +
  39 + cufftResult result;
  40 + cufftHandle plan;
  41 + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C);
  42 + if(result != CUFFT_SUCCESS){
  43 + std::cout<<"Error creating cuFFT plan."<<std::endl;
  44 + exit(1);
  45 + }
  46 +
  47 + result = cufftExecC2C(plan, (cufftComplex*)dev_E, (cufftComplex*)dev_FFT, CUFFT_FORWARD);
  48 + if(result != CUFFT_SUCCESS){
  49 + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl;
  50 + exit(1);
  51 + }
  52 +
  53 + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny);
  54 + HANDLE_ERROR( cudaMemcpy(fft, dev_FFT, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) );
  55 +
  56 + stim::cpu_fftshift(K, fft, nx, ny);
  57 + }
  58 +
  59 + template<typename T>
  60 + void cpu_scalar_from_kspace(stim::complex<T>* E, T& x, T& y, stim::complex<T>* K, T kx, T ky, size_t nx, size_t ny){
  61 +
  62 + x = stim::TAU * nx / kx; //calculate the width of the momentum space
  63 + y = stim::TAU * ny / ky;
  64 +
  65 + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny);
  66 + stim::cpu_ifftshift(fft, K, nx, ny);
  67 +
  68 + stim::complex<T>* dev_FFT;
  69 + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array
  70 + HANDLE_ERROR( cudaMemcpy(dev_FFT, fft, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory
  71 +
  72 + stim::complex<T>* dev_E;
  73 + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field
  74 +
  75 + cufftResult result;
  76 + cufftHandle plan;
  77 + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C);
  78 + if(result != CUFFT_SUCCESS){
  79 + std::cout<<"Error creating cuFFT plan."<<std::endl;
  80 + exit(1);
  81 + }
  82 +
  83 + result = cufftExecC2C(plan, (cufftComplex*)dev_FFT, (cufftComplex*)dev_E, CUFFT_FORWARD);
  84 + if(result != CUFFT_SUCCESS){
  85 + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl;
  86 + exit(1);
  87 + }
  88 +
  89 + HANDLE_ERROR( cudaMemcpy(E, dev_E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) );
  90 +
  91 +
  92 + }
  93 +
  94 + /// Propagate a field slice along its orthogonal direction by a given distance z
  95 + /// @param Enew is the resulting propogated field
  96 + template<typename T>
  97 + void cpu_scalar_propagate(stim::complex<T>* Enew, stim::complex<T>* E, T sx, T sy, T z, T k, size_t nx, size_t ny){
  98 +
  99 + stim::complex<T>* K = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * nx * ny );
  100 +
  101 + T Kx, Ky; //width and height in k space
  102 + cpu_scalar_to_kspace(K, Kx, Ky, E ,sx, sy, nx, ny);
  103 +
  104 + T* mag = (T*) malloc( sizeof(T) * nx * ny );
  105 + stim::abs(mag, K, nx * ny);
  106 + stim::cpu2image<float>(mag, "kspace_pre_shift.bmp", nx, ny, stim::cmBrewer);
  107 +
  108 + size_t kxi, kyi;
  109 + size_t i;
  110 + T kx, kx_sq, ky, ky_sq, k_sq;
  111 + T kz;
  112 + stim::complex<T> shift;
  113 + T min_kx = -Kx / 2;
  114 + T dkx = Kx / (nx);
  115 + T min_ky = -Ky / 2;
  116 + T dky = Ky / (ny);
  117 + for(kyi = 0; kyi < ny; kyi++){ //for each plane wave in the ky direction
  118 + for(kxi = 0; kxi < nx; kxi++){ //for each plane wave in the ky direction
  119 + i = kyi * nx + kxi;
  120 +
  121 + kx = min_kx + kxi * dkx; //calculate the position of the current plane wave
  122 + ky = min_ky + kyi * dky;
  123 +
  124 + kx_sq = kx * kx;
  125 + ky_sq = ky * ky;
  126 + k_sq = k*k;
  127 +
  128 + if(kx_sq + ky_sq < k_sq){
  129 + kz = sqrt(k*k - kx * kx - ky * ky); //estimate kz using the Fresnel approximation
  130 + shift = -exp(stim::complex<T>(0, kz * z));
  131 + K[i] *= shift;
  132 + }
  133 + else{
  134 + K[i] = 0;
  135 + }
  136 + }
  137 + }
  138 +
  139 + stim::abs(mag, K, nx * ny);
  140 + stim::cpu2image<float>(mag, "kspace_post_shift.bmp", nx, ny, stim::cmBrewer);
  141 +
  142 + cpu_scalar_from_kspace(Enew, sx, sy, K, Kx, Ky, nx, ny);
  143 + }
  144 +
  145 +}
  146 +
  147 +
  148 +#endif
0 \ No newline at end of file 149 \ No newline at end of file
stim/optics/scalarfield.h 0 → 100644
  1 +#ifndef STIM_SCALARFIELD_H
  2 +#define STIM_SCALARFIELD_H
  3 +
  4 +#include "../math/rect.h"
  5 +#include "../math/complex.h"
  6 +
  7 +namespace stim{
  8 +
  9 + enum locationType {CPUmem, GPUmem};
  10 +
  11 + /// Class represents a scalar optical field.
  12 +
  13 + /// In general, this class is designed to operate between the CPU and GPU. So, make sure all functions have an option to create the output on either.
  14 + /// The field is stored *either* on the GPU or host memory, but not both. This enforces that there can't be different copies of the same field.
  15 + /// This class is designed to be included in all of the other scalar optics classes, allowing them to render output data so make sure to keep it general and compatible.
  16 +
  17 +template<typename T>
  18 +class scalarfield : public rect<T>{
  19 +
  20 +protected:
  21 + stim::complex<T>* E;
  22 + size_t R[2];
  23 + locationType loc;
  24 +
  25 +
  26 +
  27 +public:
  28 +
  29 + CUDA_CALLABLE scalarfield(size_t X, size_t Y, T size = 1, T z_pos = 0) : rect<T>::rect(size, z_pos){
  30 + R[0] = X; //set the field resolution
  31 + R[1] = Y;
  32 +
  33 + E = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * R[0] * R[1]); //allocate in CPU memory
  34 + loc = CPUmem;
  35 + }
  36 +
  37 + CUDA_CALLABLE ~scalarfield(){
  38 + if(loc == CPUmem) free(E);
  39 + else cudaFree(E);
  40 + }
  41 +
  42 + /// Returns the number of values in the field
  43 + CUDA_CALLABLE size_t size(){
  44 + return R[0] * R[1];
  45 + }
  46 +
  47 + CUDA_CALLABLE size_t grid_bytes(){
  48 + return sizeof(stim::complex<T>) * R[0] * R[1];
  49 + }
  50 +
  51 + /// Calculates the distance between points on the grid
  52 + T spacing(){
  53 + T du = rect<T>::X.len() / R[0];
  54 + T dv = rect<T>::Y.len() / R[1];
  55 + return min<T>(du, dv);
  56 + }
  57 +
  58 + /// Copy the field array to the GPU, if it isn't already there
  59 + void to_gpu(){
  60 + if(loc == GPUmem) return;
  61 + else{
  62 + stim::complex<T>* dev_E;
  63 + HANDLE_ERROR( cudaMalloc(&dev_E, e_bytes()) ); //allocate GPU memory
  64 + HANDLE_ERROR( cudaMemcpy(dev_E, E, e_bytes(), cudaMemcpyHostToDevice) ); //copy the field to the GPU
  65 + free(E); //free the CPU memory
  66 + E = dev_E; //swap pointers
  67 + }
  68 + }
  69 +
  70 + /// Copy the field array to the CPU, if it isn't already there
  71 + void to_cpu(){
  72 + if(loc == CPUmem) return;
  73 + else{
  74 + stim::complex<T>* host_E = (stim::complex<T>*) malloc(e_bytes()); //allocate space in main memory
  75 + HANDLE_ERROR( cudaMemcpy(host_E, E, e_bytes(), cudaMemcpyDeviceToHost) ); //copy from GPU to CPU
  76 + HANDLE_ERROR( cudaFree(E) ); //free device memory
  77 + E = host_E; //swap pointers
  78 + }
  79 + }
  80 +
  81 + std::string str(){
  82 + std::stringstream ss;
  83 + ss<<rect<T>::str()<<std::endl;
  84 + ss<<"[ "<<R[0]<<" x "<<R[1]<<" ]"<<std::endl;
  85 + ss<<"location: ";
  86 + if(loc == CPUmem) ss<<"CPU";
  87 + else ss<<"GPU";
  88 +
  89 + ss<<endl;
  90 + return ss.str();
  91 + }
  92 +
  93 + stim::complex<T>* ptr(){
  94 + return E;
  95 + }
  96 +
  97 + /// Evaluate the cartesian coordinates of each point in the field. The resulting arrays are allocated in the same memory where the field is stored.
  98 + void meshgrid(T* X, T* Y, T* Z, locationType location){
  99 + size_t array_size = sizeof(T) * R[0] * R[1];
  100 + if(location == CPUmem){
  101 +
  102 + T du = 1.0 / (R[0] - 1); //calculate the spacing between points in the grid
  103 + T dv = 1.0 / (R[1] - 1);
  104 +
  105 + size_t ui, vi, i;
  106 + stim::vec3<T> p;
  107 + for(vi = 0; vi < R[1]; vi++){
  108 + i = vi * R[0];
  109 + for(ui = 0; ui < R[0]; ui++){
  110 + p = rect<T>::p(ui * du, vi * dv);
  111 + X[i] = p[0];
  112 + Y[i] = p[1];
  113 + Z[i] = p[2];
  114 + i++;
  115 + }
  116 + }
  117 + stim::cpu2image(X, "X.bmp", R[0], R[1], stim::cmBrewer);
  118 + stim::cpu2image(Y, "Y.bmp", R[0], R[1], stim::cmBrewer);
  119 + stim::cpu2image(Z, "Z.bmp", R[0], R[1], stim::cmBrewer);
  120 + }
  121 + else{
  122 + std::cout<<"GPU allocation of a meshgrid isn't supported yet. You'll have to write kernels to do the calculation.";
  123 + exit(1);
  124 + }
  125 + }
  126 +
  127 + void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer){
  128 +
  129 + if(loc == GPUmem) to_cpu(); //if the field is in the GPU, move it to the CPU
  130 + T* image = (T*) malloc( sizeof(T) * size() ); //allocate space for the real image
  131 +
  132 + switch(type){ //get the specified component from the complex value
  133 + case complexMag:
  134 + stim::abs(image, E, size());
  135 + break;
  136 + case complexReal:
  137 + stim::real(image, E, size());
  138 + break;
  139 + case complexImaginary:
  140 + stim::imag(image, E, size());
  141 + }
  142 + stim::cpu2image(image, filename, R[0], R[1], cmap); //save the resulting image
  143 + free(image); //free the real image
  144 + }
  145 +
  146 +}; //end class scalarfield
  147 +}
  148 +
  149 +//stream insertion operator
  150 +template<typename T>
  151 +std::ostream& operator<<(std::ostream& os, stim::scalarfield<T>& rhs){
  152 + os<<rhs.str();
  153 + return os;
  154 +}
  155 +
  156 +
  157 +#endif
0 \ No newline at end of file 158 \ No newline at end of file