Commit 8823488b5fd29c98d502eaf626b4bae0fbc99a78

Authored by Pavel Govyadinov
1 parent bb7275b6

Added missing files

stim/math/quad.h 0 → 100644
  1 +#ifndef RTS_QUAD_H
  2 +#define RTS_QUAD_H
  3 +
  4 +//enable CUDA_CALLABLE macro
  5 +#include <stim/cuda/callable.h>
  6 +#include <stim/math/vector.h>
  7 +#include <stim/math/triangle.h>
  8 +#include <stim/math/quaternion.h>
  9 +#include <iostream>
  10 +#include <iomanip>
  11 +#include <algorithm>
  12 +
  13 +namespace stim{
  14 +
  15 +//template for a quadangle class in ND space
  16 +template <class T, int N = 3>
  17 +struct quad
  18 +{
  19 + /*
  20 + B------------------>C
  21 + ^ ^
  22 + | |
  23 + Y |
  24 + | |
  25 + | |
  26 + A---------X-------->O
  27 + */
  28 +
  29 + /*T A[N];
  30 + T B[N];
  31 + T C[N];*/
  32 +
  33 + rts::vec<T, N> A;
  34 + rts::vec<T, N> X;
  35 + rts::vec<T, N> Y;
  36 +
  37 +
  38 + CUDA_CALLABLE quad()
  39 + {
  40 +
  41 + }
  42 +
  43 + CUDA_CALLABLE quad(vec<T, N> a, vec<T, N> b, vec<T, N> c)
  44 + {
  45 +
  46 + A = a;
  47 + Y = b - a;
  48 + X = c - a - Y;
  49 +
  50 + }
  51 +
  52 + /*******************************************************************
  53 + Constructor - create a quad from a position, normal, and rotation
  54 + *******************************************************************/
  55 + CUDA_CALLABLE quad(rts::vec<T, N> c, rts::vec<T, N> normal, T width, T height, T theta)
  56 + {
  57 +
  58 + //compute the X direction - start along world-space X
  59 + Y = rts::vec<T, N>(0, 1, 0);
  60 + if(Y == normal)
  61 + Y = rts::vec<T, N>(0, 0, 1);
  62 +
  63 + X = Y.cross(normal).norm();
  64 +
  65 + std::cout<<X<<std::endl;
  66 +
  67 + //rotate the X axis by theta radians
  68 + rts::quaternion<T> q;
  69 + q.CreateRotation(theta, normal);
  70 + X = q.toMatrix3() * X;
  71 + Y = normal.cross(X);
  72 +
  73 + //normalize everything
  74 + X = X.norm();
  75 + Y = Y.norm();
  76 +
  77 + //scale to match the quad width and height
  78 + X = X * width;
  79 + Y = Y * height;
  80 +
  81 + //set the corner of the plane
  82 + A = c - X * 0.5f - Y * 0.5f;
  83 +
  84 + std::cout<<X<<std::endl;
  85 + }
  86 +
  87 + //boolean comparison
  88 + bool operator==(const quad<T, N> & rhs)
  89 + {
  90 + if(A == rhs.A && X == rhs.X && Y == rhs.Y)
  91 + return true;
  92 + else
  93 + return false;
  94 + }
  95 +
  96 + /*******************************************
  97 + Return the normal for the quad
  98 + *******************************************/
  99 + CUDA_CALLABLE rts::vec<T, N> n()
  100 + {
  101 + return (X.cross(Y)).norm();
  102 + }
  103 +
  104 + CUDA_CALLABLE rts::vec<T, N> p(T a, T b)
  105 + {
  106 + rts::vec<T, N> result;
  107 + //given the two parameters a, b = [0 1], returns the position in world space
  108 + result = A + X * a + Y * b;
  109 +
  110 + return result;
  111 + }
  112 +
  113 + CUDA_CALLABLE rts::vec<T, N> operator()(T a, T b)
  114 + {
  115 + return p(a, b);
  116 + }
  117 +
  118 + std::string str()
  119 + {
  120 + std::stringstream ss;
  121 +
  122 + ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
  123 + ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl;
  124 + ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X;
  125 +
  126 + return ss.str();
  127 +
  128 + }
  129 +
  130 + CUDA_CALLABLE quad<T, N> operator*(T rhs)
  131 + {
  132 + //scales the plane by a scalar value
  133 +
  134 + //compute the center point
  135 + rts::vec<T, N> c = A + X*0.5f + Y*0.5f;
  136 +
  137 + //create the new quadangle
  138 + quad<T, N> result;
  139 + result.X = X * rhs;
  140 + result.Y = Y * rhs;
  141 + result.A = c - result.X*0.5f - result.Y*0.5f;
  142 +
  143 + return result;
  144 +
  145 + }
  146 +
  147 + CUDA_CALLABLE T dist(vec<T, N> p)
  148 + {
  149 + //compute the distance between a point and this quad
  150 +
  151 + //first break the quad up into two triangles
  152 + triangle<T, N> T0(A, A+X, A+Y);
  153 + triangle<T, N> T1(A+X+Y, A+X, A+Y);
  154 +
  155 +
  156 + T d0 = T0.dist(p);
  157 + T d1 = T1.dist(p);
  158 +
  159 + if(d0 < d1)
  160 + return d0;
  161 + else
  162 + return d1;
  163 + }
  164 +
  165 + CUDA_CALLABLE T dist_max(vec<T, N> p)
  166 + {
  167 + T da = (A - p).len();
  168 + T db = (A+X - p).len();
  169 + T dc = (A+Y - p).len();
  170 + T dd = (A+X+Y - p).len();
  171 +
  172 + return std::max( da, std::max(db, std::max(dc, dd) ) );
  173 + }
  174 +};
  175 +
  176 +} //end namespace rts
  177 +
  178 +template <typename T, int N>
  179 +std::ostream& operator<<(std::ostream& os, rts::quad<T, N> R)
  180 +{
  181 + os<<R.str();
  182 + return os;
  183 +}
  184 +
  185 +
  186 +#endif
... ...
stim/optics/beam.h 0 → 100644
  1 +#ifndef RTS_BEAM
  2 +#define RTS_BEAM
  3 +
  4 +#include "../math/vector.h"
  5 +#include "../math/function.h"
  6 +#include "../optics/planewave.h"
  7 +#include <vector>
  8 +
  9 +namespace stim{
  10 +
  11 +template<typename P>
  12 +class beam : public planewave<P>
  13 +{
  14 +public:
  15 + enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  16 +
  17 +private:
  18 +
  19 + P _na[2]; //numerical aperature of the focusing optics
  20 + vec<P> f; //focal point
  21 + function<P, P> apod; //apodization function
  22 + unsigned int apod_res; //resolution of apodization filter functions
  23 +
  24 + void apod_uniform()
  25 + {
  26 + apod = (P)1;
  27 + }
  28 + void apod_bartlett()
  29 + {
  30 + apod = (P)1;
  31 + apod.insert((P)1, (P)0);
  32 + }
  33 + void apod_hanning()
  34 + {
  35 + apod = (P)0;
  36 + P x, y;
  37 + for(unsigned int n=0; n<apod_res; n++)
  38 + {
  39 + x = (P)n/(P)apod_res;
  40 + y = pow( cos( ((P)3.14159 * x) / 2 ), 2);
  41 + apod.insert(x, y);
  42 + }
  43 + }
  44 + void apod_hamming()
  45 + {
  46 + apod = (P)0;
  47 + P x, y;
  48 + for(unsigned int n=0; n<apod_res; n++)
  49 + {
  50 + x = (P)n/(P)apod_res;
  51 + y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x);
  52 + apod.insert(x, y);
  53 + }
  54 + }
  55 +
  56 + void set_apod(beam_type type)
  57 + {
  58 + if(type == Uniform)
  59 + apod_uniform();
  60 + if(type == Bartlett)
  61 + apod_bartlett();
  62 + if(type == Hanning)
  63 + apod_hanning();
  64 + if(type == Hamming)
  65 + apod_hamming();
  66 + }
  67 +
  68 +public:
  69 +
  70 + ///constructor: build a default beam (NA=1.0)
  71 + beam(
  72 + vec<P> k = rts::vec<P>(0, 0, rtsTAU),
  73 + vec<P> _E0 = rts::vec<P>(1, 0, 0),
  74 + beam_type _apod = Uniform)
  75 + : planewave<P>(k, _E0)
  76 + {
  77 + _na[0] = (P)0.0;
  78 + _na[1] = (P)1.0;
  79 + f = vec<P>( (P)0, (P)0, (P)0 );
  80 + apod_res = 256; //set the default resolution for apodization filters
  81 + set_apod(_apod); //set the apodization function type
  82 + }
  83 +
  84 + beam<P> refract(rts::vec<P> kn) const{
  85 +
  86 + beam<P> new_beam;
  87 + new_beam._na[0] = _na[0];
  88 + new_beam._na[1] = _na[1];
  89 +
  90 +
  91 + rts::planewave<P> pw = planewave<P>::bend(kn);
  92 + //std::cout<<pw.str()<<std::endl;
  93 +
  94 + new_beam.k = pw.kvec();
  95 + new_beam.E0 = pw.E();
  96 +
  97 + return new_beam;
  98 + }
  99 +
  100 + ///Numerical Aperature functions
  101 + void NA(P na)
  102 + {
  103 + _na[0] = (P)0;
  104 + _na[1] = na;
  105 + }
  106 + void NA(P na0, P na1)
  107 + {
  108 + _na[0] = na0;
  109 + _na[1] = na1;
  110 + }
  111 +
  112 + /*string str() :
  113 + {
  114 + stringstream ss;
  115 + ss<<"Beam Center: "<<k<<std::endl;
  116 +
  117 + return ss.str();
  118 + }*/
  119 +
  120 + //Monte-Carlo decomposition into plane waves
  121 + std::vector< planewave<P> > mc(unsigned int N = 100000, unsigned int seed = 0) const
  122 + {
  123 + /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling
  124 + of a sphere and projecting these samples onto an inscribed sphere.
  125 +
  126 + seed = seed for the random number generator
  127 + */
  128 + srand(seed); //seed the random number generator
  129 +
  130 + vec<P> k_hat = beam::k.norm();
  131 +
  132 + ///compute the rotation operator to transform (0, 0, 1) to k
  133 + P cos_angle = k_hat.dot(rts::vec<P>(0, 0, 1));
  134 + rts::matrix<P, 3> rotation;
  135 +
  136 + //if the cosine of the angle is -1, the rotation is just a flip across the z axis
  137 + if(cos_angle == -1){
  138 + rotation(2, 2) = -1;
  139 + }
  140 + else if(cos_angle != 1.0)
  141 + {
  142 + rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation
  143 + P angle = acos(cos_angle); //compute the angle of rotation
  144 + rts::quaternion<P> quat; //create a quaternion describing the rotation
  145 + quat.CreateRotation(angle, r_axis);
  146 + rotation = quat.toMatrix3(); //compute the rotation matrix
  147 + }
  148 +
  149 + //find the phi values associated with the cassegrain ring
  150 + P PHI[2];
  151 + PHI[0] = (P)asin(_na[0]);
  152 + PHI[1] = (P)asin(_na[1]);
  153 +
  154 + //calculate the z-axis cylinder coordinates associated with these angles
  155 + P Z[2];
  156 + Z[0] = cos(PHI[0]);
  157 + Z[1] = cos(PHI[1]);
  158 + P range = Z[0] - Z[1];
  159 +
  160 + std::vector< planewave<P> > samples; //create a vector of plane waves
  161 +
  162 + //draw a distribution of random phi, z values
  163 + P z, phi, theta;
  164 + for(int i=0; i<N; i++) //for each sample
  165 + {
  166 + z = ((P)rand() / (P)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
  167 + theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159;
  168 + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
  169 +
  170 + //compute and store cartesian coordinates
  171 + rts::vec<P> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
  172 + rts::vec<P> cart = spherical.sph2cart();
  173 + vec<P> k_prime = rotation * cart; //create a sample vector
  174 +
  175 + //store a wave refracted along the given direction
  176 + //std::cout<<"k prime: "<<rotation<<std::endl;
  177 + samples.push_back(planewave<P>::refract(k_prime) * apod(phi/PHI[1]));
  178 + }
  179 +
  180 + return samples;
  181 + }
  182 +
  183 + std::string str()
  184 + {
  185 + std::stringstream ss;
  186 + ss<<"Beam:"<<std::endl;
  187 + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  188 + ss<<" Central Plane Wave: "<<beam::k<<std::endl;
  189 + if(_na[0] == 0)
  190 + ss<<" NA: "<<_na[1];
  191 + else
  192 + ss<<" NA: "<<_na[0]<<" -- "<<_na[1];
  193 +
  194 + return ss.str();
  195 + }
  196 +
  197 +
  198 +
  199 +};
  200 +
  201 +}
  202 +
  203 +#endif
... ...
stim/optics/efield.cuh 0 → 100644
  1 +#ifndef RTS_EFIELD
  2 +#define RTS_EFIELD
  3 +
  4 +#include "../math/complex.h"
  5 +#include "../math/realfield.cuh"
  6 +#include "../visualization/colormap.h"
  7 +#include "../optics/planewave.h"
  8 +#include "../cuda/devices.h"
  9 +#include "../optics/beam.h"
  10 +#include "../math/rect.h"
  11 +
  12 +
  13 +namespace stim{
  14 +
  15 +template<typename T>
  16 +__global__ void gpu_planewave2efield(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1,
  17 + planewave<T> w, rect<T> q)
  18 +{
  19 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  20 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  21 +
  22 + //make sure that the thread indices are in-bounds
  23 + if(iu >= r0 || iv >= r1) return;
  24 +
  25 + //compute the index into the field
  26 + int i = iv*r0 + iu;
  27 +
  28 + //get the current position
  29 + vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 );
  30 + vec<T> r(p[0], p[1], p[2]);
  31 +
  32 + complex<T> x( 0.0f, w.k.dot(r) );
  33 +
  34 + if(Y == NULL) //if this is a scalar simulation
  35 + X[i] += w.E0.len() * exp(x); //use the vector magnitude as the plane wave amplitude
  36 + else
  37 + {
  38 + X[i] += w.E0[0] * exp(x);
  39 + Y[i] += w.E0[1] * exp(x);
  40 + Z[i] += w.E0[2] * exp(x);
  41 + }
  42 +}
  43 +
  44 +template<typename T>
  45 +__global__ void gpu_efield_magnitude(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, T* M)
  46 +{
  47 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  48 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  49 +
  50 + //make sure that the thread indices are in-bounds
  51 + if(iu >= r0 || iv >= r1) return;
  52 +
  53 + //compute the index into the field
  54 + int i = iv*r0 + iu;
  55 +
  56 + if(Y == NULL) //if this is a scalar simulation
  57 + M[i] = X[i].abs(); //compute the magnitude of the X component
  58 + else
  59 + {
  60 + M[i] = rts::vec<T>(X[i].abs(), Y[i].abs(), Z[i].abs()).len();
  61 + //M[i] = Z[i].abs();
  62 + }
  63 +}
  64 +
  65 +template<typename T>
  66 +__global__ void gpu_efield_real_magnitude(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, T* M)
  67 +{
  68 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  69 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  70 +
  71 + //make sure that the thread indices are in-bounds
  72 + if(iu >= r0 || iv >= r1) return;
  73 +
  74 + //compute the index into the field
  75 + int i = iv*r0 + iu;
  76 +
  77 + if(Y == NULL) //if this is a scalar simulation
  78 + M[i] = X[i].abs(); //compute the magnitude of the X component
  79 + else
  80 + {
  81 + M[i] = rts::vec<T>(X[i].real(), Y[i].real(), Z[i].real()).len();
  82 + //M[i] = Z[i].abs();
  83 + }
  84 +}
  85 +
  86 +template<typename T>
  87 +__global__ void gpu_efield_polarization(complex<T>* X, complex<T>* Y, complex<T>* Z,
  88 + unsigned int r0, unsigned int r1,
  89 + T* Px, T* Py, T* Pz)
  90 +{
  91 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  92 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  93 +
  94 + //make sure that the thread indices are in-bounds
  95 + if(iu >= r0 || iv >= r1) return;
  96 +
  97 + //compute the index into the field
  98 + int i = iv*r0 + iu;
  99 +
  100 + //compute the field polarization
  101 + Px[i] = X[i].abs();
  102 + Py[i] = Y[i].abs();
  103 + Pz[i] = Z[i].abs();
  104 +
  105 +}
  106 +
  107 +/* This function computes the sum of two complex fields and stores the result in *dest
  108 +*/
  109 +template<typename T>
  110 +__global__ void gpu_efield_sum(complex<T>* dest, complex<T>* src, unsigned int r0, unsigned int r1)
  111 +{
  112 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  113 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  114 +
  115 + //make sure that the thread indices are in-bounds
  116 + if(iu >= r0 || iv >= r1) return;
  117 +
  118 + //compute the index into the field
  119 + int i = iv*r0 + iu;
  120 +
  121 + //sum the fields
  122 + dest[i] += src[i];
  123 +}
  124 +
  125 +/* This class implements a discrete representation of an electromagnetic field
  126 + in 2D. The majority of this representation is done on the GPU.
  127 +*/
  128 +template<typename T>
  129 +class efield
  130 +{
  131 +protected:
  132 +
  133 + bool vector;
  134 +
  135 + //gpu pointer to the field data
  136 + rts::complex<T>* X;
  137 + rts::complex<T>* Y;
  138 + rts::complex<T>* Z;
  139 +
  140 + //resolution of the discrete field
  141 + unsigned int R[2];
  142 +
  143 + //shape of the 2D field
  144 + rect<T> pos;
  145 +
  146 + void from_planewave(planewave<T> p)
  147 + {
  148 + unsigned int SQRT_BLOCK = 16;
  149 + //create one thread for each detector pixel
  150 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  151 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  152 +
  153 +
  154 + gpu_planewave2efield<T> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos);
  155 + }
  156 +
  157 + void init(unsigned int res0, unsigned int res1, bool _vector)
  158 + {
  159 + vector = _vector; //initialize field type
  160 +
  161 + X = Y = Z = NULL; //initialize all pointers to NULL
  162 + R[0] = res0;
  163 + R[1] = res1;
  164 +
  165 + //allocate memory on the gpu
  166 + cudaMalloc(&X, sizeof(rts::complex<T>) * R[0] * R[1]);
  167 + cudaMemset(X, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
  168 +
  169 + if(vector)
  170 + {
  171 + cudaMalloc(&Y, sizeof(rts::complex<T>) * R[0] * R[1]);
  172 + cudaMemset(Y, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
  173 +
  174 + cudaMalloc(&Z, sizeof(rts::complex<T>) * R[0] * R[1]);
  175 + cudaMemset(Z, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
  176 + }
  177 + }
  178 +
  179 + void destroy()
  180 + {
  181 + if(X != NULL) cudaFree(X);
  182 + if(Y != NULL) cudaFree(Y);
  183 + if(Z != NULL) cudaFree(Z);
  184 + }
  185 +
  186 + void shallowcpy(const rts::efield<T> & src)
  187 + {
  188 + vector = src.vector;
  189 + R[0] = src.R[0];
  190 + R[1] = src.R[1];
  191 + }
  192 +
  193 + void deepcpy(const rts::efield<T> & src)
  194 + {
  195 + //perform a shallow copy
  196 + shallowcpy(src);
  197 +
  198 + //allocate memory on the gpu
  199 + if(src.X != NULL)
  200 + {
  201 + cudaMalloc(&X, sizeof(rts::complex<T>) * R[0] * R[1]);
  202 + cudaMemcpy(X, src.X, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
  203 + }
  204 + if(src.Y != NULL)
  205 + {
  206 + cudaMalloc(&Y, sizeof(rts::complex<T>) * R[0] * R[1]);
  207 + cudaMemcpy(Y, src.Y, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
  208 + }
  209 + if(src.Z != NULL)
  210 + {
  211 + cudaMalloc(&Z, sizeof(rts::complex<T>) * R[0] * R[1]);
  212 + cudaMemcpy(Z, src.Z, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
  213 + }
  214 + }
  215 +
  216 +public:
  217 + efield(unsigned int res0, unsigned int res1, bool _vector = true)
  218 + {
  219 + init(res0, res1, _vector);
  220 + //pos = rts::rect<T>(rts::vec<T>(-10, 0, -10), rts::vec<T>(-10, 0, 10), rts::vec<T>(10, 0, 10));
  221 + }
  222 +
  223 + //destructor
  224 + ~efield()
  225 + {
  226 + destroy();
  227 + }
  228 +
  229 + ///Clear the field - set all points to zero
  230 + void clear()
  231 + {
  232 + cudaMemset(X, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
  233 +
  234 + if(vector)
  235 + {
  236 + cudaMemset(Y, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
  237 + cudaMemset(Z, 0, sizeof(rts::complex<T>) * R[0] * R[1]);
  238 + }
  239 + }
  240 +
  241 + void position(rect<T> _p)
  242 + {
  243 + pos = _p;
  244 + }
  245 +
  246 + //access functions
  247 + complex<T>* x(){
  248 + return X;
  249 + }
  250 + complex<T>* y(){
  251 + return Y;
  252 + }
  253 + complex<T>* z(){
  254 + return Z;
  255 + }
  256 + unsigned int Ru(){
  257 + return R[0];
  258 + }
  259 + unsigned int Rv(){
  260 + return R[1];
  261 + }
  262 + rect<T> p(){
  263 + return pos;
  264 + }
  265 +
  266 + std::string str()
  267 + {
  268 + stringstream ss;
  269 + ss<<pos<<std::endl;
  270 + ss<<"X: "<<X<<std::endl;
  271 + ss<<"Y: "<<Y<<std::endl;
  272 + ss<<"Z: "<<Z;
  273 +
  274 + return ss.str();
  275 + }
  276 +
  277 + //assignment operator: assignment from another electric field
  278 + efield<T> & operator= (const efield<T> & rhs)
  279 + {
  280 + destroy(); //destroy any previous information about this field
  281 + deepcpy(rhs); //create a deep copy
  282 + return *this; //return the current object
  283 + }
  284 +
  285 + //assignment operator: build an electric field from a plane wave
  286 + efield<T> & operator= (const planewave<T> & rhs)
  287 + {
  288 +
  289 + clear(); //clear any previous field data
  290 + from_planewave(rhs); //create a field from the planewave
  291 + return *this; //return the current object
  292 + }
  293 +
  294 + //assignment operator: add an existing electric field
  295 + efield<T> & operator+= (const efield<T> & rhs)
  296 + {
  297 + //if this field and the source field represent the same regions in space
  298 + if(R[0] == rhs.R[0] && R[1] == rhs.R[1] && pos == rhs.pos)
  299 + {
  300 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  301 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  302 +
  303 + //create one thread for each detector pixel
  304 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  305 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  306 +
  307 + //sum the fields
  308 + gpu_efield_sum <<<dimGrid, dimBlock>>> (X, rhs.X, R[0], R[1]);
  309 + if(Y != NULL)
  310 + {
  311 + gpu_efield_sum <<<dimGrid, dimBlock>>> (Y, rhs.Y, R[0], R[1]);
  312 + gpu_efield_sum <<<dimGrid, dimBlock>>> (Z, rhs.Z, R[0], R[1]);
  313 + }
  314 + }
  315 + else
  316 + {
  317 + std::cout<<"ERROR in efield: The two summed fields do not represent the same geometry."<<std::endl;
  318 + exit(1);
  319 + }
  320 +
  321 + return *this; //return this object
  322 + }
  323 +
  324 + //assignment operator: build an electric field from a beam
  325 + efield<T> & operator= (const rts::beam<T> & rhs)
  326 + {
  327 + //get a vector of monte-carlo samples
  328 + std::vector< rts::planewave<T> > p_list = rhs.mc();
  329 +
  330 + clear(); //clear any previous field data
  331 + for(unsigned int i = 0; i < p_list.size(); i++)
  332 + from_planewave(p_list[i]);
  333 + return *this;
  334 + }
  335 +
  336 +
  337 + //return a scalar field representing field magnitude
  338 + realfield<T, 1, true> mag()
  339 + {
  340 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  341 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  342 +
  343 + //create a scalar field to store the result
  344 + realfield<T, 1, true> M(R[0], R[1]);
  345 +
  346 + //create one thread for each detector pixel
  347 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  348 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  349 +
  350 + //compute the magnitude and store it in a scalar field
  351 + gpu_efield_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.ptr(0));
  352 +
  353 + return M;
  354 + }
  355 +
  356 + //return a sacalar field representing the field magnitude at an infinitely small point in time
  357 + realfield<T, 1, true> real_mag()
  358 + {
  359 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  360 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  361 +
  362 + //create a scalar field to store the result
  363 + realfield<T, 1, true> M(R[0], R[1]);
  364 +
  365 + //create one thread for each detector pixel
  366 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  367 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  368 +
  369 + //compute the magnitude and store it in a scalar field
  370 + gpu_efield_real_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.ptr(0));
  371 +
  372 + return M;
  373 + }
  374 +
  375 + //return a vector field representing field polarization
  376 + realfield<T, 3, true> polarization()
  377 + {
  378 + if(!vector)
  379 + {
  380 + std::cout<<"ERROR: Cannot compute polarization of a scalar field."<<std::endl;
  381 + exit(1);
  382 + }
  383 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  384 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  385 +
  386 + //create one thread for each detector pixel
  387 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  388 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  389 +
  390 +
  391 + realfield<T, 3, true> Pol(R[0], R[1]); //create a vector field to store the result
  392 +
  393 + //compute the polarization and store it in the vector field
  394 + gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.ptr(0), Pol.ptr(1), Pol.ptr(2));
  395 +
  396 + return Pol; //return the vector field
  397 + }
  398 +
  399 + ////////FRIENDSHIP
  400 + //friend void operator<< <T>(rts::efield<T> &ef, rts::halfspace<T> hs);
  401 +
  402 +
  403 +};
  404 +
  405 +
  406 +
  407 +} //end namespace rts
  408 +
  409 +#endif
... ...
stim/optics/esphere.cuh 0 → 100644
  1 +#ifndef RTS_ESPHERE
  2 +#define RTS_ESPHERE
  3 +
  4 +#include "../math/complex.h"
  5 +#include "../math/bessel.h"
  6 +#include "../visualization/colormap.h"
  7 +#include "../optics/planewave.h"
  8 +#include "../cuda/devices.h"
  9 +#include "../optics/efield.cuh"
  10 +
  11 +namespace stim{
  12 +
  13 +/* This class implements a discrete representation of an electromagnetic field
  14 + in 2D scattered by a sphere. This class implements Mie scattering.
  15 +*/
  16 +template<typename P>
  17 +class esphere : public efield<P>
  18 +{
  19 +private:
  20 + stim::complex<P> n; //sphere refractive index
  21 + P a; //sphere radius
  22 +
  23 + //parameters dependent on wavelength
  24 + unsigned int Nl; //number of orders for the calculation
  25 + rts::complex<P>* A; //internal scattering coefficients
  26 + rts::complex<P>* B; //external scattering coefficients
  27 +
  28 + void calcNl(P kmag)
  29 + {
  30 + //return ceil( ((P)6.282 * a) / lambda + 4 * pow( ((P)6.282 * a) / lambda, ((P)1/(P)3)) + 2);
  31 + Nl = ceil( kmag*a + 4 * pow(kmag * a, (P)1/(P)3) + 2);
  32 + }
  33 +
  34 + void calcAB(P k, unsigned int Nl, rts::complex<P>* A, rts::complex<P>* B)
  35 + {
  36 + /* These calculations require double precision, so they are computed
  37 + using doubles and converted to P at the end.
  38 +
  39 + Input:
  40 +
  41 + k = magnitude of the k vector (tau/lambda)
  42 + Nl = highest order coefficient ([0 Nl] are computed)
  43 + */
  44 +
  45 + //clear the previous coefficients
  46 + rts::complex<P>* cpuA = (rts::complex<P>*)malloc(sizeof(rts::complex<P>) * (Nl+1));
  47 + rts::complex<P>* cpuB = (rts::complex<P>*)malloc(sizeof(rts::complex<P>) * (Nl+1));
  48 +
  49 + //convert to an std complex value
  50 + complex<double> nc = (rts::complex<double>)n;
  51 +
  52 + //compute the magnitude of the k vector
  53 + complex<double> kna = nc * k * (double)a;
  54 +
  55 + //compute the arguments k*a and k*n*a
  56 + complex<double> ka(k * a, 0.0);
  57 +
  58 + //allocate space for the Bessel functions of the first and second kind (and derivatives)
  59 + unsigned int bytes = sizeof(complex<double>) * (Nl + 1);
  60 + complex<double>* cjv_ka = (complex<double>*)malloc(bytes);
  61 + complex<double>* cyv_ka = (complex<double>*)malloc(bytes);
  62 + complex<double>* cjvp_ka = (complex<double>*)malloc(bytes);
  63 + complex<double>* cyvp_ka = (complex<double>*)malloc(bytes);
  64 + complex<double>* cjv_kna = (complex<double>*)malloc(bytes);
  65 + complex<double>* cyv_kna = (complex<double>*)malloc(bytes);
  66 + complex<double>* cjvp_kna = (complex<double>*)malloc(bytes);
  67 + complex<double>* cyvp_kna = (complex<double>*)malloc(bytes);
  68 +
  69 + //allocate space for the spherical Hankel functions and derivative
  70 + complex<double>* chv_ka = (complex<double>*)malloc(bytes);
  71 + complex<double>* chvp_ka = (complex<double>*)malloc(bytes);
  72 +
  73 + //compute the bessel functions using the CPU-based algorithm
  74 + double vm;
  75 + cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka);
  76 + cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna);
  77 +
  78 + //compute A for each order
  79 + complex<double> i(0, 1);
  80 + complex<double> a, b, c, d;
  81 + complex<double> An, Bn;
  82 + for(int l=0; l<=Nl; l++)
  83 + {
  84 + //compute the Hankel functions from j and y
  85 + chv_ka[l] = cjv_ka[l] + i * cyv_ka[l];
  86 + chvp_ka[l] = cjvp_ka[l] + i * cyvp_ka[l];
  87 +
  88 + //Compute A (internal scattering coefficient)
  89 + //compute the numerator and denominator for A
  90 + a = cjv_ka[l] * chvp_ka[l] - cjvp_ka[l] * chv_ka[l];
  91 + b = cjv_kna[l] * chvp_ka[l] - chv_ka[l] * cjvp_kna[l] * nc;
  92 +
  93 + //calculate A and add it to the list
  94 + rts::complex<double> An = (2.0 * l + 1.0) * pow(i, l) * (a / b);
  95 + cpuA[l] = (rts::complex<P>)An;
  96 +
  97 + //Compute B (external scattering coefficient)
  98 + c = cjv_ka[l] * cjvp_kna[l] * nc - cjv_kna[l] * cjvp_ka[l];
  99 + d = cjv_kna[l] * chvp_ka[l] - chv_ka[l] * cjvp_kna[l] * nc;
  100 +
  101 + //calculate B and add it to the list
  102 + rts::complex<double> Bn = (2.0 * l + 1.0) * pow(i, l) * (c / d);
  103 + cpuB[l] = (rts::complex<P>)Bn;
  104 +
  105 + std::cout<<"A: "<<cpuA[l]<<" B: "<<cpuB[l]<<std::endl;
  106 + }
  107 +
  108 +
  109 + if(A != NULL) cudaFree(A); //free any previous coefficients
  110 + if(B != NULL) cudaFree(B);
  111 + cudaMalloc(&A, sizeof(rts::complex<P>) * (Nl+1)); //allocate memory for new coefficients
  112 + cudaMalloc(&B, sizeof(rts::complex<P>) * (Nl+1));
  113 +
  114 + //copy the calculations from the CPU to the GPU
  115 + cudaMemcpy(A, cpuA, sizeof(rts::complex<P>) * (Nl+1), cudaMemcpyDeviceToHost);
  116 + cudaMemcpy(B, cpuB, sizeof(rts::complex<P>) * (Nl+1), cudaMemcpyDeviceToHost);
  117 + }
  118 +
  119 +public:
  120 +
  121 + esphere(unsigned int res0, unsigned int res1, P _a, rts::complex<P> _n, bool _scalar = false) :
  122 + efield<P>(res0, res1, _scalar)
  123 + {
  124 + std::cout<<"Sphere scattered field created."<<std::endl;
  125 + n = _n; //save the refractive index
  126 + a = _a; //save the radius
  127 + }
  128 +
  129 + //assignment operator: build an electric field from a plane wave
  130 + efield<P> & operator= (const planewave<P> & rhs)
  131 + {
  132 + calcNl(rhs.kmag()); //compute the number of scattering coefficients
  133 + std::cout<<"Nl: "<<Nl<<std::endl;
  134 + calcAB(rhs.kmag(), Nl, A, B); //compute the scattering coefficients
  135 +
  136 + //determine important parameters for the scattering domain
  137 + unsigned int sR = ceil(sqrt( (P)(pow((P)esphere::R[0],2) + pow((P)esphere::R[1],2))) );
  138 + //unsigned int thetaR = 256;
  139 +
  140 + /////////////////////continue scattering code here/////////////////////////
  141 +
  142 + esphere::clear(); //clear any previous field data
  143 + from_planewave(rhs); //create a field from the planewave
  144 + return *this; //return the current object
  145 + }
  146 +
  147 + string str()
  148 + {
  149 + stringstream ss;
  150 + ss<<"Mie Scattered Field"<<std::endl;
  151 + ss<<(*this).efield<P>::str()<<std::endl;
  152 + ss<<"a = "<<a<<std::endl;
  153 + ss<<"n = "<<n;
  154 +
  155 + return ss.str();
  156 + }
  157 +
  158 +};
  159 +
  160 +} //end namespace rts
  161 +
  162 +#endif
... ...
stim/optics/halfspace.cuh 0 → 100644
  1 +#ifndef RTS_HALFSPACE_H
  2 +#define RTS_HALFSPACE_H
  3 +
  4 +#include "../math/plane.h"
  5 +
  6 +
  7 +namespace stim{
  8 +
  9 +//GPU kernel to compute the electric field
  10 +template<typename T>
  11 +__global__ void gpu_halfspace_pw2ef(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1,
  12 + plane<T> P, planewave<T> w, rect<T> q, bool at_surface = false)
  13 +{
  14 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  15 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  16 +
  17 + //make sure that the thread indices are in-bounds
  18 + if(iu >= r0 || iv >= r1) return;
  19 +
  20 + //compute the index into the field
  21 + int i = iv*r0 + iu;
  22 +
  23 + //get the current position
  24 + vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 );
  25 +
  26 + if(at_surface){
  27 + if(P.side(p) > 0)
  28 + return;
  29 + }
  30 + else{
  31 + if(P.side(p) >= 0)
  32 + return;
  33 + }
  34 +
  35 + //if the current position is on the wrong side of the plane
  36 +
  37 + //vec<T> r(p[0], p[1], p[2]);
  38 +
  39 + complex<T> x( 0.0f, w.kvec().dot(p) );
  40 + //complex<T> phase( 0.0f, w.phase());
  41 +
  42 + if(Y == NULL) //if this is a scalar simulation
  43 + X[i] += w.E().len() * exp(x); //use the vector magnitude as the plane wave amplitude
  44 + else
  45 + {
  46 + //X[i] = Y[i] = Z[i] = 1;
  47 + X[i] += w.E()[0] * exp(x);// * exp(phase);
  48 + Y[i] += w.E()[1] * exp(x);// * exp(phase);
  49 + Z[i] += w.E()[2] * exp(x);// * exp(phase);
  50 + }
  51 +}
  52 +
  53 +//GPU kernel to compute the electric displacement field
  54 +template<typename T>
  55 +__global__ void gpu_halfspace_pw2df(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1,
  56 + plane<T> P, planewave<T> w, rect<T> q, T n, bool at_surface = false)
  57 +{
  58 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  59 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  60 +
  61 + //make sure that the thread indices are in-bounds
  62 + if(iu >= r0 || iv >= r1) return;
  63 +
  64 + //compute the index into the field
  65 + int i = iv*r0 + iu;
  66 +
  67 + //get the current position
  68 + vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 );
  69 +
  70 + if(at_surface){
  71 + if(P.side(p) > 0)
  72 + return;
  73 + }
  74 + else{
  75 + if(P.side(p) >= 0)
  76 + return;
  77 + }
  78 +
  79 + //if the current position is on the wrong side of the plane
  80 +
  81 + //vec<T> r(p[0], p[1], p[2]);
  82 +
  83 + complex<T> x( 0.0f, w.kvec().dot(p) );
  84 + //complex<T> phase( 0.0f, w.phase());
  85 +
  86 + //vec< complex<T> > testE(1, 0, 0);
  87 +
  88 +
  89 +
  90 + if(Y == NULL) //if this is a scalar simulation
  91 + X[i] += w.E().len() * exp(x); //use the vector magnitude as the plane wave amplitude
  92 + else
  93 + {
  94 + plane< complex<T> > cplane = plane< complex<T>, 3>(P);
  95 + vec< complex<T> > E_para;// = cplane.parallel(w.E());
  96 + vec< complex<T> > E_perp;// = cplane.perpendicular(w.E()) * (n*n);
  97 + cplane.decompose(w.E(), E_para, E_perp);
  98 + T epsilon = n*n;
  99 +
  100 + X[i] += (E_para[0] + E_perp[0] * epsilon) * exp(x);
  101 + Y[i] += (E_para[1] + E_perp[1] * epsilon) * exp(x);
  102 + Z[i] += (E_para[2] + E_perp[2] * epsilon) * exp(x);
  103 + }
  104 +}
  105 +
  106 +//computes a scalar field containing the refractive index of the half-space at each point
  107 +template<typename T>
  108 +__global__ void gpu_halfspace_n(T* n, unsigned int r0, unsigned int r1, rect<T> q, plane<T> P, T n0, T n1){
  109 +
  110 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  111 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  112 +
  113 + //make sure that the thread indices are in-bounds
  114 + if(iu >= r0 || iv >= r1) return;
  115 +
  116 + //compute the index into the field
  117 + int i = iv*r0 + iu;
  118 +
  119 + //get the current position
  120 + vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 );
  121 +
  122 + //set the appropriate refractive index
  123 + if(P.side(p) < 0) n[i] = n0;
  124 + else n[i] = n1;
  125 +}
  126 +
  127 +template<class T>
  128 +class halfspace
  129 +{
  130 +private:
  131 + rts::plane<T> S; //surface plane splitting the half space
  132 + rts::complex<T> n0; //refractive index at the front of the plane
  133 + rts::complex<T> n1; //refractive index at the back of the plane
  134 +
  135 + //lists of waves in front (pw0) and behind (pw1) the plane
  136 + std::vector< rts::planewave<T> > w0;
  137 + std::vector< rts::planewave<T> > w1;
  138 +
  139 + //rts::planewave<T> pi; //incident plane wave
  140 + //rts::planewave<T> pr; //plane wave reflected from the surface
  141 + //rts::planewave<T> pt; //plane wave transmitted through the surface
  142 +
  143 + void init(){
  144 + n0 = 1.0;
  145 + n1 = 1.5;
  146 + }
  147 +
  148 + //compute which side of the interface is hit by the incoming plane wave (0 = front, 1 = back)
  149 + bool facing(planewave<T> p){
  150 + if(p.kvec().dot(S.norm()) > 0)
  151 + return 1;
  152 + else
  153 + return 0;
  154 + }
  155 +
  156 + T calc_theta_i(vec<T> v){
  157 +
  158 + vec<T> v_hat = v.norm();
  159 +
  160 + //compute the cosine of the angle between k_hat and the plane normal
  161 + T cos_theta_i = v_hat.dot(S.norm());
  162 +
  163 + return acos(abs(cos_theta_i));
  164 + }
  165 +
  166 + T calc_theta_t(T ni_nt, T theta_i){
  167 +
  168 + T sin_theta_t = ni_nt * sin(theta_i);
  169 + return asin(sin_theta_t);
  170 + }
  171 +
  172 +
  173 +public:
  174 +
  175 + //constructors
  176 + halfspace(){
  177 + init();
  178 + }
  179 +
  180 + halfspace(T na, T nb){
  181 + init();
  182 + n0 = na;
  183 + n1 = nb;
  184 + }
  185 +
  186 + halfspace(T na, T nb, plane<T> p){
  187 + n0 = na;
  188 + n1 = nb;
  189 + S = p;
  190 + }
  191 +
  192 + //compute the transmitted and reflective waves given the incident (vacuum) plane wave p
  193 + void incident(rts::planewave<T> p){
  194 +
  195 + planewave<T> r, t;
  196 + p.scatter(S, n1.real()/n0.real(), r, t);
  197 +
  198 + //std::cout<<"i+r: "<<p.r()[0] + r.r()[0]<<std::endl;
  199 + //std::cout<<"t: "<<t.r()[0]<<std::endl;
  200 +
  201 + if(facing(p)){
  202 + w1.push_back(p);
  203 +
  204 + if(r.E().len() != 0)
  205 + w1.push_back(r);
  206 + if(t.E().len() != 0)
  207 + w0.push_back(t);
  208 + }
  209 + else{
  210 + w0.push_back(p);
  211 +
  212 + if(r.E().len() != 0)
  213 + w0.push_back(r);
  214 + if(t.E().len() != 0)
  215 + w1.push_back(t);
  216 + }
  217 + }
  218 +
  219 + void incident(rts::beam<T> b, unsigned int N = 10000){
  220 +
  221 + //generate a plane wave decomposition for the beam
  222 + std::vector< planewave<T> > pw_list = b.mc(N);
  223 +
  224 + //calculate the reflected and refracted waves for each incident wave
  225 + for(unsigned int w = 0; w < pw_list.size(); w++){
  226 + incident(pw_list[w]);
  227 + }
  228 + }
  229 +
  230 + //return the electric field at the specified resolution and position
  231 + rts::efield<T> E(unsigned int r0, unsigned int r1, rect<T> R){
  232 +
  233 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  234 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  235 +
  236 + //create one thread for each detector pixel
  237 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  238 + dim3 dimGrid((r0 + SQRT_BLOCK -1)/SQRT_BLOCK, (r1 + SQRT_BLOCK - 1)/SQRT_BLOCK);
  239 +
  240 + //create an electric field
  241 + rts::efield<T> ef(r0, r1);
  242 + ef.position(R);
  243 +
  244 + //render each plane wave
  245 +
  246 + //plane waves at the surface front
  247 + for(unsigned int w = 0; w < w0.size(); w++){
  248 + //std::cout<<"w0 "<<w<<": "<<hs.w0[w].str()<<std::endl;
  249 + rts::gpu_halfspace_pw2ef<T> <<<dimGrid, dimBlock>>> (ef.x(), ef.y(), ef.z(), r0, r1, S, w0[w], ef.p());
  250 + }
  251 +
  252 + //plane waves at the surface back
  253 + for(unsigned int w = 0; w < w1.size(); w++){
  254 + //std::cout<<"w1 "<<w<<": "<<hs.w1[w].str()<<std::endl;
  255 + rts::gpu_halfspace_pw2ef<T> <<<dimGrid, dimBlock>>> (ef.x(), ef.y(), ef.z(), r0, r1, -S, w1[w], ef.p(), true);
  256 + }
  257 +
  258 + return ef;
  259 + }
  260 +
  261 + //return the electric displacement at the specified resolution and position
  262 + rts::efield<T> D(unsigned int r0, unsigned int r1, rect<T> R){
  263 +
  264 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  265 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  266 +
  267 + //create one thread for each detector pixel
  268 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  269 + dim3 dimGrid((r0 + SQRT_BLOCK -1)/SQRT_BLOCK, (r1 + SQRT_BLOCK - 1)/SQRT_BLOCK);
  270 +
  271 + //create a complex vector field
  272 + rts::efield<T> df(r0, r1);
  273 + df.position(R);
  274 +
  275 + //render each plane wave
  276 +
  277 + //plane waves at the surface front
  278 + for(unsigned int w = 0; w < w0.size(); w++){
  279 + //std::cout<<"w0 "<<w<<": "<<hs.w0[w].str()<<std::endl;
  280 + rts::gpu_halfspace_pw2df<T> <<<dimGrid, dimBlock>>> (df.x(), df.y(), df.z(), r0, r1, S, w0[w], df.p(), n0.real());
  281 + }
  282 +
  283 + //plane waves at the surface back
  284 + for(unsigned int w = 0; w < w1.size(); w++){
  285 + //std::cout<<"w1 "<<w<<": "<<hs.w1[w].str()<<std::endl;
  286 + rts::gpu_halfspace_pw2df<T> <<<dimGrid, dimBlock>>> (df.x(), df.y(), df.z(), r0, r1, -S, w1[w], df.p(), n1.real(), true);
  287 + }
  288 +
  289 + return df;
  290 + }
  291 +
  292 + realfield<T, 1, true> nfield(unsigned int Ru, unsigned int Rv, rect<T> p){
  293 +
  294 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  295 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  296 +
  297 + //create one thread for each detector pixel
  298 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  299 + dim3 dimGrid((Ru + SQRT_BLOCK -1)/SQRT_BLOCK, (Rv + SQRT_BLOCK - 1)/SQRT_BLOCK);
  300 +
  301 + realfield<T, 1, true> n(Ru, Rv); //create a scalar field to store the result of n
  302 +
  303 + rts::gpu_halfspace_n<T> <<<dimGrid, dimBlock>>> (n.ptr(), Ru, Rv, p, S, n0.real(), n1.real());
  304 +
  305 + return n;
  306 + }
  307 +
  308 + std::string str(){
  309 + std::stringstream ss;
  310 + ss<<"Half Space-------------"<<std::endl;
  311 + ss<<"n0: "<<n0<<std::endl;
  312 + ss<<"n1: "<<n1<<std::endl;
  313 + ss<<S.str();
  314 +
  315 +
  316 + if(w0.size() != 0 || w1.size() != 0){
  317 + ss<<std::endl<<"Plane Waves:"<<std::endl;
  318 + for(unsigned int w = 0; w < w0.size(); w++){
  319 + ss<<"w0 = "<<w<<": "<<w0[w]<<std::endl;
  320 + }
  321 + for(unsigned int w = 0; w < w1.size(); w++){
  322 + ss<<"w1 = "<<w<<": "<<w1[w]<<std::endl;
  323 + }
  324 + }
  325 +
  326 + return ss.str();
  327 + }
  328 + ////////FRIENDSHIP
  329 + //friend void operator<< <> (rts::efield<T> &ef, rts::halfspace<T> hs);
  330 +
  331 +};
  332 +
  333 +
  334 +
  335 +
  336 +}
  337 +
  338 +
  339 +#endif
... ...
stim/optics/material.h 0 → 100644
  1 +#ifndef RTS_MATERIAL_H
  2 +#define RTS_MATERIAL_H
  3 +
  4 +#include <vector>
  5 +#include <ostream>
  6 +#include <iostream>
  7 +#include <fstream>
  8 +#include <complex>
  9 +#include <algorithm>
  10 +#include <sstream>
  11 +#include "../math/complex.h"
  12 +#include "../math/constants.h"
  13 +#include "../math/function.h"
  14 +
  15 +namespace stim{
  16 +
  17 +//Material class - default representation for the material property is the refractive index (RI)
  18 +template<typename T>
  19 +class material : public function< T, complex<T> >{
  20 +
  21 +public:
  22 + enum wave_property{microns, inverse_cm};
  23 + enum material_property{ri, absorbance};
  24 +
  25 +private:
  26 +
  27 + using function< T, complex<T> >::X;
  28 + using function< T, complex<T> >::Y;
  29 + using function< T, complex<T> >::insert;
  30 + using function< T, complex<T> >::bounding;
  31 +
  32 + std::string name; //name for the material (defaults to file name)
  33 +
  34 + void process_header(std::string str, wave_property& wp, material_property& mp){
  35 +
  36 + std::stringstream ss(str); //create a stream from the data string
  37 + std::string line;
  38 + std::getline(ss, line); //get the first line as a string
  39 + while(line[0] == '#'){ //continue looping while the line is a comment
  40 +
  41 + std::stringstream lstream(line); //create a stream from the line
  42 + lstream.ignore(); //ignore the first character ('#')
  43 +
  44 + std::string prop; //get the property name
  45 + lstream>>prop;
  46 +
  47 + if(prop == "X"){
  48 + std::string wp_name;
  49 + lstream>>wp_name;
  50 + if(wp_name == "microns") wp = microns;
  51 + else if(wp_name == "inverse_cm") wp = inverse_cm;
  52 + }
  53 + else if(prop == "Y"){
  54 + std::string mp_name;
  55 + lstream>>mp_name;
  56 + if(mp_name == "ri") mp = ri;
  57 + else if(mp_name == "absorbance") mp = absorbance;
  58 + }
  59 +
  60 + std::getline(ss, line); //get the next line
  61 + }
  62 +
  63 + function< T, stim::complex<T> >::process_string(str);
  64 + }
  65 +
  66 + void from_inverse_cm(){
  67 + //convert inverse centimeters to wavelength (in microns)
  68 + for(unsigned int i=0; i<X.size(); i++)
  69 + X[i] = 10000 / X[i];
  70 +
  71 + //reverse the function array
  72 + std::reverse(X.begin(), X.end());
  73 + std::reverse(Y.begin(), Y.end());
  74 +
  75 + }
  76 +
  77 + void init(){
  78 + bounding[0] = bounding[1] = stim::complex<T>(1, 0);
  79 + }
  80 +
  81 +
  82 +public:
  83 +
  84 + material(std::string filename, wave_property wp, material_property mp){
  85 + name = filename;
  86 + load(filename, wp, mp);
  87 + }
  88 +
  89 + material(std::string filename){
  90 + name = filename;
  91 + load(filename);
  92 + }
  93 +
  94 + material(){
  95 + init();
  96 + }
  97 +
  98 + complex<T> getN(T lambda){
  99 + return function< T, complex<T> >::linear(lambda);
  100 + }
  101 +
  102 + void load(std::string filename, wave_property wp, material_property mp){
  103 +
  104 + //load the file as a function
  105 + function< T, complex<T> >::load(filename);
  106 + }
  107 +
  108 + void load(std::string filename){
  109 +
  110 + wave_property wp = inverse_cm;
  111 + material_property mp = ri;
  112 + //turn the file into a string
  113 + std::ifstream t(filename.c_str()); //open the file as a stream
  114 +
  115 + if(!t){
  116 + std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;
  117 + exit(1);
  118 + }
  119 + std::string str((std::istreambuf_iterator<char>(t)),
  120 + std::istreambuf_iterator<char>());
  121 +
  122 + //process the header information
  123 + process_header(str, wp, mp);
  124 +
  125 + //convert units
  126 + if(wp == inverse_cm)
  127 + from_inverse_cm();
  128 + //set the bounding values
  129 + bounding[0] = Y[0];
  130 + bounding[1] = Y.back();
  131 + }
  132 + std::string str(){
  133 + std::stringstream ss;
  134 + ss<<name<<std::endl;
  135 + ss<<function< T, complex<T> >::str();
  136 + return ss.str();
  137 + }
  138 + std::string get_name(){
  139 + return name;
  140 + }
  141 +
  142 + void set_name(std::string str){
  143 + name = str;
  144 + }
  145 +
  146 +};
  147 +
  148 +}
  149 +
  150 +
  151 +
  152 +
  153 +#endif
... ...
stim/optics/mirst-1d.cuh 0 → 100644
  1 +#include "../optics/material.h"
  2 +#include "../math/complexfield.cuh"
  3 +#include "../math/constants.h"
  4 +//#include "../envi/bil.h"
  5 +
  6 +#include "cufft.h"
  7 +
  8 +#include <vector>
  9 +#include <sstream>
  10 +
  11 +namespace stim{
  12 +
  13 +//this function writes a sinc function to "dest" such that an iFFT produces a slab
  14 +template<typename T>
  15 +__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,
  16 + T* src, T* zf,
  17 + T w, unsigned int zR, unsigned int nuR){
  18 + //dest = complex field representing the sample
  19 + //ri = refractive indices for each wavelength
  20 + //src = intensity of the light source for each wavelength
  21 + //zf = z position of the slab interface for each wavelength (accounting for optical path length)
  22 + //w = width of the slab (in pixels)
  23 + //zR = number of z-axis samples
  24 + //nuR = number of wavelengths
  25 +
  26 + //get the current coordinate in the plane slice
  27 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  28 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  29 +
  30 + //make sure that the thread indices are in-bounds
  31 + if(inu >= nuR || ifz >= zR) return;
  32 +
  33 + int i = inu * zR + ifz;
  34 +
  35 + T fz;
  36 + if(ifz < zR/2)
  37 + fz = ifz / (T)zR;
  38 + else
  39 + fz = -(zR - ifz) / (T)zR;
  40 +
  41 + //if the slab starts outside of the simulation domain, just return
  42 + if(zf[inu] >= zR) return;
  43 +
  44 + //fill the array along z with a sinc function representing the Fourier transform of the layer
  45 +
  46 + T opl = w * ri[inu].real(); //optical path length
  47 +
  48 + //handle the case where the slab goes outside the simulation domain
  49 + if(zf[inu] + opl >= zR)
  50 + opl = zR - zf[inu];
  51 +
  52 + if(opl == 0) return;
  53 +
  54 + //T l = w * ri[inu].real();
  55 + //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
  56 + complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
  57 +
  58 + complex<T> eta = ri[inu] * ri[inu] - 1;
  59 +
  60 + //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
  61 + if(ifz == 0)
  62 + dest[i] += opl * exp(e) * eta * src[inu];
  63 + else
  64 + dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
  65 +}
  66 +
  67 +template<typename T>
  68 +__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
  69 + //zf = current z depth (optical path length) in pixels
  70 + //ri = refractive index of the material
  71 + //w = actual width of the layer (in pixels)
  72 +
  73 +
  74 + //compute the index for this thread
  75 + int i = blockIdx.x * blockDim.x + threadIdx.x;
  76 + if(i >= S) return;
  77 +
  78 + if(ri == NULL)
  79 + zf[i] += w;
  80 + else
  81 + zf[i] += ri[i].real() * w;
  82 +}
  83 +
  84 +//apply the 1D MIRST filter to an existing sample (overwriting the sample)
  85 +template<typename T>
  86 +__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,
  87 + T dFz,
  88 + T inNA, T outNA,
  89 + unsigned int lambdaR, unsigned int zR,
  90 + T sigma = 0){
  91 + //sampleFFT = the sample in the Fourier domain (will be overwritten)
  92 + //lambda = list of wavelengths
  93 + //dFz = delta along the Fz axis in the frequency domain
  94 + //inNA = NA of the internal obscuration
  95 + //outNA = NA of the objective
  96 + //zR = number of pixels along the Fz axis (same as the z-axis)
  97 + //lambdaR = number of wavelengths
  98 + //sigma = width of the Gaussian source
  99 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  100 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  101 +
  102 + if(inu >= lambdaR || ifz >= zR) return;
  103 +
  104 + //calculate the index into the sample FT
  105 + int i = inu * zR + ifz;
  106 +
  107 + //compute the frequency (and set all negative spatial frequencies to zero)
  108 + T fz;
  109 + if(ifz < zR / 2)
  110 + fz = ifz * dFz;
  111 + //if the spatial frequency is negative, set it to zero and exit
  112 + else{
  113 + sampleFFT[i] = 0;
  114 + return;
  115 + }
  116 +
  117 + //compute the frequency in inverse microns
  118 + T nu = 1/lambda[inu];
  119 +
  120 + //determine the radius of the integration circle
  121 + T nu_sq = nu * nu;
  122 + T fz_sq = (fz * fz) / 4;
  123 +
  124 + //cut off frequencies above the diffraction limit
  125 + T r;
  126 + if(fz_sq < nu_sq)
  127 + r = sqrt(nu_sq - fz_sq);
  128 + else
  129 + r = 0;
  130 +
  131 + //account for the optics
  132 + T Q = 0;
  133 + if(r > nu * inNA && r < nu * outNA)
  134 + Q = 1;
  135 +
  136 + //account for the source
  137 + //T sigma = 30.0;
  138 + T s = exp( - (r*r * sigma*sigma) / 2 );
  139 + //T s=1;
  140 +
  141 + //compute the final filter
  142 + T mirst = 0;
  143 + if(fz != 0)
  144 + mirst = 2 * stimPI * r * s * Q * (1/fz);
  145 +
  146 + sampleFFT[i] *= mirst;
  147 +
  148 +}
  149 +
  150 +/*This object performs a 1-dimensional (layered) MIRST simulation
  151 +*/
  152 +template<typename T>
  153 +class mirst1d{
  154 +
  155 +private:
  156 + unsigned int Z; //z-axis resolution
  157 + unsigned int pad; //pixel padding on either side of the sample
  158 +
  159 + std::vector< material<T> > matlist; //list of materials
  160 + std::vector< T > layers; //list of layer thicknesses
  161 +
  162 + std::vector< T > lambdas; //list of wavelengths that are being simulated
  163 + unsigned int S; //number of wavelengths (size of "lambdas")
  164 +
  165 + T NA[2]; //numerical aperature (central obscuration and outer diameter)
  166 +
  167 + function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)
  168 +
  169 + complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.
  170 +
  171 + void fft(int direction = CUFFT_FORWARD){
  172 +
  173 + unsigned padZ = Z + pad;
  174 +
  175 + //create cuFFT handles
  176 + cufftHandle plan;
  177 + cufftResult result;
  178 +
  179 + if(sizeof(T) == 4)
  180 + result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision
  181 + else
  182 + result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision
  183 +
  184 + //check for Plan 1D errors
  185 + if(result != CUFFT_SUCCESS){
  186 + std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
  187 + CufftError(result);
  188 + exit(1);
  189 + }
  190 +
  191 + if(sizeof(T) == 4)
  192 + result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
  193 + else
  194 + result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
  195 +
  196 + //check for FFT errors
  197 + if(result != CUFFT_SUCCESS){
  198 + std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
  199 + CufftError(result);
  200 + exit(1);
  201 + }
  202 +
  203 + cufftDestroy(plan);
  204 + }
  205 +
  206 +
  207 + //initialize the scratch memory
  208 + void init_scratch(){
  209 + scratch = complexfield<T, 1>(Z + pad , lambdas.size());
  210 + scratch = 0;
  211 + }
  212 +
  213 + //get the list of scattering efficiency (eta) values for a specified layer
  214 + std::vector< complex<T> > layer_etas(unsigned int l){
  215 +
  216 + std::vector< complex<T> > etas;
  217 +
  218 + //fill the list of etas
  219 + for(unsigned int i=0; i<lambdas.size(); i++)
  220 + etas.push_back( matlist[l].eta(lambdas[i]) );
  221 + return etas;
  222 + }
  223 +
  224 + //calculates the optimal block and grid sizes using information from the GPU
  225 + void cuda_params(dim3& grids, dim3& blocks){
  226 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  227 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  228 +
  229 + //create one thread for each detector pixel
  230 + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
  231 + grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
  232 + }
  233 +
  234 + //add the fourier transform of layer n to the scratch space
  235 + void build_layer_fft(unsigned int n, T* zf){
  236 + unsigned int paddedZ = Z + pad;
  237 +
  238 + T wpx = layers[n] / dz(); //calculate the width of the layer in pixels
  239 +
  240 + //allocate memory for the refractive index
  241 + complex<T>* gpuRi;
  242 + HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
  243 +
  244 + //allocate memory for the source profile
  245 + T* gpuSrc;
  246 + HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
  247 +
  248 + complex<T> ri;
  249 + T source;
  250 + //store the refractive index and source profile in a CPU array
  251 + for(int inu=0; inu<S; inu++){
  252 + //save the refractive index to the GPU
  253 + ri = matlist[n].getN(lambdas[inu]);
  254 + HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
  255 +
  256 + //save the source profile to the GPU
  257 + source = source_profile(10000 / lambdas[inu]);
  258 + HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
  259 +
  260 + }
  261 +
  262 + //create one thread for each pixel of the field slice
  263 + dim3 gridDim, blockDim;
  264 + cuda_params(gridDim, blockDim);
  265 + stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
  266 +
  267 + int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
  268 + int linGrid = S / linBlock + 1;
  269 + stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
  270 +
  271 + //free memory
  272 + HANDLE_ERROR(cudaFree(gpuRi));
  273 + HANDLE_ERROR(cudaFree(gpuSrc));
  274 + }
  275 +
  276 + void build_sample(){
  277 + init_scratch(); //initialize the GPU scratch space
  278 + //build_layer(1);
  279 +
  280 + T* zf;
  281 + HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
  282 + HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
  283 +
  284 + //render each layer of the sample
  285 + for(unsigned int l=0; l<layers.size(); l++){
  286 + build_layer_fft(l, zf);
  287 + }
  288 +
  289 + HANDLE_ERROR(cudaFree(zf));
  290 + }
  291 +
  292 + void apply_filter(){
  293 + dim3 gridDim, blockDim;
  294 + cuda_params(gridDim, blockDim);
  295 +
  296 + unsigned int Zpad = Z + pad;
  297 +
  298 + T sim_range = dz() * Zpad;
  299 + T dFz = 1 / sim_range;
  300 +
  301 + //copy the array of wavelengths to the GPU
  302 + T* gpuLambdas;
  303 + HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
  304 + HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
  305 + stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,
  306 + dFz,
  307 + NA[0], NA[1],
  308 + S, Zpad);
  309 + }
  310 +
  311 + //crop the image to the sample thickness - keep in mind that sample thickness != optical path length
  312 + void crop(){
  313 +
  314 + scratch = scratch.crop(Z, S);