Commit 8823488b5fd29c98d502eaf626b4bae0fbc99a78
1 parent
bb7275b6
Added missing files
Showing
8 changed files
with
2076 additions
and
0 deletions
Show diff stats
1 | +#ifndef RTS_QUAD_H | |
2 | +#define RTS_QUAD_H | |
3 | + | |
4 | +//enable CUDA_CALLABLE macro | |
5 | +#include <stim/cuda/callable.h> | |
6 | +#include <stim/math/vector.h> | |
7 | +#include <stim/math/triangle.h> | |
8 | +#include <stim/math/quaternion.h> | |
9 | +#include <iostream> | |
10 | +#include <iomanip> | |
11 | +#include <algorithm> | |
12 | + | |
13 | +namespace stim{ | |
14 | + | |
15 | +//template for a quadangle class in ND space | |
16 | +template <class T, int N = 3> | |
17 | +struct quad | |
18 | +{ | |
19 | + /* | |
20 | + B------------------>C | |
21 | + ^ ^ | |
22 | + | | | |
23 | + Y | | |
24 | + | | | |
25 | + | | | |
26 | + A---------X-------->O | |
27 | + */ | |
28 | + | |
29 | + /*T A[N]; | |
30 | + T B[N]; | |
31 | + T C[N];*/ | |
32 | + | |
33 | + rts::vec<T, N> A; | |
34 | + rts::vec<T, N> X; | |
35 | + rts::vec<T, N> Y; | |
36 | + | |
37 | + | |
38 | + CUDA_CALLABLE quad() | |
39 | + { | |
40 | + | |
41 | + } | |
42 | + | |
43 | + CUDA_CALLABLE quad(vec<T, N> a, vec<T, N> b, vec<T, N> c) | |
44 | + { | |
45 | + | |
46 | + A = a; | |
47 | + Y = b - a; | |
48 | + X = c - a - Y; | |
49 | + | |
50 | + } | |
51 | + | |
52 | + /******************************************************************* | |
53 | + Constructor - create a quad from a position, normal, and rotation | |
54 | + *******************************************************************/ | |
55 | + CUDA_CALLABLE quad(rts::vec<T, N> c, rts::vec<T, N> normal, T width, T height, T theta) | |
56 | + { | |
57 | + | |
58 | + //compute the X direction - start along world-space X | |
59 | + Y = rts::vec<T, N>(0, 1, 0); | |
60 | + if(Y == normal) | |
61 | + Y = rts::vec<T, N>(0, 0, 1); | |
62 | + | |
63 | + X = Y.cross(normal).norm(); | |
64 | + | |
65 | + std::cout<<X<<std::endl; | |
66 | + | |
67 | + //rotate the X axis by theta radians | |
68 | + rts::quaternion<T> q; | |
69 | + q.CreateRotation(theta, normal); | |
70 | + X = q.toMatrix3() * X; | |
71 | + Y = normal.cross(X); | |
72 | + | |
73 | + //normalize everything | |
74 | + X = X.norm(); | |
75 | + Y = Y.norm(); | |
76 | + | |
77 | + //scale to match the quad width and height | |
78 | + X = X * width; | |
79 | + Y = Y * height; | |
80 | + | |
81 | + //set the corner of the plane | |
82 | + A = c - X * 0.5f - Y * 0.5f; | |
83 | + | |
84 | + std::cout<<X<<std::endl; | |
85 | + } | |
86 | + | |
87 | + //boolean comparison | |
88 | + bool operator==(const quad<T, N> & rhs) | |
89 | + { | |
90 | + if(A == rhs.A && X == rhs.X && Y == rhs.Y) | |
91 | + return true; | |
92 | + else | |
93 | + return false; | |
94 | + } | |
95 | + | |
96 | + /******************************************* | |
97 | + Return the normal for the quad | |
98 | + *******************************************/ | |
99 | + CUDA_CALLABLE rts::vec<T, N> n() | |
100 | + { | |
101 | + return (X.cross(Y)).norm(); | |
102 | + } | |
103 | + | |
104 | + CUDA_CALLABLE rts::vec<T, N> p(T a, T b) | |
105 | + { | |
106 | + rts::vec<T, N> result; | |
107 | + //given the two parameters a, b = [0 1], returns the position in world space | |
108 | + result = A + X * a + Y * b; | |
109 | + | |
110 | + return result; | |
111 | + } | |
112 | + | |
113 | + CUDA_CALLABLE rts::vec<T, N> operator()(T a, T b) | |
114 | + { | |
115 | + return p(a, b); | |
116 | + } | |
117 | + | |
118 | + std::string str() | |
119 | + { | |
120 | + std::stringstream ss; | |
121 | + | |
122 | + ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; | |
123 | + ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl; | |
124 | + ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X; | |
125 | + | |
126 | + return ss.str(); | |
127 | + | |
128 | + } | |
129 | + | |
130 | + CUDA_CALLABLE quad<T, N> operator*(T rhs) | |
131 | + { | |
132 | + //scales the plane by a scalar value | |
133 | + | |
134 | + //compute the center point | |
135 | + rts::vec<T, N> c = A + X*0.5f + Y*0.5f; | |
136 | + | |
137 | + //create the new quadangle | |
138 | + quad<T, N> result; | |
139 | + result.X = X * rhs; | |
140 | + result.Y = Y * rhs; | |
141 | + result.A = c - result.X*0.5f - result.Y*0.5f; | |
142 | + | |
143 | + return result; | |
144 | + | |
145 | + } | |
146 | + | |
147 | + CUDA_CALLABLE T dist(vec<T, N> p) | |
148 | + { | |
149 | + //compute the distance between a point and this quad | |
150 | + | |
151 | + //first break the quad up into two triangles | |
152 | + triangle<T, N> T0(A, A+X, A+Y); | |
153 | + triangle<T, N> T1(A+X+Y, A+X, A+Y); | |
154 | + | |
155 | + | |
156 | + T d0 = T0.dist(p); | |
157 | + T d1 = T1.dist(p); | |
158 | + | |
159 | + if(d0 < d1) | |
160 | + return d0; | |
161 | + else | |
162 | + return d1; | |
163 | + } | |
164 | + | |
165 | + CUDA_CALLABLE T dist_max(vec<T, N> p) | |
166 | + { | |
167 | + T da = (A - p).len(); | |
168 | + T db = (A+X - p).len(); | |
169 | + T dc = (A+Y - p).len(); | |
170 | + T dd = (A+X+Y - p).len(); | |
171 | + | |
172 | + return std::max( da, std::max(db, std::max(dc, dd) ) ); | |
173 | + } | |
174 | +}; | |
175 | + | |
176 | +} //end namespace rts | |
177 | + | |
178 | +template <typename T, int N> | |
179 | +std::ostream& operator<<(std::ostream& os, rts::quad<T, N> R) | |
180 | +{ | |
181 | + os<<R.str(); | |
182 | + return os; | |
183 | +} | |
184 | + | |
185 | + | |
186 | +#endif | ... | ... |
1 | +#ifndef RTS_BEAM | |
2 | +#define RTS_BEAM | |
3 | + | |
4 | +#include "../math/vector.h" | |
5 | +#include "../math/function.h" | |
6 | +#include "../optics/planewave.h" | |
7 | +#include <vector> | |
8 | + | |
9 | +namespace stim{ | |
10 | + | |
11 | +template<typename P> | |
12 | +class beam : public planewave<P> | |
13 | +{ | |
14 | +public: | |
15 | + enum beam_type {Uniform, Bartlett, Hamming, Hanning}; | |
16 | + | |
17 | +private: | |
18 | + | |
19 | + P _na[2]; //numerical aperature of the focusing optics | |
20 | + vec<P> f; //focal point | |
21 | + function<P, P> apod; //apodization function | |
22 | + unsigned int apod_res; //resolution of apodization filter functions | |
23 | + | |
24 | + void apod_uniform() | |
25 | + { | |
26 | + apod = (P)1; | |
27 | + } | |
28 | + void apod_bartlett() | |
29 | + { | |
30 | + apod = (P)1; | |
31 | + apod.insert((P)1, (P)0); | |
32 | + } | |
33 | + void apod_hanning() | |
34 | + { | |
35 | + apod = (P)0; | |
36 | + P x, y; | |
37 | + for(unsigned int n=0; n<apod_res; n++) | |
38 | + { | |
39 | + x = (P)n/(P)apod_res; | |
40 | + y = pow( cos( ((P)3.14159 * x) / 2 ), 2); | |
41 | + apod.insert(x, y); | |
42 | + } | |
43 | + } | |
44 | + void apod_hamming() | |
45 | + { | |
46 | + apod = (P)0; | |
47 | + P x, y; | |
48 | + for(unsigned int n=0; n<apod_res; n++) | |
49 | + { | |
50 | + x = (P)n/(P)apod_res; | |
51 | + y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x); | |
52 | + apod.insert(x, y); | |
53 | + } | |
54 | + } | |
55 | + | |
56 | + void set_apod(beam_type type) | |
57 | + { | |
58 | + if(type == Uniform) | |
59 | + apod_uniform(); | |
60 | + if(type == Bartlett) | |
61 | + apod_bartlett(); | |
62 | + if(type == Hanning) | |
63 | + apod_hanning(); | |
64 | + if(type == Hamming) | |
65 | + apod_hamming(); | |
66 | + } | |
67 | + | |
68 | +public: | |
69 | + | |
70 | + ///constructor: build a default beam (NA=1.0) | |
71 | + beam( | |
72 | + vec<P> k = rts::vec<P>(0, 0, rtsTAU), | |
73 | + vec<P> _E0 = rts::vec<P>(1, 0, 0), | |
74 | + beam_type _apod = Uniform) | |
75 | + : planewave<P>(k, _E0) | |
76 | + { | |
77 | + _na[0] = (P)0.0; | |
78 | + _na[1] = (P)1.0; | |
79 | + f = vec<P>( (P)0, (P)0, (P)0 ); | |
80 | + apod_res = 256; //set the default resolution for apodization filters | |
81 | + set_apod(_apod); //set the apodization function type | |
82 | + } | |
83 | + | |
84 | + beam<P> refract(rts::vec<P> kn) const{ | |
85 | + | |
86 | + beam<P> new_beam; | |
87 | + new_beam._na[0] = _na[0]; | |
88 | + new_beam._na[1] = _na[1]; | |
89 | + | |
90 | + | |
91 | + rts::planewave<P> pw = planewave<P>::bend(kn); | |
92 | + //std::cout<<pw.str()<<std::endl; | |
93 | + | |
94 | + new_beam.k = pw.kvec(); | |
95 | + new_beam.E0 = pw.E(); | |
96 | + | |
97 | + return new_beam; | |
98 | + } | |
99 | + | |
100 | + ///Numerical Aperature functions | |
101 | + void NA(P na) | |
102 | + { | |
103 | + _na[0] = (P)0; | |
104 | + _na[1] = na; | |
105 | + } | |
106 | + void NA(P na0, P na1) | |
107 | + { | |
108 | + _na[0] = na0; | |
109 | + _na[1] = na1; | |
110 | + } | |
111 | + | |
112 | + /*string str() : | |
113 | + { | |
114 | + stringstream ss; | |
115 | + ss<<"Beam Center: "<<k<<std::endl; | |
116 | + | |
117 | + return ss.str(); | |
118 | + }*/ | |
119 | + | |
120 | + //Monte-Carlo decomposition into plane waves | |
121 | + std::vector< planewave<P> > mc(unsigned int N = 100000, unsigned int seed = 0) const | |
122 | + { | |
123 | + /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling | |
124 | + of a sphere and projecting these samples onto an inscribed sphere. | |
125 | + | |
126 | + seed = seed for the random number generator | |
127 | + */ | |
128 | + srand(seed); //seed the random number generator | |
129 | + | |
130 | + vec<P> k_hat = beam::k.norm(); | |
131 | + | |
132 | + ///compute the rotation operator to transform (0, 0, 1) to k | |
133 | + P cos_angle = k_hat.dot(rts::vec<P>(0, 0, 1)); | |
134 | + rts::matrix<P, 3> rotation; | |
135 | + | |
136 | + //if the cosine of the angle is -1, the rotation is just a flip across the z axis | |
137 | + if(cos_angle == -1){ | |
138 | + rotation(2, 2) = -1; | |
139 | + } | |
140 | + else if(cos_angle != 1.0) | |
141 | + { | |
142 | + rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation | |
143 | + P angle = acos(cos_angle); //compute the angle of rotation | |
144 | + rts::quaternion<P> quat; //create a quaternion describing the rotation | |
145 | + quat.CreateRotation(angle, r_axis); | |
146 | + rotation = quat.toMatrix3(); //compute the rotation matrix | |
147 | + } | |
148 | + | |
149 | + //find the phi values associated with the cassegrain ring | |
150 | + P PHI[2]; | |
151 | + PHI[0] = (P)asin(_na[0]); | |
152 | + PHI[1] = (P)asin(_na[1]); | |
153 | + | |
154 | + //calculate the z-axis cylinder coordinates associated with these angles | |
155 | + P Z[2]; | |
156 | + Z[0] = cos(PHI[0]); | |
157 | + Z[1] = cos(PHI[1]); | |
158 | + P range = Z[0] - Z[1]; | |
159 | + | |
160 | + std::vector< planewave<P> > samples; //create a vector of plane waves | |
161 | + | |
162 | + //draw a distribution of random phi, z values | |
163 | + P z, phi, theta; | |
164 | + for(int i=0; i<N; i++) //for each sample | |
165 | + { | |
166 | + z = ((P)rand() / (P)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder | |
167 | + theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159; | |
168 | + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates | |
169 | + | |
170 | + //compute and store cartesian coordinates | |
171 | + rts::vec<P> spherical(1, theta, phi); //convert from spherical to cartesian coordinates | |
172 | + rts::vec<P> cart = spherical.sph2cart(); | |
173 | + vec<P> k_prime = rotation * cart; //create a sample vector | |
174 | + | |
175 | + //store a wave refracted along the given direction | |
176 | + //std::cout<<"k prime: "<<rotation<<std::endl; | |
177 | + samples.push_back(planewave<P>::refract(k_prime) * apod(phi/PHI[1])); | |
178 | + } | |
179 | + | |
180 | + return samples; | |
181 | + } | |
182 | + | |
183 | + std::string str() | |
184 | + { | |
185 | + std::stringstream ss; | |
186 | + ss<<"Beam:"<<std::endl; | |
187 | + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl; | |
188 | + ss<<" Central Plane Wave: "<<beam::k<<std::endl; | |
189 | + if(_na[0] == 0) | |
190 | + ss<<" NA: "<<_na[1]; | |
191 | + else | |
192 | + ss<<" NA: "<<_na[0]<<" -- "<<_na[1]; | |
193 | + | |
194 | + return ss.str(); | |
195 | + } | |
196 | + | |
197 | + | |
198 | + | |
199 | +}; | |
200 | + | |
201 | +} | |
202 | + | |
203 | +#endif | ... | ... |
1 | +#ifndef RTS_EFIELD | |
2 | +#define RTS_EFIELD | |
3 | + | |
4 | +#include "../math/complex.h" | |
5 | +#include "../math/realfield.cuh" | |
6 | +#include "../visualization/colormap.h" | |
7 | +#include "../optics/planewave.h" | |
8 | +#include "../cuda/devices.h" | |
9 | +#include "../optics/beam.h" | |
10 | +#include "../math/rect.h" | |
11 | + | |
12 | + | |
13 | +namespace stim{ | |
14 | + | |
15 | +template<typename T> | |
16 | +__global__ void gpu_planewave2efield(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, | |
17 | + planewave<T> w, rect<T> q) | |
18 | +{ | |
19 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
20 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
21 | + | |
22 | + //make sure that the thread indices are in-bounds | |
23 | + if(iu >= r0 || iv >= r1) return; | |
24 | + | |
25 | + //compute the index into the field | |
26 | + int i = iv*r0 + iu; | |
27 | + | |
28 | + //get the current position | |
29 | + vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); | |
30 | + vec<T> r(p[0], p[1], p[2]); | |
31 | + | |
32 | + complex<T> x( 0.0f, w.k.dot(r) ); | |
33 | + | |
34 | + if(Y == NULL) //if this is a scalar simulation | |
35 | + X[i] += w.E0.len() * exp(x); //use the vector magnitude as the plane wave amplitude | |
36 | + else | |
37 | + { | |
38 | + X[i] += w.E0[0] * exp(x); | |
39 | + Y[i] += w.E0[1] * exp(x); | |
40 | + Z[i] += w.E0[2] * exp(x); | |
41 | + } | |
42 | +} | |
43 | + | |
44 | +template<typename T> | |
45 | +__global__ void gpu_efield_magnitude(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, T* M) | |
46 | +{ | |
47 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
48 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
49 | + | |
50 | + //make sure that the thread indices are in-bounds | |
51 | + if(iu >= r0 || iv >= r1) return; | |
52 | + | |
53 | + //compute the index into the field | |
54 | + int i = iv*r0 + iu; | |
55 | + | |
56 | + if(Y == NULL) //if this is a scalar simulation | |
57 | + M[i] = X[i].abs(); //compute the magnitude of the X component | |
58 | + else | |
59 | + { | |
60 | + M[i] = rts::vec<T>(X[i].abs(), Y[i].abs(), Z[i].abs()).len(); | |
61 | + //M[i] = Z[i].abs(); | |
62 | + } | |
63 | +} | |
64 | + | |
65 | +template<typename T> | |
66 | +__global__ void gpu_efield_real_magnitude(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, T* M) | |
67 | +{ | |
68 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
69 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
70 | + | |
71 | + //make sure that the thread indices are in-bounds | |
72 | + if(iu >= r0 || iv >= r1) return; | |
73 | + | |
74 | + //compute the index into the field | |
75 | + int i = iv*r0 + iu; | |
76 | + | |
77 | + if(Y == NULL) //if this is a scalar simulation | |
78 | + M[i] = X[i].abs(); //compute the magnitude of the X component | |
79 | + else | |
80 | + { | |
81 | + M[i] = rts::vec<T>(X[i].real(), Y[i].real(), Z[i].real()).len(); | |
82 | + //M[i] = Z[i].abs(); | |
83 | + } | |
84 | +} | |
85 | + | |
86 | +template<typename T> | |
87 | +__global__ void gpu_efield_polarization(complex<T>* X, complex<T>* Y, complex<T>* Z, | |
88 | + unsigned int r0, unsigned int r1, | |
89 | + T* Px, T* Py, T* Pz) | |
90 | +{ | |
91 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
92 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
93 | + | |
94 | + //make sure that the thread indices are in-bounds | |
95 | + if(iu >= r0 || iv >= r1) return; | |
96 | + | |
97 | + //compute the index into the field | |
98 | + int i = iv*r0 + iu; | |
99 | + | |
100 | + //compute the field polarization | |
101 | + Px[i] = X[i].abs(); | |
102 | + Py[i] = Y[i].abs(); | |
103 | + Pz[i] = Z[i].abs(); | |
104 | + | |
105 | +} | |
106 | + | |
107 | +/* This function computes the sum of two complex fields and stores the result in *dest | |
108 | +*/ | |
109 | +template<typename T> | |
110 | +__global__ void gpu_efield_sum(complex<T>* dest, complex<T>* src, unsigned int r0, unsigned int r1) | |
111 | +{ | |
112 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
113 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
114 | + | |
115 | + //make sure that the thread indices are in-bounds | |
116 | + if(iu >= r0 || iv >= r1) return; | |
117 | + | |
118 | + //compute the index into the field | |
119 | + int i = iv*r0 + iu; | |
120 | + | |
121 | + //sum the fields | |
122 | + dest[i] += src[i]; | |
123 | +} | |
124 | + | |
125 | +/* This class implements a discrete representation of an electromagnetic field | |
126 | + in 2D. The majority of this representation is done on the GPU. | |
127 | +*/ | |
128 | +template<typename T> | |
129 | +class efield | |
130 | +{ | |
131 | +protected: | |
132 | + | |
133 | + bool vector; | |
134 | + | |
135 | + //gpu pointer to the field data | |
136 | + rts::complex<T>* X; | |
137 | + rts::complex<T>* Y; | |
138 | + rts::complex<T>* Z; | |
139 | + | |
140 | + //resolution of the discrete field | |
141 | + unsigned int R[2]; | |
142 | + | |
143 | + //shape of the 2D field | |
144 | + rect<T> pos; | |
145 | + | |
146 | + void from_planewave(planewave<T> p) | |
147 | + { | |
148 | + unsigned int SQRT_BLOCK = 16; | |
149 | + //create one thread for each detector pixel | |
150 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
151 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
152 | + | |
153 | + | |
154 | + gpu_planewave2efield<T> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos); | |
155 | + } | |
156 | + | |
157 | + void init(unsigned int res0, unsigned int res1, bool _vector) | |
158 | + { | |
159 | + vector = _vector; //initialize field type | |
160 | + | |
161 | + X = Y = Z = NULL; //initialize all pointers to NULL | |
162 | + R[0] = res0; | |
163 | + R[1] = res1; | |
164 | + | |
165 | + //allocate memory on the gpu | |
166 | + cudaMalloc(&X, sizeof(rts::complex<T>) * R[0] * R[1]); | |
167 | + cudaMemset(X, 0, sizeof(rts::complex<T>) * R[0] * R[1]); | |
168 | + | |
169 | + if(vector) | |
170 | + { | |
171 | + cudaMalloc(&Y, sizeof(rts::complex<T>) * R[0] * R[1]); | |
172 | + cudaMemset(Y, 0, sizeof(rts::complex<T>) * R[0] * R[1]); | |
173 | + | |
174 | + cudaMalloc(&Z, sizeof(rts::complex<T>) * R[0] * R[1]); | |
175 | + cudaMemset(Z, 0, sizeof(rts::complex<T>) * R[0] * R[1]); | |
176 | + } | |
177 | + } | |
178 | + | |
179 | + void destroy() | |
180 | + { | |
181 | + if(X != NULL) cudaFree(X); | |
182 | + if(Y != NULL) cudaFree(Y); | |
183 | + if(Z != NULL) cudaFree(Z); | |
184 | + } | |
185 | + | |
186 | + void shallowcpy(const rts::efield<T> & src) | |
187 | + { | |
188 | + vector = src.vector; | |
189 | + R[0] = src.R[0]; | |
190 | + R[1] = src.R[1]; | |
191 | + } | |
192 | + | |
193 | + void deepcpy(const rts::efield<T> & src) | |
194 | + { | |
195 | + //perform a shallow copy | |
196 | + shallowcpy(src); | |
197 | + | |
198 | + //allocate memory on the gpu | |
199 | + if(src.X != NULL) | |
200 | + { | |
201 | + cudaMalloc(&X, sizeof(rts::complex<T>) * R[0] * R[1]); | |
202 | + cudaMemcpy(X, src.X, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice); | |
203 | + } | |
204 | + if(src.Y != NULL) | |
205 | + { | |
206 | + cudaMalloc(&Y, sizeof(rts::complex<T>) * R[0] * R[1]); | |
207 | + cudaMemcpy(Y, src.Y, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice); | |
208 | + } | |
209 | + if(src.Z != NULL) | |
210 | + { | |
211 | + cudaMalloc(&Z, sizeof(rts::complex<T>) * R[0] * R[1]); | |
212 | + cudaMemcpy(Z, src.Z, sizeof(rts::complex<T>) * R[0] * R[1], cudaMemcpyDeviceToDevice); | |
213 | + } | |
214 | + } | |
215 | + | |
216 | +public: | |
217 | + efield(unsigned int res0, unsigned int res1, bool _vector = true) | |
218 | + { | |
219 | + init(res0, res1, _vector); | |
220 | + //pos = rts::rect<T>(rts::vec<T>(-10, 0, -10), rts::vec<T>(-10, 0, 10), rts::vec<T>(10, 0, 10)); | |
221 | + } | |
222 | + | |
223 | + //destructor | |
224 | + ~efield() | |
225 | + { | |
226 | + destroy(); | |
227 | + } | |
228 | + | |
229 | + ///Clear the field - set all points to zero | |
230 | + void clear() | |
231 | + { | |
232 | + cudaMemset(X, 0, sizeof(rts::complex<T>) * R[0] * R[1]); | |
233 | + | |
234 | + if(vector) | |
235 | + { | |
236 | + cudaMemset(Y, 0, sizeof(rts::complex<T>) * R[0] * R[1]); | |
237 | + cudaMemset(Z, 0, sizeof(rts::complex<T>) * R[0] * R[1]); | |
238 | + } | |
239 | + } | |
240 | + | |
241 | + void position(rect<T> _p) | |
242 | + { | |
243 | + pos = _p; | |
244 | + } | |
245 | + | |
246 | + //access functions | |
247 | + complex<T>* x(){ | |
248 | + return X; | |
249 | + } | |
250 | + complex<T>* y(){ | |
251 | + return Y; | |
252 | + } | |
253 | + complex<T>* z(){ | |
254 | + return Z; | |
255 | + } | |
256 | + unsigned int Ru(){ | |
257 | + return R[0]; | |
258 | + } | |
259 | + unsigned int Rv(){ | |
260 | + return R[1]; | |
261 | + } | |
262 | + rect<T> p(){ | |
263 | + return pos; | |
264 | + } | |
265 | + | |
266 | + std::string str() | |
267 | + { | |
268 | + stringstream ss; | |
269 | + ss<<pos<<std::endl; | |
270 | + ss<<"X: "<<X<<std::endl; | |
271 | + ss<<"Y: "<<Y<<std::endl; | |
272 | + ss<<"Z: "<<Z; | |
273 | + | |
274 | + return ss.str(); | |
275 | + } | |
276 | + | |
277 | + //assignment operator: assignment from another electric field | |
278 | + efield<T> & operator= (const efield<T> & rhs) | |
279 | + { | |
280 | + destroy(); //destroy any previous information about this field | |
281 | + deepcpy(rhs); //create a deep copy | |
282 | + return *this; //return the current object | |
283 | + } | |
284 | + | |
285 | + //assignment operator: build an electric field from a plane wave | |
286 | + efield<T> & operator= (const planewave<T> & rhs) | |
287 | + { | |
288 | + | |
289 | + clear(); //clear any previous field data | |
290 | + from_planewave(rhs); //create a field from the planewave | |
291 | + return *this; //return the current object | |
292 | + } | |
293 | + | |
294 | + //assignment operator: add an existing electric field | |
295 | + efield<T> & operator+= (const efield<T> & rhs) | |
296 | + { | |
297 | + //if this field and the source field represent the same regions in space | |
298 | + if(R[0] == rhs.R[0] && R[1] == rhs.R[1] && pos == rhs.pos) | |
299 | + { | |
300 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
301 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
302 | + | |
303 | + //create one thread for each detector pixel | |
304 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
305 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
306 | + | |
307 | + //sum the fields | |
308 | + gpu_efield_sum <<<dimGrid, dimBlock>>> (X, rhs.X, R[0], R[1]); | |
309 | + if(Y != NULL) | |
310 | + { | |
311 | + gpu_efield_sum <<<dimGrid, dimBlock>>> (Y, rhs.Y, R[0], R[1]); | |
312 | + gpu_efield_sum <<<dimGrid, dimBlock>>> (Z, rhs.Z, R[0], R[1]); | |
313 | + } | |
314 | + } | |
315 | + else | |
316 | + { | |
317 | + std::cout<<"ERROR in efield: The two summed fields do not represent the same geometry."<<std::endl; | |
318 | + exit(1); | |
319 | + } | |
320 | + | |
321 | + return *this; //return this object | |
322 | + } | |
323 | + | |
324 | + //assignment operator: build an electric field from a beam | |
325 | + efield<T> & operator= (const rts::beam<T> & rhs) | |
326 | + { | |
327 | + //get a vector of monte-carlo samples | |
328 | + std::vector< rts::planewave<T> > p_list = rhs.mc(); | |
329 | + | |
330 | + clear(); //clear any previous field data | |
331 | + for(unsigned int i = 0; i < p_list.size(); i++) | |
332 | + from_planewave(p_list[i]); | |
333 | + return *this; | |
334 | + } | |
335 | + | |
336 | + | |
337 | + //return a scalar field representing field magnitude | |
338 | + realfield<T, 1, true> mag() | |
339 | + { | |
340 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
341 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
342 | + | |
343 | + //create a scalar field to store the result | |
344 | + realfield<T, 1, true> M(R[0], R[1]); | |
345 | + | |
346 | + //create one thread for each detector pixel | |
347 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
348 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
349 | + | |
350 | + //compute the magnitude and store it in a scalar field | |
351 | + gpu_efield_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.ptr(0)); | |
352 | + | |
353 | + return M; | |
354 | + } | |
355 | + | |
356 | + //return a sacalar field representing the field magnitude at an infinitely small point in time | |
357 | + realfield<T, 1, true> real_mag() | |
358 | + { | |
359 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
360 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
361 | + | |
362 | + //create a scalar field to store the result | |
363 | + realfield<T, 1, true> M(R[0], R[1]); | |
364 | + | |
365 | + //create one thread for each detector pixel | |
366 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
367 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
368 | + | |
369 | + //compute the magnitude and store it in a scalar field | |
370 | + gpu_efield_real_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.ptr(0)); | |
371 | + | |
372 | + return M; | |
373 | + } | |
374 | + | |
375 | + //return a vector field representing field polarization | |
376 | + realfield<T, 3, true> polarization() | |
377 | + { | |
378 | + if(!vector) | |
379 | + { | |
380 | + std::cout<<"ERROR: Cannot compute polarization of a scalar field."<<std::endl; | |
381 | + exit(1); | |
382 | + } | |
383 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
384 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
385 | + | |
386 | + //create one thread for each detector pixel | |
387 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
388 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
389 | + | |
390 | + | |
391 | + realfield<T, 3, true> Pol(R[0], R[1]); //create a vector field to store the result | |
392 | + | |
393 | + //compute the polarization and store it in the vector field | |
394 | + gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.ptr(0), Pol.ptr(1), Pol.ptr(2)); | |
395 | + | |
396 | + return Pol; //return the vector field | |
397 | + } | |
398 | + | |
399 | + ////////FRIENDSHIP | |
400 | + //friend void operator<< <T>(rts::efield<T> &ef, rts::halfspace<T> hs); | |
401 | + | |
402 | + | |
403 | +}; | |
404 | + | |
405 | + | |
406 | + | |
407 | +} //end namespace rts | |
408 | + | |
409 | +#endif | ... | ... |
1 | +#ifndef RTS_ESPHERE | |
2 | +#define RTS_ESPHERE | |
3 | + | |
4 | +#include "../math/complex.h" | |
5 | +#include "../math/bessel.h" | |
6 | +#include "../visualization/colormap.h" | |
7 | +#include "../optics/planewave.h" | |
8 | +#include "../cuda/devices.h" | |
9 | +#include "../optics/efield.cuh" | |
10 | + | |
11 | +namespace stim{ | |
12 | + | |
13 | +/* This class implements a discrete representation of an electromagnetic field | |
14 | + in 2D scattered by a sphere. This class implements Mie scattering. | |
15 | +*/ | |
16 | +template<typename P> | |
17 | +class esphere : public efield<P> | |
18 | +{ | |
19 | +private: | |
20 | + stim::complex<P> n; //sphere refractive index | |
21 | + P a; //sphere radius | |
22 | + | |
23 | + //parameters dependent on wavelength | |
24 | + unsigned int Nl; //number of orders for the calculation | |
25 | + rts::complex<P>* A; //internal scattering coefficients | |
26 | + rts::complex<P>* B; //external scattering coefficients | |
27 | + | |
28 | + void calcNl(P kmag) | |
29 | + { | |
30 | + //return ceil( ((P)6.282 * a) / lambda + 4 * pow( ((P)6.282 * a) / lambda, ((P)1/(P)3)) + 2); | |
31 | + Nl = ceil( kmag*a + 4 * pow(kmag * a, (P)1/(P)3) + 2); | |
32 | + } | |
33 | + | |
34 | + void calcAB(P k, unsigned int Nl, rts::complex<P>* A, rts::complex<P>* B) | |
35 | + { | |
36 | + /* These calculations require double precision, so they are computed | |
37 | + using doubles and converted to P at the end. | |
38 | + | |
39 | + Input: | |
40 | + | |
41 | + k = magnitude of the k vector (tau/lambda) | |
42 | + Nl = highest order coefficient ([0 Nl] are computed) | |
43 | + */ | |
44 | + | |
45 | + //clear the previous coefficients | |
46 | + rts::complex<P>* cpuA = (rts::complex<P>*)malloc(sizeof(rts::complex<P>) * (Nl+1)); | |
47 | + rts::complex<P>* cpuB = (rts::complex<P>*)malloc(sizeof(rts::complex<P>) * (Nl+1)); | |
48 | + | |
49 | + //convert to an std complex value | |
50 | + complex<double> nc = (rts::complex<double>)n; | |
51 | + | |
52 | + //compute the magnitude of the k vector | |
53 | + complex<double> kna = nc * k * (double)a; | |
54 | + | |
55 | + //compute the arguments k*a and k*n*a | |
56 | + complex<double> ka(k * a, 0.0); | |
57 | + | |
58 | + //allocate space for the Bessel functions of the first and second kind (and derivatives) | |
59 | + unsigned int bytes = sizeof(complex<double>) * (Nl + 1); | |
60 | + complex<double>* cjv_ka = (complex<double>*)malloc(bytes); | |
61 | + complex<double>* cyv_ka = (complex<double>*)malloc(bytes); | |
62 | + complex<double>* cjvp_ka = (complex<double>*)malloc(bytes); | |
63 | + complex<double>* cyvp_ka = (complex<double>*)malloc(bytes); | |
64 | + complex<double>* cjv_kna = (complex<double>*)malloc(bytes); | |
65 | + complex<double>* cyv_kna = (complex<double>*)malloc(bytes); | |
66 | + complex<double>* cjvp_kna = (complex<double>*)malloc(bytes); | |
67 | + complex<double>* cyvp_kna = (complex<double>*)malloc(bytes); | |
68 | + | |
69 | + //allocate space for the spherical Hankel functions and derivative | |
70 | + complex<double>* chv_ka = (complex<double>*)malloc(bytes); | |
71 | + complex<double>* chvp_ka = (complex<double>*)malloc(bytes); | |
72 | + | |
73 | + //compute the bessel functions using the CPU-based algorithm | |
74 | + double vm; | |
75 | + cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka); | |
76 | + cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna); | |
77 | + | |
78 | + //compute A for each order | |
79 | + complex<double> i(0, 1); | |
80 | + complex<double> a, b, c, d; | |
81 | + complex<double> An, Bn; | |
82 | + for(int l=0; l<=Nl; l++) | |
83 | + { | |
84 | + //compute the Hankel functions from j and y | |
85 | + chv_ka[l] = cjv_ka[l] + i * cyv_ka[l]; | |
86 | + chvp_ka[l] = cjvp_ka[l] + i * cyvp_ka[l]; | |
87 | + | |
88 | + //Compute A (internal scattering coefficient) | |
89 | + //compute the numerator and denominator for A | |
90 | + a = cjv_ka[l] * chvp_ka[l] - cjvp_ka[l] * chv_ka[l]; | |
91 | + b = cjv_kna[l] * chvp_ka[l] - chv_ka[l] * cjvp_kna[l] * nc; | |
92 | + | |
93 | + //calculate A and add it to the list | |
94 | + rts::complex<double> An = (2.0 * l + 1.0) * pow(i, l) * (a / b); | |
95 | + cpuA[l] = (rts::complex<P>)An; | |
96 | + | |
97 | + //Compute B (external scattering coefficient) | |
98 | + c = cjv_ka[l] * cjvp_kna[l] * nc - cjv_kna[l] * cjvp_ka[l]; | |
99 | + d = cjv_kna[l] * chvp_ka[l] - chv_ka[l] * cjvp_kna[l] * nc; | |
100 | + | |
101 | + //calculate B and add it to the list | |
102 | + rts::complex<double> Bn = (2.0 * l + 1.0) * pow(i, l) * (c / d); | |
103 | + cpuB[l] = (rts::complex<P>)Bn; | |
104 | + | |
105 | + std::cout<<"A: "<<cpuA[l]<<" B: "<<cpuB[l]<<std::endl; | |
106 | + } | |
107 | + | |
108 | + | |
109 | + if(A != NULL) cudaFree(A); //free any previous coefficients | |
110 | + if(B != NULL) cudaFree(B); | |
111 | + cudaMalloc(&A, sizeof(rts::complex<P>) * (Nl+1)); //allocate memory for new coefficients | |
112 | + cudaMalloc(&B, sizeof(rts::complex<P>) * (Nl+1)); | |
113 | + | |
114 | + //copy the calculations from the CPU to the GPU | |
115 | + cudaMemcpy(A, cpuA, sizeof(rts::complex<P>) * (Nl+1), cudaMemcpyDeviceToHost); | |
116 | + cudaMemcpy(B, cpuB, sizeof(rts::complex<P>) * (Nl+1), cudaMemcpyDeviceToHost); | |
117 | + } | |
118 | + | |
119 | +public: | |
120 | + | |
121 | + esphere(unsigned int res0, unsigned int res1, P _a, rts::complex<P> _n, bool _scalar = false) : | |
122 | + efield<P>(res0, res1, _scalar) | |
123 | + { | |
124 | + std::cout<<"Sphere scattered field created."<<std::endl; | |
125 | + n = _n; //save the refractive index | |
126 | + a = _a; //save the radius | |
127 | + } | |
128 | + | |
129 | + //assignment operator: build an electric field from a plane wave | |
130 | + efield<P> & operator= (const planewave<P> & rhs) | |
131 | + { | |
132 | + calcNl(rhs.kmag()); //compute the number of scattering coefficients | |
133 | + std::cout<<"Nl: "<<Nl<<std::endl; | |
134 | + calcAB(rhs.kmag(), Nl, A, B); //compute the scattering coefficients | |
135 | + | |
136 | + //determine important parameters for the scattering domain | |
137 | + unsigned int sR = ceil(sqrt( (P)(pow((P)esphere::R[0],2) + pow((P)esphere::R[1],2))) ); | |
138 | + //unsigned int thetaR = 256; | |
139 | + | |
140 | + /////////////////////continue scattering code here///////////////////////// | |
141 | + | |
142 | + esphere::clear(); //clear any previous field data | |
143 | + from_planewave(rhs); //create a field from the planewave | |
144 | + return *this; //return the current object | |
145 | + } | |
146 | + | |
147 | + string str() | |
148 | + { | |
149 | + stringstream ss; | |
150 | + ss<<"Mie Scattered Field"<<std::endl; | |
151 | + ss<<(*this).efield<P>::str()<<std::endl; | |
152 | + ss<<"a = "<<a<<std::endl; | |
153 | + ss<<"n = "<<n; | |
154 | + | |
155 | + return ss.str(); | |
156 | + } | |
157 | + | |
158 | +}; | |
159 | + | |
160 | +} //end namespace rts | |
161 | + | |
162 | +#endif | ... | ... |
1 | +#ifndef RTS_HALFSPACE_H | |
2 | +#define RTS_HALFSPACE_H | |
3 | + | |
4 | +#include "../math/plane.h" | |
5 | + | |
6 | + | |
7 | +namespace stim{ | |
8 | + | |
9 | +//GPU kernel to compute the electric field | |
10 | +template<typename T> | |
11 | +__global__ void gpu_halfspace_pw2ef(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, | |
12 | + plane<T> P, planewave<T> w, rect<T> q, bool at_surface = false) | |
13 | +{ | |
14 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
15 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
16 | + | |
17 | + //make sure that the thread indices are in-bounds | |
18 | + if(iu >= r0 || iv >= r1) return; | |
19 | + | |
20 | + //compute the index into the field | |
21 | + int i = iv*r0 + iu; | |
22 | + | |
23 | + //get the current position | |
24 | + vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); | |
25 | + | |
26 | + if(at_surface){ | |
27 | + if(P.side(p) > 0) | |
28 | + return; | |
29 | + } | |
30 | + else{ | |
31 | + if(P.side(p) >= 0) | |
32 | + return; | |
33 | + } | |
34 | + | |
35 | + //if the current position is on the wrong side of the plane | |
36 | + | |
37 | + //vec<T> r(p[0], p[1], p[2]); | |
38 | + | |
39 | + complex<T> x( 0.0f, w.kvec().dot(p) ); | |
40 | + //complex<T> phase( 0.0f, w.phase()); | |
41 | + | |
42 | + if(Y == NULL) //if this is a scalar simulation | |
43 | + X[i] += w.E().len() * exp(x); //use the vector magnitude as the plane wave amplitude | |
44 | + else | |
45 | + { | |
46 | + //X[i] = Y[i] = Z[i] = 1; | |
47 | + X[i] += w.E()[0] * exp(x);// * exp(phase); | |
48 | + Y[i] += w.E()[1] * exp(x);// * exp(phase); | |
49 | + Z[i] += w.E()[2] * exp(x);// * exp(phase); | |
50 | + } | |
51 | +} | |
52 | + | |
53 | +//GPU kernel to compute the electric displacement field | |
54 | +template<typename T> | |
55 | +__global__ void gpu_halfspace_pw2df(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, | |
56 | + plane<T> P, planewave<T> w, rect<T> q, T n, bool at_surface = false) | |
57 | +{ | |
58 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
59 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
60 | + | |
61 | + //make sure that the thread indices are in-bounds | |
62 | + if(iu >= r0 || iv >= r1) return; | |
63 | + | |
64 | + //compute the index into the field | |
65 | + int i = iv*r0 + iu; | |
66 | + | |
67 | + //get the current position | |
68 | + vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); | |
69 | + | |
70 | + if(at_surface){ | |
71 | + if(P.side(p) > 0) | |
72 | + return; | |
73 | + } | |
74 | + else{ | |
75 | + if(P.side(p) >= 0) | |
76 | + return; | |
77 | + } | |
78 | + | |
79 | + //if the current position is on the wrong side of the plane | |
80 | + | |
81 | + //vec<T> r(p[0], p[1], p[2]); | |
82 | + | |
83 | + complex<T> x( 0.0f, w.kvec().dot(p) ); | |
84 | + //complex<T> phase( 0.0f, w.phase()); | |
85 | + | |
86 | + //vec< complex<T> > testE(1, 0, 0); | |
87 | + | |
88 | + | |
89 | + | |
90 | + if(Y == NULL) //if this is a scalar simulation | |
91 | + X[i] += w.E().len() * exp(x); //use the vector magnitude as the plane wave amplitude | |
92 | + else | |
93 | + { | |
94 | + plane< complex<T> > cplane = plane< complex<T>, 3>(P); | |
95 | + vec< complex<T> > E_para;// = cplane.parallel(w.E()); | |
96 | + vec< complex<T> > E_perp;// = cplane.perpendicular(w.E()) * (n*n); | |
97 | + cplane.decompose(w.E(), E_para, E_perp); | |
98 | + T epsilon = n*n; | |
99 | + | |
100 | + X[i] += (E_para[0] + E_perp[0] * epsilon) * exp(x); | |
101 | + Y[i] += (E_para[1] + E_perp[1] * epsilon) * exp(x); | |
102 | + Z[i] += (E_para[2] + E_perp[2] * epsilon) * exp(x); | |
103 | + } | |
104 | +} | |
105 | + | |
106 | +//computes a scalar field containing the refractive index of the half-space at each point | |
107 | +template<typename T> | |
108 | +__global__ void gpu_halfspace_n(T* n, unsigned int r0, unsigned int r1, rect<T> q, plane<T> P, T n0, T n1){ | |
109 | + | |
110 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
111 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
112 | + | |
113 | + //make sure that the thread indices are in-bounds | |
114 | + if(iu >= r0 || iv >= r1) return; | |
115 | + | |
116 | + //compute the index into the field | |
117 | + int i = iv*r0 + iu; | |
118 | + | |
119 | + //get the current position | |
120 | + vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); | |
121 | + | |
122 | + //set the appropriate refractive index | |
123 | + if(P.side(p) < 0) n[i] = n0; | |
124 | + else n[i] = n1; | |
125 | +} | |
126 | + | |
127 | +template<class T> | |
128 | +class halfspace | |
129 | +{ | |
130 | +private: | |
131 | + rts::plane<T> S; //surface plane splitting the half space | |
132 | + rts::complex<T> n0; //refractive index at the front of the plane | |
133 | + rts::complex<T> n1; //refractive index at the back of the plane | |
134 | + | |
135 | + //lists of waves in front (pw0) and behind (pw1) the plane | |
136 | + std::vector< rts::planewave<T> > w0; | |
137 | + std::vector< rts::planewave<T> > w1; | |
138 | + | |
139 | + //rts::planewave<T> pi; //incident plane wave | |
140 | + //rts::planewave<T> pr; //plane wave reflected from the surface | |
141 | + //rts::planewave<T> pt; //plane wave transmitted through the surface | |
142 | + | |
143 | + void init(){ | |
144 | + n0 = 1.0; | |
145 | + n1 = 1.5; | |
146 | + } | |
147 | + | |
148 | + //compute which side of the interface is hit by the incoming plane wave (0 = front, 1 = back) | |
149 | + bool facing(planewave<T> p){ | |
150 | + if(p.kvec().dot(S.norm()) > 0) | |
151 | + return 1; | |
152 | + else | |
153 | + return 0; | |
154 | + } | |
155 | + | |
156 | + T calc_theta_i(vec<T> v){ | |
157 | + | |
158 | + vec<T> v_hat = v.norm(); | |
159 | + | |
160 | + //compute the cosine of the angle between k_hat and the plane normal | |
161 | + T cos_theta_i = v_hat.dot(S.norm()); | |
162 | + | |
163 | + return acos(abs(cos_theta_i)); | |
164 | + } | |
165 | + | |
166 | + T calc_theta_t(T ni_nt, T theta_i){ | |
167 | + | |
168 | + T sin_theta_t = ni_nt * sin(theta_i); | |
169 | + return asin(sin_theta_t); | |
170 | + } | |
171 | + | |
172 | + | |
173 | +public: | |
174 | + | |
175 | + //constructors | |
176 | + halfspace(){ | |
177 | + init(); | |
178 | + } | |
179 | + | |
180 | + halfspace(T na, T nb){ | |
181 | + init(); | |
182 | + n0 = na; | |
183 | + n1 = nb; | |
184 | + } | |
185 | + | |
186 | + halfspace(T na, T nb, plane<T> p){ | |
187 | + n0 = na; | |
188 | + n1 = nb; | |
189 | + S = p; | |
190 | + } | |
191 | + | |
192 | + //compute the transmitted and reflective waves given the incident (vacuum) plane wave p | |
193 | + void incident(rts::planewave<T> p){ | |
194 | + | |
195 | + planewave<T> r, t; | |
196 | + p.scatter(S, n1.real()/n0.real(), r, t); | |
197 | + | |
198 | + //std::cout<<"i+r: "<<p.r()[0] + r.r()[0]<<std::endl; | |
199 | + //std::cout<<"t: "<<t.r()[0]<<std::endl; | |
200 | + | |
201 | + if(facing(p)){ | |
202 | + w1.push_back(p); | |
203 | + | |
204 | + if(r.E().len() != 0) | |
205 | + w1.push_back(r); | |
206 | + if(t.E().len() != 0) | |
207 | + w0.push_back(t); | |
208 | + } | |
209 | + else{ | |
210 | + w0.push_back(p); | |
211 | + | |
212 | + if(r.E().len() != 0) | |
213 | + w0.push_back(r); | |
214 | + if(t.E().len() != 0) | |
215 | + w1.push_back(t); | |
216 | + } | |
217 | + } | |
218 | + | |
219 | + void incident(rts::beam<T> b, unsigned int N = 10000){ | |
220 | + | |
221 | + //generate a plane wave decomposition for the beam | |
222 | + std::vector< planewave<T> > pw_list = b.mc(N); | |
223 | + | |
224 | + //calculate the reflected and refracted waves for each incident wave | |
225 | + for(unsigned int w = 0; w < pw_list.size(); w++){ | |
226 | + incident(pw_list[w]); | |
227 | + } | |
228 | + } | |
229 | + | |
230 | + //return the electric field at the specified resolution and position | |
231 | + rts::efield<T> E(unsigned int r0, unsigned int r1, rect<T> R){ | |
232 | + | |
233 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
234 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
235 | + | |
236 | + //create one thread for each detector pixel | |
237 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
238 | + dim3 dimGrid((r0 + SQRT_BLOCK -1)/SQRT_BLOCK, (r1 + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
239 | + | |
240 | + //create an electric field | |
241 | + rts::efield<T> ef(r0, r1); | |
242 | + ef.position(R); | |
243 | + | |
244 | + //render each plane wave | |
245 | + | |
246 | + //plane waves at the surface front | |
247 | + for(unsigned int w = 0; w < w0.size(); w++){ | |
248 | + //std::cout<<"w0 "<<w<<": "<<hs.w0[w].str()<<std::endl; | |
249 | + rts::gpu_halfspace_pw2ef<T> <<<dimGrid, dimBlock>>> (ef.x(), ef.y(), ef.z(), r0, r1, S, w0[w], ef.p()); | |
250 | + } | |
251 | + | |
252 | + //plane waves at the surface back | |
253 | + for(unsigned int w = 0; w < w1.size(); w++){ | |
254 | + //std::cout<<"w1 "<<w<<": "<<hs.w1[w].str()<<std::endl; | |
255 | + rts::gpu_halfspace_pw2ef<T> <<<dimGrid, dimBlock>>> (ef.x(), ef.y(), ef.z(), r0, r1, -S, w1[w], ef.p(), true); | |
256 | + } | |
257 | + | |
258 | + return ef; | |
259 | + } | |
260 | + | |
261 | + //return the electric displacement at the specified resolution and position | |
262 | + rts::efield<T> D(unsigned int r0, unsigned int r1, rect<T> R){ | |
263 | + | |
264 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
265 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
266 | + | |
267 | + //create one thread for each detector pixel | |
268 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
269 | + dim3 dimGrid((r0 + SQRT_BLOCK -1)/SQRT_BLOCK, (r1 + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
270 | + | |
271 | + //create a complex vector field | |
272 | + rts::efield<T> df(r0, r1); | |
273 | + df.position(R); | |
274 | + | |
275 | + //render each plane wave | |
276 | + | |
277 | + //plane waves at the surface front | |
278 | + for(unsigned int w = 0; w < w0.size(); w++){ | |
279 | + //std::cout<<"w0 "<<w<<": "<<hs.w0[w].str()<<std::endl; | |
280 | + rts::gpu_halfspace_pw2df<T> <<<dimGrid, dimBlock>>> (df.x(), df.y(), df.z(), r0, r1, S, w0[w], df.p(), n0.real()); | |
281 | + } | |
282 | + | |
283 | + //plane waves at the surface back | |
284 | + for(unsigned int w = 0; w < w1.size(); w++){ | |
285 | + //std::cout<<"w1 "<<w<<": "<<hs.w1[w].str()<<std::endl; | |
286 | + rts::gpu_halfspace_pw2df<T> <<<dimGrid, dimBlock>>> (df.x(), df.y(), df.z(), r0, r1, -S, w1[w], df.p(), n1.real(), true); | |
287 | + } | |
288 | + | |
289 | + return df; | |
290 | + } | |
291 | + | |
292 | + realfield<T, 1, true> nfield(unsigned int Ru, unsigned int Rv, rect<T> p){ | |
293 | + | |
294 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
295 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
296 | + | |
297 | + //create one thread for each detector pixel | |
298 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
299 | + dim3 dimGrid((Ru + SQRT_BLOCK -1)/SQRT_BLOCK, (Rv + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
300 | + | |
301 | + realfield<T, 1, true> n(Ru, Rv); //create a scalar field to store the result of n | |
302 | + | |
303 | + rts::gpu_halfspace_n<T> <<<dimGrid, dimBlock>>> (n.ptr(), Ru, Rv, p, S, n0.real(), n1.real()); | |
304 | + | |
305 | + return n; | |
306 | + } | |
307 | + | |
308 | + std::string str(){ | |
309 | + std::stringstream ss; | |
310 | + ss<<"Half Space-------------"<<std::endl; | |
311 | + ss<<"n0: "<<n0<<std::endl; | |
312 | + ss<<"n1: "<<n1<<std::endl; | |
313 | + ss<<S.str(); | |
314 | + | |
315 | + | |
316 | + if(w0.size() != 0 || w1.size() != 0){ | |
317 | + ss<<std::endl<<"Plane Waves:"<<std::endl; | |
318 | + for(unsigned int w = 0; w < w0.size(); w++){ | |
319 | + ss<<"w0 = "<<w<<": "<<w0[w]<<std::endl; | |
320 | + } | |
321 | + for(unsigned int w = 0; w < w1.size(); w++){ | |
322 | + ss<<"w1 = "<<w<<": "<<w1[w]<<std::endl; | |
323 | + } | |
324 | + } | |
325 | + | |
326 | + return ss.str(); | |
327 | + } | |
328 | + ////////FRIENDSHIP | |
329 | + //friend void operator<< <> (rts::efield<T> &ef, rts::halfspace<T> hs); | |
330 | + | |
331 | +}; | |
332 | + | |
333 | + | |
334 | + | |
335 | + | |
336 | +} | |
337 | + | |
338 | + | |
339 | +#endif | ... | ... |
1 | +#ifndef RTS_MATERIAL_H | |
2 | +#define RTS_MATERIAL_H | |
3 | + | |
4 | +#include <vector> | |
5 | +#include <ostream> | |
6 | +#include <iostream> | |
7 | +#include <fstream> | |
8 | +#include <complex> | |
9 | +#include <algorithm> | |
10 | +#include <sstream> | |
11 | +#include "../math/complex.h" | |
12 | +#include "../math/constants.h" | |
13 | +#include "../math/function.h" | |
14 | + | |
15 | +namespace stim{ | |
16 | + | |
17 | +//Material class - default representation for the material property is the refractive index (RI) | |
18 | +template<typename T> | |
19 | +class material : public function< T, complex<T> >{ | |
20 | + | |
21 | +public: | |
22 | + enum wave_property{microns, inverse_cm}; | |
23 | + enum material_property{ri, absorbance}; | |
24 | + | |
25 | +private: | |
26 | + | |
27 | + using function< T, complex<T> >::X; | |
28 | + using function< T, complex<T> >::Y; | |
29 | + using function< T, complex<T> >::insert; | |
30 | + using function< T, complex<T> >::bounding; | |
31 | + | |
32 | + std::string name; //name for the material (defaults to file name) | |
33 | + | |
34 | + void process_header(std::string str, wave_property& wp, material_property& mp){ | |
35 | + | |
36 | + std::stringstream ss(str); //create a stream from the data string | |
37 | + std::string line; | |
38 | + std::getline(ss, line); //get the first line as a string | |
39 | + while(line[0] == '#'){ //continue looping while the line is a comment | |
40 | + | |
41 | + std::stringstream lstream(line); //create a stream from the line | |
42 | + lstream.ignore(); //ignore the first character ('#') | |
43 | + | |
44 | + std::string prop; //get the property name | |
45 | + lstream>>prop; | |
46 | + | |
47 | + if(prop == "X"){ | |
48 | + std::string wp_name; | |
49 | + lstream>>wp_name; | |
50 | + if(wp_name == "microns") wp = microns; | |
51 | + else if(wp_name == "inverse_cm") wp = inverse_cm; | |
52 | + } | |
53 | + else if(prop == "Y"){ | |
54 | + std::string mp_name; | |
55 | + lstream>>mp_name; | |
56 | + if(mp_name == "ri") mp = ri; | |
57 | + else if(mp_name == "absorbance") mp = absorbance; | |
58 | + } | |
59 | + | |
60 | + std::getline(ss, line); //get the next line | |
61 | + } | |
62 | + | |
63 | + function< T, stim::complex<T> >::process_string(str); | |
64 | + } | |
65 | + | |
66 | + void from_inverse_cm(){ | |
67 | + //convert inverse centimeters to wavelength (in microns) | |
68 | + for(unsigned int i=0; i<X.size(); i++) | |
69 | + X[i] = 10000 / X[i]; | |
70 | + | |
71 | + //reverse the function array | |
72 | + std::reverse(X.begin(), X.end()); | |
73 | + std::reverse(Y.begin(), Y.end()); | |
74 | + | |
75 | + } | |
76 | + | |
77 | + void init(){ | |
78 | + bounding[0] = bounding[1] = stim::complex<T>(1, 0); | |
79 | + } | |
80 | + | |
81 | + | |
82 | +public: | |
83 | + | |
84 | + material(std::string filename, wave_property wp, material_property mp){ | |
85 | + name = filename; | |
86 | + load(filename, wp, mp); | |
87 | + } | |
88 | + | |
89 | + material(std::string filename){ | |
90 | + name = filename; | |
91 | + load(filename); | |
92 | + } | |
93 | + | |
94 | + material(){ | |
95 | + init(); | |
96 | + } | |
97 | + | |
98 | + complex<T> getN(T lambda){ | |
99 | + return function< T, complex<T> >::linear(lambda); | |
100 | + } | |
101 | + | |
102 | + void load(std::string filename, wave_property wp, material_property mp){ | |
103 | + | |
104 | + //load the file as a function | |
105 | + function< T, complex<T> >::load(filename); | |
106 | + } | |
107 | + | |
108 | + void load(std::string filename){ | |
109 | + | |
110 | + wave_property wp = inverse_cm; | |
111 | + material_property mp = ri; | |
112 | + //turn the file into a string | |
113 | + std::ifstream t(filename.c_str()); //open the file as a stream | |
114 | + | |
115 | + if(!t){ | |
116 | + std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl; | |
117 | + exit(1); | |
118 | + } | |
119 | + std::string str((std::istreambuf_iterator<char>(t)), | |
120 | + std::istreambuf_iterator<char>()); | |
121 | + | |
122 | + //process the header information | |
123 | + process_header(str, wp, mp); | |
124 | + | |
125 | + //convert units | |
126 | + if(wp == inverse_cm) | |
127 | + from_inverse_cm(); | |
128 | + //set the bounding values | |
129 | + bounding[0] = Y[0]; | |
130 | + bounding[1] = Y.back(); | |
131 | + } | |
132 | + std::string str(){ | |
133 | + std::stringstream ss; | |
134 | + ss<<name<<std::endl; | |
135 | + ss<<function< T, complex<T> >::str(); | |
136 | + return ss.str(); | |
137 | + } | |
138 | + std::string get_name(){ | |
139 | + return name; | |
140 | + } | |
141 | + | |
142 | + void set_name(std::string str){ | |
143 | + name = str; | |
144 | + } | |
145 | + | |
146 | +}; | |
147 | + | |
148 | +} | |
149 | + | |
150 | + | |
151 | + | |
152 | + | |
153 | +#endif | ... | ... |
1 | +#include "../optics/material.h" | |
2 | +#include "../math/complexfield.cuh" | |
3 | +#include "../math/constants.h" | |
4 | +//#include "../envi/bil.h" | |
5 | + | |
6 | +#include "cufft.h" | |
7 | + | |
8 | +#include <vector> | |
9 | +#include <sstream> | |
10 | + | |
11 | +namespace stim{ | |
12 | + | |
13 | +//this function writes a sinc function to "dest" such that an iFFT produces a slab | |
14 | +template<typename T> | |
15 | +__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri, | |
16 | + T* src, T* zf, | |
17 | + T w, unsigned int zR, unsigned int nuR){ | |
18 | + //dest = complex field representing the sample | |
19 | + //ri = refractive indices for each wavelength | |
20 | + //src = intensity of the light source for each wavelength | |
21 | + //zf = z position of the slab interface for each wavelength (accounting for optical path length) | |
22 | + //w = width of the slab (in pixels) | |
23 | + //zR = number of z-axis samples | |
24 | + //nuR = number of wavelengths | |
25 | + | |
26 | + //get the current coordinate in the plane slice | |
27 | + int ifz = blockIdx.x * blockDim.x + threadIdx.x; | |
28 | + int inu = blockIdx.y * blockDim.y + threadIdx.y; | |
29 | + | |
30 | + //make sure that the thread indices are in-bounds | |
31 | + if(inu >= nuR || ifz >= zR) return; | |
32 | + | |
33 | + int i = inu * zR + ifz; | |
34 | + | |
35 | + T fz; | |
36 | + if(ifz < zR/2) | |
37 | + fz = ifz / (T)zR; | |
38 | + else | |
39 | + fz = -(zR - ifz) / (T)zR; | |
40 | + | |
41 | + //if the slab starts outside of the simulation domain, just return | |
42 | + if(zf[inu] >= zR) return; | |
43 | + | |
44 | + //fill the array along z with a sinc function representing the Fourier transform of the layer | |
45 | + | |
46 | + T opl = w * ri[inu].real(); //optical path length | |
47 | + | |
48 | + //handle the case where the slab goes outside the simulation domain | |
49 | + if(zf[inu] + opl >= zR) | |
50 | + opl = zR - zf[inu]; | |
51 | + | |
52 | + if(opl == 0) return; | |
53 | + | |
54 | + //T l = w * ri[inu].real(); | |
55 | + //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0)); | |
56 | + complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2)); | |
57 | + | |
58 | + complex<T> eta = ri[inu] * ri[inu] - 1; | |
59 | + | |
60 | + //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz); | |
61 | + if(ifz == 0) | |
62 | + dest[i] += opl * exp(e) * eta * src[inu]; | |
63 | + else | |
64 | + dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl); | |
65 | +} | |
66 | + | |
67 | +template<typename T> | |
68 | +__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){ | |
69 | + //zf = current z depth (optical path length) in pixels | |
70 | + //ri = refractive index of the material | |
71 | + //w = actual width of the layer (in pixels) | |
72 | + | |
73 | + | |
74 | + //compute the index for this thread | |
75 | + int i = blockIdx.x * blockDim.x + threadIdx.x; | |
76 | + if(i >= S) return; | |
77 | + | |
78 | + if(ri == NULL) | |
79 | + zf[i] += w; | |
80 | + else | |
81 | + zf[i] += ri[i].real() * w; | |
82 | +} | |
83 | + | |
84 | +//apply the 1D MIRST filter to an existing sample (overwriting the sample) | |
85 | +template<typename T> | |
86 | +__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda, | |
87 | + T dFz, | |
88 | + T inNA, T outNA, | |
89 | + unsigned int lambdaR, unsigned int zR, | |
90 | + T sigma = 0){ | |
91 | + //sampleFFT = the sample in the Fourier domain (will be overwritten) | |
92 | + //lambda = list of wavelengths | |
93 | + //dFz = delta along the Fz axis in the frequency domain | |
94 | + //inNA = NA of the internal obscuration | |
95 | + //outNA = NA of the objective | |
96 | + //zR = number of pixels along the Fz axis (same as the z-axis) | |
97 | + //lambdaR = number of wavelengths | |
98 | + //sigma = width of the Gaussian source | |
99 | + int ifz = blockIdx.x * blockDim.x + threadIdx.x; | |
100 | + int inu = blockIdx.y * blockDim.y + threadIdx.y; | |
101 | + | |
102 | + if(inu >= lambdaR || ifz >= zR) return; | |
103 | + | |
104 | + //calculate the index into the sample FT | |
105 | + int i = inu * zR + ifz; | |
106 | + | |
107 | + //compute the frequency (and set all negative spatial frequencies to zero) | |
108 | + T fz; | |
109 | + if(ifz < zR / 2) | |
110 | + fz = ifz * dFz; | |
111 | + //if the spatial frequency is negative, set it to zero and exit | |
112 | + else{ | |
113 | + sampleFFT[i] = 0; | |
114 | + return; | |
115 | + } | |
116 | + | |
117 | + //compute the frequency in inverse microns | |
118 | + T nu = 1/lambda[inu]; | |
119 | + | |
120 | + //determine the radius of the integration circle | |
121 | + T nu_sq = nu * nu; | |
122 | + T fz_sq = (fz * fz) / 4; | |
123 | + | |
124 | + //cut off frequencies above the diffraction limit | |
125 | + T r; | |
126 | + if(fz_sq < nu_sq) | |
127 | + r = sqrt(nu_sq - fz_sq); | |
128 | + else | |
129 | + r = 0; | |
130 | + | |
131 | + //account for the optics | |
132 | + T Q = 0; | |
133 | + if(r > nu * inNA && r < nu * outNA) | |
134 | + Q = 1; | |
135 | + | |
136 | + //account for the source | |
137 | + //T sigma = 30.0; | |
138 | + T s = exp( - (r*r * sigma*sigma) / 2 ); | |
139 | + //T s=1; | |
140 | + | |
141 | + //compute the final filter | |
142 | + T mirst = 0; | |
143 | + if(fz != 0) | |
144 | + mirst = 2 * stimPI * r * s * Q * (1/fz); | |
145 | + | |
146 | + sampleFFT[i] *= mirst; | |
147 | + | |
148 | +} | |
149 | + | |
150 | +/*This object performs a 1-dimensional (layered) MIRST simulation | |
151 | +*/ | |
152 | +template<typename T> | |
153 | +class mirst1d{ | |
154 | + | |
155 | +private: | |
156 | + unsigned int Z; //z-axis resolution | |
157 | + unsigned int pad; //pixel padding on either side of the sample | |
158 | + | |
159 | + std::vector< material<T> > matlist; //list of materials | |
160 | + std::vector< T > layers; //list of layer thicknesses | |
161 | + | |
162 | + std::vector< T > lambdas; //list of wavelengths that are being simulated | |
163 | + unsigned int S; //number of wavelengths (size of "lambdas") | |
164 | + | |
165 | + T NA[2]; //numerical aperature (central obscuration and outer diameter) | |
166 | + | |
167 | + function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters) | |
168 | + | |
169 | + complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc. | |
170 | + | |
171 | + void fft(int direction = CUFFT_FORWARD){ | |
172 | + | |
173 | + unsigned padZ = Z + pad; | |
174 | + | |
175 | + //create cuFFT handles | |
176 | + cufftHandle plan; | |
177 | + cufftResult result; | |
178 | + | |
179 | + if(sizeof(T) == 4) | |
180 | + result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision | |
181 | + else | |
182 | + result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision | |
183 | + | |
184 | + //check for Plan 1D errors | |
185 | + if(result != CUFFT_SUCCESS){ | |
186 | + std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl; | |
187 | + CufftError(result); | |
188 | + exit(1); | |
189 | + } | |
190 | + | |
191 | + if(sizeof(T) == 4) | |
192 | + result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction); | |
193 | + else | |
194 | + result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction); | |
195 | + | |
196 | + //check for FFT errors | |
197 | + if(result != CUFFT_SUCCESS){ | |
198 | + std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl; | |
199 | + CufftError(result); | |
200 | + exit(1); | |
201 | + } | |
202 | + | |
203 | + cufftDestroy(plan); | |
204 | + } | |
205 | + | |
206 | + | |
207 | + //initialize the scratch memory | |
208 | + void init_scratch(){ | |
209 | + scratch = complexfield<T, 1>(Z + pad , lambdas.size()); | |
210 | + scratch = 0; | |
211 | + } | |
212 | + | |
213 | + //get the list of scattering efficiency (eta) values for a specified layer | |
214 | + std::vector< complex<T> > layer_etas(unsigned int l){ | |
215 | + | |
216 | + std::vector< complex<T> > etas; | |
217 | + | |
218 | + //fill the list of etas | |
219 | + for(unsigned int i=0; i<lambdas.size(); i++) | |
220 | + etas.push_back( matlist[l].eta(lambdas[i]) ); | |
221 | + return etas; | |
222 | + } | |
223 | + | |
224 | + //calculates the optimal block and grid sizes using information from the GPU | |
225 | + void cuda_params(dim3& grids, dim3& blocks){ | |
226 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
227 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
228 | + | |
229 | + //create one thread for each detector pixel | |
230 | + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | |
231 | + grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
232 | + } | |
233 | + | |
234 | + //add the fourier transform of layer n to the scratch space | |
235 | + void build_layer_fft(unsigned int n, T* zf){ | |
236 | + unsigned int paddedZ = Z + pad; | |
237 | + | |
238 | + T wpx = layers[n] / dz(); //calculate the width of the layer in pixels | |
239 | + | |
240 | + //allocate memory for the refractive index | |
241 | + complex<T>* gpuRi; | |
242 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S)); | |
243 | + | |
244 | + //allocate memory for the source profile | |
245 | + T* gpuSrc; | |
246 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S)); | |
247 | + | |
248 | + complex<T> ri; | |
249 | + T source; | |
250 | + //store the refractive index and source profile in a CPU array | |
251 | + for(int inu=0; inu<S; inu++){ | |
252 | + //save the refractive index to the GPU | |
253 | + ri = matlist[n].getN(lambdas[inu]); | |
254 | + HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice )); | |
255 | + | |
256 | + //save the source profile to the GPU | |
257 | + source = source_profile(10000 / lambdas[inu]); | |
258 | + HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice )); | |
259 | + | |
260 | + } | |
261 | + | |
262 | + //create one thread for each pixel of the field slice | |
263 | + dim3 gridDim, blockDim; | |
264 | + cuda_params(gridDim, blockDim); | |
265 | + stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S); | |
266 | + | |
267 | + int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size | |
268 | + int linGrid = S / linBlock + 1; | |
269 | + stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S); | |
270 | + | |
271 | + //free memory | |
272 | + HANDLE_ERROR(cudaFree(gpuRi)); | |
273 | + HANDLE_ERROR(cudaFree(gpuSrc)); | |
274 | + } | |
275 | + | |
276 | + void build_sample(){ | |
277 | + init_scratch(); //initialize the GPU scratch space | |
278 | + //build_layer(1); | |
279 | + | |
280 | + T* zf; | |
281 | + HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S)); | |
282 | + HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S)); | |
283 | + | |
284 | + //render each layer of the sample | |
285 | + for(unsigned int l=0; l<layers.size(); l++){ | |
286 | + build_layer_fft(l, zf); | |
287 | + } | |
288 | + | |
289 | + HANDLE_ERROR(cudaFree(zf)); | |
290 | + } | |
291 | + | |
292 | + void apply_filter(){ | |
293 | + dim3 gridDim, blockDim; | |
294 | + cuda_params(gridDim, blockDim); | |
295 | + | |
296 | + unsigned int Zpad = Z + pad; | |
297 | + | |
298 | + T sim_range = dz() * Zpad; | |
299 | + T dFz = 1 / sim_range; | |
300 | + | |
301 | + //copy the array of wavelengths to the GPU | |
302 | + T* gpuLambdas; | |
303 | + HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad)); | |
304 | + HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice)); | |
305 | + stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas, | |
306 | + dFz, | |
307 | + NA[0], NA[1], | |
308 | + S, Zpad); | |
309 | + } | |
310 | + | |
311 | + //crop the image to the sample thickness - keep in mind that sample thickness != optical path length | |
312 | + void crop(){ | |
313 | + | |
314 | + scratch = scratch.crop(Z, S); | |
315 | + } | |
316 | + | |
317 | + //save the scratch field as a binary file | |
318 | + void to_binary(std::string filename){ | |
319 | + | |
320 | + } | |
321 | + | |
322 | + | |
323 | +public: | |
324 | + | |
325 | + //constructor | |
326 | + mirst1d(unsigned int rZ = 100, | |
327 | + unsigned int padding = 0){ | |
328 | + Z = rZ; | |
329 | + pad = padding; | |
330 | + NA[0] = 0; | |
331 | + NA[1] = 0.8; | |
332 | + S = 0; | |
333 | + source_profile = 1; | |
334 | + } | |
335 | + | |
336 | + //add a layer, thickness = microns | |
337 | + void add_layer(material<T> mat, T thickness){ | |
338 | + matlist.push_back(mat); | |
339 | + layers.push_back(thickness); | |
340 | + } | |
341 | + | |
342 | + void add_layer(std::string filename, T thickness){ | |
343 | + add_layer(material<T>(filename), thickness); | |
344 | + } | |
345 | + | |
346 | + //adds a profile spectrum for the light source | |
347 | + void set_source(std::string filename){ | |
348 | + source_profile.load(filename); | |
349 | + } | |
350 | + | |
351 | + //adds a block of wavenumbers (cm^-1) to the simulation parameters | |
352 | + void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){ | |
353 | + unsigned int nu = start; | |
354 | + while(nu <= stop){ | |
355 | + lambdas.push_back((T)10000 / nu); | |
356 | + nu += step; | |
357 | + } | |
358 | + S = lambdas.size(); //increment the number of wavelengths (shorthand for later) | |
359 | + } | |
360 | + | |
361 | + T thickness(){ | |
362 | + T t = 0; | |
363 | + for(unsigned int l=0; l<layers.size(); l++) | |
364 | + t += layers[l]; | |
365 | + return t; | |
366 | + } | |
367 | + | |
368 | + void padding(unsigned int padding = 0){ | |
369 | + pad = padding; | |
370 | + } | |
371 | + | |
372 | + T dz(){ | |
373 | + return thickness() / Z; //calculate the z-axis step size | |
374 | + } | |
375 | + | |
376 | + void na(T in, T out){ | |
377 | + NA[0] = in; | |
378 | + NA[1] = out; | |
379 | + } | |
380 | + | |
381 | + void na(T out){ | |
382 | + na(0, out); | |
383 | + } | |
384 | + | |
385 | + stim::function<T, T> get_source(){ | |
386 | + return source_profile; | |
387 | + } | |
388 | + | |
389 | + void save_sample(std::string filename){ | |
390 | + //create a sample and save the magnitude as an image | |
391 | + build_sample(); | |
392 | + fft(CUFFT_INVERSE); | |
393 | + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude); | |
394 | + } | |
395 | + | |
396 | + void save_mirst(std::string filename, bool binary = true){ | |
397 | + //apply the MIRST filter to a sample and save the image | |
398 | + | |
399 | + //build the sample in the Fourier domain | |
400 | + build_sample(); | |
401 | + | |
402 | + //apply the MIRST filter | |
403 | + apply_filter(); | |
404 | + | |
405 | + //apply an inverse FFT to bring the results back into the spatial domain | |
406 | + fft(CUFFT_INVERSE); | |
407 | + | |
408 | + crop(); | |
409 | + | |
410 | + //save the image | |
411 | + if(binary) | |
412 | + to_binary(filename); | |
413 | + else | |
414 | + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude); | |
415 | + } | |
416 | + | |
417 | + | |
418 | + | |
419 | + | |
420 | + std::string str(){ | |
421 | + | |
422 | + stringstream ss; | |
423 | + ss<<"1D MIRST Simulation========================="<<std::endl; | |
424 | + ss<<"z-axis resolution: "<<Z<<std::endl; | |
425 | + ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl; | |
426 | + ss<<"number of wavelengths: "<<lambdas.size()<<std::endl; | |
427 | + ss<<"padding: "<<pad<<std::endl; | |
428 | + ss<<"sample thickness: "<<thickness()<<" um"<<std::endl; | |
429 | + ss<<"dz: "<<dz()<<" um"<<std::endl; | |
430 | + ss<<std::endl; | |
431 | + ss<<layers.size()<<" layers-------------"<<std::endl; | |
432 | + for(unsigned int l=0; l<layers.size(); l++) | |
433 | + ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl; | |
434 | + | |
435 | + ss<<"source profile-----------"<<std::endl; | |
436 | + ss<<get_source().str()<<std::endl; | |
437 | + | |
438 | + return ss.str(); | |
439 | + | |
440 | + | |
441 | + } | |
442 | + | |
443 | + | |
444 | + | |
445 | +}; | |
446 | + | |
447 | +} | ... | ... |
1 | +#ifndef STIM_GLNETWORK_H | |
2 | +#define STIM_GLNETWORK_H | |
3 | + | |
4 | +#include <GL/glew.h> | |
5 | +#include <GL/glut.h> | |
6 | +#include "network.h" | |
7 | +#include <stim/visualization/cylinder.h> | |
8 | +#include <stim/math/vector.h> | |
9 | +#include <list> | |
10 | +#include <ANN/ANN.h> | |
11 | +#include "fiber.h" | |
12 | + | |
13 | + | |
14 | +namespace stim | |
15 | +{ | |
16 | +template <typename T> | |
17 | +class glnetwork : public virtual network<T> | |
18 | +{ | |
19 | +private: | |
20 | + using stim::network<T>::E; | |
21 | + | |
22 | + GLuint dList; ///displaylist for the lines. | |
23 | + GLuint cList; ///displaylist for the cylinders. | |
24 | + | |
25 | + void init() | |
26 | + { | |
27 | + ///clear lists if there is data in them. | |
28 | + ///adding points may create errors or uncessary duplicate points. | |
29 | + if(glIsList(dList)) | |
30 | + glDeleteLists(dList, 1); | |
31 | + if(glIsList(cList)) | |
32 | + glDeleteLists(cList, 1); | |
33 | + dList = glGenLists(1); ///create the lists | |
34 | + cList = glGenLists(1); | |
35 | + | |
36 | + ///set up the Line list. | |
37 | + glListBase(dList); | |
38 | + glMatrixMode(GL_PROJECTION); | |
39 | + glLoadIdentity; | |
40 | + glMatrixMode(GL_MODELVIEW); | |
41 | + glLoadIdentity; | |
42 | + | |
43 | + ///set up the cylinder List. | |
44 | + glListBase(cList); | |
45 | + glMatrixMode(GL_PROJECTION); | |
46 | + glLoadIdentity; | |
47 | + glMatrixMode(GL_MODELVIEW); | |
48 | + glLoadIdentity; | |
49 | + } | |
50 | + | |
51 | + void | |
52 | + Create(GLenum mode, int sides = 8) | |
53 | + { | |
54 | + glListBase(dList); | |
55 | + glNewList(dList, GL_COMPILE); | |
56 | + glLineWidth(3.5); | |
57 | + for(int i = 0; i < E.size(); i++) | |
58 | + { | |
59 | + if(mode == GL_SELECT) | |
60 | + { | |
61 | +// glLineWidth(3.5); | |
62 | + glLoadName(i); | |
63 | + } | |
64 | + else{ | |
65 | +// glLineWidth(1.0+1.0*i); | |
66 | + } | |
67 | + glColor3f(0.0, 1.0-0.05*i, i*0.05); | |
68 | + std::vector<stim::vec<T> > line = getEdgeCenterLine(i); | |
69 | + glBegin(GL_LINE_STRIP); | |
70 | + for(int j = 0; j < line.size(); j++) | |
71 | + { | |
72 | + glVertex3f(line[j][0], | |
73 | + line[j][1], | |
74 | + line[j][2]); | |
75 | + } | |
76 | + glEnd(); | |
77 | + } | |
78 | + glEndList(); | |
79 | + | |
80 | + glListBase(cList); | |
81 | + glNewList(cList, GL_COMPILE); | |
82 | + | |
83 | + for(int i = 0; i < E.size(); i++) | |
84 | + { | |
85 | + if(mode == GL_SELECT) | |
86 | + { | |
87 | + glLoadName(i); | |
88 | + } | |
89 | + glColor3f(1.0, 1.0, 0.0); | |
90 | + std::vector<stim::vec<T> > line = getEdgeCenterLine(i); | |
91 | + std::vector<stim::vec<T> > linemag = getEdgeCenterLineMag(i); | |
92 | + stim::cylinder<T> cyl(line, linemag); | |
93 | + std::vector<std::vector<stim::vec<T > > > p = cyl.getPoints(sides); | |
94 | + for(int i = 0; i < p.size()-1; i++) | |
95 | + { | |
96 | + for(int j = 0; j < p[0].size()-1; j++) | |
97 | + { | |
98 | + glColor4f(1.0, 1.0, 0.0, 0.5); | |
99 | + glEnable(GL_BLEND); | |
100 | + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); | |
101 | + glBegin(GL_QUADS); | |
102 | + glVertex3f(p[i][j][0], p[i][j][1], p[i][j][2]); | |
103 | + glVertex3f(p[i][j+1][0], p[i][j+1][1], p[i][j+1][2]); | |
104 | + glVertex3f(p[i+1][j+1][0], p[i+1][j+1][1], p[i+1][j+1][2] ); | |
105 | + glVertex3f(p[i+1][j][0], p[i+1][j][1], p[i+1][j][2]); | |
106 | + glEnd(); | |
107 | + glDisable(GL_BLEND); | |
108 | + | |
109 | + glColor4f(1.0, 0.0, 1.0, 1.0); | |
110 | + glLineWidth(2.0); | |
111 | + glBegin(GL_LINES); | |
112 | + glVertex3f(p[i][j][0], p[i][j][1], p[i][j][2]); | |
113 | + glVertex3f(p[i][j+1][0], p[i][j+1][1], p[i][j+1][2]); | |
114 | + glVertex3f(p[i][j][0], p[i][j][1], p[i][j][2]); | |
115 | + glVertex3f(p[i+1][j][0], p[i+1][j][1], p[i+1][j][2] ); | |
116 | + glEnd(); | |
117 | + } | |
118 | + | |
119 | + } | |
120 | + | |
121 | + | |
122 | + | |
123 | + } | |
124 | + glEndList(); | |
125 | +// CHECK_OPENGL_ERROR | |
126 | + } | |
127 | + | |
128 | +public: | |
129 | + using stim::network<T>::sizeE; | |
130 | + using stim::network<T>::getEdgeCenterLine; | |
131 | + using stim::network<T>::getEdgeCenterLineMag; | |
132 | + glnetwork() | |
133 | + { | |
134 | + | |
135 | + } | |
136 | + | |
137 | + void | |
138 | + createFromSelf(GLenum mode = GL_RENDER, int sides = 8) | |
139 | + { | |
140 | + init(); | |
141 | + Create(mode, sides); | |
142 | + } | |
143 | + | |
144 | + void | |
145 | + Render() | |
146 | + { | |
147 | + glCallList(dList); | |
148 | +// CHECK_OPENGL_ERROR | |
149 | + } | |
150 | + | |
151 | + void | |
152 | + RenderCylinders() | |
153 | + { | |
154 | + glCallList(cList); | |
155 | +// CHECK_OPENGL_ERROR | |
156 | + } | |
157 | + | |
158 | + void | |
159 | + RenderLine(std::vector<stim::vec<T> > l) | |
160 | + { | |
161 | + glColor3f(0.5, 1.0, 0.5); | |
162 | + glLineWidth(3.0); | |
163 | + glBegin(GL_LINE_STRIP); | |
164 | + for(int j = 0; j < l.size(); j++){ | |
165 | + glVertex3f( | |
166 | + l[j][0], | |
167 | + l[j][1], | |
168 | + l[j][2] | |
169 | + ); | |
170 | + } | |
171 | + glEnd(); | |
172 | + } | |
173 | + | |
174 | +}; | |
175 | +} | |
176 | + | |
177 | +#endif | ... | ... |