Commit 8d4f09409f53f0f8545a8d6aa3ba027f916ecdaf
1 parent
a9275be5
ERROR in plane wave refraction.
Showing
12 changed files
with
373 additions
and
862 deletions
Show diff stats
math/complex.h
... | ... | @@ -161,6 +161,17 @@ struct complex |
161 | 161 | return *this; |
162 | 162 | } |
163 | 163 | |
164 | + CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs) | |
165 | + { | |
166 | + *this = *this - rhs; | |
167 | + return *this; | |
168 | + } | |
169 | + CUDA_CALLABLE complex<T> operator-=(const T &rhs) | |
170 | + { | |
171 | + *this = *this - rhs; | |
172 | + return *this; | |
173 | + } | |
174 | + | |
164 | 175 | CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs) |
165 | 176 | { |
166 | 177 | *this = *this * rhs; |
... | ... | @@ -220,7 +231,7 @@ struct complex |
220 | 231 | { |
221 | 232 | complex<T> result; |
222 | 233 | |
223 | - result = (T)log() * y; | |
234 | + result = log() * y; | |
224 | 235 | |
225 | 236 | return result.exp(); |
226 | 237 | } |
... | ... | @@ -267,6 +278,27 @@ struct complex |
267 | 278 | return false; |
268 | 279 | } |
269 | 280 | |
281 | + //CASTING operators | |
282 | + template < typename otherT > | |
283 | + operator complex<otherT>() | |
284 | + { | |
285 | + complex<otherT> result((otherT)r, (otherT)i); | |
286 | + return result; | |
287 | + } | |
288 | + template< typename otherT > | |
289 | + complex( const complex<otherT> &rhs) | |
290 | + { | |
291 | + r = (T)rhs.r; | |
292 | + i = (T)rhs.i; | |
293 | + } | |
294 | + template< typename otherT > | |
295 | + complex& operator=(const complex<otherT> &rhs) | |
296 | + { | |
297 | + r = (T)rhs.r; | |
298 | + i = (T)rhs.i; | |
299 | + return *this; | |
300 | + } | |
301 | + | |
270 | 302 | }; |
271 | 303 | |
272 | 304 | } //end RTS namespace |
... | ... | @@ -319,6 +351,11 @@ CUDA_CALLABLE static rts::complex<T> pow(rts::complex<T> x, T y) |
319 | 351 | { |
320 | 352 | return x.pow(y); |
321 | 353 | } |
354 | +template<typename T> | |
355 | +CUDA_CALLABLE static rts::complex<T> pow(rts::complex<T> x, int y) | |
356 | +{ | |
357 | + return x.pow(y); | |
358 | +} | |
322 | 359 | |
323 | 360 | //log function |
324 | 361 | template<typename T> | ... | ... |
math/function.h
... | ... | @@ -31,33 +31,40 @@ public: |
31 | 31 | //insert(0, 0); |
32 | 32 | } |
33 | 33 | |
34 | - Y linear(X x) | |
34 | + Y linear(X x) const | |
35 | 35 | { |
36 | 36 | if(f.size() == 0) return (Y)0; //return zero if the function is empty |
37 | 37 | //declare an iterator |
38 | - typename std::vector< dataPoint >::iterator it; | |
38 | + typedef typename std::vector< dataPoint >::iterator f_iter; | |
39 | + f_iter it; | |
39 | 40 | |
40 | - dataPoint s; | |
41 | - s.x = x; | |
41 | + //dataPoint s; | |
42 | + //s.x = x; | |
42 | 43 | |
43 | - it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling); | |
44 | + //it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling); | |
45 | + unsigned int i; | |
46 | + for(i = 0; i<f.size(); i++) | |
47 | + { | |
48 | + if(f[i].x > x) | |
49 | + break; | |
50 | + } | |
44 | 51 | |
45 | 52 | //if the wavelength is past the end of the list, return the back |
46 | - if(it == f.end()) | |
53 | + if(i == f.size()) | |
47 | 54 | return f.back().y; |
48 | 55 | //if the wavelength is before the beginning of the list, return the front |
49 | - else if(it == f.begin()) | |
56 | + else if(i == 0) | |
50 | 57 | return f.front().y; |
51 | 58 | //otherwise interpolate |
52 | 59 | else |
53 | 60 | { |
54 | - X xMax = (*it).x; | |
55 | - X xMin = (*(it - 1)).x; | |
61 | + X xMax = f[i].x; | |
62 | + X xMin = f[i - 1].x; | |
56 | 63 | //std::cout<<lMin<<"----------"<<lMax<<std::endl; |
57 | 64 | |
58 | 65 | X a = (x - xMin) / (xMax - xMin); |
59 | - Y riMin = (*(it - 1)).y; | |
60 | - Y riMax = (*it).y; | |
66 | + Y riMin = f[i - 1].y; | |
67 | + Y riMax = f[i].y; | |
61 | 68 | Y interp; |
62 | 69 | interp = riMax * a + riMin * (1.0 - a); |
63 | 70 | return interp; |
... | ... | @@ -92,35 +99,35 @@ public: |
92 | 99 | |
93 | 100 | } |
94 | 101 | |
95 | - X getX(unsigned int i) | |
102 | + X getX(unsigned int i) const | |
96 | 103 | { |
97 | 104 | return f[i].x; |
98 | 105 | } |
99 | 106 | |
100 | - Y getY(unsigned int i) | |
107 | + Y getY(unsigned int i) const | |
101 | 108 | { |
102 | 109 | return f[i].y; |
103 | 110 | } |
104 | 111 | |
105 | 112 | ///get the number of data points in the function |
106 | - unsigned int getN() | |
113 | + unsigned int getN() const | |
107 | 114 | { |
108 | 115 | return f.size(); |
109 | 116 | } |
110 | 117 | |
111 | 118 | //look up an indexed component |
112 | - dataPoint operator[](int i) | |
119 | + dataPoint operator[](int i) const | |
113 | 120 | { |
114 | 121 | return f[i]; |
115 | 122 | } |
116 | 123 | |
117 | 124 | ///linear interpolation |
118 | - Y operator()(X x) | |
125 | + Y operator()(X x) const | |
119 | 126 | { |
120 | 127 | return linear(x); |
121 | 128 | } |
122 | 129 | |
123 | - function<X, Y> operator+(Y r) | |
130 | + function<X, Y> operator+(Y r) const | |
124 | 131 | { |
125 | 132 | function<X, Y> result; |
126 | 133 | |
... | ... | @@ -134,13 +141,13 @@ public: |
134 | 141 | return result; |
135 | 142 | } |
136 | 143 | |
137 | - function<X, Y> & operator= (const Y & rhs) | |
138 | - { | |
139 | - f.clear(); | |
140 | - if(rhs != 0) //if the RHS is zero, just clear, otherwise add one value of RHS | |
141 | - insert(0, rhs); | |
142 | - | |
143 | - return *this; | |
144 | + function<X, Y> & operator= (const Y & rhs) | |
145 | + { | |
146 | + f.clear(); | |
147 | + if(rhs != 0) //if the RHS is zero, just clear, otherwise add one value of RHS | |
148 | + insert(0, rhs); | |
149 | + | |
150 | + return *this; | |
144 | 151 | } |
145 | 152 | |
146 | 153 | ... | ... |
math/quad.h
... | ... | @@ -7,6 +7,7 @@ |
7 | 7 | #include "../math/triangle.h" |
8 | 8 | #include "../math/quaternion.h" |
9 | 9 | #include <iostream> |
10 | +#include <iomanip> | |
10 | 11 | #include <algorithm> |
11 | 12 | |
12 | 13 | namespace rts{ |
... | ... | @@ -48,37 +49,6 @@ struct quad |
48 | 49 | |
49 | 50 | } |
50 | 51 | |
51 | - /**************************************************************** | |
52 | - Constructor - create a quad from two points and a normal | |
53 | - ****************************************************************/ | |
54 | - /*CUDA_CALLABLE quad(rts::vec<T, N> pMin, rts::vec<T, N> pMax, rts::vec<T, N> normal) | |
55 | - { | |
56 | - | |
57 | - //assign the corner point | |
58 | - A = pMin; | |
59 | - | |
60 | - //compute the vector from pMin to pMax | |
61 | - rts::vec<T, 3> v0; | |
62 | - v0 = pMax - pMin; | |
63 | - | |
64 | - //compute the cross product of A and the plane normal | |
65 | - rts::vec<T, 3> v1; | |
66 | - v1 = v0.cross(normal); | |
67 | - | |
68 | - | |
69 | - //calculate point B | |
70 | - rts::vec<T, 3> B; | |
71 | - B = A + v0 * 0.5f + v1 * 0.5f; | |
72 | - | |
73 | - //calculate rtsPoint C | |
74 | - rts::vec<T, 3> C; | |
75 | - C = A + v0 * 0.5f - v1 * 0.5f; | |
76 | - | |
77 | - //calculate X and Y | |
78 | - X = B - A; | |
79 | - Y = C - A; | |
80 | - }*/ | |
81 | - | |
82 | 52 | /******************************************************************* |
83 | 53 | Constructor - create a quad from a position, normal, and rotation |
84 | 54 | *******************************************************************/ |
... | ... | @@ -114,6 +84,15 @@ struct quad |
114 | 84 | std::cout<<X<<std::endl; |
115 | 85 | } |
116 | 86 | |
87 | + //boolean comparison | |
88 | + bool operator==(const quad<T, N> & rhs) | |
89 | + { | |
90 | + if(A == rhs.A && X == rhs.X && Y == rhs.Y) | |
91 | + return true; | |
92 | + else | |
93 | + return false; | |
94 | + } | |
95 | + | |
117 | 96 | /******************************************* |
118 | 97 | Return the normal for the quad |
119 | 98 | *******************************************/ |
... | ... | @@ -140,10 +119,9 @@ struct quad |
140 | 119 | { |
141 | 120 | std::stringstream ss; |
142 | 121 | |
143 | - ss<<"A = "<<A<<std::endl; | |
144 | - ss<<"B = "<<A + Y<<std::endl; | |
145 | - ss<<"C = "<<A + Y + X<<std::endl; | |
146 | - ss<<"D = "<<A + X<<std::endl; | |
122 | + ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; | |
123 | + ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl; | |
124 | + ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X; | |
147 | 125 | |
148 | 126 | return ss.str(); |
149 | 127 | ... | ... |
math/quaternion.h
... | ... | @@ -38,19 +38,24 @@ void quaternion<T>::normalize() |
38 | 38 | } |
39 | 39 | |
40 | 40 | template<typename T> |
41 | -void quaternion<T>::CreateRotation(T theta, T axis_x, T axis_y, T axis_z) | |
41 | +void quaternion<T>::CreateRotation(T theta, T ux, T uy, T uz) | |
42 | 42 | { |
43 | - //assign the given Euler rotation to this quaternion | |
44 | - w = (T)cos(theta/2); | |
45 | - x = axis_x*(T)sin(theta/2); | |
46 | - y = axis_y*(T)sin(theta/2); | |
47 | - z = axis_z*(T)sin(theta/2); | |
43 | + vec<T, 3> u(ux, uy, uz); | |
44 | + | |
45 | + CreateRotation(theta, u); | |
46 | + | |
48 | 47 | } |
49 | 48 | |
50 | 49 | template<typename T> |
51 | -void quaternion<T>::CreateRotation(T theta, vec<T, 3> axis) | |
50 | +void quaternion<T>::CreateRotation(T theta, vec<T, 3> u) | |
52 | 51 | { |
53 | - CreateRotation(theta, axis[0], axis[1], axis[2]); | |
52 | + vec<T, 3> u_hat = u.norm(); | |
53 | + | |
54 | + //assign the given Euler rotation to this quaternion | |
55 | + w = (T)cos(theta/2); | |
56 | + x = u_hat[0]*(T)sin(theta/2); | |
57 | + y = u_hat[1]*(T)sin(theta/2); | |
58 | + z = u_hat[2]*(T)sin(theta/2); | |
54 | 59 | } |
55 | 60 | |
56 | 61 | template<typename T> | ... | ... |
math/spherical_bessel.h deleted
1 | -#ifndef RTS_SBESSEL_H | |
2 | -#define RTS_SBESSEL_H | |
3 | -#include <math.h> | |
4 | - | |
5 | - | |
6 | -namespace rts{ | |
7 | - | |
8 | -#define RTS_BESSEL_CONVERGENCE_MIN 0.0145f | |
9 | -#define RTS_BESSEL_CONVERGENCE_MAX 0.4f | |
10 | -#define RTS_BESSEL_MAXIMUM_FLOAT -1e33f | |
11 | - | |
12 | -template <typename T> | |
13 | -CUDA_CALLABLE void sbesselj(int n, complex<T> x, complex<T>* j) | |
14 | -{ | |
15 | - //compute the first bessel function | |
16 | - if(n >= 0) | |
17 | - j[0] = (T)sin(x) / x; | |
18 | - | |
19 | - //compute the second bessel function | |
20 | - if(n >= 1) | |
21 | - j[1] = j[0] / x - (T)cos(x) / x; | |
22 | - | |
23 | - //use the recurrence relation to compute the rest | |
24 | - for(int i = 2; i <= n; i++) | |
25 | - { | |
26 | - j[i] = ( (2 * i - 1) / x ) * j[i-1] - j[i-2]; | |
27 | - } | |
28 | - | |
29 | - //if x = 0, deal with the degenerate case | |
30 | - /*if( isnan(j[0].r) ) | |
31 | - { | |
32 | - j[0] = (T)1.0; | |
33 | - for(int i = 1; i<=n; i++) | |
34 | - j[i] = (T)0.0; | |
35 | - }*/ | |
36 | -} | |
37 | - | |
38 | -template <typename T> | |
39 | -CUDA_CALLABLE void sbessely(int n, complex<T> x, complex<T>* y) | |
40 | -{ | |
41 | - //compute the first bessel function | |
42 | - if(n >= 0) | |
43 | - y[0] = -(T)cos(x) / x; | |
44 | - | |
45 | - //compute the second bessel function | |
46 | - if(n >= 1) | |
47 | - y[1] = y[0] / x - (T)sin(x) / x; | |
48 | - | |
49 | - //use the recurrence relation to compute the rest | |
50 | - for(int i = 2; i <= n; i++) | |
51 | - { | |
52 | - y[i] = ( (2 * i - 1) / x ) * y[i-1] - y[i-2]; | |
53 | - } | |
54 | - | |
55 | -} | |
56 | - | |
57 | -//spherical Hankel functions of the first kind | |
58 | -template <typename T> | |
59 | -CUDA_CALLABLE void sbesselh1(int n, complex<T> x, complex<T>* h) | |
60 | -{ | |
61 | - //compute j_0 and j_1 | |
62 | - complex<T> j[2]; | |
63 | - sbesselj(1, x, j); | |
64 | - | |
65 | - //compute y_0 and y_1 | |
66 | - complex<T> y[2]; | |
67 | - sbessely(1, x, y); | |
68 | - | |
69 | - //compute the first-order Hhankel function | |
70 | - if(n >= 0) | |
71 | - h[0] = j[0] + y[0].imul(); | |
72 | - | |
73 | - //compute the second bessel function | |
74 | - if(n >= 1) | |
75 | - h[1] = j[1] + y[1].imul(); | |
76 | - | |
77 | - //use the recurrence relation to compute the rest | |
78 | - for(int i = 2; i <= n; i++) | |
79 | - { | |
80 | - h[i] = ( (2 * i - 1) / x ) * h[i-1] - h[i-2]; | |
81 | - } | |
82 | -} | |
83 | - | |
84 | -template <typename T> | |
85 | -CUDA_CALLABLE void init_sbesselj(T x, T* j) | |
86 | -{ | |
87 | - //compute the first 2 bessel functions | |
88 | - j[0] = (T)sin(x) / x; | |
89 | - | |
90 | - j[1] = j[0] / x - (T)cos(x) / x; | |
91 | -} | |
92 | - | |
93 | -template <typename T> | |
94 | -CUDA_CALLABLE void init_sbessely(T x, T* y) | |
95 | -{ | |
96 | - //compute the first 2 bessel functions | |
97 | - y[0] = -(T)cos(x) / x; | |
98 | - | |
99 | - y[1] = y[0] / x - (T)sin(x) / x; | |
100 | -} | |
101 | - | |
102 | -template <typename T> | |
103 | -CUDA_CALLABLE void shift_sbesselj(int n, T x, T* b)//, T stability = 1.4) | |
104 | -{ | |
105 | - | |
106 | - T bnew; | |
107 | - | |
108 | - //compute the next (order n) Bessel function | |
109 | - bnew = ((2 * n - 1) * b[1])/x - b[0]; | |
110 | - | |
111 | - //if(n > stability*x) | |
112 | - if(n > real(x)) | |
113 | - if(real(bnew) < RTS_BESSEL_CONVERGENCE_MIN || real(bnew) > RTS_BESSEL_CONVERGENCE_MAX) | |
114 | - bnew = 0; | |
115 | - | |
116 | - //shift and add the new value to the array | |
117 | - b[0] = b[1]; | |
118 | - b[1] = bnew; | |
119 | -} | |
120 | - | |
121 | -template <typename T> | |
122 | -CUDA_CALLABLE void shift_sbessely(int n, T x, T* b)//, T stability = 1.4) | |
123 | -{ | |
124 | - | |
125 | - T bnew; | |
126 | - | |
127 | - //compute the next (order n) Bessel function | |
128 | - bnew = ((2 * n - 1) * b[1])/x - b[0]; | |
129 | - | |
130 | - if(bnew < RTS_BESSEL_MAXIMUM_FLOAT || | |
131 | - (n > x && bnew > 0)) | |
132 | - { | |
133 | - bnew = 0; | |
134 | - b[1] = 0; | |
135 | - } | |
136 | - | |
137 | - | |
138 | - //shift and add the new value to the array | |
139 | - b[0] = b[1]; | |
140 | - b[1] = bnew; | |
141 | -} | |
142 | - | |
143 | - | |
144 | - | |
145 | -} //end namespace rts | |
146 | - | |
147 | - | |
148 | - | |
149 | -#endif |
math/vector.h
... | ... | @@ -54,7 +54,7 @@ struct vec |
54 | 54 | memcpy(v, data, sizeof(T) * N); |
55 | 55 | } |
56 | 56 | |
57 | - CUDA_CALLABLE T len() | |
57 | + CUDA_CALLABLE T len() const | |
58 | 58 | { |
59 | 59 | //compute and return the vector length |
60 | 60 | T sum_sq = (T)0; |
... | ... | @@ -66,7 +66,7 @@ struct vec |
66 | 66 | |
67 | 67 | } |
68 | 68 | |
69 | - CUDA_CALLABLE vec<T, N> cart2sph() | |
69 | + CUDA_CALLABLE vec<T, N> cart2sph() const | |
70 | 70 | { |
71 | 71 | //convert the vector from cartesian to spherical coordinates |
72 | 72 | //x, y, z -> r, theta, phi (where theta = 0 to 2*pi) |
... | ... | @@ -79,7 +79,7 @@ struct vec |
79 | 79 | return sph; |
80 | 80 | } |
81 | 81 | |
82 | - CUDA_CALLABLE vec<T, N> sph2cart() | |
82 | + CUDA_CALLABLE vec<T, N> sph2cart() const | |
83 | 83 | { |
84 | 84 | //convert the vector from cartesian to spherical coordinates |
85 | 85 | //r, theta, phi -> x, y, z (where theta = 0 to 2*pi) |
... | ... | @@ -92,7 +92,7 @@ struct vec |
92 | 92 | return cart; |
93 | 93 | } |
94 | 94 | |
95 | - CUDA_CALLABLE vec<T, N> norm() | |
95 | + CUDA_CALLABLE vec<T, N> norm() const | |
96 | 96 | { |
97 | 97 | //compute and return the vector norm |
98 | 98 | vec<T, N> result; |
... | ... | @@ -109,19 +109,19 @@ struct vec |
109 | 109 | return result; |
110 | 110 | } |
111 | 111 | |
112 | - CUDA_CALLABLE vec<T, 3> cross(vec<T, 3> rhs) | |
112 | + CUDA_CALLABLE vec<T, 3> cross(const vec<T, 3> rhs) const | |
113 | 113 | { |
114 | 114 | vec<T, 3> result; |
115 | 115 | |
116 | 116 | //compute the cross product (only valid for 3D vectors) |
117 | - result[0] = v[1] * rhs[2] - v[2] * rhs[1]; | |
118 | - result[1] = v[2] * rhs[0] - v[0] * rhs[2]; | |
119 | - result[2] = v[0] * rhs[1] - v[1] * rhs[0]; | |
117 | + result[0] = v[1] * rhs.v[2] - v[2] * rhs.v[1]; | |
118 | + result[1] = v[2] * rhs.v[0] - v[0] * rhs.v[2]; | |
119 | + result[2] = v[0] * rhs.v[1] - v[1] * rhs.v[0]; | |
120 | 120 | |
121 | 121 | return result; |
122 | 122 | } |
123 | 123 | |
124 | - CUDA_CALLABLE T dot(vec<T, N> rhs) | |
124 | + CUDA_CALLABLE T dot(vec<T, N> rhs) const | |
125 | 125 | { |
126 | 126 | T result = (T)0; |
127 | 127 | |
... | ... | @@ -133,7 +133,7 @@ struct vec |
133 | 133 | } |
134 | 134 | |
135 | 135 | //arithmetic |
136 | - CUDA_CALLABLE vec<T, N> operator+(vec<T, N> rhs) | |
136 | + CUDA_CALLABLE vec<T, N> operator+(vec<T, N> rhs) const | |
137 | 137 | { |
138 | 138 | vec<T, N> result; |
139 | 139 | |
... | ... | @@ -142,7 +142,7 @@ struct vec |
142 | 142 | |
143 | 143 | return result; |
144 | 144 | } |
145 | - CUDA_CALLABLE vec<T, N> operator-(vec<T, N> rhs) | |
145 | + CUDA_CALLABLE vec<T, N> operator-(vec<T, N> rhs) const | |
146 | 146 | { |
147 | 147 | vec<T, N> result; |
148 | 148 | |
... | ... | @@ -151,7 +151,7 @@ struct vec |
151 | 151 | |
152 | 152 | return result; |
153 | 153 | } |
154 | - CUDA_CALLABLE vec<T, N> operator*(T rhs) | |
154 | + CUDA_CALLABLE vec<T, N> operator*(T rhs) const | |
155 | 155 | { |
156 | 156 | vec<T, N> result; |
157 | 157 | |
... | ... | @@ -160,7 +160,7 @@ struct vec |
160 | 160 | |
161 | 161 | return result; |
162 | 162 | } |
163 | - CUDA_CALLABLE vec<T, N> operator/(T rhs) | |
163 | + CUDA_CALLABLE vec<T, N> operator/(T rhs) const | |
164 | 164 | { |
165 | 165 | vec<T, N> result; |
166 | 166 | |
... | ... | @@ -179,7 +179,7 @@ struct vec |
179 | 179 | return *this; |
180 | 180 | }*/ |
181 | 181 | |
182 | - CUDA_CALLABLE bool operator==(vec<T, N> rhs) | |
182 | + CUDA_CALLABLE bool operator==(vec<T, N> rhs) const | |
183 | 183 | { |
184 | 184 | if ( (rhs.v[0] == v[0]) && (rhs.v[1] == v[1]) && (rhs.v[2] == v[2]) ) |
185 | 185 | return true; |
... | ... | @@ -187,7 +187,7 @@ struct vec |
187 | 187 | return false; |
188 | 188 | } |
189 | 189 | |
190 | - std::string toStr() | |
190 | + std::string toStr() const | |
191 | 191 | { |
192 | 192 | std::stringstream ss; |
193 | 193 | |
... | ... | @@ -203,8 +203,8 @@ struct vec |
203 | 203 | return ss.str(); |
204 | 204 | } |
205 | 205 | |
206 | - //bracket operator | |
207 | - CUDA_CALLABLE T& operator[](int i) | |
206 | + //bracket operator - allows assignment to the vector | |
207 | + CUDA_CALLABLE T& operator[](const unsigned int i) | |
208 | 208 | { |
209 | 209 | return v[i]; |
210 | 210 | } | ... | ... |
optics/beam.h
... | ... | @@ -3,12 +3,13 @@ |
3 | 3 | |
4 | 4 | #include "../math/vector.h" |
5 | 5 | #include "../math/function.h" |
6 | +#include "../optics/planewave.h" | |
6 | 7 | #include <vector> |
7 | 8 | |
8 | 9 | namespace rts{ |
9 | 10 | |
10 | 11 | template<typename P> |
11 | -class beam | |
12 | +class beam : public planewave<P> | |
12 | 13 | { |
13 | 14 | public: |
14 | 15 | enum beam_type {Uniform, Bartlett, Hamming, Hanning}; |
... | ... | @@ -17,10 +18,6 @@ private: |
17 | 18 | |
18 | 19 | P na[2]; //numerical aperature of the focusing optics |
19 | 20 | vec<P> f; //focal point |
20 | - vec<P> k; //direction vector | |
21 | - vec<P> E0; //polarization direction | |
22 | - P omega; //frequency | |
23 | - | |
24 | 21 | function<P, P> apod; //apodization function |
25 | 22 | unsigned int apod_res; //resolution of complex apodization filters |
26 | 23 | |
... | ... | @@ -71,17 +68,17 @@ private: |
71 | 68 | public: |
72 | 69 | |
73 | 70 | ///constructor: build a default beam (NA=1.0) |
74 | - beam(beam_type _apod = Uniform) | |
71 | + beam( | |
72 | + vec<P> _k = rts::vec<P>(0, 0, TAU), | |
73 | + vec<P> _E0 = rts::vec<P>(1, 0, 0), | |
74 | + beam_type _apod = Uniform) | |
75 | + : planewave<P>(_k, _E0) | |
75 | 76 | { |
76 | 77 | na[0] = (P)0.0; |
77 | 78 | na[1] = (P)1.0; |
78 | - f = vec<P>( (P)0.0, (P)0.0, (P)0.0 ); | |
79 | - k = vec<P>( (P)0.0, (P)0.0, (P)1.0 ); | |
80 | - E0 = vec<P>( (P)1.0, (P)0.0, (P)0.0 ); | |
81 | - omega = (P)2 * (P)3.14159; | |
79 | + f = vec<P>( (P)0, (P)0, (P)0 ); | |
82 | 80 | apod_res = 256; //set the default resolution for apodization filters |
83 | 81 | set_apod(_apod); //set the apodization function type |
84 | - | |
85 | 82 | } |
86 | 83 | |
87 | 84 | ///Numerical Aperature functions |
... | ... | @@ -96,28 +93,32 @@ public: |
96 | 93 | na[1] = _na1; |
97 | 94 | } |
98 | 95 | |
96 | + /*string str() : | |
97 | + { | |
98 | + stringstream ss; | |
99 | + ss<<"Beam Center: "<<k<<std::endl; | |
100 | + | |
101 | + return ss.str(); | |
102 | + }*/ | |
99 | 103 | |
100 | 104 | //Monte-Carlo decomposition into plane waves |
101 | - std::vector< planewave<P> > mc(unsigned int N, unsigned int seed = 0) | |
105 | + std::vector< planewave<P> > mc(unsigned int N = 100000, unsigned int seed = 0) const | |
102 | 106 | { |
103 | 107 | /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling |
104 | 108 | of a sphere and projecting these samples onto an inscribed sphere. |
105 | 109 | |
106 | - samples = rtsPointer to sample vectors specified as normalized cartesian coordinates | |
107 | - N = number of samples | |
108 | - kSph = incident light direction in spherical coordinates | |
109 | - NAin = internal obscuration NA | |
110 | - NAout = outer cassegrain NA | |
110 | + seed = seed for the random number generator | |
111 | 111 | */ |
112 | - | |
113 | 112 | srand(seed); //seed the random number generator |
114 | 113 | |
114 | + vec<P> k_hat = beam::k.norm(); | |
115 | + | |
115 | 116 | ///compute the rotation operator to transform (0, 0, 1) to k |
116 | - P cos_angle = k.dot(rts::vec<P>(0, 0, 1)); | |
117 | + P cos_angle = k_hat.dot(rts::vec<P>(0, 0, 1)); | |
117 | 118 | rts::matrix<P, 3> rotation; |
118 | 119 | if(cos_angle != 1.0) |
119 | 120 | { |
120 | - rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k).norm(); //compute the axis of rotation | |
121 | + rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation | |
121 | 122 | P angle = acos(cos_angle); //compute the angle of rotation |
122 | 123 | rts::quaternion<P> quat; //create a quaternion describing the rotation |
123 | 124 | quat.CreateRotation(angle, r_axis); |
... | ... | @@ -137,8 +138,6 @@ public: |
137 | 138 | |
138 | 139 | std::vector< planewave<P> > samples; //create a vector of plane waves |
139 | 140 | |
140 | - planewave<P> beam_center(k, E0, omega); //create a plane wave representing the beam center | |
141 | - | |
142 | 141 | //draw a distribution of random phi, z values |
143 | 142 | P z, phi, theta; |
144 | 143 | for(int i=0; i<N; i++) //for each sample |
... | ... | @@ -153,7 +152,7 @@ public: |
153 | 152 | vec<P> k_prime = rotation * cart; //create a sample vector |
154 | 153 | |
155 | 154 | //store a wave refracted along the given direction |
156 | - samples.push_back(beam_center.refract(k_prime) * apod(phi/PHI[1])); | |
155 | + samples.push_back(beam::refract(k_prime) * apod(phi/PHI[1])); | |
157 | 156 | } |
158 | 157 | |
159 | 158 | return samples; | ... | ... |
optics/efield.cuh
1 | +#ifndef RTS_EFIELD | |
2 | +#define RTS_EFIELD | |
3 | + | |
1 | 4 | #include "../math/complex.h" |
5 | +#include "../math/realfield.cuh" | |
2 | 6 | #include "../visualization/colormap.h" |
3 | -#include "../visualization/scalarfield.cuh" | |
4 | -#include "../visualization/vectorfield.cuh" | |
5 | 7 | #include "../optics/planewave.h" |
6 | 8 | #include "../cuda/devices.h" |
9 | +#include "../optics/beam.h" | |
7 | 10 | |
8 | 11 | |
9 | 12 | |
... | ... | @@ -26,7 +29,7 @@ __global__ void gpu_planewave2efield(complex<T>* X, complex<T>* Y, complex<T>* Z |
26 | 29 | vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); |
27 | 30 | vec<T> r(p[0], p[1], p[2]); |
28 | 31 | |
29 | - complex<T> x( 0.0f, w.omega * (w.k_hat.dot(r)) ); | |
32 | + complex<T> x( 0.0f, w.k.dot(r) ); | |
30 | 33 | |
31 | 34 | if(Y == NULL) //if this is a scalar simulation |
32 | 35 | X[i] += w.E0.len() * exp(x); //use the vector magnitude as the plane wave amplitude |
... | ... | @@ -77,7 +80,24 @@ __global__ void gpu_efield_polarization(complex<T>* X, complex<T>* Y, complex<T> |
77 | 80 | Px[i] = X[i].abs(); |
78 | 81 | Py[i] = Y[i].abs(); |
79 | 82 | Pz[i] = Z[i].abs(); |
83 | +} | |
84 | + | |
85 | +/* This function computes the sum of two complex fields and stores the result in *dest | |
86 | +*/ | |
87 | +template<typename T> | |
88 | +__global__ void gpu_efield_sum(complex<T>* dest, complex<T>* src, unsigned int r0, unsigned int r1) | |
89 | +{ | |
90 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
91 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
80 | 92 | |
93 | + //make sure that the thread indices are in-bounds | |
94 | + if(iu >= r0 || iv >= r1) return; | |
95 | + | |
96 | + //compute the index into the field | |
97 | + int i = iv*r0 + iu; | |
98 | + | |
99 | + //sum the fields | |
100 | + dest[i] += src[i]; | |
81 | 101 | } |
82 | 102 | |
83 | 103 | /* This class implements a discrete representation of an electromagnetic field |
... | ... | @@ -86,9 +106,9 @@ __global__ void gpu_efield_polarization(complex<T>* X, complex<T>* Y, complex<T> |
86 | 106 | template<typename P> |
87 | 107 | class efield |
88 | 108 | { |
89 | -private: | |
109 | +protected: | |
90 | 110 | |
91 | - bool scalar; | |
111 | + bool vector; | |
92 | 112 | |
93 | 113 | //gpu pointer to the field data |
94 | 114 | rts::complex<P>* X; |
... | ... | @@ -107,27 +127,14 @@ private: |
107 | 127 | //create one thread for each detector pixel |
108 | 128 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); |
109 | 129 | dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); |
130 | + | |
110 | 131 | |
111 | - gpu_planewave2efield<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos); | |
132 | + gpu_planewave2efield<P> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos); | |
112 | 133 | } |
113 | 134 | |
114 | - void clear() | |
115 | - { | |
116 | - cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]); | |
117 | - | |
118 | - if(!scalar) | |
119 | - { | |
120 | - cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]); | |
121 | - cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]); | |
122 | - } | |
123 | - } | |
124 | - | |
125 | - | |
126 | -public: | |
127 | - | |
128 | - efield(unsigned int res0, unsigned int res1, bool _scalar = false) | |
135 | + void init(unsigned int res0, unsigned int res1, bool _vector) | |
129 | 136 | { |
130 | - scalar = _scalar; //initialize field type | |
137 | + vector = _vector; //initialize field type | |
131 | 138 | |
132 | 139 | X = Y = Z = NULL; //initialize all pointers to NULL |
133 | 140 | R[0] = res0; |
... | ... | @@ -137,9 +144,9 @@ public: |
137 | 144 | cudaMalloc(&X, sizeof(rts::complex<P>) * R[0] * R[1]); |
138 | 145 | cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]); |
139 | 146 | |
140 | - if(!scalar) | |
147 | + if(vector) | |
141 | 148 | { |
142 | - std::cout<<"scalar:"; | |
149 | + std::cout<<"vector:"; | |
143 | 150 | cudaMalloc(&Y, sizeof(rts::complex<P>) * R[0] * R[1]); |
144 | 151 | cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]); |
145 | 152 | |
... | ... | @@ -148,12 +155,66 @@ public: |
148 | 155 | } |
149 | 156 | } |
150 | 157 | |
158 | + void destroy() | |
159 | + { | |
160 | + if(X != NULL) cudaFree(X); | |
161 | + if(Y != NULL) cudaFree(Y); | |
162 | + if(Z != NULL) cudaFree(Z); | |
163 | + } | |
164 | + | |
165 | + void shallowcpy(const rts::efield<P> & src) | |
166 | + { | |
167 | + vector = src.vector; | |
168 | + R[0] = src.R[0]; | |
169 | + R[1] = src.R[1]; | |
170 | + } | |
171 | + | |
172 | + void deepcpy(const rts::efield<P> & src) | |
173 | + { | |
174 | + //perform a shallow copy | |
175 | + shallowcpy(src); | |
176 | + | |
177 | + //allocate memory on the gpu | |
178 | + if(src.X != NULL) | |
179 | + { | |
180 | + cudaMalloc(&X, sizeof(rts::complex<P>) * R[0] * R[1]); | |
181 | + cudaMemcpy(X, src.X, sizeof(rts::complex<P>) * R[0] * R[1], cudaMemcpyDeviceToDevice); | |
182 | + } | |
183 | + if(src.Y != NULL) | |
184 | + { | |
185 | + cudaMalloc(&Y, sizeof(rts::complex<P>) * R[0] * R[1]); | |
186 | + cudaMemcpy(Y, src.Y, sizeof(rts::complex<P>) * R[0] * R[1], cudaMemcpyDeviceToDevice); | |
187 | + } | |
188 | + if(src.Z != NULL) | |
189 | + { | |
190 | + cudaMalloc(&Z, sizeof(rts::complex<P>) * R[0] * R[1]); | |
191 | + cudaMemcpy(Z, src.Z, sizeof(rts::complex<P>) * R[0] * R[1], cudaMemcpyDeviceToDevice); | |
192 | + } | |
193 | + } | |
194 | + | |
195 | +public: | |
196 | + efield(unsigned int res0, unsigned int res1, bool _vector = true) | |
197 | + { | |
198 | + init(res0, res1, _vector); | |
199 | + pos = rts::quad<P>(rts::vec<P>(-10, 0, -10), rts::vec<P>(-10, 0, 10), rts::vec<P>(10, 0, 10)); | |
200 | + } | |
201 | + | |
151 | 202 | //destructor |
152 | 203 | ~efield() |
153 | 204 | { |
154 | - if(X != NULL) cudaFree(X); | |
155 | - if(Y != NULL) cudaFree(Y); | |
156 | - if(Z != NULL) cudaFree(Z); | |
205 | + destroy(); | |
206 | + } | |
207 | + | |
208 | + ///Clear the field - set all points to zero | |
209 | + void clear() | |
210 | + { | |
211 | + cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]); | |
212 | + | |
213 | + if(vector) | |
214 | + { | |
215 | + cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]); | |
216 | + cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]); | |
217 | + } | |
157 | 218 | } |
158 | 219 | |
159 | 220 | void position(quad<P> _p) |
... | ... | @@ -172,56 +233,88 @@ public: |
172 | 233 | return ss.str(); |
173 | 234 | } |
174 | 235 | |
236 | + //assignment operator: assignment from another electric field | |
237 | + efield<P> & operator= (const efield<P> & rhs) | |
238 | + { | |
239 | + destroy(); //destroy any previous information about this field | |
240 | + deepcpy(rhs); //create a deep copy | |
241 | + return *this; //return the current object | |
242 | + } | |
243 | + | |
175 | 244 | //assignment operator: build an electric field from a plane wave |
176 | 245 | efield<P> & operator= (const planewave<P> & rhs) |
177 | 246 | { |
178 | 247 | |
179 | 248 | clear(); //clear any previous field data |
180 | 249 | from_planewave(rhs); //create a field from the planewave |
181 | - return *this; | |
250 | + return *this; //return the current object | |
182 | 251 | } |
183 | 252 | |
184 | - //assignment operator: add the electric field from a plane wave | |
185 | - efield<P> & operator+= (const planewave<P> & rhs) | |
253 | + //assignment operator: add an existing electric field | |
254 | + efield<P> & operator+= (const efield<P> & rhs) | |
186 | 255 | { |
187 | - //create a field from the planewave | |
188 | - from_planewave(rhs); | |
256 | + //if this field and the source field represent the same regions in space | |
257 | + if(R[0] == rhs.R[0] && R[1] == rhs.R[1] && pos == rhs.pos) | |
258 | + { | |
259 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
260 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
261 | + | |
262 | + //create one thread for each detector pixel | |
263 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
264 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
265 | + | |
266 | + //sum the fields | |
267 | + gpu_efield_sum <<<dimGrid, dimBlock>>> (X, rhs.X, R[0], R[1]); | |
268 | + if(Y != NULL) | |
269 | + { | |
270 | + gpu_efield_sum <<<dimGrid, dimBlock>>> (Y, rhs.Y, R[0], R[1]); | |
271 | + gpu_efield_sum <<<dimGrid, dimBlock>>> (Z, rhs.Z, R[0], R[1]); | |
272 | + } | |
273 | + } | |
274 | + else | |
275 | + { | |
276 | + std::cout<<"ERROR in efield: The two summed fields do not represent the same geometry."<<std::endl; | |
277 | + exit(1); | |
278 | + } | |
189 | 279 | |
190 | - return *this; | |
280 | + return *this; //return this object | |
191 | 281 | } |
192 | 282 | |
193 | - //assignment operator: build an electric field from a list of plane waves | |
194 | - efield<P> & operator= (const std::vector< planewave<P> > & rhs) | |
195 | - { | |
196 | - clear(); //clear any previous field data | |
197 | - for(unsigned int i = 0; i < rhs.size(); i++) | |
198 | - from_planewave(rhs[i]); | |
199 | - return *this; | |
200 | - } | |
283 | + efield<P> & operator= (const rts::beam<P> & rhs) | |
284 | + { | |
285 | + //get a vector of monte-carlo samples | |
286 | + std::vector< rts::planewave<P> > p_list = rhs.mc(); | |
287 | + | |
288 | + clear(); //clear any previous field data | |
289 | + for(unsigned int i = 0; i < p_list.size(); i++) | |
290 | + from_planewave(p_list[i]); | |
291 | + return *this; | |
292 | + } | |
293 | + | |
201 | 294 | |
202 | 295 | //return a scalar field representing field magnitude |
203 | - scalarfield<P> mag() | |
296 | + realfield<P, 1, true> mag() | |
204 | 297 | { |
205 | - int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
206 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
298 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
299 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
207 | 300 | |
208 | 301 | //create a scalar field to store the result |
209 | - scalarfield<P> M(R[0], R[1]); | |
302 | + realfield<P, 1, true> M(R[0], R[1]); | |
210 | 303 | |
211 | - //create one thread for each detector pixel | |
212 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
213 | - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
304 | + //create one thread for each detector pixel | |
305 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
306 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
214 | 307 | |
215 | - //compute the magnitude and store it in a scalar field | |
216 | - gpu_efield_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.S); | |
308 | + //compute the magnitude and store it in a scalar field | |
309 | + gpu_efield_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.ptr(0)); | |
217 | 310 | |
218 | 311 | return M; |
219 | 312 | } |
220 | 313 | |
221 | 314 | //return a vector field representing field polarization |
222 | - vectorfield<P> polarization() | |
315 | + realfield<P, 3, true> polarization() | |
223 | 316 | { |
224 | - if(scalar) | |
317 | + if(!vector) | |
225 | 318 | { |
226 | 319 | std::cout<<"ERROR: Cannot compute polarization of a scalar field."<<std::endl; |
227 | 320 | exit(1); |
... | ... | @@ -234,10 +327,10 @@ public: |
234 | 327 | dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); |
235 | 328 | |
236 | 329 | |
237 | - vectorfield<P> Pol(R[0], R[1]); //create a vector field to store the result | |
330 | + realfield<P, 3, true> Pol(R[0], R[1]); //create a vector field to store the result | |
238 | 331 | |
239 | 332 | //compute the polarization and store it in the vector field |
240 | - gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.S[0], Pol.S[1], Pol.S[2]); | |
333 | + gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.ptr(0), Pol.ptr(1), Pol.ptr(2)); | |
241 | 334 | |
242 | 335 | return Pol; //return the vector field |
243 | 336 | } |
... | ... | @@ -248,4 +341,6 @@ public: |
248 | 341 | |
249 | 342 | |
250 | 343 | |
251 | -} //end namespace rts | |
252 | 344 | \ No newline at end of file |
345 | +} //end namespace rts | |
346 | + | |
347 | +#endif | |
253 | 348 | \ No newline at end of file | ... | ... |
optics/planewave.h
... | ... | @@ -4,8 +4,13 @@ |
4 | 4 | #include <string> |
5 | 5 | #include <sstream> |
6 | 6 | |
7 | -#include "rts/math/vector.h" | |
8 | -#include "rts/math/quaternion.h" | |
7 | +#include "../math/vector.h" | |
8 | +#include "../math/quaternion.h" | |
9 | +#include "../math/constants.h" | |
10 | + | |
11 | +/*Basic conversions used here (assuming a vacuum) | |
12 | + lambda = | |
13 | +*/ | |
9 | 14 | |
10 | 15 | namespace rts{ |
11 | 16 | |
... | ... | @@ -13,37 +18,28 @@ template<typename P> |
13 | 18 | class planewave{ |
14 | 19 | |
15 | 20 | public: |
16 | - rts::vec<P> k_hat; //normalized planewave direction | |
17 | - P omega; //frequency | |
18 | - | |
19 | - rts::vec<P> E0; //amplitude | |
21 | + vec<P> k; //k = tau / lambda | |
22 | + vec<P> E0; //amplitude | |
20 | 23 | |
21 | 24 | |
22 | - | |
23 | - planewave() | |
24 | - { | |
25 | - omega = (P)2 * (P)3.14159; | |
26 | - k_hat = rts::vec<P>(0, 0, 1); | |
27 | - E0 = rts::vec<P>(1, 0, 0); | |
28 | - } | |
29 | 25 | ///constructor: create a plane wave propagating along z, polarized along x |
30 | - planewave(P _omega) | |
26 | + /*planewave(P lambda = (P)1) | |
31 | 27 | { |
32 | - omega = _omega; | |
33 | - k_hat = rts::vec<P>(0, 0, 1); | |
28 | + k = rts::vec<P>(0, 0, 1) * (TAU/lambda); | |
34 | 29 | E0 = rts::vec<P>(1, 0, 0); |
35 | - } | |
30 | + }*/ | |
36 | 31 | ///constructor: create a plane wave propagating along _k, polarized along _E0, at frequency _omega |
37 | - planewave(vec<P> _k, vec<P> _E0, P _omega) | |
32 | + planewave(vec<P> _k = rts::vec<P>(0, 0, TAU), vec<P> _E0 = rts::vec<P>(1, 0, 0)) | |
38 | 33 | { |
39 | - k_hat = _k.norm(); | |
40 | - vec<P> s = k_hat.cross(_E0); //re-orthogonalize | |
41 | - E0 = s.cross(k_hat); | |
42 | - omega = _omega; | |
34 | + k = _k; | |
35 | + vec<P> k_hat = _k.norm(); | |
36 | + | |
37 | + vec<P> s = (k.cross(_E0)).norm(); //compute an orthogonal side vector | |
38 | + vec<P> E0_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector | |
39 | + E0 = E0_hat * E0_hat.dot(_E0); //compute the projection of _E0 onto E0_hat | |
43 | 40 | } |
44 | 41 | |
45 | 42 | ///multiplication operator: scale E0 |
46 | - //assignment operator: build an electric field from a plane wave | |
47 | 43 | planewave<P> & operator* (const P & rhs) |
48 | 44 | { |
49 | 45 | |
... | ... | @@ -51,19 +47,34 @@ public: |
51 | 47 | return *this; |
52 | 48 | } |
53 | 49 | |
50 | + P lambda() const | |
51 | + { | |
52 | + return TAU / k.len(); | |
53 | + } | |
54 | + | |
55 | + P kmag() const | |
56 | + { | |
57 | + return k.len(); | |
58 | + } | |
59 | + | |
54 | 60 | |
55 | - planewave<P> refract(rts::vec<P> new_k) | |
61 | + planewave<P> refract(rts::vec<P> kn) const | |
56 | 62 | { |
57 | - new_k = new_k.norm(); //normalize new_k | |
63 | + vec<P> kn_hat = kn.norm(); //normalize new_k | |
64 | + vec<P> k_hat = k.norm(); | |
58 | 65 | |
59 | 66 | //compute the side vector (around which we will be rotating) |
60 | - rts::vec<P> s = k_hat.cross(E0.norm()); | |
67 | + vec<P> E0_hat = E0.norm(); | |
68 | + rts::vec<P> s = k_hat.cross(E0_hat); | |
61 | 69 | |
62 | 70 | //compute the projection of k' onto the k-E plane |
63 | - rts::vec<P> s_prime = s * (new_k.dot(s)); | |
71 | + rts::vec<P> s_prime = s * (kn_hat.dot(s)); | |
64 | 72 | |
65 | 73 | //compute the angle between |
66 | - P theta = acos(k_hat.dot( (new_k - s_prime).norm() )); | |
74 | + vec<P> kp_hat = (kn_hat - s_prime).norm(); | |
75 | + P theta = acos(k_hat.dot( kp_hat )); | |
76 | + if(kn_hat.dot(E0_hat) < 0) | |
77 | + theta = -theta; | |
67 | 78 | |
68 | 79 | //rotate E0 around s by theta |
69 | 80 | quaternion<P> q; |
... | ... | @@ -71,17 +82,60 @@ public: |
71 | 82 | rts::vec<P> E0_prime = q.toMatrix3() * E0; |
72 | 83 | |
73 | 84 | //create the refracted plane wave |
74 | - planewave<P> new_p(omega); | |
85 | + planewave<P> new_p; | |
75 | 86 | new_p.E0 = E0_prime; |
76 | - new_p.k_hat = new_k; | |
87 | + new_p.k = kn_hat * k.len(); | |
77 | 88 | |
78 | 89 | return new_p; |
90 | + /*vec<P> kn_hat = kn.norm(); //normalize kn | |
91 | + vec<P> k_hat = k.norm(); | |
92 | + vec<P> E0_hat = E0.norm(); | |
93 | + | |
94 | + vec<P> B = k_hat.cross(E0_hat); | |
95 | + planewave<P> newp; | |
96 | + newp.k = kn_hat * k.len(); | |
97 | + newp.E0 = B.cross(kn_hat).norm(); | |
98 | + std::cout<<newp.k.norm().dot(newp.E0.norm())<<std::endl; | |
99 | + return newp;*/ | |
100 | +/* | |
101 | + //compute the side vector (around which we will be rotating) | |
102 | + rts::vec<P> s_hat = k_hat.cross(E0_hat); | |
103 | + //std::cout<<s.len()<<std::endl; | |
104 | + | |
105 | + //project kn_hat into the k-E0 plane | |
106 | + rts::vec<P> sp = s_hat * (kn_hat.dot(s_hat)); //project k_new onto s | |
107 | + rts::vec<P> kp = (kn_hat - sp); //correct k_new so it lies on the E0-k plane | |
108 | + rts::vec<P> kp_hat = kp.norm(); | |
109 | + | |
110 | + //compute the angle and direction between k_prime and k | |
111 | + P theta = acos(k_hat.dot(kp_hat)); | |
112 | + if(kp_hat.dot(E0_hat) < 0) | |
113 | + theta = -theta; | |
114 | + | |
115 | + //rotate E0 around s by theta | |
116 | + quaternion<P> q; | |
117 | + q.CreateRotation(theta, s_hat); | |
118 | + rts::vec<P> E0n = q.toMatrix3() * E0; | |
119 | + rts::vec<P> E0n_hat = E0n.norm(); | |
120 | + | |
121 | + //std::cout<<s_hat.dot(kp_hat)<<" "<<s_hat.dot(E0n_hat)<<" "<<s_hat.dot(E0_hat)<<" "<<s_hat.dot(k_hat)<<" "<< | |
122 | + // E0_hat.dot(k_hat)<<" "<<k_hat.dot(kp_hat)<<" "<<E0_hat.dot(E0n_hat)<<" "<<E0n_hat.dot(kp_hat)<<std::endl; | |
123 | + | |
124 | + //create the refracted plane wave | |
125 | + //std::cout<<"cos: "<<cos(theta)<<" k*kp: "<<k_hat.dot(kp_hat)<<" E0*E0p: "<<E0_hat.dot(E0n_hat)<<" E0p*kp: "<<E0n_hat.dot(kp_hat)<<std::endl; | |
126 | + | |
127 | + //std::cout<<"kp*s: "<<kp.dot(s_hat)<<" E0n*s: "<<E0n.dot(s_hat)<<" |E0n|: "<<E0n.len()<<" E0n*kp: "<<E0n.dot(kp_hat)<<" E0n*kn: "<<E0n.dot(kn_hat)<<std::endl; | |
128 | + | |
129 | + planewave<P> new_p(kn_hat * k.len(), E0n); //create the plane wave | |
130 | + std::cout<<"|E0n|: "<<new_p.E0.len()<<" E0n*kn: "<<(new_p.E0.norm()).dot(new_p.k.norm())<<std::endl; | |
131 | + | |
132 | + return new_p;*/ | |
79 | 133 | } |
80 | 134 | |
81 | 135 | std::string str() |
82 | 136 | { |
83 | 137 | std::stringstream ss; |
84 | - ss<<E0<<" e^i ( "<<omega<<"t - "<<omega<<" "<<k_hat * omega<<" . r )"; | |
138 | + ss<<E0<<" e^i ( "<<k<<" . r )"; | |
85 | 139 | return ss.str(); |
86 | 140 | } |
87 | 141 | }; | ... | ... |
visualization/colormap.h
... | ... | @@ -86,7 +86,6 @@ static void initBrewer() |
86 | 86 | static void destroyBrewer() |
87 | 87 | { |
88 | 88 | HANDLE_ERROR(cudaFreeArray(gpuBrewer)); |
89 | - | |
90 | 89 | } |
91 | 90 | |
92 | 91 | template<class T> |
... | ... | @@ -99,8 +98,11 @@ __global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigne |
99 | 98 | //compute the normalized value on [minVal maxVal] |
100 | 99 | float a = (gpuSource[i] - minVal) / (maxVal - minVal); |
101 | 100 | |
101 | + //compensate for the additional space at the edges | |
102 | + a *= (T)(BREWER_CTRL_PTS - 1)/(T)(BREWER_CTRL_PTS); | |
103 | + | |
102 | 104 | //lookup the color |
103 | - float shift = 1.0/(2*BREWER_CTRL_PTS); | |
105 | + float shift = (T)1/(2*BREWER_CTRL_PTS); | |
104 | 106 | float4 color = tex1D(cudaTexBrewer, a+shift); |
105 | 107 | //float4 color = tex1D(cudaTexBrewer, a); |
106 | 108 | ... | ... |
visualization/scalarfield.cuh deleted
1 | -#ifndef RTS_SCALAR_SLICE | |
2 | -#define RTS_SCALAR_SLICE | |
3 | - | |
4 | -#include "../visualization/colormap.h" | |
5 | -#include "../envi/envi.h" | |
6 | -#include "../math/quad.h" | |
7 | -#include "../cuda/devices.h" | |
8 | -#include "cublas_v2.h" | |
9 | -#include <cuda_runtime.h> | |
10 | - | |
11 | -///Compute a Gaussian function in 3D (mostly for testing) | |
12 | -template<typename T> | |
13 | -__global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, T std, rts::quad<T> shape) | |
14 | -{ | |
15 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
16 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
17 | - | |
18 | - //make sure that the thread indices are in-bounds | |
19 | - if(iu >= r0 || iv >= r1) return; | |
20 | - | |
21 | - //compute the index into the field | |
22 | - int i = iv*r0 + iu; | |
23 | - | |
24 | - T u = (T)iu / (T)r0; | |
25 | - T v = (T)iv / (T)r1; | |
26 | - | |
27 | - rts::vec<T> p = shape(u, v); | |
28 | - | |
29 | - T fx = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[0] - mean, 2) / (2 * std*std) ); | |
30 | - T fy = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[1] - mean, 2) / (2 * std*std) ); | |
31 | - T fz = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[2] - mean, 2) / (2 * std*std) ); | |
32 | - | |
33 | - dest[i] = fx * fy * fz; | |
34 | -} | |
35 | - | |
36 | -namespace rts { | |
37 | -template<typename P> | |
38 | -struct scalarfield | |
39 | -{ | |
40 | - //gpu pointer to the scalar slice | |
41 | - P* S; | |
42 | - | |
43 | - //resolution of the slice | |
44 | - int R[2]; | |
45 | - | |
46 | - quad<P> shape; | |
47 | - | |
48 | - scalarfield() | |
49 | - { | |
50 | - R[0] = R[1] = 0; | |
51 | - shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); | |
52 | - S = NULL; | |
53 | - | |
54 | - std::cout<<"scalarfield CONSTRUCTOR"<<std::endl; | |
55 | - } | |
56 | - | |
57 | - scalarfield(int x, int y) | |
58 | - { | |
59 | - //set the resolution | |
60 | - R[0] = x; | |
61 | - R[1] = y; | |
62 | - | |
63 | - shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); | |
64 | - | |
65 | - //allocate memory on the GPU | |
66 | - HANDLE_ERROR(cudaMalloc( (void**)&S, sizeof(P) * x * y )); | |
67 | - | |
68 | - std::cout<<"scalarfield CONSTRUCTOR"<<std::endl; | |
69 | - } | |
70 | - | |
71 | - ~scalarfield() | |
72 | - { | |
73 | - if(S != NULL) | |
74 | - HANDLE_ERROR(cudaFree(S)); | |
75 | - S = NULL; | |
76 | - R[0] = R[1] = 0; | |
77 | - | |
78 | - std::cout<<"scalarfield DESTRUCTOR"<<std::endl; | |
79 | - } | |
80 | - | |
81 | - void clear() | |
82 | - { | |
83 | - //this function sets the slice to zero | |
84 | - if(S != NULL) | |
85 | - HANDLE_ERROR(cudaMemset(S, 0, sizeof(P) * R[0] * R[1])); | |
86 | - } | |
87 | - | |
88 | - void toImage(std::string filename, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer) | |
89 | - { | |
90 | - rts::gpu2image<P>(S, filename, R[0], R[1], vmin, vmax, cmap); | |
91 | - } | |
92 | - | |
93 | - void toImage(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer) | |
94 | - { | |
95 | - cublasStatus_t stat; | |
96 | - cublasHandle_t handle; | |
97 | - | |
98 | - //create a CUBLAS handle | |
99 | - stat = cublasCreate(&handle); | |
100 | - if(stat != CUBLAS_STATUS_SUCCESS) | |
101 | - { | |
102 | - std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | |
103 | - exit(1); | |
104 | - } | |
105 | - | |
106 | - //find the index of the value with maximum magnitude | |
107 | - int N = R[0] * R[1]; | |
108 | - int result; | |
109 | - | |
110 | - if(sizeof(P) == 4) | |
111 | - stat = cublasIsamax(handle, N, (float*)S, 1, &result); | |
112 | - else | |
113 | - stat = cublasIdamax(handle, N, (double*)S, 1, &result); | |
114 | - | |
115 | - //adjust for 1-based indexing | |
116 | - result -= 1; | |
117 | - | |
118 | - if(stat != CUBLAS_STATUS_SUCCESS) | |
119 | - { | |
120 | - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | |
121 | - exit(1); | |
122 | - } | |
123 | - | |
124 | - | |
125 | - | |
126 | - //retrieve the maximum value | |
127 | - P maxVal; | |
128 | - HANDLE_ERROR(cudaMemcpy(&maxVal, S + result, sizeof(P), cudaMemcpyDeviceToHost)); | |
129 | - | |
130 | - //destroy the CUBLAS handle | |
131 | - cublasDestroy(handle); | |
132 | - | |
133 | - //output the image | |
134 | - if(positive) | |
135 | - toImage(filename, 0, maxVal, cmap); | |
136 | - else | |
137 | - toImage(filename, -abs(maxVal), abs(maxVal), cmap); | |
138 | - } | |
139 | - | |
140 | - | |
141 | - void toEnvi(std::string filename, P wavelength = 0, bool append = false) | |
142 | - { | |
143 | - std::string mode; | |
144 | - if(append) mode = "a"; | |
145 | - else mode = "w"; | |
146 | - | |
147 | - //open the ENVI file | |
148 | - EnviFile outfile(filename, mode); | |
149 | - | |
150 | - //get the scalar slice from the GPU to the CPU | |
151 | - int memsize = sizeof(P) * R[0] * R[1]; | |
152 | - P* cpuData = (P*) malloc( memsize ); | |
153 | - HANDLE_ERROR(cudaMemcpy( cpuData, S, memsize, cudaMemcpyDeviceToHost)); | |
154 | - | |
155 | - //add a band to the ENVI file | |
156 | - outfile.addBand(cpuData, R[0], R[1], wavelength); | |
157 | - | |
158 | - outfile.close(); | |
159 | - } | |
160 | - | |
161 | - //assignment operator | |
162 | - scalarfield & operator= (const scalarfield & rhs) | |
163 | - { | |
164 | - //de-allocate any existing GPU memory | |
165 | - if(S != NULL) | |
166 | - HANDLE_ERROR(cudaFree(S)); | |
167 | - | |
168 | - //copy the slice resolution | |
169 | - R[0] = rhs.R[0]; | |
170 | - R[1] = rhs.R[1]; | |
171 | - | |
172 | - //allocate the necessary memory | |
173 | - HANDLE_ERROR(cudaMalloc(&S, sizeof(P) * R[0] * R[1])); | |
174 | - | |
175 | - //copy the slice | |
176 | - HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
177 | - | |
178 | - | |
179 | - std::cout<<"Assignment operator."<<std::endl; | |
180 | - | |
181 | - return *this; | |
182 | - } | |
183 | - | |
184 | - ///copy constructor | |
185 | - scalarfield(const scalarfield &rhs) | |
186 | - { | |
187 | - //first make a shallow copy | |
188 | - R[0] = rhs.R[0]; | |
189 | - R[1] = rhs.R[1]; | |
190 | - | |
191 | - //do we have to make a deep copy? | |
192 | - if(rhs.S == NULL) | |
193 | - S = NULL; //no | |
194 | - else | |
195 | - { | |
196 | - //allocate the necessary memory | |
197 | - HANDLE_ERROR(cudaMalloc(&S, sizeof(P) * R[0] * R[1])); | |
198 | - | |
199 | - //copy the slice | |
200 | - HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
201 | - } | |
202 | - | |
203 | - std::cout<<"scalarfield COPY CONSTRUCTOR"<<std::endl; | |
204 | - } | |
205 | - | |
206 | - void gaussian(P mean, P std) | |
207 | - { | |
208 | - int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
209 | - int SQRT_BLOCK = (int)sqrt((float)maxThreads); | |
210 | - //create one thread for each detector pixel | |
211 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
212 | - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
213 | - | |
214 | - gpu_gaussian<float> <<<dimGrid, dimBlock>>> (S, R[0], R[1], mean, std, shape); | |
215 | - } | |
216 | - | |
217 | -}; | |
218 | - | |
219 | -} //end namespace rts | |
220 | - | |
221 | - | |
222 | - | |
223 | -#endif |
visualization/vectorfield.cuh deleted
1 | -#ifndef RTS_VECTORFIELD | |
2 | -#define RTS_VECTORFIELD | |
3 | - | |
4 | -#include "../visualization/colormap.h" | |
5 | -#include "../envi/envi.h" | |
6 | -#include "../math/quad.h" | |
7 | -#include "../cuda/devices.h" | |
8 | -#include "cublas_v2.h" | |
9 | -#include <cuda_runtime.h> | |
10 | -#include <iomanip> | |
11 | - | |
12 | -#include <qimage.h> | |
13 | -#include <qcolor.h> | |
14 | - | |
15 | - | |
16 | -namespace rts { | |
17 | -template<typename P, unsigned int N = 3> | |
18 | -struct vectorfield | |
19 | -{ | |
20 | -private: | |
21 | - void process_filename(std::string name, std::string &prefix, std::string &postfix, | |
22 | - std::string &ext, unsigned int &digits) | |
23 | - { | |
24 | - std::stringstream ss(name); | |
25 | - std::string item; | |
26 | - std::vector<std::string> elems; | |
27 | - while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension) | |
28 | - { | |
29 | - elems.push_back(item); | |
30 | - } | |
31 | - | |
32 | - prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters) | |
33 | - ext = elems[1]; //file extension (ex. .bmp, .png) | |
34 | - ext = std::string(".") + ext; //add a period back into the extension | |
35 | - | |
36 | - size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'') | |
37 | - size_t i1 = prefix.find_last_of("?"); | |
38 | - | |
39 | - postfix = prefix.substr(i1+1); | |
40 | - prefix = prefix.substr(0, i0); | |
41 | - | |
42 | - digits = i1 - i0 + 1; //compute the number of wildcards | |
43 | - | |
44 | - } | |
45 | - | |
46 | -public: | |
47 | - //gpu pointers to scalar slices | |
48 | - P* S[N]; | |
49 | - | |
50 | - //resolution of the slice | |
51 | - int R[2]; | |
52 | - | |
53 | - quad<P> shape; | |
54 | - | |
55 | - vectorfield() | |
56 | - { | |
57 | - R[0] = R[1] = 0; //set the initial resolution to 0 | |
58 | - shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); | |
59 | - for(int n=0; n<N; n++) //set each vector component to NULL | |
60 | - S[n]=NULL; | |
61 | - | |
62 | - std::cout<<"vectorfield CONSTRUCTOR"<<std::endl; | |
63 | - } | |
64 | - | |
65 | - vectorfield(int x, int y) | |
66 | - { | |
67 | - //set the resolution | |
68 | - R[0] = x; | |
69 | - R[1] = y; | |
70 | - | |
71 | - shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); | |
72 | - | |
73 | - //allocate memory on the GPU | |
74 | - for(int n=0; n<N; n++) | |
75 | - HANDLE_ERROR(cudaMalloc( (void**)&S[n], sizeof(P) * x * y )); | |
76 | - | |
77 | - std::cout<<"vectorfield CONSTRUCTOR"<<std::endl; | |
78 | - } | |
79 | - | |
80 | - ~vectorfield() | |
81 | - { | |
82 | - for(int n=0; n<N; n++) | |
83 | - if(S[n] != NULL) | |
84 | - { | |
85 | - HANDLE_ERROR(cudaFree(S[n])); | |
86 | - S[n] = NULL; | |
87 | - } | |
88 | - R[0] = R[1] = 0; | |
89 | - | |
90 | - std::cout<<"vectorfield DESTRUCTOR"<<std::endl; | |
91 | - } | |
92 | - | |
93 | - void clear() | |
94 | - { | |
95 | - //this function sets the slice to zero | |
96 | - for(int n=0; n<N; n++) | |
97 | - if(S[n] != NULL) | |
98 | - HANDLE_ERROR(cudaMemset(S[n], 0, sizeof(P) * R[0] * R[1])); | |
99 | - } | |
100 | - | |
101 | - void toImage(std::string filename, unsigned int n, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer) | |
102 | - { | |
103 | - rts::gpu2image<P>(S[n], filename, R[0], R[1], vmin, vmax, cmap); | |
104 | - } | |
105 | - | |
106 | - void toImage3(std::string filename, P vmin, P vmax) | |
107 | - { | |
108 | - std::cout<<"Implementing a 3-component rainbow colormap: "<<filename.c_str()<<std::endl; | |
109 | - //create a buffer for each RGB component | |
110 | - unsigned char* red = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]); | |
111 | - unsigned char* green = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]); | |
112 | - unsigned char* blue = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]); | |
113 | - | |
114 | - //retrieve the buffered images for each component | |
115 | - rts::gpu2cpu<P>(S[0], red, R[0] * R[1], vmin, vmax); | |
116 | - rts::gpu2cpu<P>(S[1], green, R[0] * R[1], vmin, vmax); | |
117 | - rts::gpu2cpu<P>(S[2], blue, R[0] * R[1], vmin, vmax); | |
118 | - | |
119 | - QImage image(R[0], R[1], QImage::Format_RGB32); //create a QImage object | |
120 | - if(image.isNull()) //if it didn't work, throw an error | |
121 | - { | |
122 | - std::cout<<"Error creating QImage."<<std::endl; | |
123 | - return; | |
124 | - } | |
125 | - | |
126 | - int i; | |
127 | - unsigned char r, g, b; | |
128 | - unsigned int x, y; | |
129 | - for(y=0; y<R[1]; y++) | |
130 | - for(x=0; x<R[0]; x++) | |
131 | - { | |
132 | - //calculate the 1D index | |
133 | - i = y * R[0] + x; | |
134 | - | |
135 | - r = red[i * 3 + 0]; | |
136 | - g = green[i * 3 + 1]; | |
137 | - b = blue[i * 3 + 2]; | |
138 | - | |
139 | - //set the image pixel | |
140 | - QColor color(r, g, b); | |
141 | - image.setPixel(x, y, color.rgb()); | |
142 | - } | |
143 | - | |
144 | - if(!image.save(filename.c_str())) //if the image didn't save correctly, | |
145 | - std::cout<<"Error saving QImage."<<std::endl; // throw an error | |
146 | - } | |
147 | - | |
148 | - void toImages(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer, bool globalmax = true) | |
149 | - { | |
150 | - std::string prefix, postfix, extension; | |
151 | - unsigned int digits; | |
152 | - process_filename(filename, prefix, postfix, extension, digits); //process the filename for wild cards | |
153 | - | |
154 | - cublasStatus_t stat; | |
155 | - cublasHandle_t handle; | |
156 | - | |
157 | - //create a CUBLAS handle | |
158 | - stat = cublasCreate(&handle); | |
159 | - if(stat != CUBLAS_STATUS_SUCCESS) | |
160 | - { | |
161 | - std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | |
162 | - exit(1); | |
163 | - } | |
164 | - | |
165 | - int L = R[0] * R[1]; //compute the number of discrete points in a slice | |
166 | - int result; //result of the max operation | |
167 | - | |
168 | - P maxVal[N]; //array stores minimum and maximum values | |
169 | - P maxAll = 0; //largest value in the data set | |
170 | - | |
171 | - //compute the maximum value for each vector component | |
172 | - for(int n=0; n<N; n++) | |
173 | - { | |
174 | - if(sizeof(P) == 4) | |
175 | - stat = cublasIsamax(handle, L, (const float*)S[n], 1, &result); | |
176 | - else | |
177 | - stat = cublasIdamax(handle, L, (const double*)S[n], 1, &result); | |
178 | - | |
179 | - result -= 1; //adjust for 1-based indexing | |
180 | - | |
181 | - if(stat != CUBLAS_STATUS_SUCCESS) //if there was a GPU error, terminate | |
182 | - { | |
183 | - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | |
184 | - exit(1); | |
185 | - } | |
186 | - | |
187 | - //retrieve the maximum value for this slice and store it in the maxVal array | |
188 | - HANDLE_ERROR(cudaMemcpy(&maxVal[n], S[n] + result, sizeof(P), cudaMemcpyDeviceToHost)); | |
189 | - if(maxVal[n] > maxAll) //if maxVal is larger, update the maxAll variable | |
190 | - maxAll = maxVal[n]; | |
191 | - | |
192 | - } | |
193 | - | |
194 | - cublasDestroy(handle); //destroy the CUBLAS handle | |
195 | - | |
196 | - if(cmap == rts::cmRainbow && N == 3) //if the user specifies a rainbow colormap, and the vector has 3 elements | |
197 | - { | |
198 | - //implement a single image with RGB = XYZ | |
199 | - if(positive) | |
200 | - toImage3(prefix+postfix+extension, 0, maxAll); | |
201 | - else | |
202 | - toImage3(prefix+postfix+extension, 0, maxAll); | |
203 | - } | |
204 | - else | |
205 | - { | |
206 | - for(int n=0; n<N; n++) //for each image | |
207 | - { | |
208 | - stringstream ss; //assemble the file name | |
209 | - ss<<prefix<<std::setfill('0')<<std::setw(digits)<<n<<postfix<<extension; | |
210 | - std::cout<<ss.str()<<std::endl; | |
211 | - if(positive) //if the image is positive | |
212 | - { | |
213 | - std::cout<<"Positive: "<<n<<std::endl; | |
214 | - if(globalmax) //if the global maximum is used | |
215 | - toImage(ss.str(), n, 0, maxAll, cmap); //save the image using the global maximum | |
216 | - else | |
217 | - toImage(ss.str(), n, 0, maxVal[n], cmap); //save the image using the local maximum | |
218 | - } | |
219 | - else | |
220 | - { | |
221 | - std::cout<<"Negative: "<<n<<std::endl; | |
222 | - if(globalmax) //if the global maximum is used | |
223 | - toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap); //save the image using the global maximum | |
224 | - else | |
225 | - toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap); //save the image using the local maximum | |
226 | - } | |
227 | - } | |
228 | - } | |
229 | - } | |
230 | - | |
231 | - //assignment operator | |
232 | - vectorfield & operator= (const vectorfield & rhs) | |
233 | - { | |
234 | - //de-allocate any existing GPU memory | |
235 | - for(int n=0; n<N; n++) | |
236 | - if(S[n] != NULL) | |
237 | - HANDLE_ERROR(cudaFree(S[n])); | |
238 | - | |
239 | - //copy the slice resolution | |
240 | - R[0] = rhs.R[0]; | |
241 | - R[1] = rhs.R[1]; | |
242 | - | |
243 | - for(int n=0; n<N; n++) | |
244 | - { | |
245 | - //allocate the necessary memory | |
246 | - HANDLE_ERROR(cudaMalloc(&S[n], sizeof(P) * R[0] * R[1])); | |
247 | - | |
248 | - //copy the slice | |
249 | - HANDLE_ERROR(cudaMemcpy(S[n], rhs.S[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
250 | - } | |
251 | - | |
252 | - | |
253 | - std::cout<<"Assignment operator."<<std::endl; | |
254 | - | |
255 | - return *this; | |
256 | - } | |
257 | - | |
258 | - ///copy constructor | |
259 | - vectorfield(const vectorfield &rhs) | |
260 | - { | |
261 | - //first make a shallow copy | |
262 | - R[0] = rhs.R[0]; | |
263 | - R[1] = rhs.R[1]; | |
264 | - | |
265 | - //do we have to make a deep copy? | |
266 | - if(rhs.S[0] == NULL) //no? | |
267 | - { | |
268 | - for(int n=0; n<N; n++) //set all components to NULL | |
269 | - { | |
270 | - S[n] = NULL; | |
271 | - } | |
272 | - } | |
273 | - else | |
274 | - { | |
275 | - for(int n=0; n<N; n++) | |
276 | - { | |
277 | - //allocate the necessary memory | |
278 | - HANDLE_ERROR(cudaMalloc(&S[n], sizeof(P) * R[0] * R[1])); | |
279 | - | |
280 | - //copy the slice | |
281 | - HANDLE_ERROR(cudaMemcpy(S[n], rhs.S[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
282 | - } | |
283 | - } | |
284 | - | |
285 | - std::cout<<"vectorfield COPY CONSTRUCTOR"<<std::endl; | |
286 | - } | |
287 | - | |
288 | -}; | |
289 | - | |
290 | -} //end namespace rts | |
291 | - | |
292 | - | |
293 | - | |
294 | -#endif |