Commit a9275be58aaa6e83acfdb092f1221ec3d2ac9eb8
1 parent
4d67ff4e
added vector field visualization tools
Showing
15 changed files
with
1088 additions
and
357 deletions
Show diff stats
1 | +#ifndef RTS_CUDA_DEVICES | |
2 | +#define RTS_CUDA_DEVICES | |
3 | + | |
4 | +#include <cuda.h> | |
5 | + | |
6 | +namespace rts{ | |
7 | + | |
8 | +int maxThreadsPerBlock() | |
9 | +{ | |
10 | + int device; | |
11 | + cudaGetDevice(&device); //get the id of the current device | |
12 | + cudaDeviceProp props; //device property structure | |
13 | + cudaGetDeviceProperties(&props, device); | |
14 | + return props.maxThreadsPerBlock; | |
15 | +} | |
16 | +} //end namespace rts | |
17 | + | |
18 | +#endif | |
0 | 19 | \ No newline at end of file | ... | ... |
cuda/memory.h deleted
math/function.h
... | ... | @@ -18,62 +18,74 @@ class function |
18 | 18 | //function data |
19 | 19 | std::vector<dataPoint> f; |
20 | 20 | |
21 | - //comparison function for searching lambda | |
22 | - static bool findCeiling(dataPoint a, dataPoint b) | |
23 | - { | |
24 | - return (a.x > b.x); | |
21 | + //comparison function for searching lambda | |
22 | + static bool findCeiling(dataPoint a, dataPoint b) | |
23 | + { | |
24 | + return (a.x > b.x); | |
25 | 25 | } |
26 | 26 | |
27 | 27 | |
28 | 28 | public: |
29 | + function() | |
30 | + { | |
31 | + //insert(0, 0); | |
32 | + } | |
33 | + | |
29 | 34 | Y linear(X x) |
30 | 35 | { |
31 | - //declare an iterator | |
32 | - typename std::vector< dataPoint >::iterator it; | |
33 | - | |
34 | - dataPoint s; | |
35 | - s.x = x; | |
36 | - | |
37 | - it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling); | |
38 | - | |
39 | - //if the wavelength is past the end of the list, return the back | |
40 | - if(it == f.end()) | |
41 | - return f.back().y; | |
42 | - //if the wavelength is before the beginning of the list, return the front | |
43 | - else if(it == f.begin()) | |
44 | - return f.front().y; | |
45 | - //otherwise interpolate | |
46 | - else | |
47 | - { | |
48 | - X xMax = (*it).x; | |
49 | - X xMin = (*(it - 1)).x; | |
50 | - //std::cout<<lMin<<"----------"<<lMax<<std::endl; | |
51 | - | |
52 | - X a = (x - xMin) / (xMax - xMin); | |
53 | - Y riMin = (*(it - 1)).y; | |
54 | - Y riMax = (*it).y; | |
55 | - Y interp; | |
56 | - interp = riMin * a + riMax * (1.0 - a); | |
57 | - return interp; | |
36 | + if(f.size() == 0) return (Y)0; //return zero if the function is empty | |
37 | + //declare an iterator | |
38 | + typename std::vector< dataPoint >::iterator it; | |
39 | + | |
40 | + dataPoint s; | |
41 | + s.x = x; | |
42 | + | |
43 | + it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling); | |
44 | + | |
45 | + //if the wavelength is past the end of the list, return the back | |
46 | + if(it == f.end()) | |
47 | + return f.back().y; | |
48 | + //if the wavelength is before the beginning of the list, return the front | |
49 | + else if(it == f.begin()) | |
50 | + return f.front().y; | |
51 | + //otherwise interpolate | |
52 | + else | |
53 | + { | |
54 | + X xMax = (*it).x; | |
55 | + X xMin = (*(it - 1)).x; | |
56 | + //std::cout<<lMin<<"----------"<<lMax<<std::endl; | |
57 | + | |
58 | + X a = (x - xMin) / (xMax - xMin); | |
59 | + Y riMin = (*(it - 1)).y; | |
60 | + Y riMax = (*it).y; | |
61 | + Y interp; | |
62 | + interp = riMax * a + riMin * (1.0 - a); | |
63 | + return interp; | |
58 | 64 | } |
59 | 65 | } |
60 | 66 | |
67 | + ///add a data point to a function | |
61 | 68 | void insert(X x, Y y) |
62 | 69 | { |
63 | - //declare an iterator | |
64 | - typename std::vector< dataPoint >::iterator it; | |
65 | - | |
66 | - dataPoint s; | |
67 | - s.x = x; | |
68 | - s.y = y; | |
69 | - | |
70 | - it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling); | |
71 | - | |
72 | - //if the function value is past the end of the vector, add it to the back | |
73 | - if(it == f.end()) | |
74 | - return f.push_back(s); | |
75 | - //otherwise add the value at the iterator position | |
76 | - else | |
70 | + dataPoint s; | |
71 | + s.x = x; | |
72 | + s.y = y; | |
73 | + | |
74 | + if(f.size() == 0 || f.back().x < x) | |
75 | + return f.push_back(s); | |
76 | + | |
77 | + //declare an iterator | |
78 | + typename std::vector< dataPoint >::iterator it; | |
79 | + | |
80 | + | |
81 | + | |
82 | + it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling); | |
83 | + | |
84 | + //if the function value is past the end of the vector, add it to the back | |
85 | + if(it == f.end()) | |
86 | + return f.push_back(s); | |
87 | + //otherwise add the value at the iterator position | |
88 | + else | |
77 | 89 | { |
78 | 90 | f.insert(it, s); |
79 | 91 | } |
... | ... | @@ -90,16 +102,24 @@ public: |
90 | 102 | return f[i].y; |
91 | 103 | } |
92 | 104 | |
105 | + ///get the number of data points in the function | |
93 | 106 | unsigned int getN() |
94 | 107 | { |
95 | 108 | return f.size(); |
96 | 109 | } |
97 | 110 | |
111 | + //look up an indexed component | |
98 | 112 | dataPoint operator[](int i) |
99 | 113 | { |
100 | 114 | return f[i]; |
101 | 115 | } |
102 | 116 | |
117 | + ///linear interpolation | |
118 | + Y operator()(X x) | |
119 | + { | |
120 | + return linear(x); | |
121 | + } | |
122 | + | |
103 | 123 | function<X, Y> operator+(Y r) |
104 | 124 | { |
105 | 125 | function<X, Y> result; |
... | ... | @@ -114,10 +134,19 @@ public: |
114 | 134 | return result; |
115 | 135 | } |
116 | 136 | |
137 | + function<X, Y> & operator= (const Y & rhs) | |
138 | + { | |
139 | + f.clear(); | |
140 | + if(rhs != 0) //if the RHS is zero, just clear, otherwise add one value of RHS | |
141 | + insert(0, rhs); | |
142 | + | |
143 | + return *this; | |
144 | + } | |
145 | + | |
117 | 146 | |
118 | 147 | }; |
119 | 148 | |
120 | 149 | } //end namespace rts |
121 | 150 | |
122 | 151 | |
123 | -#endif | |
152 | +#endif | ... | ... |
math/matrix.h
... | ... | @@ -4,6 +4,7 @@ |
4 | 4 | //#include "rts/vector.h" |
5 | 5 | #include <string.h> |
6 | 6 | #include <iostream> |
7 | +#include "vector.h" | |
7 | 8 | |
8 | 9 | namespace rts |
9 | 10 | { |
... | ... | @@ -46,9 +47,9 @@ struct matrix |
46 | 47 | return *this; |
47 | 48 | }*/ |
48 | 49 | |
49 | - vector<T, N> operator*(vector<T, N> rhs) | |
50 | + vec<T, N> operator*(vec<T, N> rhs) | |
50 | 51 | { |
51 | - vector<T, N> result; | |
52 | + vec<T, N> result; | |
52 | 53 | |
53 | 54 | for(int r=0; r<N; r++) |
54 | 55 | for(int c=0; c<N; c++) | ... | ... |
math/point.h deleted
1 | -#ifndef RTS_rtsPoint_H | |
2 | -#define RTS_rtsPoint_H | |
3 | - | |
4 | -#include "rts/math/vector.h" | |
5 | -#include <string.h> | |
6 | -#include "rts/cuda/callable.h" | |
7 | - | |
8 | -namespace rts | |
9 | -{ | |
10 | - | |
11 | -template <class T, int N> | |
12 | -struct point | |
13 | -{ | |
14 | - T p[N]; | |
15 | - | |
16 | - CUDA_CALLABLE point() | |
17 | - { | |
18 | - | |
19 | - } | |
20 | - | |
21 | - //efficiency constructor, makes construction easier for 1D-4D vectors | |
22 | - CUDA_CALLABLE point(T x) | |
23 | - { | |
24 | - p[0] = x; | |
25 | - } | |
26 | - CUDA_CALLABLE point(T x, T y) | |
27 | - { | |
28 | - p[0] = x; | |
29 | - p[1] = y; | |
30 | - } | |
31 | - CUDA_CALLABLE point(T x, T y, T z) | |
32 | - { | |
33 | - p[0] = x; | |
34 | - p[1] = y; | |
35 | - p[2] = z; | |
36 | - } | |
37 | - CUDA_CALLABLE point(T x, T y, T z, T w) | |
38 | - { | |
39 | - p[0] = x; | |
40 | - p[1] = y; | |
41 | - p[2] = z; | |
42 | - p[3] = w; | |
43 | - } | |
44 | - | |
45 | - //arithmetic operators | |
46 | - CUDA_CALLABLE rts::point<T, N> operator+(vector<T, N> v) | |
47 | - { | |
48 | - rts::point<T, N> r; | |
49 | - | |
50 | - //calculate the position of the resulting point | |
51 | - for(int i=0; i<N; i++) | |
52 | - r.p[i] = p[i] + v.v[i]; | |
53 | - | |
54 | - return r; | |
55 | - } | |
56 | - CUDA_CALLABLE rts::point<T, N> operator-(vector<T, N> v) | |
57 | - { | |
58 | - rts::point<T, N> r; | |
59 | - | |
60 | - //calculate the position of the resulting point | |
61 | - for(int i=0; i<N; i++) | |
62 | - r.p[i] = p[i] - v.v[i]; | |
63 | - | |
64 | - return r; | |
65 | - } | |
66 | - CUDA_CALLABLE vector<T, N> operator-(point<T, N> rhs) | |
67 | - { | |
68 | - vector<T, N> r; | |
69 | - | |
70 | - //calculate the position of the resulting point | |
71 | - for(int i=0; i<N; i++) | |
72 | - r.v[i] = p[i] - rhs.p[i]; | |
73 | - | |
74 | - return r; | |
75 | - } | |
76 | - CUDA_CALLABLE rts::point<T, N> operator*(T rhs) | |
77 | - { | |
78 | - rts::point<T, N> r; | |
79 | - | |
80 | - //calculate the position of the resulting point | |
81 | - for(int i=0; i<N; i++) | |
82 | - r.p[i] = p[i] * rhs; | |
83 | - | |
84 | - return r; | |
85 | - } | |
86 | - | |
87 | - CUDA_CALLABLE point(const T(&data)[N]) | |
88 | - { | |
89 | - memcpy(p, data, sizeof(T) * N); | |
90 | - } | |
91 | - | |
92 | - std::string toStr() | |
93 | - { | |
94 | - std::stringstream ss; | |
95 | - | |
96 | - ss<<"("; | |
97 | - for(int i=0; i<N; i++) | |
98 | - { | |
99 | - ss<<p[i]; | |
100 | - if(i != N-1) | |
101 | - ss<<", "; | |
102 | - } | |
103 | - ss<<")"; | |
104 | - | |
105 | - return ss.str(); | |
106 | - } | |
107 | - | |
108 | - //bracket operator | |
109 | - CUDA_CALLABLE T& operator[](int i) | |
110 | - { | |
111 | - return p[i]; | |
112 | - } | |
113 | - | |
114 | -}; | |
115 | - | |
116 | -} //end namespace rts | |
117 | - | |
118 | -template <typename T, int N> | |
119 | -std::ostream& operator<<(std::ostream& os, rts::point<T, N> p) | |
120 | -{ | |
121 | - os<<p.toStr(); | |
122 | - return os; | |
123 | -} | |
124 | - | |
125 | -//arithmetic | |
126 | -template <typename T, int N> | |
127 | -CUDA_CALLABLE rts::point<T, N> operator*(T lhs, rts::point<T, N> rhs) | |
128 | -{ | |
129 | - rts::point<T, N> r; | |
130 | - | |
131 | - return rhs * lhs; | |
132 | -} | |
133 | - | |
134 | -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | |
135 | -//template<class T, int N> using rtsPoint = rts::point<T, N>; | |
136 | -//#endif | |
137 | - | |
138 | -#endif |
math/quad.h
... | ... | @@ -2,37 +2,36 @@ |
2 | 2 | #define RTS_RECT_H |
3 | 3 | |
4 | 4 | //enable CUDA_CALLABLE macro |
5 | -#include "rts/cuda/callable.h" | |
6 | -#include "rts/math/vector.h" | |
7 | -#include "rts/math/point.h" | |
8 | -#include "rts/math/triangle.h" | |
9 | -#include "rts/math/quaternion.h" | |
5 | +#include "../cuda/callable.h" | |
6 | +#include "../math/vector.h" | |
7 | +#include "../math/triangle.h" | |
8 | +#include "../math/quaternion.h" | |
10 | 9 | #include <iostream> |
11 | 10 | #include <algorithm> |
12 | 11 | |
13 | 12 | namespace rts{ |
14 | 13 | |
15 | 14 | //template for a quadangle class in ND space |
16 | -template <class T, int N> | |
15 | +template <class T, int N = 3> | |
17 | 16 | struct quad |
18 | 17 | { |
19 | 18 | /* |
20 | - C------------------>O | |
19 | + B------------------>C | |
21 | 20 | ^ ^ |
22 | 21 | | | |
23 | 22 | Y | |
24 | 23 | | | |
25 | 24 | | | |
26 | - A---------X-------->B | |
25 | + A---------X-------->O | |
27 | 26 | */ |
28 | 27 | |
29 | 28 | /*T A[N]; |
30 | 29 | T B[N]; |
31 | 30 | T C[N];*/ |
32 | 31 | |
33 | - rts::point<T, N> A; | |
34 | - rts::vector<T, N> X; | |
35 | - rts::vector<T, N> Y; | |
32 | + rts::vec<T, N> A; | |
33 | + rts::vec<T, N> X; | |
34 | + rts::vec<T, N> Y; | |
36 | 35 | |
37 | 36 | |
38 | 37 | CUDA_CALLABLE quad() |
... | ... | @@ -40,56 +39,56 @@ struct quad |
40 | 39 | |
41 | 40 | } |
42 | 41 | |
43 | - CUDA_CALLABLE quad(point<T, N> a, point<T, N> b, point<T, N> c) | |
42 | + CUDA_CALLABLE quad(vec<T, N> a, vec<T, N> b, vec<T, N> c) | |
44 | 43 | { |
45 | 44 | |
46 | - A = a; | |
47 | - X = b - a; | |
48 | - Y = c - a; | |
45 | + A = a; | |
46 | + Y = b - a; | |
47 | + X = c - a - Y; | |
49 | 48 | |
50 | 49 | } |
51 | 50 | |
52 | 51 | /**************************************************************** |
53 | 52 | Constructor - create a quad from two points and a normal |
54 | 53 | ****************************************************************/ |
55 | - CUDA_CALLABLE quad(rts::point<T, N> pMin, rts::point<T, N> pMax, rts::vector<T, N> normal) | |
54 | + /*CUDA_CALLABLE quad(rts::vec<T, N> pMin, rts::vec<T, N> pMax, rts::vec<T, N> normal) | |
56 | 55 | { |
57 | 56 | |
58 | 57 | //assign the corner point |
59 | 58 | A = pMin; |
60 | 59 | |
61 | 60 | //compute the vector from pMin to pMax |
62 | - rts::vector<T, 3> v0; | |
61 | + rts::vec<T, 3> v0; | |
63 | 62 | v0 = pMax - pMin; |
64 | 63 | |
65 | 64 | //compute the cross product of A and the plane normal |
66 | - rts::vector<T, 3> v1; | |
65 | + rts::vec<T, 3> v1; | |
67 | 66 | v1 = v0.cross(normal); |
68 | 67 | |
69 | 68 | |
70 | 69 | //calculate point B |
71 | - rts::point<T, 3> B; | |
70 | + rts::vec<T, 3> B; | |
72 | 71 | B = A + v0 * 0.5f + v1 * 0.5f; |
73 | 72 | |
74 | 73 | //calculate rtsPoint C |
75 | - rts::point<T, 3> C; | |
74 | + rts::vec<T, 3> C; | |
76 | 75 | C = A + v0 * 0.5f - v1 * 0.5f; |
77 | 76 | |
78 | 77 | //calculate X and Y |
79 | 78 | X = B - A; |
80 | 79 | Y = C - A; |
81 | - } | |
80 | + }*/ | |
82 | 81 | |
83 | 82 | /******************************************************************* |
84 | 83 | Constructor - create a quad from a position, normal, and rotation |
85 | 84 | *******************************************************************/ |
86 | - CUDA_CALLABLE quad(rts::point<T, N> c, rts::vector<T, N> normal, T width, T height, T theta) | |
85 | + CUDA_CALLABLE quad(rts::vec<T, N> c, rts::vec<T, N> normal, T width, T height, T theta) | |
87 | 86 | { |
88 | 87 | |
89 | 88 | //compute the X direction - start along world-space X |
90 | - Y = rts::vector<T, N>(0, 1, 0); | |
89 | + Y = rts::vec<T, N>(0, 1, 0); | |
91 | 90 | if(Y == normal) |
92 | - Y = rts::vector<T, N>(0, 0, 1); | |
91 | + Y = rts::vec<T, N>(0, 0, 1); | |
93 | 92 | |
94 | 93 | X = Y.cross(normal).norm(); |
95 | 94 | |
... | ... | @@ -118,33 +117,33 @@ struct quad |
118 | 117 | /******************************************* |
119 | 118 | Return the normal for the quad |
120 | 119 | *******************************************/ |
121 | - CUDA_CALLABLE rts::vector<T, N> n() | |
120 | + CUDA_CALLABLE rts::vec<T, N> n() | |
122 | 121 | { |
123 | 122 | return (X.cross(Y)).norm(); |
124 | 123 | } |
125 | 124 | |
126 | - CUDA_CALLABLE rts::point<T, N> p(T a, T b) | |
125 | + CUDA_CALLABLE rts::vec<T, N> p(T a, T b) | |
127 | 126 | { |
128 | - rts::point<T, N> result; | |
127 | + rts::vec<T, N> result; | |
129 | 128 | //given the two parameters a, b = [0 1], returns the position in world space |
130 | 129 | result = A + X * a + Y * b; |
131 | 130 | |
132 | 131 | return result; |
133 | 132 | } |
134 | 133 | |
135 | - CUDA_CALLABLE rts::point<T, N> operator()(T a, T b) | |
134 | + CUDA_CALLABLE rts::vec<T, N> operator()(T a, T b) | |
136 | 135 | { |
137 | 136 | return p(a, b); |
138 | 137 | } |
139 | 138 | |
140 | - std::string toStr() | |
139 | + std::string str() | |
141 | 140 | { |
142 | 141 | std::stringstream ss; |
143 | 142 | |
144 | 143 | ss<<"A = "<<A<<std::endl; |
145 | - ss<<"B = "<<A + X<<std::endl; | |
146 | - ss<<"C = "<<A + X + Y<<std::endl; | |
147 | - ss<<"D = "<<A + Y<<std::endl; | |
144 | + ss<<"B = "<<A + Y<<std::endl; | |
145 | + ss<<"C = "<<A + Y + X<<std::endl; | |
146 | + ss<<"D = "<<A + X<<std::endl; | |
148 | 147 | |
149 | 148 | return ss.str(); |
150 | 149 | |
... | ... | @@ -155,7 +154,7 @@ struct quad |
155 | 154 | //scales the plane by a scalar value |
156 | 155 | |
157 | 156 | //compute the center point |
158 | - rts::point<T, N> c = A + X*0.5f + Y*0.5f; | |
157 | + rts::vec<T, N> c = A + X*0.5f + Y*0.5f; | |
159 | 158 | |
160 | 159 | //create the new quadangle |
161 | 160 | quad<T, N> result; |
... | ... | @@ -167,7 +166,7 @@ struct quad |
167 | 166 | |
168 | 167 | } |
169 | 168 | |
170 | - CUDA_CALLABLE T dist(point<T, N> p) | |
169 | + CUDA_CALLABLE T dist(vec<T, N> p) | |
171 | 170 | { |
172 | 171 | //compute the distance between a point and this quad |
173 | 172 | |
... | ... | @@ -176,8 +175,8 @@ struct quad |
176 | 175 | triangle<T, N> T1(A+X+Y, A+X, A+Y); |
177 | 176 | |
178 | 177 | |
179 | - ptype d0 = T0.dist(p); | |
180 | - ptype d1 = T1.dist(p); | |
178 | + T d0 = T0.dist(p); | |
179 | + T d1 = T1.dist(p); | |
181 | 180 | |
182 | 181 | if(d0 < d1) |
183 | 182 | return d0; |
... | ... | @@ -185,7 +184,7 @@ struct quad |
185 | 184 | return d1; |
186 | 185 | } |
187 | 186 | |
188 | - CUDA_CALLABLE T dist_max(point<T, N> p) | |
187 | + CUDA_CALLABLE T dist_max(vec<T, N> p) | |
189 | 188 | { |
190 | 189 | T da = (A - p).len(); |
191 | 190 | T db = (A+X - p).len(); |
... | ... | @@ -201,7 +200,7 @@ struct quad |
201 | 200 | template <typename T, int N> |
202 | 201 | std::ostream& operator<<(std::ostream& os, rts::quad<T, N> R) |
203 | 202 | { |
204 | - os<<R.toStr(); | |
203 | + os<<R.str(); | |
205 | 204 | return os; |
206 | 205 | } |
207 | 206 | ... | ... |
math/quaternion.h
... | ... | @@ -16,7 +16,7 @@ public: |
16 | 16 | |
17 | 17 | void normalize(); |
18 | 18 | void CreateRotation(T theta, T axis_x, T axis_y, T axis_z); |
19 | - void CreateRotation(T theta, vector<T, 3> axis); | |
19 | + void CreateRotation(T theta, vec<T, 3> axis); | |
20 | 20 | quaternion<T> operator*(quaternion<T> &rhs); |
21 | 21 | matrix<T, 3> toMatrix3(); |
22 | 22 | matrix<T, 4> toMatrix4(); |
... | ... | @@ -48,7 +48,7 @@ void quaternion<T>::CreateRotation(T theta, T axis_x, T axis_y, T axis_z) |
48 | 48 | } |
49 | 49 | |
50 | 50 | template<typename T> |
51 | -void quaternion<T>::CreateRotation(T theta, vector<T, 3> axis) | |
51 | +void quaternion<T>::CreateRotation(T theta, vec<T, 3> axis) | |
52 | 52 | { |
53 | 53 | CreateRotation(theta, axis[0], axis[1], axis[2]); |
54 | 54 | } | ... | ... |
math/triangle.h
... | ... | @@ -4,12 +4,11 @@ |
4 | 4 | //enable CUDA_CALLABLE macro |
5 | 5 | #include "rts/cuda/callable.h" |
6 | 6 | #include "rts/math/vector.h" |
7 | -#include "rts/math/point.h" | |
8 | 7 | #include <iostream> |
9 | 8 | |
10 | 9 | namespace rts{ |
11 | 10 | |
12 | -template <class T, int N> | |
11 | +template <class T, int N=3> | |
13 | 12 | struct triangle |
14 | 13 | { |
15 | 14 | /* |
... | ... | @@ -24,15 +23,15 @@ struct triangle |
24 | 23 | */ |
25 | 24 | private: |
26 | 25 | |
27 | - point<T, N> A; | |
28 | - point<T, N> B; | |
29 | - point<T, N> C; | |
26 | + vec<T, N> A; | |
27 | + vec<T, N> B; | |
28 | + vec<T, N> C; | |
30 | 29 | |
31 | - CUDA_CALLABLE point<T, N> _p(T s, T t) | |
30 | + CUDA_CALLABLE vec<T, N> _p(T s, T t) | |
32 | 31 | { |
33 | 32 | //This function returns the point specified by p = A + s(B-A) + t(C-A) |
34 | - vector<T, N> E0 = B-A; | |
35 | - vector<T, N> E1 = C-A; | |
33 | + vec<T, N> E0 = B-A; | |
34 | + vec<T, N> E1 = C-A; | |
36 | 35 | |
37 | 36 | return A + s*E0 + t*E1; |
38 | 37 | } |
... | ... | @@ -47,26 +46,26 @@ struct triangle |
47 | 46 | |
48 | 47 | } |
49 | 48 | |
50 | - CUDA_CALLABLE triangle(point<T, N> a, point<T, N> b, point<T, N> c) | |
49 | + CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c) | |
51 | 50 | { |
52 | 51 | A = a; |
53 | 52 | B = b; |
54 | 53 | C = c; |
55 | 54 | } |
56 | 55 | |
57 | - CUDA_CALLABLE rts::point<T, N> operator()(T s, T t) | |
56 | + CUDA_CALLABLE rts::vec<T, N> operator()(T s, T t) | |
58 | 57 | { |
59 | 58 | return _p(s, t); |
60 | 59 | } |
61 | 60 | |
62 | - CUDA_CALLABLE point<T, N> nearest(point<T, N> p) | |
61 | + CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p) | |
63 | 62 | { |
64 | 63 | //comptue the distance between a point and this triangle |
65 | 64 | // This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf |
66 | 65 | |
67 | - vector<T, N> E0 = B-A; | |
68 | - vector<T, N> E1 = C-A; | |
69 | - vector<T, N> D = A - p; | |
66 | + vec<T, N> E0 = B-A; | |
67 | + vec<T, N> E1 = C-A; | |
68 | + vec<T, N> D = A - p; | |
70 | 69 | |
71 | 70 | T a = E0.dot(E0); |
72 | 71 | T b = E0.dot(E1); |
... | ... | @@ -125,7 +124,7 @@ struct triangle |
125 | 124 | { |
126 | 125 | //region 0 |
127 | 126 | //std::cout<<"Region 0"<<std::endl; |
128 | - T invDet = (ptype)1.0/det; | |
127 | + T invDet = (T)1.0/det; | |
129 | 128 | s *= invDet; |
130 | 129 | t *= invDet; |
131 | 130 | //done |
... | ... | @@ -176,9 +175,9 @@ struct triangle |
176 | 175 | |
177 | 176 | } |
178 | 177 | |
179 | - CUDA_CALLABLE T dist(point<T, N> p) | |
178 | + CUDA_CALLABLE T dist(vec<T, N> p) | |
180 | 179 | { |
181 | - point<T, N> n = nearest(p); | |
180 | + vec<T, N> n = nearest(p); | |
182 | 181 | |
183 | 182 | return (p - n).len(); |
184 | 183 | } | ... | ... |
math/vector.h
... | ... | @@ -4,20 +4,21 @@ |
4 | 4 | #include <iostream> |
5 | 5 | #include <cmath> |
6 | 6 | #include <sstream> |
7 | -//#include "rts/point.h" | |
7 | +//#include "rts/math/point.h" | |
8 | 8 | #include "rts/cuda/callable.h" |
9 | 9 | |
10 | + | |
10 | 11 | namespace rts |
11 | 12 | { |
12 | 13 | |
13 | 14 | |
14 | 15 | |
15 | -template <class T, int N> | |
16 | -struct vector | |
16 | +template <class T, int N=3> | |
17 | +struct vec | |
17 | 18 | { |
18 | 19 | T v[N]; |
19 | 20 | |
20 | - CUDA_CALLABLE vector() | |
21 | + CUDA_CALLABLE vec() | |
21 | 22 | { |
22 | 23 | //memset(v, 0, sizeof(T) * N); |
23 | 24 | for(int i=0; i<N; i++) |
... | ... | @@ -25,19 +26,30 @@ struct vector |
25 | 26 | } |
26 | 27 | |
27 | 28 | //efficiency constructor, makes construction easier for 1D-4D vectors |
28 | - CUDA_CALLABLE vector(T x, T y = (T)0.0, T z = (T)0.0, T w = (T)0.0) | |
29 | + CUDA_CALLABLE vec(T x) | |
30 | + { | |
31 | + v[0] = x; | |
32 | + } | |
33 | + CUDA_CALLABLE vec(T x, T y) | |
34 | + { | |
35 | + v[0] = x; | |
36 | + v[1] = y; | |
37 | + } | |
38 | + CUDA_CALLABLE vec(T x, T y, T z) | |
29 | 39 | { |
30 | - if(N >= 1) | |
31 | - v[0] = x; | |
32 | - if(N >= 2) | |
33 | - v[1] = y; | |
34 | - if(N >= 3) | |
35 | - v[2] = z; | |
36 | - if(N >= 4) | |
37 | - v[3] = w; | |
40 | + v[0] = x; | |
41 | + v[1] = y; | |
42 | + v[2] = z; | |
43 | + } | |
44 | + CUDA_CALLABLE vec(T x, T y, T z, T w) | |
45 | + { | |
46 | + v[0] = x; | |
47 | + v[1] = y; | |
48 | + v[2] = z; | |
49 | + v[3] = w; | |
38 | 50 | } |
39 | 51 | |
40 | - CUDA_CALLABLE vector(const T(&data)[N]) | |
52 | + CUDA_CALLABLE vec(const T(&data)[N]) | |
41 | 53 | { |
42 | 54 | memcpy(v, data, sizeof(T) * N); |
43 | 55 | } |
... | ... | @@ -54,12 +66,12 @@ struct vector |
54 | 66 | |
55 | 67 | } |
56 | 68 | |
57 | - CUDA_CALLABLE vector<T, N> cart2sph() | |
69 | + CUDA_CALLABLE vec<T, N> cart2sph() | |
58 | 70 | { |
59 | 71 | //convert the vector from cartesian to spherical coordinates |
60 | 72 | //x, y, z -> r, theta, phi (where theta = 0 to 2*pi) |
61 | 73 | |
62 | - vector<T, N> sph; | |
74 | + vec<T, N> sph; | |
63 | 75 | sph[0] = std::sqrt(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]); |
64 | 76 | sph[1] = std::atan2(v[1], v[0]); |
65 | 77 | sph[2] = std::acos(v[2] / sph[0]); |
... | ... | @@ -67,12 +79,12 @@ struct vector |
67 | 79 | return sph; |
68 | 80 | } |
69 | 81 | |
70 | - CUDA_CALLABLE vector<T, N> sph2cart() | |
82 | + CUDA_CALLABLE vec<T, N> sph2cart() | |
71 | 83 | { |
72 | 84 | //convert the vector from cartesian to spherical coordinates |
73 | 85 | //r, theta, phi -> x, y, z (where theta = 0 to 2*pi) |
74 | 86 | |
75 | - vector<T, N> cart; | |
87 | + vec<T, N> cart; | |
76 | 88 | cart[0] = v[0] * std::cos(v[1]) * std::sin(v[2]); |
77 | 89 | cart[1] = v[0] * std::sin(v[1]) * std::sin(v[2]); |
78 | 90 | cart[2] = v[0] * std::cos(v[2]); |
... | ... | @@ -80,10 +92,10 @@ struct vector |
80 | 92 | return cart; |
81 | 93 | } |
82 | 94 | |
83 | - CUDA_CALLABLE vector<T, N> norm() | |
95 | + CUDA_CALLABLE vec<T, N> norm() | |
84 | 96 | { |
85 | 97 | //compute and return the vector norm |
86 | - vector<T, N> result; | |
98 | + vec<T, N> result; | |
87 | 99 | |
88 | 100 | //compute the vector length |
89 | 101 | T l = len(); |
... | ... | @@ -97,9 +109,9 @@ struct vector |
97 | 109 | return result; |
98 | 110 | } |
99 | 111 | |
100 | - CUDA_CALLABLE vector<T, 3> cross(vector<T, 3> rhs) | |
112 | + CUDA_CALLABLE vec<T, 3> cross(vec<T, 3> rhs) | |
101 | 113 | { |
102 | - vector<T, 3> result; | |
114 | + vec<T, 3> result; | |
103 | 115 | |
104 | 116 | //compute the cross product (only valid for 3D vectors) |
105 | 117 | result[0] = v[1] * rhs[2] - v[2] * rhs[1]; |
... | ... | @@ -109,7 +121,7 @@ struct vector |
109 | 121 | return result; |
110 | 122 | } |
111 | 123 | |
112 | - CUDA_CALLABLE T dot(vector<T, N> rhs) | |
124 | + CUDA_CALLABLE T dot(vec<T, N> rhs) | |
113 | 125 | { |
114 | 126 | T result = (T)0; |
115 | 127 | |
... | ... | @@ -121,36 +133,36 @@ struct vector |
121 | 133 | } |
122 | 134 | |
123 | 135 | //arithmetic |
124 | - CUDA_CALLABLE vector<T, N> operator+(vector<T, N> rhs) | |
136 | + CUDA_CALLABLE vec<T, N> operator+(vec<T, N> rhs) | |
125 | 137 | { |
126 | - vector<T, N> result; | |
138 | + vec<T, N> result; | |
127 | 139 | |
128 | 140 | for(int i=0; i<N; i++) |
129 | 141 | result.v[i] = v[i] + rhs.v[i]; |
130 | 142 | |
131 | 143 | return result; |
132 | 144 | } |
133 | - CUDA_CALLABLE vector<T, N> operator-(vector<T, N> rhs) | |
145 | + CUDA_CALLABLE vec<T, N> operator-(vec<T, N> rhs) | |
134 | 146 | { |
135 | - vector<T, N> result; | |
147 | + vec<T, N> result; | |
136 | 148 | |
137 | 149 | for(int i=0; i<N; i++) |
138 | 150 | result.v[i] = v[i] - rhs.v[i]; |
139 | 151 | |
140 | 152 | return result; |
141 | 153 | } |
142 | - CUDA_CALLABLE vector<T, N> operator*(T rhs) | |
154 | + CUDA_CALLABLE vec<T, N> operator*(T rhs) | |
143 | 155 | { |
144 | - vector<T, N> result; | |
156 | + vec<T, N> result; | |
145 | 157 | |
146 | 158 | for(int i=0; i<N; i++) |
147 | 159 | result.v[i] = v[i] * rhs; |
148 | 160 | |
149 | 161 | return result; |
150 | 162 | } |
151 | - CUDA_CALLABLE vector<T, N> operator/(T rhs) | |
163 | + CUDA_CALLABLE vec<T, N> operator/(T rhs) | |
152 | 164 | { |
153 | - vector<T, N> result; | |
165 | + vec<T, N> result; | |
154 | 166 | |
155 | 167 | for(int i=0; i<N; i++) |
156 | 168 | result.v[i] = v[i] / rhs; |
... | ... | @@ -158,7 +170,16 @@ struct vector |
158 | 170 | return result; |
159 | 171 | } |
160 | 172 | |
161 | - CUDA_CALLABLE bool operator==(vector<T, N> rhs) | |
173 | + //conversion from a point | |
174 | + /*CUDA_CALLABLE vector<T, N> & operator=(point<T, N> rhs) | |
175 | + { | |
176 | + for(int n=0; n<N; n++) | |
177 | + v[n] = rhs.p[n]; | |
178 | + | |
179 | + return *this; | |
180 | + }*/ | |
181 | + | |
182 | + CUDA_CALLABLE bool operator==(vec<T, N> rhs) | |
162 | 183 | { |
163 | 184 | if ( (rhs.v[0] == v[0]) && (rhs.v[1] == v[1]) && (rhs.v[2] == v[2]) ) |
164 | 185 | return true; |
... | ... | @@ -194,7 +215,7 @@ struct vector |
194 | 215 | } //end namespace rts |
195 | 216 | |
196 | 217 | template <typename T, int N> |
197 | -std::ostream& operator<<(std::ostream& os, rts::vector<T, N> v) | |
218 | +std::ostream& operator<<(std::ostream& os, rts::vec<T, N> v) | |
198 | 219 | { |
199 | 220 | os<<v.toStr(); |
200 | 221 | return os; |
... | ... | @@ -202,9 +223,9 @@ std::ostream& operator<<(std::ostream& os, rts::vector<T, N> v) |
202 | 223 | |
203 | 224 | //arithmetic operators |
204 | 225 | template <typename T, int N> |
205 | -CUDA_CALLABLE rts::vector<T, N> operator-(rts::vector<T, N> v) | |
226 | +CUDA_CALLABLE rts::vec<T, N> operator-(rts::vec<T, N> v) | |
206 | 227 | { |
207 | - rts::vector<T, N> r; | |
228 | + rts::vec<T, N> r; | |
208 | 229 | |
209 | 230 | //negate the vector |
210 | 231 | for(int i=0; i<N; i++) |
... | ... | @@ -214,9 +235,9 @@ CUDA_CALLABLE rts::vector<T, N> operator-(rts::vector<T, N> v) |
214 | 235 | } |
215 | 236 | |
216 | 237 | template <typename T, int N> |
217 | -CUDA_CALLABLE rts::vector<T, N> operator*(T lhs, rts::vector<T, N> rhs) | |
238 | +CUDA_CALLABLE rts::vec<T, N> operator*(T lhs, rts::vec<T, N> rhs) | |
218 | 239 | { |
219 | - rts::vector<T, N> r; | |
240 | + rts::vec<T, N> r; | |
220 | 241 | |
221 | 242 | return rhs * lhs; |
222 | 243 | } | ... | ... |
1 | +#ifndef RTS_BEAM | |
2 | +#define RTS_BEAM | |
3 | + | |
4 | +#include "../math/vector.h" | |
5 | +#include "../math/function.h" | |
6 | +#include <vector> | |
7 | + | |
8 | +namespace rts{ | |
9 | + | |
10 | +template<typename P> | |
11 | +class beam | |
12 | +{ | |
13 | +public: | |
14 | + enum beam_type {Uniform, Bartlett, Hamming, Hanning}; | |
15 | + | |
16 | +private: | |
17 | + | |
18 | + P na[2]; //numerical aperature of the focusing optics | |
19 | + vec<P> f; //focal point | |
20 | + vec<P> k; //direction vector | |
21 | + vec<P> E0; //polarization direction | |
22 | + P omega; //frequency | |
23 | + | |
24 | + function<P, P> apod; //apodization function | |
25 | + unsigned int apod_res; //resolution of complex apodization filters | |
26 | + | |
27 | + void apod_uniform() | |
28 | + { | |
29 | + apod = (P)1; | |
30 | + } | |
31 | + void apod_bartlett() | |
32 | + { | |
33 | + apod = (P)1; | |
34 | + apod.insert((P)1, (P)0); | |
35 | + } | |
36 | + void apod_hanning() | |
37 | + { | |
38 | + apod = (P)0; | |
39 | + P x, y; | |
40 | + for(unsigned int n=0; n<apod_res; n++) | |
41 | + { | |
42 | + x = (P)n/(P)apod_res; | |
43 | + y = pow( cos( ((P)3.14159 * x) / 2 ), 2); | |
44 | + apod.insert(x, y); | |
45 | + } | |
46 | + } | |
47 | + void apod_hamming() | |
48 | + { | |
49 | + apod = (P)0; | |
50 | + P x, y; | |
51 | + for(unsigned int n=0; n<apod_res; n++) | |
52 | + { | |
53 | + x = (P)n/(P)apod_res; | |
54 | + y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x); | |
55 | + apod.insert(x, y); | |
56 | + } | |
57 | + } | |
58 | + | |
59 | + void set_apod(beam_type type) | |
60 | + { | |
61 | + if(type == Uniform) | |
62 | + apod_uniform(); | |
63 | + if(type == Bartlett) | |
64 | + apod_bartlett(); | |
65 | + if(type == Hanning) | |
66 | + apod_hanning(); | |
67 | + if(type == Hamming) | |
68 | + apod_hamming(); | |
69 | + } | |
70 | + | |
71 | +public: | |
72 | + | |
73 | + ///constructor: build a default beam (NA=1.0) | |
74 | + beam(beam_type _apod = Uniform) | |
75 | + { | |
76 | + na[0] = (P)0.0; | |
77 | + na[1] = (P)1.0; | |
78 | + f = vec<P>( (P)0.0, (P)0.0, (P)0.0 ); | |
79 | + k = vec<P>( (P)0.0, (P)0.0, (P)1.0 ); | |
80 | + E0 = vec<P>( (P)1.0, (P)0.0, (P)0.0 ); | |
81 | + omega = (P)2 * (P)3.14159; | |
82 | + apod_res = 256; //set the default resolution for apodization filters | |
83 | + set_apod(_apod); //set the apodization function type | |
84 | + | |
85 | + } | |
86 | + | |
87 | + ///Numerical Aperature functions | |
88 | + void NA(P _na) | |
89 | + { | |
90 | + na[0] = (P)0; | |
91 | + na[1] = _na; | |
92 | + } | |
93 | + void NA(P _na0, P _na1) | |
94 | + { | |
95 | + na[0] = _na0; | |
96 | + na[1] = _na1; | |
97 | + } | |
98 | + | |
99 | + | |
100 | + //Monte-Carlo decomposition into plane waves | |
101 | + std::vector< planewave<P> > mc(unsigned int N, unsigned int seed = 0) | |
102 | + { | |
103 | + /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling | |
104 | + of a sphere and projecting these samples onto an inscribed sphere. | |
105 | + | |
106 | + samples = rtsPointer to sample vectors specified as normalized cartesian coordinates | |
107 | + N = number of samples | |
108 | + kSph = incident light direction in spherical coordinates | |
109 | + NAin = internal obscuration NA | |
110 | + NAout = outer cassegrain NA | |
111 | + */ | |
112 | + | |
113 | + srand(seed); //seed the random number generator | |
114 | + | |
115 | + ///compute the rotation operator to transform (0, 0, 1) to k | |
116 | + P cos_angle = k.dot(rts::vec<P>(0, 0, 1)); | |
117 | + rts::matrix<P, 3> rotation; | |
118 | + if(cos_angle != 1.0) | |
119 | + { | |
120 | + rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k).norm(); //compute the axis of rotation | |
121 | + P angle = acos(cos_angle); //compute the angle of rotation | |
122 | + rts::quaternion<P> quat; //create a quaternion describing the rotation | |
123 | + quat.CreateRotation(angle, r_axis); | |
124 | + rotation = quat.toMatrix3(); //compute the rotation matrix | |
125 | + } | |
126 | + | |
127 | + //find the phi values associated with the cassegrain ring | |
128 | + P PHI[2]; | |
129 | + PHI[0] = (P)asin(na[0]); | |
130 | + PHI[1] = (P)asin(na[1]); | |
131 | + | |
132 | + //calculate the z-axis cylinder coordinates associated with these angles | |
133 | + P Z[2]; | |
134 | + Z[0] = cos(PHI[0]); | |
135 | + Z[1] = cos(PHI[1]); | |
136 | + P range = Z[0] - Z[1]; | |
137 | + | |
138 | + std::vector< planewave<P> > samples; //create a vector of plane waves | |
139 | + | |
140 | + planewave<P> beam_center(k, E0, omega); //create a plane wave representing the beam center | |
141 | + | |
142 | + //draw a distribution of random phi, z values | |
143 | + P z, phi, theta; | |
144 | + for(int i=0; i<N; i++) //for each sample | |
145 | + { | |
146 | + z = ((P)rand() / (P)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder | |
147 | + theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159; | |
148 | + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates | |
149 | + | |
150 | + //compute and store cartesian coordinates | |
151 | + rts::vec<P> spherical(1, theta, phi); //convert from spherical to cartesian coordinates | |
152 | + rts::vec<P> cart = spherical.sph2cart(); | |
153 | + vec<P> k_prime = rotation * cart; //create a sample vector | |
154 | + | |
155 | + //store a wave refracted along the given direction | |
156 | + samples.push_back(beam_center.refract(k_prime) * apod(phi/PHI[1])); | |
157 | + } | |
158 | + | |
159 | + return samples; | |
160 | + } | |
161 | + | |
162 | +}; | |
163 | + | |
164 | +} | |
165 | + | |
166 | +#endif | |
0 | 167 | \ No newline at end of file | ... | ... |
optics/efield.cuh
1 | -#include "rts/math/complex.h" | |
2 | -#include "rts/visualization/colormap.h" | |
1 | +#include "../math/complex.h" | |
2 | +#include "../visualization/colormap.h" | |
3 | +#include "../visualization/scalarfield.cuh" | |
4 | +#include "../visualization/vectorfield.cuh" | |
5 | +#include "../optics/planewave.h" | |
6 | +#include "../cuda/devices.h" | |
7 | + | |
8 | + | |
3 | 9 | |
4 | 10 | namespace rts{ |
5 | 11 | |
12 | +template<typename T> | |
13 | +__global__ void gpu_planewave2efield(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, | |
14 | + planewave<T> w, quad<T> q) | |
15 | +{ | |
16 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
17 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
18 | + | |
19 | + //make sure that the thread indices are in-bounds | |
20 | + if(iu >= r0 || iv >= r1) return; | |
21 | + | |
22 | + //compute the index into the field | |
23 | + int i = iv*r0 + iu; | |
24 | + | |
25 | + //get the current position | |
26 | + vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); | |
27 | + vec<T> r(p[0], p[1], p[2]); | |
28 | + | |
29 | + complex<T> x( 0.0f, w.omega * (w.k_hat.dot(r)) ); | |
30 | + | |
31 | + if(Y == NULL) //if this is a scalar simulation | |
32 | + X[i] += w.E0.len() * exp(x); //use the vector magnitude as the plane wave amplitude | |
33 | + else | |
34 | + { | |
35 | + X[i] += w.E0[0] * exp(x); | |
36 | + Y[i] += w.E0[1] * exp(x); | |
37 | + Z[i] += w.E0[2] * exp(x); | |
38 | + } | |
39 | +} | |
40 | + | |
41 | +template<typename T> | |
42 | +__global__ void gpu_efield_magnitude(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, T* M) | |
43 | +{ | |
44 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
45 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
46 | + | |
47 | + //make sure that the thread indices are in-bounds | |
48 | + if(iu >= r0 || iv >= r1) return; | |
49 | + | |
50 | + //compute the index into the field | |
51 | + int i = iv*r0 + iu; | |
52 | + | |
53 | + if(Y == NULL) //if this is a scalar simulation | |
54 | + M[i] = X[i].abs(); //compute the magnitude of the X component | |
55 | + else | |
56 | + { | |
57 | + M[i] = rts::vec<T>(X[i].abs(), Y[i].abs(), Z[i].abs()).len(); | |
58 | + //M[i] = Z[i].abs(); | |
59 | + } | |
60 | +} | |
61 | + | |
62 | +template<typename T> | |
63 | +__global__ void gpu_efield_polarization(complex<T>* X, complex<T>* Y, complex<T>* Z, | |
64 | + unsigned int r0, unsigned int r1, | |
65 | + T* Px, T* Py, T* Pz) | |
66 | +{ | |
67 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
68 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
69 | + | |
70 | + //make sure that the thread indices are in-bounds | |
71 | + if(iu >= r0 || iv >= r1) return; | |
72 | + | |
73 | + //compute the index into the field | |
74 | + int i = iv*r0 + iu; | |
75 | + | |
76 | + //compute the field polarization | |
77 | + Px[i] = X[i].abs(); | |
78 | + Py[i] = Y[i].abs(); | |
79 | + Pz[i] = Z[i].abs(); | |
80 | + | |
81 | +} | |
82 | + | |
6 | 83 | /* This class implements a discrete representation of an electromagnetic field |
7 | 84 | in 2D. The majority of this representation is done on the GPU. |
8 | 85 | */ |
... | ... | @@ -21,23 +98,53 @@ private: |
21 | 98 | //resolution of the discrete field |
22 | 99 | unsigned int R[2]; |
23 | 100 | |
101 | + //shape of the 2D field | |
102 | + quad<P> pos; | |
103 | + | |
104 | + void from_planewave(planewave<P> p) | |
105 | + { | |
106 | + unsigned int SQRT_BLOCK = 16; | |
107 | + //create one thread for each detector pixel | |
108 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
109 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
110 | + | |
111 | + gpu_planewave2efield<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos); | |
112 | + } | |
113 | + | |
114 | + void clear() | |
115 | + { | |
116 | + cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]); | |
117 | + | |
118 | + if(!scalar) | |
119 | + { | |
120 | + cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]); | |
121 | + cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]); | |
122 | + } | |
123 | + } | |
124 | + | |
24 | 125 | |
25 | 126 | public: |
26 | 127 | |
27 | 128 | efield(unsigned int res0, unsigned int res1, bool _scalar = false) |
28 | 129 | { |
29 | - //initialize all pointers to NULL | |
30 | - X = Y = Z = NULL; | |
130 | + scalar = _scalar; //initialize field type | |
131 | + | |
132 | + X = Y = Z = NULL; //initialize all pointers to NULL | |
31 | 133 | R[0] = res0; |
32 | 134 | R[1] = res1; |
33 | 135 | |
34 | 136 | //allocate memory on the gpu |
35 | 137 | cudaMalloc(&X, sizeof(rts::complex<P>) * R[0] * R[1]); |
138 | + cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]); | |
36 | 139 | |
37 | 140 | if(!scalar) |
38 | 141 | { |
142 | + std::cout<<"scalar:"; | |
39 | 143 | cudaMalloc(&Y, sizeof(rts::complex<P>) * R[0] * R[1]); |
144 | + cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]); | |
145 | + | |
40 | 146 | cudaMalloc(&Z, sizeof(rts::complex<P>) * R[0] * R[1]); |
147 | + cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]); | |
41 | 148 | } |
42 | 149 | } |
43 | 150 | |
... | ... | @@ -49,14 +156,96 @@ public: |
49 | 156 | if(Z != NULL) cudaFree(Z); |
50 | 157 | } |
51 | 158 | |
52 | - void render(std::string) | |
159 | + void position(quad<P> _p) | |
160 | + { | |
161 | + pos = _p; | |
162 | + } | |
163 | + | |
164 | + std::string str() | |
165 | + { | |
166 | + stringstream ss; | |
167 | + ss<<pos<<std::endl; | |
168 | + ss<<"X: "<<X<<std::endl; | |
169 | + ss<<"Y: "<<Y<<std::endl; | |
170 | + ss<<"Z: "<<Z; | |
171 | + | |
172 | + return ss.str(); | |
173 | + } | |
174 | + | |
175 | + //assignment operator: build an electric field from a plane wave | |
176 | + efield<P> & operator= (const planewave<P> & rhs) | |
177 | + { | |
178 | + | |
179 | + clear(); //clear any previous field data | |
180 | + from_planewave(rhs); //create a field from the planewave | |
181 | + return *this; | |
182 | + } | |
183 | + | |
184 | + //assignment operator: add the electric field from a plane wave | |
185 | + efield<P> & operator+= (const planewave<P> & rhs) | |
186 | + { | |
187 | + //create a field from the planewave | |
188 | + from_planewave(rhs); | |
189 | + | |
190 | + return *this; | |
191 | + } | |
192 | + | |
193 | + //assignment operator: build an electric field from a list of plane waves | |
194 | + efield<P> & operator= (const std::vector< planewave<P> > & rhs) | |
195 | + { | |
196 | + clear(); //clear any previous field data | |
197 | + for(unsigned int i = 0; i < rhs.size(); i++) | |
198 | + from_planewave(rhs[i]); | |
199 | + return *this; | |
200 | + } | |
201 | + | |
202 | + //return a scalar field representing field magnitude | |
203 | + scalarfield<P> mag() | |
53 | 204 | { |
205 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
206 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
54 | 207 | |
208 | + //create a scalar field to store the result | |
209 | + scalarfield<P> M(R[0], R[1]); | |
210 | + | |
211 | + //create one thread for each detector pixel | |
212 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
213 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
214 | + | |
215 | + //compute the magnitude and store it in a scalar field | |
216 | + gpu_efield_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.S); | |
217 | + | |
218 | + return M; | |
55 | 219 | } |
56 | 220 | |
221 | + //return a vector field representing field polarization | |
222 | + vectorfield<P> polarization() | |
223 | + { | |
224 | + if(scalar) | |
225 | + { | |
226 | + std::cout<<"ERROR: Cannot compute polarization of a scalar field."<<std::endl; | |
227 | + exit(1); | |
228 | + } | |
229 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
230 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
231 | + | |
232 | + //create one thread for each detector pixel | |
233 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
234 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
235 | + | |
236 | + | |
237 | + vectorfield<P> Pol(R[0], R[1]); //create a vector field to store the result | |
238 | + | |
239 | + //compute the polarization and store it in the vector field | |
240 | + gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.S[0], Pol.S[1], Pol.S[2]); | |
241 | + | |
242 | + return Pol; //return the vector field | |
243 | + } | |
244 | + | |
245 | + | |
57 | 246 | |
58 | 247 | }; |
59 | 248 | |
60 | 249 | |
61 | 250 | |
62 | 251 | -} //end namespace rts |
252 | +} //end namespace rts | |
63 | 253 | \ No newline at end of file | ... | ... |
1 | +#ifndef RTS_PLANEWAVE | |
2 | +#define RTS_PLANEWAVE | |
3 | + | |
4 | +#include <string> | |
5 | +#include <sstream> | |
6 | + | |
7 | +#include "rts/math/vector.h" | |
8 | +#include "rts/math/quaternion.h" | |
9 | + | |
10 | +namespace rts{ | |
11 | + | |
12 | +template<typename P> | |
13 | +class planewave{ | |
14 | + | |
15 | +public: | |
16 | + rts::vec<P> k_hat; //normalized planewave direction | |
17 | + P omega; //frequency | |
18 | + | |
19 | + rts::vec<P> E0; //amplitude | |
20 | + | |
21 | + | |
22 | + | |
23 | + planewave() | |
24 | + { | |
25 | + omega = (P)2 * (P)3.14159; | |
26 | + k_hat = rts::vec<P>(0, 0, 1); | |
27 | + E0 = rts::vec<P>(1, 0, 0); | |
28 | + } | |
29 | + ///constructor: create a plane wave propagating along z, polarized along x | |
30 | + planewave(P _omega) | |
31 | + { | |
32 | + omega = _omega; | |
33 | + k_hat = rts::vec<P>(0, 0, 1); | |
34 | + E0 = rts::vec<P>(1, 0, 0); | |
35 | + } | |
36 | + ///constructor: create a plane wave propagating along _k, polarized along _E0, at frequency _omega | |
37 | + planewave(vec<P> _k, vec<P> _E0, P _omega) | |
38 | + { | |
39 | + k_hat = _k.norm(); | |
40 | + vec<P> s = k_hat.cross(_E0); //re-orthogonalize | |
41 | + E0 = s.cross(k_hat); | |
42 | + omega = _omega; | |
43 | + } | |
44 | + | |
45 | + ///multiplication operator: scale E0 | |
46 | + //assignment operator: build an electric field from a plane wave | |
47 | + planewave<P> & operator* (const P & rhs) | |
48 | + { | |
49 | + | |
50 | + E0 = E0 * rhs; | |
51 | + return *this; | |
52 | + } | |
53 | + | |
54 | + | |
55 | + planewave<P> refract(rts::vec<P> new_k) | |
56 | + { | |
57 | + new_k = new_k.norm(); //normalize new_k | |
58 | + | |
59 | + //compute the side vector (around which we will be rotating) | |
60 | + rts::vec<P> s = k_hat.cross(E0.norm()); | |
61 | + | |
62 | + //compute the projection of k' onto the k-E plane | |
63 | + rts::vec<P> s_prime = s * (new_k.dot(s)); | |
64 | + | |
65 | + //compute the angle between | |
66 | + P theta = acos(k_hat.dot( (new_k - s_prime).norm() )); | |
67 | + | |
68 | + //rotate E0 around s by theta | |
69 | + quaternion<P> q; | |
70 | + q.CreateRotation(theta, s); | |
71 | + rts::vec<P> E0_prime = q.toMatrix3() * E0; | |
72 | + | |
73 | + //create the refracted plane wave | |
74 | + planewave<P> new_p(omega); | |
75 | + new_p.E0 = E0_prime; | |
76 | + new_p.k_hat = new_k; | |
77 | + | |
78 | + return new_p; | |
79 | + } | |
80 | + | |
81 | + std::string str() | |
82 | + { | |
83 | + std::stringstream ss; | |
84 | + ss<<E0<<" e^i ( "<<omega<<"t - "<<omega<<" "<<k_hat * omega<<" . r )"; | |
85 | + return ss.str(); | |
86 | + } | |
87 | +}; | |
88 | +} | |
89 | + | |
90 | +#endif | |
0 | 91 | \ No newline at end of file | ... | ... |
visualization/colormap.h
... | ... | @@ -32,7 +32,7 @@ static cudaArray* gpuBrewer; |
32 | 32 | |
33 | 33 | namespace rts{ |
34 | 34 | |
35 | -enum colormapType {cmBrewer, cmGrayscale}; | |
35 | +enum colormapType {cmBrewer, cmGrayscale, cmRainbow}; | |
36 | 36 | |
37 | 37 | static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size) |
38 | 38 | { |
... | ... | @@ -119,10 +119,10 @@ __global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsi |
119 | 119 | float a = (gpuSource[i] - minVal) / (maxVal - minVal); |
120 | 120 | |
121 | 121 | //threshold |
122 | - if(a > 1.0) | |
123 | - a = 1.0; | |
124 | - if(a < 0.0) | |
125 | - a = 0.0; | |
122 | + if(a > 1) | |
123 | + a = 1; | |
124 | + if(a < 0) | |
125 | + a = 0; | |
126 | 126 | |
127 | 127 | gpuDest[i * 3 + 0] = 255 * a; |
128 | 128 | gpuDest[i * 3 + 1] = 255 * a; |
... | ... | @@ -222,9 +222,9 @@ static void cpuApplyBrewer(T* cpuSource, unsigned char* cpuDest, unsigned int N, |
222 | 222 | } |
223 | 223 | else |
224 | 224 | { |
225 | - r = BREWERCP[ptLow * 4 + 0] * (1.0-m) + BREWERCP[ (ptLow+1) * 4 + 0] * m; | |
226 | - g = BREWERCP[ptLow * 4 + 1] * (1.0-m) + BREWERCP[ (ptLow+1) * 4 + 1] * m; | |
227 | - b = BREWERCP[ptLow * 4 + 2] * (1.0-m) + BREWERCP[ (ptLow+1) * 4 + 2] * m; | |
225 | + r = BREWERCP[ptLow * 4 + 0] * (1-m) + BREWERCP[ (ptLow+1) * 4 + 0] * m; | |
226 | + g = BREWERCP[ptLow * 4 + 1] * (1-m) + BREWERCP[ (ptLow+1) * 4 + 1] * m; | |
227 | + b = BREWERCP[ptLow * 4 + 2] * (1-m) + BREWERCP[ (ptLow+1) * 4 + 2] * m; | |
228 | 228 | } |
229 | 229 | |
230 | 230 | |
... | ... | @@ -251,8 +251,8 @@ static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T |
251 | 251 | //normalize to the range [valMin valMax] |
252 | 252 | a = (cpuSource[i] - valMin) / range; |
253 | 253 | |
254 | - if(a < 0) a = 0.0; | |
255 | - if(a > 1) a = 1.0; | |
254 | + if(a < 0) a = 0; | |
255 | + if(a > 1) a = 1; | |
256 | 256 | |
257 | 257 | cpuDest[i * 3 + 0] = 255 * a; |
258 | 258 | cpuDest[i * 3 + 1] = 255 * a; |
... | ... | @@ -275,7 +275,7 @@ static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, co |
275 | 275 | } |
276 | 276 | |
277 | 277 | if(positive) |
278 | - cpu2cpu(cpuSource, cpuDest, nVals, (T)0.0, maxVal, cm); | |
278 | + cpu2cpu(cpuSource, cpuDest, nVals, (T)0, maxVal, cm); | |
279 | 279 | else |
280 | 280 | cpu2cpu(cpuSource, cpuDest, nVals, -maxVal, maxVal, cm); |
281 | 281 | ... | ... |
visualization/scalarfield.cuh
1 | -#ifndef RTS_SCALAR_SLICE | |
2 | -#define RTS_SCALAR_SLICE | |
3 | - | |
4 | -#include "rts/visualization/colormap.h" | |
5 | -#include "rts/envi/envi.h" | |
1 | +#ifndef RTS_SCALAR_SLICE | |
2 | +#define RTS_SCALAR_SLICE | |
3 | + | |
4 | +#include "../visualization/colormap.h" | |
5 | +#include "../envi/envi.h" | |
6 | +#include "../math/quad.h" | |
7 | +#include "../cuda/devices.h" | |
6 | 8 | #include "cublas_v2.h" |
7 | -#include <cuda_runtime.h> | |
9 | +#include <cuda_runtime.h> | |
10 | + | |
11 | +///Compute a Gaussian function in 3D (mostly for testing) | |
12 | +template<typename T> | |
13 | +__global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, T std, rts::quad<T> shape) | |
14 | +{ | |
15 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
16 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
17 | + | |
18 | + //make sure that the thread indices are in-bounds | |
19 | + if(iu >= r0 || iv >= r1) return; | |
20 | + | |
21 | + //compute the index into the field | |
22 | + int i = iv*r0 + iu; | |
23 | + | |
24 | + T u = (T)iu / (T)r0; | |
25 | + T v = (T)iv / (T)r1; | |
26 | + | |
27 | + rts::vec<T> p = shape(u, v); | |
28 | + | |
29 | + T fx = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[0] - mean, 2) / (2 * std*std) ); | |
30 | + T fy = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[1] - mean, 2) / (2 * std*std) ); | |
31 | + T fz = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[2] - mean, 2) / (2 * std*std) ); | |
32 | + | |
33 | + dest[i] = fx * fy * fz; | |
34 | +} | |
35 | + | |
36 | +namespace rts { | |
37 | +template<typename P> | |
38 | +struct scalarfield | |
39 | +{ | |
40 | + //gpu pointer to the scalar slice | |
41 | + P* S; | |
8 | 42 | |
9 | -template<typename P> | |
10 | -struct scalarfield | |
11 | -{ | |
12 | - //gpu pointer to the scalar slice | |
13 | - P* S; | |
14 | - | |
15 | - //resolution of the slice | |
16 | - int R[2]; | |
43 | + //resolution of the slice | |
44 | + int R[2]; | |
45 | + | |
46 | + quad<P> shape; | |
17 | 47 | |
18 | 48 | scalarfield() |
19 | 49 | { |
20 | 50 | R[0] = R[1] = 0; |
51 | + shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); | |
21 | 52 | S = NULL; |
22 | 53 | |
23 | - //std::cout<<"Scalerslice created (default)."<<std::endl; | |
54 | + std::cout<<"scalarfield CONSTRUCTOR"<<std::endl; | |
24 | 55 | } |
25 | - | |
56 | + | |
26 | 57 | scalarfield(int x, int y) |
27 | - { | |
28 | - //set the resolution | |
29 | - R[0] = x; | |
30 | - R[1] = y; | |
31 | - | |
32 | - //allocate memory on the GPU | |
58 | + { | |
59 | + //set the resolution | |
60 | + R[0] = x; | |
61 | + R[1] = y; | |
62 | + | |
63 | + shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); | |
64 | + | |
65 | + //allocate memory on the GPU | |
33 | 66 | HANDLE_ERROR(cudaMalloc( (void**)&S, sizeof(P) * x * y )); |
34 | - //std::cout<<"Scalerslice created."<<std::endl; | |
67 | + | |
68 | + std::cout<<"scalarfield CONSTRUCTOR"<<std::endl; | |
35 | 69 | } |
36 | - | |
70 | + | |
37 | 71 | ~scalarfield() |
38 | - { | |
39 | - if(S != NULL) | |
40 | - HANDLE_ERROR(cudaFree(S)); | |
72 | + { | |
73 | + if(S != NULL) | |
74 | + HANDLE_ERROR(cudaFree(S)); | |
41 | 75 | S = NULL; |
42 | 76 | R[0] = R[1] = 0; |
43 | 77 | |
44 | - //std::cout<<"Scalerslice destroyed."<<std::endl; | |
78 | + std::cout<<"scalarfield DESTRUCTOR"<<std::endl; | |
45 | 79 | } |
46 | - | |
80 | + | |
47 | 81 | void clear() |
48 | - { | |
49 | - //this function sets the slice to zero | |
50 | - if(S != NULL) | |
51 | - //HANDLE_ERROR(cudaMalloc( (void**)&S, sizeof(P) * R[0] * R[1] )); | |
52 | - HANDLE_ERROR(cudaMemset(S, 0, sizeof(P) * R[0] * R[1])); | |
53 | - } | |
54 | - | |
82 | + { | |
83 | + //this function sets the slice to zero | |
84 | + if(S != NULL) | |
85 | + HANDLE_ERROR(cudaMemset(S, 0, sizeof(P) * R[0] * R[1])); | |
86 | + } | |
87 | + | |
55 | 88 | void toImage(std::string filename, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer) |
56 | - { | |
57 | - rts::gpu2image<ptype>(S, filename, R[0], R[1], vmin, vmax, cmap); | |
89 | + { | |
90 | + rts::gpu2image<P>(S, filename, R[0], R[1], vmin, vmax, cmap); | |
58 | 91 | } |
59 | 92 | |
60 | 93 | void toImage(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer) |
... | ... | @@ -73,11 +106,11 @@ struct scalarfield |
73 | 106 | //find the index of the value with maximum magnitude |
74 | 107 | int N = R[0] * R[1]; |
75 | 108 | int result; |
76 | - #ifdef PRECISION_SINGLE | |
77 | - stat = cublasIsamax(handle, N, S, 1, &result); | |
78 | - #elif defined PRECISION_DOUBLE | |
79 | - stat = cublasIdamax(handle, N, S, 1, &result); | |
80 | - #endif | |
109 | + | |
110 | + if(sizeof(P) == 4) | |
111 | + stat = cublasIsamax(handle, N, (float*)S, 1, &result); | |
112 | + else | |
113 | + stat = cublasIdamax(handle, N, (double*)S, 1, &result); | |
81 | 114 | |
82 | 115 | //adjust for 1-based indexing |
83 | 116 | result -= 1; |
... | ... | @@ -92,7 +125,7 @@ struct scalarfield |
92 | 125 | |
93 | 126 | //retrieve the maximum value |
94 | 127 | P maxVal; |
95 | - HANDLE_ERROR(cudaMemcpy(&maxVal, S + result, sizeof(ptype), cudaMemcpyDeviceToHost)); | |
128 | + HANDLE_ERROR(cudaMemcpy(&maxVal, S + result, sizeof(P), cudaMemcpyDeviceToHost)); | |
96 | 129 | |
97 | 130 | //destroy the CUBLAS handle |
98 | 131 | cublasDestroy(handle); |
... | ... | @@ -105,7 +138,7 @@ struct scalarfield |
105 | 138 | } |
106 | 139 | |
107 | 140 | |
108 | - void toEnvi(std::string filename, ptype wavelength = 0, bool append = false) | |
141 | + void toEnvi(std::string filename, P wavelength = 0, bool append = false) | |
109 | 142 | { |
110 | 143 | std::string mode; |
111 | 144 | if(append) mode = "a"; |
... | ... | @@ -115,23 +148,21 @@ struct scalarfield |
115 | 148 | EnviFile outfile(filename, mode); |
116 | 149 | |
117 | 150 | //get the scalar slice from the GPU to the CPU |
118 | - int memsize = sizeof(ptype) * R[0] * R[1]; | |
119 | - ptype* cpuData = (ptype*) malloc( memsize ); | |
151 | + int memsize = sizeof(P) * R[0] * R[1]; | |
152 | + P* cpuData = (P*) malloc( memsize ); | |
120 | 153 | HANDLE_ERROR(cudaMemcpy( cpuData, S, memsize, cudaMemcpyDeviceToHost)); |
121 | 154 | |
122 | 155 | //add a band to the ENVI file |
123 | 156 | outfile.addBand(cpuData, R[0], R[1], wavelength); |
124 | 157 | |
125 | 158 | outfile.close(); |
126 | - | |
127 | - | |
128 | 159 | } |
129 | 160 | |
130 | 161 | //assignment operator |
131 | 162 | scalarfield & operator= (const scalarfield & rhs) |
132 | 163 | { |
133 | 164 | //de-allocate any existing GPU memory |
134 | - if(S != NULL) | |
165 | + if(S != NULL) | |
135 | 166 | HANDLE_ERROR(cudaFree(S)); |
136 | 167 | |
137 | 168 | //copy the slice resolution |
... | ... | @@ -139,20 +170,54 @@ struct scalarfield |
139 | 170 | R[1] = rhs.R[1]; |
140 | 171 | |
141 | 172 | //allocate the necessary memory |
142 | - HANDLE_ERROR(cudaMalloc(&S, sizeof(ptype) * R[0] * R[1])); | |
173 | + HANDLE_ERROR(cudaMalloc(&S, sizeof(P) * R[0] * R[1])); | |
143 | 174 | |
144 | 175 | //copy the slice |
145 | - HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(ptype) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
176 | + HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
146 | 177 | |
147 | 178 | |
148 | 179 | std::cout<<"Assignment operator."<<std::endl; |
149 | 180 | |
150 | 181 | return *this; |
182 | + } | |
183 | + | |
184 | + ///copy constructor | |
185 | + scalarfield(const scalarfield &rhs) | |
186 | + { | |
187 | + //first make a shallow copy | |
188 | + R[0] = rhs.R[0]; | |
189 | + R[1] = rhs.R[1]; | |
190 | + | |
191 | + //do we have to make a deep copy? | |
192 | + if(rhs.S == NULL) | |
193 | + S = NULL; //no | |
194 | + else | |
195 | + { | |
196 | + //allocate the necessary memory | |
197 | + HANDLE_ERROR(cudaMalloc(&S, sizeof(P) * R[0] * R[1])); | |
198 | + | |
199 | + //copy the slice | |
200 | + HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
201 | + } | |
202 | + | |
203 | + std::cout<<"scalarfield COPY CONSTRUCTOR"<<std::endl; | |
204 | + } | |
205 | + | |
206 | + void gaussian(P mean, P std) | |
207 | + { | |
208 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
209 | + int SQRT_BLOCK = (int)sqrt((float)maxThreads); | |
210 | + //create one thread for each detector pixel | |
211 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
212 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
213 | + | |
214 | + gpu_gaussian<float> <<<dimGrid, dimBlock>>> (S, R[0], R[1], mean, std, shape); | |
215 | + } | |
216 | + | |
217 | +}; | |
218 | + | |
219 | +} //end namespace rts | |
220 | + | |
221 | + | |
151 | 222 | |
152 | - } | |
153 | - | |
154 | -}; | |
155 | - | |
156 | - | |
157 | - | |
158 | 223 | #endif | ... | ... |
1 | +#ifndef RTS_VECTORFIELD | |
2 | +#define RTS_VECTORFIELD | |
3 | + | |
4 | +#include "../visualization/colormap.h" | |
5 | +#include "../envi/envi.h" | |
6 | +#include "../math/quad.h" | |
7 | +#include "../cuda/devices.h" | |
8 | +#include "cublas_v2.h" | |
9 | +#include <cuda_runtime.h> | |
10 | +#include <iomanip> | |
11 | + | |
12 | +#include <qimage.h> | |
13 | +#include <qcolor.h> | |
14 | + | |
15 | + | |
16 | +namespace rts { | |
17 | +template<typename P, unsigned int N = 3> | |
18 | +struct vectorfield | |
19 | +{ | |
20 | +private: | |
21 | + void process_filename(std::string name, std::string &prefix, std::string &postfix, | |
22 | + std::string &ext, unsigned int &digits) | |
23 | + { | |
24 | + std::stringstream ss(name); | |
25 | + std::string item; | |
26 | + std::vector<std::string> elems; | |
27 | + while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension) | |
28 | + { | |
29 | + elems.push_back(item); | |
30 | + } | |
31 | + | |
32 | + prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters) | |
33 | + ext = elems[1]; //file extension (ex. .bmp, .png) | |
34 | + ext = std::string(".") + ext; //add a period back into the extension | |
35 | + | |
36 | + size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'') | |
37 | + size_t i1 = prefix.find_last_of("?"); | |
38 | + | |
39 | + postfix = prefix.substr(i1+1); | |
40 | + prefix = prefix.substr(0, i0); | |
41 | + | |
42 | + digits = i1 - i0 + 1; //compute the number of wildcards | |
43 | + | |
44 | + } | |
45 | + | |
46 | +public: | |
47 | + //gpu pointers to scalar slices | |
48 | + P* S[N]; | |
49 | + | |
50 | + //resolution of the slice | |
51 | + int R[2]; | |
52 | + | |
53 | + quad<P> shape; | |
54 | + | |
55 | + vectorfield() | |
56 | + { | |
57 | + R[0] = R[1] = 0; //set the initial resolution to 0 | |
58 | + shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); | |
59 | + for(int n=0; n<N; n++) //set each vector component to NULL | |
60 | + S[n]=NULL; | |
61 | + | |
62 | + std::cout<<"vectorfield CONSTRUCTOR"<<std::endl; | |
63 | + } | |
64 | + | |
65 | + vectorfield(int x, int y) | |
66 | + { | |
67 | + //set the resolution | |
68 | + R[0] = x; | |
69 | + R[1] = y; | |
70 | + | |
71 | + shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); | |
72 | + | |
73 | + //allocate memory on the GPU | |
74 | + for(int n=0; n<N; n++) | |
75 | + HANDLE_ERROR(cudaMalloc( (void**)&S[n], sizeof(P) * x * y )); | |
76 | + | |
77 | + std::cout<<"vectorfield CONSTRUCTOR"<<std::endl; | |
78 | + } | |
79 | + | |
80 | + ~vectorfield() | |
81 | + { | |
82 | + for(int n=0; n<N; n++) | |
83 | + if(S[n] != NULL) | |
84 | + { | |
85 | + HANDLE_ERROR(cudaFree(S[n])); | |
86 | + S[n] = NULL; | |
87 | + } | |
88 | + R[0] = R[1] = 0; | |
89 | + | |
90 | + std::cout<<"vectorfield DESTRUCTOR"<<std::endl; | |
91 | + } | |
92 | + | |
93 | + void clear() | |
94 | + { | |
95 | + //this function sets the slice to zero | |
96 | + for(int n=0; n<N; n++) | |
97 | + if(S[n] != NULL) | |
98 | + HANDLE_ERROR(cudaMemset(S[n], 0, sizeof(P) * R[0] * R[1])); | |
99 | + } | |
100 | + | |
101 | + void toImage(std::string filename, unsigned int n, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer) | |
102 | + { | |
103 | + rts::gpu2image<P>(S[n], filename, R[0], R[1], vmin, vmax, cmap); | |
104 | + } | |
105 | + | |
106 | + void toImage3(std::string filename, P vmin, P vmax) | |
107 | + { | |
108 | + std::cout<<"Implementing a 3-component rainbow colormap: "<<filename.c_str()<<std::endl; | |
109 | + //create a buffer for each RGB component | |
110 | + unsigned char* red = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]); | |
111 | + unsigned char* green = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]); | |
112 | + unsigned char* blue = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]); | |
113 | + | |
114 | + //retrieve the buffered images for each component | |
115 | + rts::gpu2cpu<P>(S[0], red, R[0] * R[1], vmin, vmax); | |
116 | + rts::gpu2cpu<P>(S[1], green, R[0] * R[1], vmin, vmax); | |
117 | + rts::gpu2cpu<P>(S[2], blue, R[0] * R[1], vmin, vmax); | |
118 | + | |
119 | + QImage image(R[0], R[1], QImage::Format_RGB32); //create a QImage object | |
120 | + if(image.isNull()) //if it didn't work, throw an error | |
121 | + { | |
122 | + std::cout<<"Error creating QImage."<<std::endl; | |
123 | + return; | |
124 | + } | |
125 | + | |
126 | + int i; | |
127 | + unsigned char r, g, b; | |
128 | + unsigned int x, y; | |
129 | + for(y=0; y<R[1]; y++) | |
130 | + for(x=0; x<R[0]; x++) | |
131 | + { | |
132 | + //calculate the 1D index | |
133 | + i = y * R[0] + x; | |
134 | + | |
135 | + r = red[i * 3 + 0]; | |
136 | + g = green[i * 3 + 1]; | |
137 | + b = blue[i * 3 + 2]; | |
138 | + | |
139 | + //set the image pixel | |
140 | + QColor color(r, g, b); | |
141 | + image.setPixel(x, y, color.rgb()); | |
142 | + } | |
143 | + | |
144 | + if(!image.save(filename.c_str())) //if the image didn't save correctly, | |
145 | + std::cout<<"Error saving QImage."<<std::endl; // throw an error | |
146 | + } | |
147 | + | |
148 | + void toImages(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer, bool globalmax = true) | |
149 | + { | |
150 | + std::string prefix, postfix, extension; | |
151 | + unsigned int digits; | |
152 | + process_filename(filename, prefix, postfix, extension, digits); //process the filename for wild cards | |
153 | + | |
154 | + cublasStatus_t stat; | |
155 | + cublasHandle_t handle; | |
156 | + | |
157 | + //create a CUBLAS handle | |
158 | + stat = cublasCreate(&handle); | |
159 | + if(stat != CUBLAS_STATUS_SUCCESS) | |
160 | + { | |
161 | + std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | |
162 | + exit(1); | |
163 | + } | |
164 | + | |
165 | + int L = R[0] * R[1]; //compute the number of discrete points in a slice | |
166 | + int result; //result of the max operation | |
167 | + | |
168 | + P maxVal[N]; //array stores minimum and maximum values | |
169 | + P maxAll = 0; //largest value in the data set | |
170 | + | |
171 | + //compute the maximum value for each vector component | |
172 | + for(int n=0; n<N; n++) | |
173 | + { | |
174 | + if(sizeof(P) == 4) | |
175 | + stat = cublasIsamax(handle, L, (const float*)S[n], 1, &result); | |
176 | + else | |
177 | + stat = cublasIdamax(handle, L, (const double*)S[n], 1, &result); | |
178 | + | |
179 | + result -= 1; //adjust for 1-based indexing | |
180 | + | |
181 | + if(stat != CUBLAS_STATUS_SUCCESS) //if there was a GPU error, terminate | |
182 | + { | |
183 | + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | |
184 | + exit(1); | |
185 | + } | |
186 | + | |
187 | + //retrieve the maximum value for this slice and store it in the maxVal array | |
188 | + HANDLE_ERROR(cudaMemcpy(&maxVal[n], S[n] + result, sizeof(P), cudaMemcpyDeviceToHost)); | |
189 | + if(maxVal[n] > maxAll) //if maxVal is larger, update the maxAll variable | |
190 | + maxAll = maxVal[n]; | |
191 | + | |
192 | + } | |
193 | + | |
194 | + cublasDestroy(handle); //destroy the CUBLAS handle | |
195 | + | |
196 | + if(cmap == rts::cmRainbow && N == 3) //if the user specifies a rainbow colormap, and the vector has 3 elements | |
197 | + { | |
198 | + //implement a single image with RGB = XYZ | |
199 | + if(positive) | |
200 | + toImage3(prefix+postfix+extension, 0, maxAll); | |
201 | + else | |
202 | + toImage3(prefix+postfix+extension, 0, maxAll); | |
203 | + } | |
204 | + else | |
205 | + { | |
206 | + for(int n=0; n<N; n++) //for each image | |
207 | + { | |
208 | + stringstream ss; //assemble the file name | |
209 | + ss<<prefix<<std::setfill('0')<<std::setw(digits)<<n<<postfix<<extension; | |
210 | + std::cout<<ss.str()<<std::endl; | |
211 | + if(positive) //if the image is positive | |
212 | + { | |
213 | + std::cout<<"Positive: "<<n<<std::endl; | |
214 | + if(globalmax) //if the global maximum is used | |
215 | + toImage(ss.str(), n, 0, maxAll, cmap); //save the image using the global maximum | |
216 | + else | |
217 | + toImage(ss.str(), n, 0, maxVal[n], cmap); //save the image using the local maximum | |
218 | + } | |
219 | + else | |
220 | + { | |
221 | + std::cout<<"Negative: "<<n<<std::endl; | |
222 | + if(globalmax) //if the global maximum is used | |
223 | + toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap); //save the image using the global maximum | |
224 | + else | |
225 | + toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap); //save the image using the local maximum | |
226 | + } | |
227 | + } | |
228 | + } | |
229 | + } | |
230 | + | |
231 | + //assignment operator | |
232 | + vectorfield & operator= (const vectorfield & rhs) | |
233 | + { | |
234 | + //de-allocate any existing GPU memory | |
235 | + for(int n=0; n<N; n++) | |
236 | + if(S[n] != NULL) | |
237 | + HANDLE_ERROR(cudaFree(S[n])); | |
238 | + | |
239 | + //copy the slice resolution | |
240 | + R[0] = rhs.R[0]; | |
241 | + R[1] = rhs.R[1]; | |
242 | + | |
243 | + for(int n=0; n<N; n++) | |
244 | + { | |
245 | + //allocate the necessary memory | |
246 | + HANDLE_ERROR(cudaMalloc(&S[n], sizeof(P) * R[0] * R[1])); | |
247 | + | |
248 | + //copy the slice | |
249 | + HANDLE_ERROR(cudaMemcpy(S[n], rhs.S[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
250 | + } | |
251 | + | |
252 | + | |
253 | + std::cout<<"Assignment operator."<<std::endl; | |
254 | + | |
255 | + return *this; | |
256 | + } | |
257 | + | |
258 | + ///copy constructor | |
259 | + vectorfield(const vectorfield &rhs) | |
260 | + { | |
261 | + //first make a shallow copy | |
262 | + R[0] = rhs.R[0]; | |
263 | + R[1] = rhs.R[1]; | |
264 | + | |
265 | + //do we have to make a deep copy? | |
266 | + if(rhs.S[0] == NULL) //no? | |
267 | + { | |
268 | + for(int n=0; n<N; n++) //set all components to NULL | |
269 | + { | |
270 | + S[n] = NULL; | |
271 | + } | |
272 | + } | |
273 | + else | |
274 | + { | |
275 | + for(int n=0; n<N; n++) | |
276 | + { | |
277 | + //allocate the necessary memory | |
278 | + HANDLE_ERROR(cudaMalloc(&S[n], sizeof(P) * R[0] * R[1])); | |
279 | + | |
280 | + //copy the slice | |
281 | + HANDLE_ERROR(cudaMemcpy(S[n], rhs.S[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
282 | + } | |
283 | + } | |
284 | + | |
285 | + std::cout<<"vectorfield COPY CONSTRUCTOR"<<std::endl; | |
286 | + } | |
287 | + | |
288 | +}; | |
289 | + | |
290 | +} //end namespace rts | |
291 | + | |
292 | + | |
293 | + | |
294 | +#endif | ... | ... |