Commit a9275be58aaa6e83acfdb092f1221ec3d2ac9eb8

Authored by David Mayerich
1 parent 4d67ff4e

added vector field visualization tools

cuda/devices.h 0 → 100644
  1 +#ifndef RTS_CUDA_DEVICES
  2 +#define RTS_CUDA_DEVICES
  3 +
  4 +#include <cuda.h>
  5 +
  6 +namespace rts{
  7 +
  8 +int maxThreadsPerBlock()
  9 +{
  10 + int device;
  11 + cudaGetDevice(&device); //get the id of the current device
  12 + cudaDeviceProp props; //device property structure
  13 + cudaGetDeviceProperties(&props, device);
  14 + return props.maxThreadsPerBlock;
  15 +}
  16 +} //end namespace rts
  17 +
  18 +#endif
0 19 \ No newline at end of file
... ...
cuda/memory.h deleted
1   -#include <cuda.h>
2   -
math/function.h
... ... @@ -18,62 +18,74 @@ class function
18 18 //function data
19 19 std::vector<dataPoint> f;
20 20  
21   - //comparison function for searching lambda
22   - static bool findCeiling(dataPoint a, dataPoint b)
23   - {
24   - return (a.x > b.x);
  21 + //comparison function for searching lambda
  22 + static bool findCeiling(dataPoint a, dataPoint b)
  23 + {
  24 + return (a.x > b.x);
25 25 }
26 26  
27 27  
28 28 public:
  29 + function()
  30 + {
  31 + //insert(0, 0);
  32 + }
  33 +
29 34 Y linear(X x)
30 35 {
31   - //declare an iterator
32   - typename std::vector< dataPoint >::iterator it;
33   -
34   - dataPoint s;
35   - s.x = x;
36   -
37   - it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling);
38   -
39   - //if the wavelength is past the end of the list, return the back
40   - if(it == f.end())
41   - return f.back().y;
42   - //if the wavelength is before the beginning of the list, return the front
43   - else if(it == f.begin())
44   - return f.front().y;
45   - //otherwise interpolate
46   - else
47   - {
48   - X xMax = (*it).x;
49   - X xMin = (*(it - 1)).x;
50   - //std::cout<<lMin<<"----------"<<lMax<<std::endl;
51   -
52   - X a = (x - xMin) / (xMax - xMin);
53   - Y riMin = (*(it - 1)).y;
54   - Y riMax = (*it).y;
55   - Y interp;
56   - interp = riMin * a + riMax * (1.0 - a);
57   - return interp;
  36 + if(f.size() == 0) return (Y)0; //return zero if the function is empty
  37 + //declare an iterator
  38 + typename std::vector< dataPoint >::iterator it;
  39 +
  40 + dataPoint s;
  41 + s.x = x;
  42 +
  43 + it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling);
  44 +
  45 + //if the wavelength is past the end of the list, return the back
  46 + if(it == f.end())
  47 + return f.back().y;
  48 + //if the wavelength is before the beginning of the list, return the front
  49 + else if(it == f.begin())
  50 + return f.front().y;
  51 + //otherwise interpolate
  52 + else
  53 + {
  54 + X xMax = (*it).x;
  55 + X xMin = (*(it - 1)).x;
  56 + //std::cout<<lMin<<"----------"<<lMax<<std::endl;
  57 +
  58 + X a = (x - xMin) / (xMax - xMin);
  59 + Y riMin = (*(it - 1)).y;
  60 + Y riMax = (*it).y;
  61 + Y interp;
  62 + interp = riMax * a + riMin * (1.0 - a);
  63 + return interp;
58 64 }
59 65 }
60 66  
  67 + ///add a data point to a function
61 68 void insert(X x, Y y)
62 69 {
63   - //declare an iterator
64   - typename std::vector< dataPoint >::iterator it;
65   -
66   - dataPoint s;
67   - s.x = x;
68   - s.y = y;
69   -
70   - it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling);
71   -
72   - //if the function value is past the end of the vector, add it to the back
73   - if(it == f.end())
74   - return f.push_back(s);
75   - //otherwise add the value at the iterator position
76   - else
  70 + dataPoint s;
  71 + s.x = x;
  72 + s.y = y;
  73 +
  74 + if(f.size() == 0 || f.back().x < x)
  75 + return f.push_back(s);
  76 +
  77 + //declare an iterator
  78 + typename std::vector< dataPoint >::iterator it;
  79 +
  80 +
  81 +
  82 + it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling);
  83 +
  84 + //if the function value is past the end of the vector, add it to the back
  85 + if(it == f.end())
  86 + return f.push_back(s);
  87 + //otherwise add the value at the iterator position
  88 + else
77 89 {
78 90 f.insert(it, s);
79 91 }
... ... @@ -90,16 +102,24 @@ public:
90 102 return f[i].y;
91 103 }
92 104  
  105 + ///get the number of data points in the function
93 106 unsigned int getN()
94 107 {
95 108 return f.size();
96 109 }
97 110  
  111 + //look up an indexed component
98 112 dataPoint operator[](int i)
99 113 {
100 114 return f[i];
101 115 }
102 116  
  117 + ///linear interpolation
  118 + Y operator()(X x)
  119 + {
  120 + return linear(x);
  121 + }
  122 +
103 123 function<X, Y> operator+(Y r)
104 124 {
105 125 function<X, Y> result;
... ... @@ -114,10 +134,19 @@ public:
114 134 return result;
115 135 }
116 136  
  137 + function<X, Y> & operator= (const Y & rhs)
  138 + {
  139 + f.clear();
  140 + if(rhs != 0) //if the RHS is zero, just clear, otherwise add one value of RHS
  141 + insert(0, rhs);
  142 +
  143 + return *this;
  144 + }
  145 +
117 146  
118 147 };
119 148  
120 149 } //end namespace rts
121 150  
122 151  
123   -#endif
  152 +#endif
... ...
math/matrix.h
... ... @@ -4,6 +4,7 @@
4 4 //#include "rts/vector.h"
5 5 #include <string.h>
6 6 #include <iostream>
  7 +#include "vector.h"
7 8  
8 9 namespace rts
9 10 {
... ... @@ -46,9 +47,9 @@ struct matrix
46 47 return *this;
47 48 }*/
48 49  
49   - vector<T, N> operator*(vector<T, N> rhs)
  50 + vec<T, N> operator*(vec<T, N> rhs)
50 51 {
51   - vector<T, N> result;
  52 + vec<T, N> result;
52 53  
53 54 for(int r=0; r<N; r++)
54 55 for(int c=0; c<N; c++)
... ...
math/point.h deleted
1   -#ifndef RTS_rtsPoint_H
2   -#define RTS_rtsPoint_H
3   -
4   -#include "rts/math/vector.h"
5   -#include <string.h>
6   -#include "rts/cuda/callable.h"
7   -
8   -namespace rts
9   -{
10   -
11   -template <class T, int N>
12   -struct point
13   -{
14   - T p[N];
15   -
16   - CUDA_CALLABLE point()
17   - {
18   -
19   - }
20   -
21   - //efficiency constructor, makes construction easier for 1D-4D vectors
22   - CUDA_CALLABLE point(T x)
23   - {
24   - p[0] = x;
25   - }
26   - CUDA_CALLABLE point(T x, T y)
27   - {
28   - p[0] = x;
29   - p[1] = y;
30   - }
31   - CUDA_CALLABLE point(T x, T y, T z)
32   - {
33   - p[0] = x;
34   - p[1] = y;
35   - p[2] = z;
36   - }
37   - CUDA_CALLABLE point(T x, T y, T z, T w)
38   - {
39   - p[0] = x;
40   - p[1] = y;
41   - p[2] = z;
42   - p[3] = w;
43   - }
44   -
45   - //arithmetic operators
46   - CUDA_CALLABLE rts::point<T, N> operator+(vector<T, N> v)
47   - {
48   - rts::point<T, N> r;
49   -
50   - //calculate the position of the resulting point
51   - for(int i=0; i<N; i++)
52   - r.p[i] = p[i] + v.v[i];
53   -
54   - return r;
55   - }
56   - CUDA_CALLABLE rts::point<T, N> operator-(vector<T, N> v)
57   - {
58   - rts::point<T, N> r;
59   -
60   - //calculate the position of the resulting point
61   - for(int i=0; i<N; i++)
62   - r.p[i] = p[i] - v.v[i];
63   -
64   - return r;
65   - }
66   - CUDA_CALLABLE vector<T, N> operator-(point<T, N> rhs)
67   - {
68   - vector<T, N> r;
69   -
70   - //calculate the position of the resulting point
71   - for(int i=0; i<N; i++)
72   - r.v[i] = p[i] - rhs.p[i];
73   -
74   - return r;
75   - }
76   - CUDA_CALLABLE rts::point<T, N> operator*(T rhs)
77   - {
78   - rts::point<T, N> r;
79   -
80   - //calculate the position of the resulting point
81   - for(int i=0; i<N; i++)
82   - r.p[i] = p[i] * rhs;
83   -
84   - return r;
85   - }
86   -
87   - CUDA_CALLABLE point(const T(&data)[N])
88   - {
89   - memcpy(p, data, sizeof(T) * N);
90   - }
91   -
92   - std::string toStr()
93   - {
94   - std::stringstream ss;
95   -
96   - ss<<"(";
97   - for(int i=0; i<N; i++)
98   - {
99   - ss<<p[i];
100   - if(i != N-1)
101   - ss<<", ";
102   - }
103   - ss<<")";
104   -
105   - return ss.str();
106   - }
107   -
108   - //bracket operator
109   - CUDA_CALLABLE T& operator[](int i)
110   - {
111   - return p[i];
112   - }
113   -
114   -};
115   -
116   -} //end namespace rts
117   -
118   -template <typename T, int N>
119   -std::ostream& operator<<(std::ostream& os, rts::point<T, N> p)
120   -{
121   - os<<p.toStr();
122   - return os;
123   -}
124   -
125   -//arithmetic
126   -template <typename T, int N>
127   -CUDA_CALLABLE rts::point<T, N> operator*(T lhs, rts::point<T, N> rhs)
128   -{
129   - rts::point<T, N> r;
130   -
131   - return rhs * lhs;
132   -}
133   -
134   -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
135   -//template<class T, int N> using rtsPoint = rts::point<T, N>;
136   -//#endif
137   -
138   -#endif
math/quad.h
... ... @@ -2,37 +2,36 @@
2 2 #define RTS_RECT_H
3 3  
4 4 //enable CUDA_CALLABLE macro
5   -#include "rts/cuda/callable.h"
6   -#include "rts/math/vector.h"
7   -#include "rts/math/point.h"
8   -#include "rts/math/triangle.h"
9   -#include "rts/math/quaternion.h"
  5 +#include "../cuda/callable.h"
  6 +#include "../math/vector.h"
  7 +#include "../math/triangle.h"
  8 +#include "../math/quaternion.h"
10 9 #include <iostream>
11 10 #include <algorithm>
12 11  
13 12 namespace rts{
14 13  
15 14 //template for a quadangle class in ND space
16   -template <class T, int N>
  15 +template <class T, int N = 3>
17 16 struct quad
18 17 {
19 18 /*
20   - C------------------>O
  19 + B------------------>C
21 20 ^ ^
22 21 | |
23 22 Y |
24 23 | |
25 24 | |
26   - A---------X-------->B
  25 + A---------X-------->O
27 26 */
28 27  
29 28 /*T A[N];
30 29 T B[N];
31 30 T C[N];*/
32 31  
33   - rts::point<T, N> A;
34   - rts::vector<T, N> X;
35   - rts::vector<T, N> Y;
  32 + rts::vec<T, N> A;
  33 + rts::vec<T, N> X;
  34 + rts::vec<T, N> Y;
36 35  
37 36  
38 37 CUDA_CALLABLE quad()
... ... @@ -40,56 +39,56 @@ struct quad
40 39  
41 40 }
42 41  
43   - CUDA_CALLABLE quad(point<T, N> a, point<T, N> b, point<T, N> c)
  42 + CUDA_CALLABLE quad(vec<T, N> a, vec<T, N> b, vec<T, N> c)
44 43 {
45 44  
46   - A = a;
47   - X = b - a;
48   - Y = c - a;
  45 + A = a;
  46 + Y = b - a;
  47 + X = c - a - Y;
49 48  
50 49 }
51 50  
52 51 /****************************************************************
53 52 Constructor - create a quad from two points and a normal
54 53 ****************************************************************/
55   - CUDA_CALLABLE quad(rts::point<T, N> pMin, rts::point<T, N> pMax, rts::vector<T, N> normal)
  54 + /*CUDA_CALLABLE quad(rts::vec<T, N> pMin, rts::vec<T, N> pMax, rts::vec<T, N> normal)
56 55 {
57 56  
58 57 //assign the corner point
59 58 A = pMin;
60 59  
61 60 //compute the vector from pMin to pMax
62   - rts::vector<T, 3> v0;
  61 + rts::vec<T, 3> v0;
63 62 v0 = pMax - pMin;
64 63  
65 64 //compute the cross product of A and the plane normal
66   - rts::vector<T, 3> v1;
  65 + rts::vec<T, 3> v1;
67 66 v1 = v0.cross(normal);
68 67  
69 68  
70 69 //calculate point B
71   - rts::point<T, 3> B;
  70 + rts::vec<T, 3> B;
72 71 B = A + v0 * 0.5f + v1 * 0.5f;
73 72  
74 73 //calculate rtsPoint C
75   - rts::point<T, 3> C;
  74 + rts::vec<T, 3> C;
76 75 C = A + v0 * 0.5f - v1 * 0.5f;
77 76  
78 77 //calculate X and Y
79 78 X = B - A;
80 79 Y = C - A;
81   - }
  80 + }*/
82 81  
83 82 /*******************************************************************
84 83 Constructor - create a quad from a position, normal, and rotation
85 84 *******************************************************************/
86   - CUDA_CALLABLE quad(rts::point<T, N> c, rts::vector<T, N> normal, T width, T height, T theta)
  85 + CUDA_CALLABLE quad(rts::vec<T, N> c, rts::vec<T, N> normal, T width, T height, T theta)
87 86 {
88 87  
89 88 //compute the X direction - start along world-space X
90   - Y = rts::vector<T, N>(0, 1, 0);
  89 + Y = rts::vec<T, N>(0, 1, 0);
91 90 if(Y == normal)
92   - Y = rts::vector<T, N>(0, 0, 1);
  91 + Y = rts::vec<T, N>(0, 0, 1);
93 92  
94 93 X = Y.cross(normal).norm();
95 94  
... ... @@ -118,33 +117,33 @@ struct quad
118 117 /*******************************************
119 118 Return the normal for the quad
120 119 *******************************************/
121   - CUDA_CALLABLE rts::vector<T, N> n()
  120 + CUDA_CALLABLE rts::vec<T, N> n()
122 121 {
123 122 return (X.cross(Y)).norm();
124 123 }
125 124  
126   - CUDA_CALLABLE rts::point<T, N> p(T a, T b)
  125 + CUDA_CALLABLE rts::vec<T, N> p(T a, T b)
127 126 {
128   - rts::point<T, N> result;
  127 + rts::vec<T, N> result;
129 128 //given the two parameters a, b = [0 1], returns the position in world space
130 129 result = A + X * a + Y * b;
131 130  
132 131 return result;
133 132 }
134 133  
135   - CUDA_CALLABLE rts::point<T, N> operator()(T a, T b)
  134 + CUDA_CALLABLE rts::vec<T, N> operator()(T a, T b)
136 135 {
137 136 return p(a, b);
138 137 }
139 138  
140   - std::string toStr()
  139 + std::string str()
141 140 {
142 141 std::stringstream ss;
143 142  
144 143 ss<<"A = "<<A<<std::endl;
145   - ss<<"B = "<<A + X<<std::endl;
146   - ss<<"C = "<<A + X + Y<<std::endl;
147   - ss<<"D = "<<A + Y<<std::endl;
  144 + ss<<"B = "<<A + Y<<std::endl;
  145 + ss<<"C = "<<A + Y + X<<std::endl;
  146 + ss<<"D = "<<A + X<<std::endl;
148 147  
149 148 return ss.str();
150 149  
... ... @@ -155,7 +154,7 @@ struct quad
155 154 //scales the plane by a scalar value
156 155  
157 156 //compute the center point
158   - rts::point<T, N> c = A + X*0.5f + Y*0.5f;
  157 + rts::vec<T, N> c = A + X*0.5f + Y*0.5f;
159 158  
160 159 //create the new quadangle
161 160 quad<T, N> result;
... ... @@ -167,7 +166,7 @@ struct quad
167 166  
168 167 }
169 168  
170   - CUDA_CALLABLE T dist(point<T, N> p)
  169 + CUDA_CALLABLE T dist(vec<T, N> p)
171 170 {
172 171 //compute the distance between a point and this quad
173 172  
... ... @@ -176,8 +175,8 @@ struct quad
176 175 triangle<T, N> T1(A+X+Y, A+X, A+Y);
177 176  
178 177  
179   - ptype d0 = T0.dist(p);
180   - ptype d1 = T1.dist(p);
  178 + T d0 = T0.dist(p);
  179 + T d1 = T1.dist(p);
181 180  
182 181 if(d0 < d1)
183 182 return d0;
... ... @@ -185,7 +184,7 @@ struct quad
185 184 return d1;
186 185 }
187 186  
188   - CUDA_CALLABLE T dist_max(point<T, N> p)
  187 + CUDA_CALLABLE T dist_max(vec<T, N> p)
189 188 {
190 189 T da = (A - p).len();
191 190 T db = (A+X - p).len();
... ... @@ -201,7 +200,7 @@ struct quad
201 200 template <typename T, int N>
202 201 std::ostream& operator<<(std::ostream& os, rts::quad<T, N> R)
203 202 {
204   - os<<R.toStr();
  203 + os<<R.str();
205 204 return os;
206 205 }
207 206  
... ...
math/quaternion.h
... ... @@ -16,7 +16,7 @@ public:
16 16  
17 17 void normalize();
18 18 void CreateRotation(T theta, T axis_x, T axis_y, T axis_z);
19   - void CreateRotation(T theta, vector<T, 3> axis);
  19 + void CreateRotation(T theta, vec<T, 3> axis);
20 20 quaternion<T> operator*(quaternion<T> &rhs);
21 21 matrix<T, 3> toMatrix3();
22 22 matrix<T, 4> toMatrix4();
... ... @@ -48,7 +48,7 @@ void quaternion&lt;T&gt;::CreateRotation(T theta, T axis_x, T axis_y, T axis_z)
48 48 }
49 49  
50 50 template<typename T>
51   -void quaternion<T>::CreateRotation(T theta, vector<T, 3> axis)
  51 +void quaternion<T>::CreateRotation(T theta, vec<T, 3> axis)
52 52 {
53 53 CreateRotation(theta, axis[0], axis[1], axis[2]);
54 54 }
... ...
math/triangle.h
... ... @@ -4,12 +4,11 @@
4 4 //enable CUDA_CALLABLE macro
5 5 #include "rts/cuda/callable.h"
6 6 #include "rts/math/vector.h"
7   -#include "rts/math/point.h"
8 7 #include <iostream>
9 8  
10 9 namespace rts{
11 10  
12   -template <class T, int N>
  11 +template <class T, int N=3>
13 12 struct triangle
14 13 {
15 14 /*
... ... @@ -24,15 +23,15 @@ struct triangle
24 23 */
25 24 private:
26 25  
27   - point<T, N> A;
28   - point<T, N> B;
29   - point<T, N> C;
  26 + vec<T, N> A;
  27 + vec<T, N> B;
  28 + vec<T, N> C;
30 29  
31   - CUDA_CALLABLE point<T, N> _p(T s, T t)
  30 + CUDA_CALLABLE vec<T, N> _p(T s, T t)
32 31 {
33 32 //This function returns the point specified by p = A + s(B-A) + t(C-A)
34   - vector<T, N> E0 = B-A;
35   - vector<T, N> E1 = C-A;
  33 + vec<T, N> E0 = B-A;
  34 + vec<T, N> E1 = C-A;
36 35  
37 36 return A + s*E0 + t*E1;
38 37 }
... ... @@ -47,26 +46,26 @@ struct triangle
47 46  
48 47 }
49 48  
50   - CUDA_CALLABLE triangle(point<T, N> a, point<T, N> b, point<T, N> c)
  49 + CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c)
51 50 {
52 51 A = a;
53 52 B = b;
54 53 C = c;
55 54 }
56 55  
57   - CUDA_CALLABLE rts::point<T, N> operator()(T s, T t)
  56 + CUDA_CALLABLE rts::vec<T, N> operator()(T s, T t)
58 57 {
59 58 return _p(s, t);
60 59 }
61 60  
62   - CUDA_CALLABLE point<T, N> nearest(point<T, N> p)
  61 + CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p)
63 62 {
64 63 //comptue the distance between a point and this triangle
65 64 // This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf
66 65  
67   - vector<T, N> E0 = B-A;
68   - vector<T, N> E1 = C-A;
69   - vector<T, N> D = A - p;
  66 + vec<T, N> E0 = B-A;
  67 + vec<T, N> E1 = C-A;
  68 + vec<T, N> D = A - p;
70 69  
71 70 T a = E0.dot(E0);
72 71 T b = E0.dot(E1);
... ... @@ -125,7 +124,7 @@ struct triangle
125 124 {
126 125 //region 0
127 126 //std::cout<<"Region 0"<<std::endl;
128   - T invDet = (ptype)1.0/det;
  127 + T invDet = (T)1.0/det;
129 128 s *= invDet;
130 129 t *= invDet;
131 130 //done
... ... @@ -176,9 +175,9 @@ struct triangle
176 175  
177 176 }
178 177  
179   - CUDA_CALLABLE T dist(point<T, N> p)
  178 + CUDA_CALLABLE T dist(vec<T, N> p)
180 179 {
181   - point<T, N> n = nearest(p);
  180 + vec<T, N> n = nearest(p);
182 181  
183 182 return (p - n).len();
184 183 }
... ...
math/vector.h
... ... @@ -4,20 +4,21 @@
4 4 #include <iostream>
5 5 #include <cmath>
6 6 #include <sstream>
7   -//#include "rts/point.h"
  7 +//#include "rts/math/point.h"
8 8 #include "rts/cuda/callable.h"
9 9  
  10 +
10 11 namespace rts
11 12 {
12 13  
13 14  
14 15  
15   -template <class T, int N>
16   -struct vector
  16 +template <class T, int N=3>
  17 +struct vec
17 18 {
18 19 T v[N];
19 20  
20   - CUDA_CALLABLE vector()
  21 + CUDA_CALLABLE vec()
21 22 {
22 23 //memset(v, 0, sizeof(T) * N);
23 24 for(int i=0; i<N; i++)
... ... @@ -25,19 +26,30 @@ struct vector
25 26 }
26 27  
27 28 //efficiency constructor, makes construction easier for 1D-4D vectors
28   - CUDA_CALLABLE vector(T x, T y = (T)0.0, T z = (T)0.0, T w = (T)0.0)
  29 + CUDA_CALLABLE vec(T x)
  30 + {
  31 + v[0] = x;
  32 + }
  33 + CUDA_CALLABLE vec(T x, T y)
  34 + {
  35 + v[0] = x;
  36 + v[1] = y;
  37 + }
  38 + CUDA_CALLABLE vec(T x, T y, T z)
29 39 {
30   - if(N >= 1)
31   - v[0] = x;
32   - if(N >= 2)
33   - v[1] = y;
34   - if(N >= 3)
35   - v[2] = z;
36   - if(N >= 4)
37   - v[3] = w;
  40 + v[0] = x;
  41 + v[1] = y;
  42 + v[2] = z;
  43 + }
  44 + CUDA_CALLABLE vec(T x, T y, T z, T w)
  45 + {
  46 + v[0] = x;
  47 + v[1] = y;
  48 + v[2] = z;
  49 + v[3] = w;
38 50 }
39 51  
40   - CUDA_CALLABLE vector(const T(&data)[N])
  52 + CUDA_CALLABLE vec(const T(&data)[N])
41 53 {
42 54 memcpy(v, data, sizeof(T) * N);
43 55 }
... ... @@ -54,12 +66,12 @@ struct vector
54 66  
55 67 }
56 68  
57   - CUDA_CALLABLE vector<T, N> cart2sph()
  69 + CUDA_CALLABLE vec<T, N> cart2sph()
58 70 {
59 71 //convert the vector from cartesian to spherical coordinates
60 72 //x, y, z -> r, theta, phi (where theta = 0 to 2*pi)
61 73  
62   - vector<T, N> sph;
  74 + vec<T, N> sph;
63 75 sph[0] = std::sqrt(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
64 76 sph[1] = std::atan2(v[1], v[0]);
65 77 sph[2] = std::acos(v[2] / sph[0]);
... ... @@ -67,12 +79,12 @@ struct vector
67 79 return sph;
68 80 }
69 81  
70   - CUDA_CALLABLE vector<T, N> sph2cart()
  82 + CUDA_CALLABLE vec<T, N> sph2cart()
71 83 {
72 84 //convert the vector from cartesian to spherical coordinates
73 85 //r, theta, phi -> x, y, z (where theta = 0 to 2*pi)
74 86  
75   - vector<T, N> cart;
  87 + vec<T, N> cart;
76 88 cart[0] = v[0] * std::cos(v[1]) * std::sin(v[2]);
77 89 cart[1] = v[0] * std::sin(v[1]) * std::sin(v[2]);
78 90 cart[2] = v[0] * std::cos(v[2]);
... ... @@ -80,10 +92,10 @@ struct vector
80 92 return cart;
81 93 }
82 94  
83   - CUDA_CALLABLE vector<T, N> norm()
  95 + CUDA_CALLABLE vec<T, N> norm()
84 96 {
85 97 //compute and return the vector norm
86   - vector<T, N> result;
  98 + vec<T, N> result;
87 99  
88 100 //compute the vector length
89 101 T l = len();
... ... @@ -97,9 +109,9 @@ struct vector
97 109 return result;
98 110 }
99 111  
100   - CUDA_CALLABLE vector<T, 3> cross(vector<T, 3> rhs)
  112 + CUDA_CALLABLE vec<T, 3> cross(vec<T, 3> rhs)
101 113 {
102   - vector<T, 3> result;
  114 + vec<T, 3> result;
103 115  
104 116 //compute the cross product (only valid for 3D vectors)
105 117 result[0] = v[1] * rhs[2] - v[2] * rhs[1];
... ... @@ -109,7 +121,7 @@ struct vector
109 121 return result;
110 122 }
111 123  
112   - CUDA_CALLABLE T dot(vector<T, N> rhs)
  124 + CUDA_CALLABLE T dot(vec<T, N> rhs)
113 125 {
114 126 T result = (T)0;
115 127  
... ... @@ -121,36 +133,36 @@ struct vector
121 133 }
122 134  
123 135 //arithmetic
124   - CUDA_CALLABLE vector<T, N> operator+(vector<T, N> rhs)
  136 + CUDA_CALLABLE vec<T, N> operator+(vec<T, N> rhs)
125 137 {
126   - vector<T, N> result;
  138 + vec<T, N> result;
127 139  
128 140 for(int i=0; i<N; i++)
129 141 result.v[i] = v[i] + rhs.v[i];
130 142  
131 143 return result;
132 144 }
133   - CUDA_CALLABLE vector<T, N> operator-(vector<T, N> rhs)
  145 + CUDA_CALLABLE vec<T, N> operator-(vec<T, N> rhs)
134 146 {
135   - vector<T, N> result;
  147 + vec<T, N> result;
136 148  
137 149 for(int i=0; i<N; i++)
138 150 result.v[i] = v[i] - rhs.v[i];
139 151  
140 152 return result;
141 153 }
142   - CUDA_CALLABLE vector<T, N> operator*(T rhs)
  154 + CUDA_CALLABLE vec<T, N> operator*(T rhs)
143 155 {
144   - vector<T, N> result;
  156 + vec<T, N> result;
145 157  
146 158 for(int i=0; i<N; i++)
147 159 result.v[i] = v[i] * rhs;
148 160  
149 161 return result;
150 162 }
151   - CUDA_CALLABLE vector<T, N> operator/(T rhs)
  163 + CUDA_CALLABLE vec<T, N> operator/(T rhs)
152 164 {
153   - vector<T, N> result;
  165 + vec<T, N> result;
154 166  
155 167 for(int i=0; i<N; i++)
156 168 result.v[i] = v[i] / rhs;
... ... @@ -158,7 +170,16 @@ struct vector
158 170 return result;
159 171 }
160 172  
161   - CUDA_CALLABLE bool operator==(vector<T, N> rhs)
  173 + //conversion from a point
  174 + /*CUDA_CALLABLE vector<T, N> & operator=(point<T, N> rhs)
  175 + {
  176 + for(int n=0; n<N; n++)
  177 + v[n] = rhs.p[n];
  178 +
  179 + return *this;
  180 + }*/
  181 +
  182 + CUDA_CALLABLE bool operator==(vec<T, N> rhs)
162 183 {
163 184 if ( (rhs.v[0] == v[0]) && (rhs.v[1] == v[1]) && (rhs.v[2] == v[2]) )
164 185 return true;
... ... @@ -194,7 +215,7 @@ struct vector
194 215 } //end namespace rts
195 216  
196 217 template <typename T, int N>
197   -std::ostream& operator<<(std::ostream& os, rts::vector<T, N> v)
  218 +std::ostream& operator<<(std::ostream& os, rts::vec<T, N> v)
198 219 {
199 220 os<<v.toStr();
200 221 return os;
... ... @@ -202,9 +223,9 @@ std::ostream&amp; operator&lt;&lt;(std::ostream&amp; os, rts::vector&lt;T, N&gt; v)
202 223  
203 224 //arithmetic operators
204 225 template <typename T, int N>
205   -CUDA_CALLABLE rts::vector<T, N> operator-(rts::vector<T, N> v)
  226 +CUDA_CALLABLE rts::vec<T, N> operator-(rts::vec<T, N> v)
206 227 {
207   - rts::vector<T, N> r;
  228 + rts::vec<T, N> r;
208 229  
209 230 //negate the vector
210 231 for(int i=0; i<N; i++)
... ... @@ -214,9 +235,9 @@ CUDA_CALLABLE rts::vector&lt;T, N&gt; operator-(rts::vector&lt;T, N&gt; v)
214 235 }
215 236  
216 237 template <typename T, int N>
217   -CUDA_CALLABLE rts::vector<T, N> operator*(T lhs, rts::vector<T, N> rhs)
  238 +CUDA_CALLABLE rts::vec<T, N> operator*(T lhs, rts::vec<T, N> rhs)
218 239 {
219   - rts::vector<T, N> r;
  240 + rts::vec<T, N> r;
220 241  
221 242 return rhs * lhs;
222 243 }
... ...
optics/beam.h 0 → 100644
  1 +#ifndef RTS_BEAM
  2 +#define RTS_BEAM
  3 +
  4 +#include "../math/vector.h"
  5 +#include "../math/function.h"
  6 +#include <vector>
  7 +
  8 +namespace rts{
  9 +
  10 +template<typename P>
  11 +class beam
  12 +{
  13 +public:
  14 + enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  15 +
  16 +private:
  17 +
  18 + P na[2]; //numerical aperature of the focusing optics
  19 + vec<P> f; //focal point
  20 + vec<P> k; //direction vector
  21 + vec<P> E0; //polarization direction
  22 + P omega; //frequency
  23 +
  24 + function<P, P> apod; //apodization function
  25 + unsigned int apod_res; //resolution of complex apodization filters
  26 +
  27 + void apod_uniform()
  28 + {
  29 + apod = (P)1;
  30 + }
  31 + void apod_bartlett()
  32 + {
  33 + apod = (P)1;
  34 + apod.insert((P)1, (P)0);
  35 + }
  36 + void apod_hanning()
  37 + {
  38 + apod = (P)0;
  39 + P x, y;
  40 + for(unsigned int n=0; n<apod_res; n++)
  41 + {
  42 + x = (P)n/(P)apod_res;
  43 + y = pow( cos( ((P)3.14159 * x) / 2 ), 2);
  44 + apod.insert(x, y);
  45 + }
  46 + }
  47 + void apod_hamming()
  48 + {
  49 + apod = (P)0;
  50 + P x, y;
  51 + for(unsigned int n=0; n<apod_res; n++)
  52 + {
  53 + x = (P)n/(P)apod_res;
  54 + y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x);
  55 + apod.insert(x, y);
  56 + }
  57 + }
  58 +
  59 + void set_apod(beam_type type)
  60 + {
  61 + if(type == Uniform)
  62 + apod_uniform();
  63 + if(type == Bartlett)
  64 + apod_bartlett();
  65 + if(type == Hanning)
  66 + apod_hanning();
  67 + if(type == Hamming)
  68 + apod_hamming();
  69 + }
  70 +
  71 +public:
  72 +
  73 + ///constructor: build a default beam (NA=1.0)
  74 + beam(beam_type _apod = Uniform)
  75 + {
  76 + na[0] = (P)0.0;
  77 + na[1] = (P)1.0;
  78 + f = vec<P>( (P)0.0, (P)0.0, (P)0.0 );
  79 + k = vec<P>( (P)0.0, (P)0.0, (P)1.0 );
  80 + E0 = vec<P>( (P)1.0, (P)0.0, (P)0.0 );
  81 + omega = (P)2 * (P)3.14159;
  82 + apod_res = 256; //set the default resolution for apodization filters
  83 + set_apod(_apod); //set the apodization function type
  84 +
  85 + }
  86 +
  87 + ///Numerical Aperature functions
  88 + void NA(P _na)
  89 + {
  90 + na[0] = (P)0;
  91 + na[1] = _na;
  92 + }
  93 + void NA(P _na0, P _na1)
  94 + {
  95 + na[0] = _na0;
  96 + na[1] = _na1;
  97 + }
  98 +
  99 +
  100 + //Monte-Carlo decomposition into plane waves
  101 + std::vector< planewave<P> > mc(unsigned int N, unsigned int seed = 0)
  102 + {
  103 + /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling
  104 + of a sphere and projecting these samples onto an inscribed sphere.
  105 +
  106 + samples = rtsPointer to sample vectors specified as normalized cartesian coordinates
  107 + N = number of samples
  108 + kSph = incident light direction in spherical coordinates
  109 + NAin = internal obscuration NA
  110 + NAout = outer cassegrain NA
  111 + */
  112 +
  113 + srand(seed); //seed the random number generator
  114 +
  115 + ///compute the rotation operator to transform (0, 0, 1) to k
  116 + P cos_angle = k.dot(rts::vec<P>(0, 0, 1));
  117 + rts::matrix<P, 3> rotation;
  118 + if(cos_angle != 1.0)
  119 + {
  120 + rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k).norm(); //compute the axis of rotation
  121 + P angle = acos(cos_angle); //compute the angle of rotation
  122 + rts::quaternion<P> quat; //create a quaternion describing the rotation
  123 + quat.CreateRotation(angle, r_axis);
  124 + rotation = quat.toMatrix3(); //compute the rotation matrix
  125 + }
  126 +
  127 + //find the phi values associated with the cassegrain ring
  128 + P PHI[2];
  129 + PHI[0] = (P)asin(na[0]);
  130 + PHI[1] = (P)asin(na[1]);
  131 +
  132 + //calculate the z-axis cylinder coordinates associated with these angles
  133 + P Z[2];
  134 + Z[0] = cos(PHI[0]);
  135 + Z[1] = cos(PHI[1]);
  136 + P range = Z[0] - Z[1];
  137 +
  138 + std::vector< planewave<P> > samples; //create a vector of plane waves
  139 +
  140 + planewave<P> beam_center(k, E0, omega); //create a plane wave representing the beam center
  141 +
  142 + //draw a distribution of random phi, z values
  143 + P z, phi, theta;
  144 + for(int i=0; i<N; i++) //for each sample
  145 + {
  146 + z = ((P)rand() / (P)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
  147 + theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159;
  148 + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
  149 +
  150 + //compute and store cartesian coordinates
  151 + rts::vec<P> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
  152 + rts::vec<P> cart = spherical.sph2cart();
  153 + vec<P> k_prime = rotation * cart; //create a sample vector
  154 +
  155 + //store a wave refracted along the given direction
  156 + samples.push_back(beam_center.refract(k_prime) * apod(phi/PHI[1]));
  157 + }
  158 +
  159 + return samples;
  160 + }
  161 +
  162 +};
  163 +
  164 +}
  165 +
  166 +#endif
0 167 \ No newline at end of file
... ...
optics/efield.cuh
1   -#include "rts/math/complex.h"
2   -#include "rts/visualization/colormap.h"
  1 +#include "../math/complex.h"
  2 +#include "../visualization/colormap.h"
  3 +#include "../visualization/scalarfield.cuh"
  4 +#include "../visualization/vectorfield.cuh"
  5 +#include "../optics/planewave.h"
  6 +#include "../cuda/devices.h"
  7 +
  8 +
3 9  
4 10 namespace rts{
5 11  
  12 +template<typename T>
  13 +__global__ void gpu_planewave2efield(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1,
  14 + planewave<T> w, quad<T> q)
  15 +{
  16 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  17 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  18 +
  19 + //make sure that the thread indices are in-bounds
  20 + if(iu >= r0 || iv >= r1) return;
  21 +
  22 + //compute the index into the field
  23 + int i = iv*r0 + iu;
  24 +
  25 + //get the current position
  26 + vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 );
  27 + vec<T> r(p[0], p[1], p[2]);
  28 +
  29 + complex<T> x( 0.0f, w.omega * (w.k_hat.dot(r)) );
  30 +
  31 + if(Y == NULL) //if this is a scalar simulation
  32 + X[i] += w.E0.len() * exp(x); //use the vector magnitude as the plane wave amplitude
  33 + else
  34 + {
  35 + X[i] += w.E0[0] * exp(x);
  36 + Y[i] += w.E0[1] * exp(x);
  37 + Z[i] += w.E0[2] * exp(x);
  38 + }
  39 +}
  40 +
  41 +template<typename T>
  42 +__global__ void gpu_efield_magnitude(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, T* M)
  43 +{
  44 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  45 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  46 +
  47 + //make sure that the thread indices are in-bounds
  48 + if(iu >= r0 || iv >= r1) return;
  49 +
  50 + //compute the index into the field
  51 + int i = iv*r0 + iu;
  52 +
  53 + if(Y == NULL) //if this is a scalar simulation
  54 + M[i] = X[i].abs(); //compute the magnitude of the X component
  55 + else
  56 + {
  57 + M[i] = rts::vec<T>(X[i].abs(), Y[i].abs(), Z[i].abs()).len();
  58 + //M[i] = Z[i].abs();
  59 + }
  60 +}
  61 +
  62 +template<typename T>
  63 +__global__ void gpu_efield_polarization(complex<T>* X, complex<T>* Y, complex<T>* Z,
  64 + unsigned int r0, unsigned int r1,
  65 + T* Px, T* Py, T* Pz)
  66 +{
  67 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  68 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  69 +
  70 + //make sure that the thread indices are in-bounds
  71 + if(iu >= r0 || iv >= r1) return;
  72 +
  73 + //compute the index into the field
  74 + int i = iv*r0 + iu;
  75 +
  76 + //compute the field polarization
  77 + Px[i] = X[i].abs();
  78 + Py[i] = Y[i].abs();
  79 + Pz[i] = Z[i].abs();
  80 +
  81 +}
  82 +
6 83 /* This class implements a discrete representation of an electromagnetic field
7 84 in 2D. The majority of this representation is done on the GPU.
8 85 */
... ... @@ -21,23 +98,53 @@ private:
21 98 //resolution of the discrete field
22 99 unsigned int R[2];
23 100  
  101 + //shape of the 2D field
  102 + quad<P> pos;
  103 +
  104 + void from_planewave(planewave<P> p)
  105 + {
  106 + unsigned int SQRT_BLOCK = 16;
  107 + //create one thread for each detector pixel
  108 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  109 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  110 +
  111 + gpu_planewave2efield<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos);
  112 + }
  113 +
  114 + void clear()
  115 + {
  116 + cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
  117 +
  118 + if(!scalar)
  119 + {
  120 + cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
  121 + cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
  122 + }
  123 + }
  124 +
24 125  
25 126 public:
26 127  
27 128 efield(unsigned int res0, unsigned int res1, bool _scalar = false)
28 129 {
29   - //initialize all pointers to NULL
30   - X = Y = Z = NULL;
  130 + scalar = _scalar; //initialize field type
  131 +
  132 + X = Y = Z = NULL; //initialize all pointers to NULL
31 133 R[0] = res0;
32 134 R[1] = res1;
33 135  
34 136 //allocate memory on the gpu
35 137 cudaMalloc(&X, sizeof(rts::complex<P>) * R[0] * R[1]);
  138 + cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
36 139  
37 140 if(!scalar)
38 141 {
  142 + std::cout<<"scalar:";
39 143 cudaMalloc(&Y, sizeof(rts::complex<P>) * R[0] * R[1]);
  144 + cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
  145 +
40 146 cudaMalloc(&Z, sizeof(rts::complex<P>) * R[0] * R[1]);
  147 + cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
41 148 }
42 149 }
43 150  
... ... @@ -49,14 +156,96 @@ public:
49 156 if(Z != NULL) cudaFree(Z);
50 157 }
51 158  
52   - void render(std::string)
  159 + void position(quad<P> _p)
  160 + {
  161 + pos = _p;
  162 + }
  163 +
  164 + std::string str()
  165 + {
  166 + stringstream ss;
  167 + ss<<pos<<std::endl;
  168 + ss<<"X: "<<X<<std::endl;
  169 + ss<<"Y: "<<Y<<std::endl;
  170 + ss<<"Z: "<<Z;
  171 +
  172 + return ss.str();
  173 + }
  174 +
  175 + //assignment operator: build an electric field from a plane wave
  176 + efield<P> & operator= (const planewave<P> & rhs)
  177 + {
  178 +
  179 + clear(); //clear any previous field data
  180 + from_planewave(rhs); //create a field from the planewave
  181 + return *this;
  182 + }
  183 +
  184 + //assignment operator: add the electric field from a plane wave
  185 + efield<P> & operator+= (const planewave<P> & rhs)
  186 + {
  187 + //create a field from the planewave
  188 + from_planewave(rhs);
  189 +
  190 + return *this;
  191 + }
  192 +
  193 + //assignment operator: build an electric field from a list of plane waves
  194 + efield<P> & operator= (const std::vector< planewave<P> > & rhs)
  195 + {
  196 + clear(); //clear any previous field data
  197 + for(unsigned int i = 0; i < rhs.size(); i++)
  198 + from_planewave(rhs[i]);
  199 + return *this;
  200 + }
  201 +
  202 + //return a scalar field representing field magnitude
  203 + scalarfield<P> mag()
53 204 {
  205 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  206 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
54 207  
  208 + //create a scalar field to store the result
  209 + scalarfield<P> M(R[0], R[1]);
  210 +
  211 + //create one thread for each detector pixel
  212 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  213 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  214 +
  215 + //compute the magnitude and store it in a scalar field
  216 + gpu_efield_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.S);
  217 +
  218 + return M;
55 219 }
56 220  
  221 + //return a vector field representing field polarization
  222 + vectorfield<P> polarization()
  223 + {
  224 + if(scalar)
  225 + {
  226 + std::cout<<"ERROR: Cannot compute polarization of a scalar field."<<std::endl;
  227 + exit(1);
  228 + }
  229 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  230 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  231 +
  232 + //create one thread for each detector pixel
  233 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  234 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  235 +
  236 +
  237 + vectorfield<P> Pol(R[0], R[1]); //create a vector field to store the result
  238 +
  239 + //compute the polarization and store it in the vector field
  240 + gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.S[0], Pol.S[1], Pol.S[2]);
  241 +
  242 + return Pol; //return the vector field
  243 + }
  244 +
  245 +
57 246  
58 247 };
59 248  
60 249  
61 250  
62 251 -} //end namespace rts
  252 +} //end namespace rts
63 253 \ No newline at end of file
... ...
optics/planewave.h 0 → 100644
  1 +#ifndef RTS_PLANEWAVE
  2 +#define RTS_PLANEWAVE
  3 +
  4 +#include <string>
  5 +#include <sstream>
  6 +
  7 +#include "rts/math/vector.h"
  8 +#include "rts/math/quaternion.h"
  9 +
  10 +namespace rts{
  11 +
  12 +template<typename P>
  13 +class planewave{
  14 +
  15 +public:
  16 + rts::vec<P> k_hat; //normalized planewave direction
  17 + P omega; //frequency
  18 +
  19 + rts::vec<P> E0; //amplitude
  20 +
  21 +
  22 +
  23 + planewave()
  24 + {
  25 + omega = (P)2 * (P)3.14159;
  26 + k_hat = rts::vec<P>(0, 0, 1);
  27 + E0 = rts::vec<P>(1, 0, 0);
  28 + }
  29 + ///constructor: create a plane wave propagating along z, polarized along x
  30 + planewave(P _omega)
  31 + {
  32 + omega = _omega;
  33 + k_hat = rts::vec<P>(0, 0, 1);
  34 + E0 = rts::vec<P>(1, 0, 0);
  35 + }
  36 + ///constructor: create a plane wave propagating along _k, polarized along _E0, at frequency _omega
  37 + planewave(vec<P> _k, vec<P> _E0, P _omega)
  38 + {
  39 + k_hat = _k.norm();
  40 + vec<P> s = k_hat.cross(_E0); //re-orthogonalize
  41 + E0 = s.cross(k_hat);
  42 + omega = _omega;
  43 + }
  44 +
  45 + ///multiplication operator: scale E0
  46 + //assignment operator: build an electric field from a plane wave
  47 + planewave<P> & operator* (const P & rhs)
  48 + {
  49 +
  50 + E0 = E0 * rhs;
  51 + return *this;
  52 + }
  53 +
  54 +
  55 + planewave<P> refract(rts::vec<P> new_k)
  56 + {
  57 + new_k = new_k.norm(); //normalize new_k
  58 +
  59 + //compute the side vector (around which we will be rotating)
  60 + rts::vec<P> s = k_hat.cross(E0.norm());
  61 +
  62 + //compute the projection of k' onto the k-E plane
  63 + rts::vec<P> s_prime = s * (new_k.dot(s));
  64 +
  65 + //compute the angle between
  66 + P theta = acos(k_hat.dot( (new_k - s_prime).norm() ));
  67 +
  68 + //rotate E0 around s by theta
  69 + quaternion<P> q;
  70 + q.CreateRotation(theta, s);
  71 + rts::vec<P> E0_prime = q.toMatrix3() * E0;
  72 +
  73 + //create the refracted plane wave
  74 + planewave<P> new_p(omega);
  75 + new_p.E0 = E0_prime;
  76 + new_p.k_hat = new_k;
  77 +
  78 + return new_p;
  79 + }
  80 +
  81 + std::string str()
  82 + {
  83 + std::stringstream ss;
  84 + ss<<E0<<" e^i ( "<<omega<<"t - "<<omega<<" "<<k_hat * omega<<" . r )";
  85 + return ss.str();
  86 + }
  87 +};
  88 +}
  89 +
  90 +#endif
0 91 \ No newline at end of file
... ...
visualization/colormap.h
... ... @@ -32,7 +32,7 @@ static cudaArray* gpuBrewer;
32 32  
33 33 namespace rts{
34 34  
35   -enum colormapType {cmBrewer, cmGrayscale};
  35 +enum colormapType {cmBrewer, cmGrayscale, cmRainbow};
36 36  
37 37 static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size)
38 38 {
... ... @@ -119,10 +119,10 @@ __global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsi
119 119 float a = (gpuSource[i] - minVal) / (maxVal - minVal);
120 120  
121 121 //threshold
122   - if(a > 1.0)
123   - a = 1.0;
124   - if(a < 0.0)
125   - a = 0.0;
  122 + if(a > 1)
  123 + a = 1;
  124 + if(a < 0)
  125 + a = 0;
126 126  
127 127 gpuDest[i * 3 + 0] = 255 * a;
128 128 gpuDest[i * 3 + 1] = 255 * a;
... ... @@ -222,9 +222,9 @@ static void cpuApplyBrewer(T* cpuSource, unsigned char* cpuDest, unsigned int N,
222 222 }
223 223 else
224 224 {
225   - r = BREWERCP[ptLow * 4 + 0] * (1.0-m) + BREWERCP[ (ptLow+1) * 4 + 0] * m;
226   - g = BREWERCP[ptLow * 4 + 1] * (1.0-m) + BREWERCP[ (ptLow+1) * 4 + 1] * m;
227   - b = BREWERCP[ptLow * 4 + 2] * (1.0-m) + BREWERCP[ (ptLow+1) * 4 + 2] * m;
  225 + r = BREWERCP[ptLow * 4 + 0] * (1-m) + BREWERCP[ (ptLow+1) * 4 + 0] * m;
  226 + g = BREWERCP[ptLow * 4 + 1] * (1-m) + BREWERCP[ (ptLow+1) * 4 + 1] * m;
  227 + b = BREWERCP[ptLow * 4 + 2] * (1-m) + BREWERCP[ (ptLow+1) * 4 + 2] * m;
228 228 }
229 229  
230 230  
... ... @@ -251,8 +251,8 @@ static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T
251 251 //normalize to the range [valMin valMax]
252 252 a = (cpuSource[i] - valMin) / range;
253 253  
254   - if(a < 0) a = 0.0;
255   - if(a > 1) a = 1.0;
  254 + if(a < 0) a = 0;
  255 + if(a > 1) a = 1;
256 256  
257 257 cpuDest[i * 3 + 0] = 255 * a;
258 258 cpuDest[i * 3 + 1] = 255 * a;
... ... @@ -275,7 +275,7 @@ static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, co
275 275 }
276 276  
277 277 if(positive)
278   - cpu2cpu(cpuSource, cpuDest, nVals, (T)0.0, maxVal, cm);
  278 + cpu2cpu(cpuSource, cpuDest, nVals, (T)0, maxVal, cm);
279 279 else
280 280 cpu2cpu(cpuSource, cpuDest, nVals, -maxVal, maxVal, cm);
281 281  
... ...
visualization/scalarfield.cuh
1   -#ifndef RTS_SCALAR_SLICE
2   -#define RTS_SCALAR_SLICE
3   -
4   -#include "rts/visualization/colormap.h"
5   -#include "rts/envi/envi.h"
  1 +#ifndef RTS_SCALAR_SLICE
  2 +#define RTS_SCALAR_SLICE
  3 +
  4 +#include "../visualization/colormap.h"
  5 +#include "../envi/envi.h"
  6 +#include "../math/quad.h"
  7 +#include "../cuda/devices.h"
6 8 #include "cublas_v2.h"
7   -#include <cuda_runtime.h>
  9 +#include <cuda_runtime.h>
  10 +
  11 +///Compute a Gaussian function in 3D (mostly for testing)
  12 +template<typename T>
  13 +__global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, T std, rts::quad<T> shape)
  14 +{
  15 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  16 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  17 +
  18 + //make sure that the thread indices are in-bounds
  19 + if(iu >= r0 || iv >= r1) return;
  20 +
  21 + //compute the index into the field
  22 + int i = iv*r0 + iu;
  23 +
  24 + T u = (T)iu / (T)r0;
  25 + T v = (T)iv / (T)r1;
  26 +
  27 + rts::vec<T> p = shape(u, v);
  28 +
  29 + T fx = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[0] - mean, 2) / (2 * std*std) );
  30 + T fy = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[1] - mean, 2) / (2 * std*std) );
  31 + T fz = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[2] - mean, 2) / (2 * std*std) );
  32 +
  33 + dest[i] = fx * fy * fz;
  34 +}
  35 +
  36 +namespace rts {
  37 +template<typename P>
  38 +struct scalarfield
  39 +{
  40 + //gpu pointer to the scalar slice
  41 + P* S;
8 42  
9   -template<typename P>
10   -struct scalarfield
11   -{
12   - //gpu pointer to the scalar slice
13   - P* S;
14   -
15   - //resolution of the slice
16   - int R[2];
  43 + //resolution of the slice
  44 + int R[2];
  45 +
  46 + quad<P> shape;
17 47  
18 48 scalarfield()
19 49 {
20 50 R[0] = R[1] = 0;
  51 + shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));
21 52 S = NULL;
22 53  
23   - //std::cout<<"Scalerslice created (default)."<<std::endl;
  54 + std::cout<<"scalarfield CONSTRUCTOR"<<std::endl;
24 55 }
25   -
  56 +
26 57 scalarfield(int x, int y)
27   - {
28   - //set the resolution
29   - R[0] = x;
30   - R[1] = y;
31   -
32   - //allocate memory on the GPU
  58 + {
  59 + //set the resolution
  60 + R[0] = x;
  61 + R[1] = y;
  62 +
  63 + shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));
  64 +
  65 + //allocate memory on the GPU
33 66 HANDLE_ERROR(cudaMalloc( (void**)&S, sizeof(P) * x * y ));
34   - //std::cout<<"Scalerslice created."<<std::endl;
  67 +
  68 + std::cout<<"scalarfield CONSTRUCTOR"<<std::endl;
35 69 }
36   -
  70 +
37 71 ~scalarfield()
38   - {
39   - if(S != NULL)
40   - HANDLE_ERROR(cudaFree(S));
  72 + {
  73 + if(S != NULL)
  74 + HANDLE_ERROR(cudaFree(S));
41 75 S = NULL;
42 76 R[0] = R[1] = 0;
43 77  
44   - //std::cout<<"Scalerslice destroyed."<<std::endl;
  78 + std::cout<<"scalarfield DESTRUCTOR"<<std::endl;
45 79 }
46   -
  80 +
47 81 void clear()
48   - {
49   - //this function sets the slice to zero
50   - if(S != NULL)
51   - //HANDLE_ERROR(cudaMalloc( (void**)&S, sizeof(P) * R[0] * R[1] ));
52   - HANDLE_ERROR(cudaMemset(S, 0, sizeof(P) * R[0] * R[1]));
53   - }
54   -
  82 + {
  83 + //this function sets the slice to zero
  84 + if(S != NULL)
  85 + HANDLE_ERROR(cudaMemset(S, 0, sizeof(P) * R[0] * R[1]));
  86 + }
  87 +
55 88 void toImage(std::string filename, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer)
56   - {
57   - rts::gpu2image<ptype>(S, filename, R[0], R[1], vmin, vmax, cmap);
  89 + {
  90 + rts::gpu2image<P>(S, filename, R[0], R[1], vmin, vmax, cmap);
58 91 }
59 92  
60 93 void toImage(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer)
... ... @@ -73,11 +106,11 @@ struct scalarfield
73 106 //find the index of the value with maximum magnitude
74 107 int N = R[0] * R[1];
75 108 int result;
76   - #ifdef PRECISION_SINGLE
77   - stat = cublasIsamax(handle, N, S, 1, &result);
78   - #elif defined PRECISION_DOUBLE
79   - stat = cublasIdamax(handle, N, S, 1, &result);
80   - #endif
  109 +
  110 + if(sizeof(P) == 4)
  111 + stat = cublasIsamax(handle, N, (float*)S, 1, &result);
  112 + else
  113 + stat = cublasIdamax(handle, N, (double*)S, 1, &result);
81 114  
82 115 //adjust for 1-based indexing
83 116 result -= 1;
... ... @@ -92,7 +125,7 @@ struct scalarfield
92 125  
93 126 //retrieve the maximum value
94 127 P maxVal;
95   - HANDLE_ERROR(cudaMemcpy(&maxVal, S + result, sizeof(ptype), cudaMemcpyDeviceToHost));
  128 + HANDLE_ERROR(cudaMemcpy(&maxVal, S + result, sizeof(P), cudaMemcpyDeviceToHost));
96 129  
97 130 //destroy the CUBLAS handle
98 131 cublasDestroy(handle);
... ... @@ -105,7 +138,7 @@ struct scalarfield
105 138 }
106 139  
107 140  
108   - void toEnvi(std::string filename, ptype wavelength = 0, bool append = false)
  141 + void toEnvi(std::string filename, P wavelength = 0, bool append = false)
109 142 {
110 143 std::string mode;
111 144 if(append) mode = "a";
... ... @@ -115,23 +148,21 @@ struct scalarfield
115 148 EnviFile outfile(filename, mode);
116 149  
117 150 //get the scalar slice from the GPU to the CPU
118   - int memsize = sizeof(ptype) * R[0] * R[1];
119   - ptype* cpuData = (ptype*) malloc( memsize );
  151 + int memsize = sizeof(P) * R[0] * R[1];
  152 + P* cpuData = (P*) malloc( memsize );
120 153 HANDLE_ERROR(cudaMemcpy( cpuData, S, memsize, cudaMemcpyDeviceToHost));
121 154  
122 155 //add a band to the ENVI file
123 156 outfile.addBand(cpuData, R[0], R[1], wavelength);
124 157  
125 158 outfile.close();
126   -
127   -
128 159 }
129 160  
130 161 //assignment operator
131 162 scalarfield & operator= (const scalarfield & rhs)
132 163 {
133 164 //de-allocate any existing GPU memory
134   - if(S != NULL)
  165 + if(S != NULL)
135 166 HANDLE_ERROR(cudaFree(S));
136 167  
137 168 //copy the slice resolution
... ... @@ -139,20 +170,54 @@ struct scalarfield
139 170 R[1] = rhs.R[1];
140 171  
141 172 //allocate the necessary memory
142   - HANDLE_ERROR(cudaMalloc(&S, sizeof(ptype) * R[0] * R[1]));
  173 + HANDLE_ERROR(cudaMalloc(&S, sizeof(P) * R[0] * R[1]));
143 174  
144 175 //copy the slice
145   - HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(ptype) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  176 + HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
146 177  
147 178  
148 179 std::cout<<"Assignment operator."<<std::endl;
149 180  
150 181 return *this;
  182 + }
  183 +
  184 + ///copy constructor
  185 + scalarfield(const scalarfield &rhs)
  186 + {
  187 + //first make a shallow copy
  188 + R[0] = rhs.R[0];
  189 + R[1] = rhs.R[1];
  190 +
  191 + //do we have to make a deep copy?
  192 + if(rhs.S == NULL)
  193 + S = NULL; //no
  194 + else
  195 + {
  196 + //allocate the necessary memory
  197 + HANDLE_ERROR(cudaMalloc(&S, sizeof(P) * R[0] * R[1]));
  198 +
  199 + //copy the slice
  200 + HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  201 + }
  202 +
  203 + std::cout<<"scalarfield COPY CONSTRUCTOR"<<std::endl;
  204 + }
  205 +
  206 + void gaussian(P mean, P std)
  207 + {
  208 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  209 + int SQRT_BLOCK = (int)sqrt((float)maxThreads);
  210 + //create one thread for each detector pixel
  211 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  212 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  213 +
  214 + gpu_gaussian<float> <<<dimGrid, dimBlock>>> (S, R[0], R[1], mean, std, shape);
  215 + }
  216 +
  217 +};
  218 +
  219 +} //end namespace rts
  220 +
  221 +
151 222  
152   - }
153   -
154   -};
155   -
156   -
157   -
158 223 #endif
... ...
visualization/vectorfield.cuh 0 → 100644
  1 +#ifndef RTS_VECTORFIELD
  2 +#define RTS_VECTORFIELD
  3 +
  4 +#include "../visualization/colormap.h"
  5 +#include "../envi/envi.h"
  6 +#include "../math/quad.h"
  7 +#include "../cuda/devices.h"
  8 +#include "cublas_v2.h"
  9 +#include <cuda_runtime.h>
  10 +#include <iomanip>
  11 +
  12 +#include <qimage.h>
  13 +#include <qcolor.h>
  14 +
  15 +
  16 +namespace rts {
  17 +template<typename P, unsigned int N = 3>
  18 +struct vectorfield
  19 +{
  20 +private:
  21 + void process_filename(std::string name, std::string &prefix, std::string &postfix,
  22 + std::string &ext, unsigned int &digits)
  23 + {
  24 + std::stringstream ss(name);
  25 + std::string item;
  26 + std::vector<std::string> elems;
  27 + while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension)
  28 + {
  29 + elems.push_back(item);
  30 + }
  31 +
  32 + prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters)
  33 + ext = elems[1]; //file extension (ex. .bmp, .png)
  34 + ext = std::string(".") + ext; //add a period back into the extension
  35 +
  36 + size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'')
  37 + size_t i1 = prefix.find_last_of("?");
  38 +
  39 + postfix = prefix.substr(i1+1);
  40 + prefix = prefix.substr(0, i0);
  41 +
  42 + digits = i1 - i0 + 1; //compute the number of wildcards
  43 +
  44 + }
  45 +
  46 +public:
  47 + //gpu pointers to scalar slices
  48 + P* S[N];
  49 +
  50 + //resolution of the slice
  51 + int R[2];
  52 +
  53 + quad<P> shape;
  54 +
  55 + vectorfield()
  56 + {
  57 + R[0] = R[1] = 0; //set the initial resolution to 0
  58 + shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));
  59 + for(int n=0; n<N; n++) //set each vector component to NULL
  60 + S[n]=NULL;
  61 +
  62 + std::cout<<"vectorfield CONSTRUCTOR"<<std::endl;
  63 + }
  64 +
  65 + vectorfield(int x, int y)
  66 + {
  67 + //set the resolution
  68 + R[0] = x;
  69 + R[1] = y;
  70 +
  71 + shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));
  72 +
  73 + //allocate memory on the GPU
  74 + for(int n=0; n<N; n++)
  75 + HANDLE_ERROR(cudaMalloc( (void**)&S[n], sizeof(P) * x * y ));
  76 +
  77 + std::cout<<"vectorfield CONSTRUCTOR"<<std::endl;
  78 + }
  79 +
  80 + ~vectorfield()
  81 + {
  82 + for(int n=0; n<N; n++)
  83 + if(S[n] != NULL)
  84 + {
  85 + HANDLE_ERROR(cudaFree(S[n]));
  86 + S[n] = NULL;
  87 + }
  88 + R[0] = R[1] = 0;
  89 +
  90 + std::cout<<"vectorfield DESTRUCTOR"<<std::endl;
  91 + }
  92 +
  93 + void clear()
  94 + {
  95 + //this function sets the slice to zero
  96 + for(int n=0; n<N; n++)
  97 + if(S[n] != NULL)
  98 + HANDLE_ERROR(cudaMemset(S[n], 0, sizeof(P) * R[0] * R[1]));
  99 + }
  100 +
  101 + void toImage(std::string filename, unsigned int n, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer)
  102 + {
  103 + rts::gpu2image<P>(S[n], filename, R[0], R[1], vmin, vmax, cmap);
  104 + }
  105 +
  106 + void toImage3(std::string filename, P vmin, P vmax)
  107 + {
  108 + std::cout<<"Implementing a 3-component rainbow colormap: "<<filename.c_str()<<std::endl;
  109 + //create a buffer for each RGB component
  110 + unsigned char* red = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]);
  111 + unsigned char* green = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]);
  112 + unsigned char* blue = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]);
  113 +
  114 + //retrieve the buffered images for each component
  115 + rts::gpu2cpu<P>(S[0], red, R[0] * R[1], vmin, vmax);
  116 + rts::gpu2cpu<P>(S[1], green, R[0] * R[1], vmin, vmax);
  117 + rts::gpu2cpu<P>(S[2], blue, R[0] * R[1], vmin, vmax);
  118 +
  119 + QImage image(R[0], R[1], QImage::Format_RGB32); //create a QImage object
  120 + if(image.isNull()) //if it didn't work, throw an error
  121 + {
  122 + std::cout<<"Error creating QImage."<<std::endl;
  123 + return;
  124 + }
  125 +
  126 + int i;
  127 + unsigned char r, g, b;
  128 + unsigned int x, y;
  129 + for(y=0; y<R[1]; y++)
  130 + for(x=0; x<R[0]; x++)
  131 + {
  132 + //calculate the 1D index
  133 + i = y * R[0] + x;
  134 +
  135 + r = red[i * 3 + 0];
  136 + g = green[i * 3 + 1];
  137 + b = blue[i * 3 + 2];
  138 +
  139 + //set the image pixel
  140 + QColor color(r, g, b);
  141 + image.setPixel(x, y, color.rgb());
  142 + }
  143 +
  144 + if(!image.save(filename.c_str())) //if the image didn't save correctly,
  145 + std::cout<<"Error saving QImage."<<std::endl; // throw an error
  146 + }
  147 +
  148 + void toImages(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer, bool globalmax = true)
  149 + {
  150 + std::string prefix, postfix, extension;
  151 + unsigned int digits;
  152 + process_filename(filename, prefix, postfix, extension, digits); //process the filename for wild cards
  153 +
  154 + cublasStatus_t stat;
  155 + cublasHandle_t handle;
  156 +
  157 + //create a CUBLAS handle
  158 + stat = cublasCreate(&handle);
  159 + if(stat != CUBLAS_STATUS_SUCCESS)
  160 + {
  161 + std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
  162 + exit(1);
  163 + }
  164 +
  165 + int L = R[0] * R[1]; //compute the number of discrete points in a slice
  166 + int result; //result of the max operation
  167 +
  168 + P maxVal[N]; //array stores minimum and maximum values
  169 + P maxAll = 0; //largest value in the data set
  170 +
  171 + //compute the maximum value for each vector component
  172 + for(int n=0; n<N; n++)
  173 + {
  174 + if(sizeof(P) == 4)
  175 + stat = cublasIsamax(handle, L, (const float*)S[n], 1, &result);
  176 + else
  177 + stat = cublasIdamax(handle, L, (const double*)S[n], 1, &result);
  178 +
  179 + result -= 1; //adjust for 1-based indexing
  180 +
  181 + if(stat != CUBLAS_STATUS_SUCCESS) //if there was a GPU error, terminate
  182 + {
  183 + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
  184 + exit(1);
  185 + }
  186 +
  187 + //retrieve the maximum value for this slice and store it in the maxVal array
  188 + HANDLE_ERROR(cudaMemcpy(&maxVal[n], S[n] + result, sizeof(P), cudaMemcpyDeviceToHost));
  189 + if(maxVal[n] > maxAll) //if maxVal is larger, update the maxAll variable
  190 + maxAll = maxVal[n];
  191 +
  192 + }
  193 +
  194 + cublasDestroy(handle); //destroy the CUBLAS handle
  195 +
  196 + if(cmap == rts::cmRainbow && N == 3) //if the user specifies a rainbow colormap, and the vector has 3 elements
  197 + {
  198 + //implement a single image with RGB = XYZ
  199 + if(positive)
  200 + toImage3(prefix+postfix+extension, 0, maxAll);
  201 + else
  202 + toImage3(prefix+postfix+extension, 0, maxAll);
  203 + }
  204 + else
  205 + {
  206 + for(int n=0; n<N; n++) //for each image
  207 + {
  208 + stringstream ss; //assemble the file name
  209 + ss<<prefix<<std::setfill('0')<<std::setw(digits)<<n<<postfix<<extension;
  210 + std::cout<<ss.str()<<std::endl;
  211 + if(positive) //if the image is positive
  212 + {
  213 + std::cout<<"Positive: "<<n<<std::endl;
  214 + if(globalmax) //if the global maximum is used
  215 + toImage(ss.str(), n, 0, maxAll, cmap); //save the image using the global maximum
  216 + else
  217 + toImage(ss.str(), n, 0, maxVal[n], cmap); //save the image using the local maximum
  218 + }
  219 + else
  220 + {
  221 + std::cout<<"Negative: "<<n<<std::endl;
  222 + if(globalmax) //if the global maximum is used
  223 + toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap); //save the image using the global maximum
  224 + else
  225 + toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap); //save the image using the local maximum
  226 + }
  227 + }
  228 + }
  229 + }
  230 +
  231 + //assignment operator
  232 + vectorfield & operator= (const vectorfield & rhs)
  233 + {
  234 + //de-allocate any existing GPU memory
  235 + for(int n=0; n<N; n++)
  236 + if(S[n] != NULL)
  237 + HANDLE_ERROR(cudaFree(S[n]));
  238 +
  239 + //copy the slice resolution
  240 + R[0] = rhs.R[0];
  241 + R[1] = rhs.R[1];
  242 +
  243 + for(int n=0; n<N; n++)
  244 + {
  245 + //allocate the necessary memory
  246 + HANDLE_ERROR(cudaMalloc(&S[n], sizeof(P) * R[0] * R[1]));
  247 +
  248 + //copy the slice
  249 + HANDLE_ERROR(cudaMemcpy(S[n], rhs.S[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  250 + }
  251 +
  252 +
  253 + std::cout<<"Assignment operator."<<std::endl;
  254 +
  255 + return *this;
  256 + }
  257 +
  258 + ///copy constructor
  259 + vectorfield(const vectorfield &rhs)
  260 + {
  261 + //first make a shallow copy
  262 + R[0] = rhs.R[0];
  263 + R[1] = rhs.R[1];
  264 +
  265 + //do we have to make a deep copy?
  266 + if(rhs.S[0] == NULL) //no?
  267 + {
  268 + for(int n=0; n<N; n++) //set all components to NULL
  269 + {
  270 + S[n] = NULL;
  271 + }
  272 + }
  273 + else
  274 + {
  275 + for(int n=0; n<N; n++)
  276 + {
  277 + //allocate the necessary memory
  278 + HANDLE_ERROR(cudaMalloc(&S[n], sizeof(P) * R[0] * R[1]));
  279 +
  280 + //copy the slice
  281 + HANDLE_ERROR(cudaMemcpy(S[n], rhs.S[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  282 + }
  283 + }
  284 +
  285 + std::cout<<"vectorfield COPY CONSTRUCTOR"<<std::endl;
  286 + }
  287 +
  288 +};
  289 +
  290 +} //end namespace rts
  291 +
  292 +
  293 +
  294 +#endif
... ...