Commit 8d4f09409f53f0f8545a8d6aa3ba027f916ecdaf

Authored by David Mayerich
1 parent a9275be5

ERROR in plane wave refraction.

math/complex.h
... ... @@ -161,6 +161,17 @@ struct complex
161 161 return *this;
162 162 }
163 163  
  164 + CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs)
  165 + {
  166 + *this = *this - rhs;
  167 + return *this;
  168 + }
  169 + CUDA_CALLABLE complex<T> operator-=(const T &rhs)
  170 + {
  171 + *this = *this - rhs;
  172 + return *this;
  173 + }
  174 +
164 175 CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs)
165 176 {
166 177 *this = *this * rhs;
... ... @@ -220,7 +231,7 @@ struct complex
220 231 {
221 232 complex<T> result;
222 233  
223   - result = (T)log() * y;
  234 + result = log() * y;
224 235  
225 236 return result.exp();
226 237 }
... ... @@ -267,6 +278,27 @@ struct complex
267 278 return false;
268 279 }
269 280  
  281 + //CASTING operators
  282 + template < typename otherT >
  283 + operator complex<otherT>()
  284 + {
  285 + complex<otherT> result((otherT)r, (otherT)i);
  286 + return result;
  287 + }
  288 + template< typename otherT >
  289 + complex( const complex<otherT> &rhs)
  290 + {
  291 + r = (T)rhs.r;
  292 + i = (T)rhs.i;
  293 + }
  294 + template< typename otherT >
  295 + complex& operator=(const complex<otherT> &rhs)
  296 + {
  297 + r = (T)rhs.r;
  298 + i = (T)rhs.i;
  299 + return *this;
  300 + }
  301 +
270 302 };
271 303  
272 304 } //end RTS namespace
... ... @@ -319,6 +351,11 @@ CUDA_CALLABLE static rts::complex&lt;T&gt; pow(rts::complex&lt;T&gt; x, T y)
319 351 {
320 352 return x.pow(y);
321 353 }
  354 +template<typename T>
  355 +CUDA_CALLABLE static rts::complex<T> pow(rts::complex<T> x, int y)
  356 +{
  357 + return x.pow(y);
  358 +}
322 359  
323 360 //log function
324 361 template<typename T>
... ...
math/function.h
... ... @@ -31,33 +31,40 @@ public:
31 31 //insert(0, 0);
32 32 }
33 33  
34   - Y linear(X x)
  34 + Y linear(X x) const
35 35 {
36 36 if(f.size() == 0) return (Y)0; //return zero if the function is empty
37 37 //declare an iterator
38   - typename std::vector< dataPoint >::iterator it;
  38 + typedef typename std::vector< dataPoint >::iterator f_iter;
  39 + f_iter it;
39 40  
40   - dataPoint s;
41   - s.x = x;
  41 + //dataPoint s;
  42 + //s.x = x;
42 43  
43   - it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling);
  44 + //it = search(f.begin(), f.end(), &s, &s + 1, &function<X, Y>::findCeiling);
  45 + unsigned int i;
  46 + for(i = 0; i<f.size(); i++)
  47 + {
  48 + if(f[i].x > x)
  49 + break;
  50 + }
44 51  
45 52 //if the wavelength is past the end of the list, return the back
46   - if(it == f.end())
  53 + if(i == f.size())
47 54 return f.back().y;
48 55 //if the wavelength is before the beginning of the list, return the front
49   - else if(it == f.begin())
  56 + else if(i == 0)
50 57 return f.front().y;
51 58 //otherwise interpolate
52 59 else
53 60 {
54   - X xMax = (*it).x;
55   - X xMin = (*(it - 1)).x;
  61 + X xMax = f[i].x;
  62 + X xMin = f[i - 1].x;
56 63 //std::cout<<lMin<<"----------"<<lMax<<std::endl;
57 64  
58 65 X a = (x - xMin) / (xMax - xMin);
59   - Y riMin = (*(it - 1)).y;
60   - Y riMax = (*it).y;
  66 + Y riMin = f[i - 1].y;
  67 + Y riMax = f[i].y;
61 68 Y interp;
62 69 interp = riMax * a + riMin * (1.0 - a);
63 70 return interp;
... ... @@ -92,35 +99,35 @@ public:
92 99  
93 100 }
94 101  
95   - X getX(unsigned int i)
  102 + X getX(unsigned int i) const
96 103 {
97 104 return f[i].x;
98 105 }
99 106  
100   - Y getY(unsigned int i)
  107 + Y getY(unsigned int i) const
101 108 {
102 109 return f[i].y;
103 110 }
104 111  
105 112 ///get the number of data points in the function
106   - unsigned int getN()
  113 + unsigned int getN() const
107 114 {
108 115 return f.size();
109 116 }
110 117  
111 118 //look up an indexed component
112   - dataPoint operator[](int i)
  119 + dataPoint operator[](int i) const
113 120 {
114 121 return f[i];
115 122 }
116 123  
117 124 ///linear interpolation
118   - Y operator()(X x)
  125 + Y operator()(X x) const
119 126 {
120 127 return linear(x);
121 128 }
122 129  
123   - function<X, Y> operator+(Y r)
  130 + function<X, Y> operator+(Y r) const
124 131 {
125 132 function<X, Y> result;
126 133  
... ... @@ -134,13 +141,13 @@ public:
134 141 return result;
135 142 }
136 143  
137   - function<X, Y> & operator= (const Y & rhs)
138   - {
139   - f.clear();
140   - if(rhs != 0) //if the RHS is zero, just clear, otherwise add one value of RHS
141   - insert(0, rhs);
142   -
143   - return *this;
  144 + function<X, Y> & operator= (const Y & rhs)
  145 + {
  146 + f.clear();
  147 + if(rhs != 0) //if the RHS is zero, just clear, otherwise add one value of RHS
  148 + insert(0, rhs);
  149 +
  150 + return *this;
144 151 }
145 152  
146 153  
... ...
math/quad.h
... ... @@ -7,6 +7,7 @@
7 7 #include "../math/triangle.h"
8 8 #include "../math/quaternion.h"
9 9 #include <iostream>
  10 +#include <iomanip>
10 11 #include <algorithm>
11 12  
12 13 namespace rts{
... ... @@ -48,37 +49,6 @@ struct quad
48 49  
49 50 }
50 51  
51   - /****************************************************************
52   - Constructor - create a quad from two points and a normal
53   - ****************************************************************/
54   - /*CUDA_CALLABLE quad(rts::vec<T, N> pMin, rts::vec<T, N> pMax, rts::vec<T, N> normal)
55   - {
56   -
57   - //assign the corner point
58   - A = pMin;
59   -
60   - //compute the vector from pMin to pMax
61   - rts::vec<T, 3> v0;
62   - v0 = pMax - pMin;
63   -
64   - //compute the cross product of A and the plane normal
65   - rts::vec<T, 3> v1;
66   - v1 = v0.cross(normal);
67   -
68   -
69   - //calculate point B
70   - rts::vec<T, 3> B;
71   - B = A + v0 * 0.5f + v1 * 0.5f;
72   -
73   - //calculate rtsPoint C
74   - rts::vec<T, 3> C;
75   - C = A + v0 * 0.5f - v1 * 0.5f;
76   -
77   - //calculate X and Y
78   - X = B - A;
79   - Y = C - A;
80   - }*/
81   -
82 52 /*******************************************************************
83 53 Constructor - create a quad from a position, normal, and rotation
84 54 *******************************************************************/
... ... @@ -114,6 +84,15 @@ struct quad
114 84 std::cout<<X<<std::endl;
115 85 }
116 86  
  87 + //boolean comparison
  88 + bool operator==(const quad<T, N> & rhs)
  89 + {
  90 + if(A == rhs.A && X == rhs.X && Y == rhs.Y)
  91 + return true;
  92 + else
  93 + return false;
  94 + }
  95 +
117 96 /*******************************************
118 97 Return the normal for the quad
119 98 *******************************************/
... ... @@ -140,10 +119,9 @@ struct quad
140 119 {
141 120 std::stringstream ss;
142 121  
143   - ss<<"A = "<<A<<std::endl;
144   - ss<<"B = "<<A + Y<<std::endl;
145   - ss<<"C = "<<A + Y + X<<std::endl;
146   - ss<<"D = "<<A + X<<std::endl;
  122 + ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
  123 + ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl;
  124 + ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X;
147 125  
148 126 return ss.str();
149 127  
... ...
math/quaternion.h
... ... @@ -38,19 +38,24 @@ void quaternion&lt;T&gt;::normalize()
38 38 }
39 39  
40 40 template<typename T>
41   -void quaternion<T>::CreateRotation(T theta, T axis_x, T axis_y, T axis_z)
  41 +void quaternion<T>::CreateRotation(T theta, T ux, T uy, T uz)
42 42 {
43   - //assign the given Euler rotation to this quaternion
44   - w = (T)cos(theta/2);
45   - x = axis_x*(T)sin(theta/2);
46   - y = axis_y*(T)sin(theta/2);
47   - z = axis_z*(T)sin(theta/2);
  43 + vec<T, 3> u(ux, uy, uz);
  44 +
  45 + CreateRotation(theta, u);
  46 +
48 47 }
49 48  
50 49 template<typename T>
51   -void quaternion<T>::CreateRotation(T theta, vec<T, 3> axis)
  50 +void quaternion<T>::CreateRotation(T theta, vec<T, 3> u)
52 51 {
53   - CreateRotation(theta, axis[0], axis[1], axis[2]);
  52 + vec<T, 3> u_hat = u.norm();
  53 +
  54 + //assign the given Euler rotation to this quaternion
  55 + w = (T)cos(theta/2);
  56 + x = u_hat[0]*(T)sin(theta/2);
  57 + y = u_hat[1]*(T)sin(theta/2);
  58 + z = u_hat[2]*(T)sin(theta/2);
54 59 }
55 60  
56 61 template<typename T>
... ...
math/spherical_bessel.h deleted
1   -#ifndef RTS_SBESSEL_H
2   -#define RTS_SBESSEL_H
3   -#include <math.h>
4   -
5   -
6   -namespace rts{
7   -
8   -#define RTS_BESSEL_CONVERGENCE_MIN 0.0145f
9   -#define RTS_BESSEL_CONVERGENCE_MAX 0.4f
10   -#define RTS_BESSEL_MAXIMUM_FLOAT -1e33f
11   -
12   -template <typename T>
13   -CUDA_CALLABLE void sbesselj(int n, complex<T> x, complex<T>* j)
14   -{
15   - //compute the first bessel function
16   - if(n >= 0)
17   - j[0] = (T)sin(x) / x;
18   -
19   - //compute the second bessel function
20   - if(n >= 1)
21   - j[1] = j[0] / x - (T)cos(x) / x;
22   -
23   - //use the recurrence relation to compute the rest
24   - for(int i = 2; i <= n; i++)
25   - {
26   - j[i] = ( (2 * i - 1) / x ) * j[i-1] - j[i-2];
27   - }
28   -
29   - //if x = 0, deal with the degenerate case
30   - /*if( isnan(j[0].r) )
31   - {
32   - j[0] = (T)1.0;
33   - for(int i = 1; i<=n; i++)
34   - j[i] = (T)0.0;
35   - }*/
36   -}
37   -
38   -template <typename T>
39   -CUDA_CALLABLE void sbessely(int n, complex<T> x, complex<T>* y)
40   -{
41   - //compute the first bessel function
42   - if(n >= 0)
43   - y[0] = -(T)cos(x) / x;
44   -
45   - //compute the second bessel function
46   - if(n >= 1)
47   - y[1] = y[0] / x - (T)sin(x) / x;
48   -
49   - //use the recurrence relation to compute the rest
50   - for(int i = 2; i <= n; i++)
51   - {
52   - y[i] = ( (2 * i - 1) / x ) * y[i-1] - y[i-2];
53   - }
54   -
55   -}
56   -
57   -//spherical Hankel functions of the first kind
58   -template <typename T>
59   -CUDA_CALLABLE void sbesselh1(int n, complex<T> x, complex<T>* h)
60   -{
61   - //compute j_0 and j_1
62   - complex<T> j[2];
63   - sbesselj(1, x, j);
64   -
65   - //compute y_0 and y_1
66   - complex<T> y[2];
67   - sbessely(1, x, y);
68   -
69   - //compute the first-order Hhankel function
70   - if(n >= 0)
71   - h[0] = j[0] + y[0].imul();
72   -
73   - //compute the second bessel function
74   - if(n >= 1)
75   - h[1] = j[1] + y[1].imul();
76   -
77   - //use the recurrence relation to compute the rest
78   - for(int i = 2; i <= n; i++)
79   - {
80   - h[i] = ( (2 * i - 1) / x ) * h[i-1] - h[i-2];
81   - }
82   -}
83   -
84   -template <typename T>
85   -CUDA_CALLABLE void init_sbesselj(T x, T* j)
86   -{
87   - //compute the first 2 bessel functions
88   - j[0] = (T)sin(x) / x;
89   -
90   - j[1] = j[0] / x - (T)cos(x) / x;
91   -}
92   -
93   -template <typename T>
94   -CUDA_CALLABLE void init_sbessely(T x, T* y)
95   -{
96   - //compute the first 2 bessel functions
97   - y[0] = -(T)cos(x) / x;
98   -
99   - y[1] = y[0] / x - (T)sin(x) / x;
100   -}
101   -
102   -template <typename T>
103   -CUDA_CALLABLE void shift_sbesselj(int n, T x, T* b)//, T stability = 1.4)
104   -{
105   -
106   - T bnew;
107   -
108   - //compute the next (order n) Bessel function
109   - bnew = ((2 * n - 1) * b[1])/x - b[0];
110   -
111   - //if(n > stability*x)
112   - if(n > real(x))
113   - if(real(bnew) < RTS_BESSEL_CONVERGENCE_MIN || real(bnew) > RTS_BESSEL_CONVERGENCE_MAX)
114   - bnew = 0;
115   -
116   - //shift and add the new value to the array
117   - b[0] = b[1];
118   - b[1] = bnew;
119   -}
120   -
121   -template <typename T>
122   -CUDA_CALLABLE void shift_sbessely(int n, T x, T* b)//, T stability = 1.4)
123   -{
124   -
125   - T bnew;
126   -
127   - //compute the next (order n) Bessel function
128   - bnew = ((2 * n - 1) * b[1])/x - b[0];
129   -
130   - if(bnew < RTS_BESSEL_MAXIMUM_FLOAT ||
131   - (n > x && bnew > 0))
132   - {
133   - bnew = 0;
134   - b[1] = 0;
135   - }
136   -
137   -
138   - //shift and add the new value to the array
139   - b[0] = b[1];
140   - b[1] = bnew;
141   -}
142   -
143   -
144   -
145   -} //end namespace rts
146   -
147   -
148   -
149   -#endif
math/vector.h
... ... @@ -54,7 +54,7 @@ struct vec
54 54 memcpy(v, data, sizeof(T) * N);
55 55 }
56 56  
57   - CUDA_CALLABLE T len()
  57 + CUDA_CALLABLE T len() const
58 58 {
59 59 //compute and return the vector length
60 60 T sum_sq = (T)0;
... ... @@ -66,7 +66,7 @@ struct vec
66 66  
67 67 }
68 68  
69   - CUDA_CALLABLE vec<T, N> cart2sph()
  69 + CUDA_CALLABLE vec<T, N> cart2sph() const
70 70 {
71 71 //convert the vector from cartesian to spherical coordinates
72 72 //x, y, z -> r, theta, phi (where theta = 0 to 2*pi)
... ... @@ -79,7 +79,7 @@ struct vec
79 79 return sph;
80 80 }
81 81  
82   - CUDA_CALLABLE vec<T, N> sph2cart()
  82 + CUDA_CALLABLE vec<T, N> sph2cart() const
83 83 {
84 84 //convert the vector from cartesian to spherical coordinates
85 85 //r, theta, phi -> x, y, z (where theta = 0 to 2*pi)
... ... @@ -92,7 +92,7 @@ struct vec
92 92 return cart;
93 93 }
94 94  
95   - CUDA_CALLABLE vec<T, N> norm()
  95 + CUDA_CALLABLE vec<T, N> norm() const
96 96 {
97 97 //compute and return the vector norm
98 98 vec<T, N> result;
... ... @@ -109,19 +109,19 @@ struct vec
109 109 return result;
110 110 }
111 111  
112   - CUDA_CALLABLE vec<T, 3> cross(vec<T, 3> rhs)
  112 + CUDA_CALLABLE vec<T, 3> cross(const vec<T, 3> rhs) const
113 113 {
114 114 vec<T, 3> result;
115 115  
116 116 //compute the cross product (only valid for 3D vectors)
117   - result[0] = v[1] * rhs[2] - v[2] * rhs[1];
118   - result[1] = v[2] * rhs[0] - v[0] * rhs[2];
119   - result[2] = v[0] * rhs[1] - v[1] * rhs[0];
  117 + result[0] = v[1] * rhs.v[2] - v[2] * rhs.v[1];
  118 + result[1] = v[2] * rhs.v[0] - v[0] * rhs.v[2];
  119 + result[2] = v[0] * rhs.v[1] - v[1] * rhs.v[0];
120 120  
121 121 return result;
122 122 }
123 123  
124   - CUDA_CALLABLE T dot(vec<T, N> rhs)
  124 + CUDA_CALLABLE T dot(vec<T, N> rhs) const
125 125 {
126 126 T result = (T)0;
127 127  
... ... @@ -133,7 +133,7 @@ struct vec
133 133 }
134 134  
135 135 //arithmetic
136   - CUDA_CALLABLE vec<T, N> operator+(vec<T, N> rhs)
  136 + CUDA_CALLABLE vec<T, N> operator+(vec<T, N> rhs) const
137 137 {
138 138 vec<T, N> result;
139 139  
... ... @@ -142,7 +142,7 @@ struct vec
142 142  
143 143 return result;
144 144 }
145   - CUDA_CALLABLE vec<T, N> operator-(vec<T, N> rhs)
  145 + CUDA_CALLABLE vec<T, N> operator-(vec<T, N> rhs) const
146 146 {
147 147 vec<T, N> result;
148 148  
... ... @@ -151,7 +151,7 @@ struct vec
151 151  
152 152 return result;
153 153 }
154   - CUDA_CALLABLE vec<T, N> operator*(T rhs)
  154 + CUDA_CALLABLE vec<T, N> operator*(T rhs) const
155 155 {
156 156 vec<T, N> result;
157 157  
... ... @@ -160,7 +160,7 @@ struct vec
160 160  
161 161 return result;
162 162 }
163   - CUDA_CALLABLE vec<T, N> operator/(T rhs)
  163 + CUDA_CALLABLE vec<T, N> operator/(T rhs) const
164 164 {
165 165 vec<T, N> result;
166 166  
... ... @@ -179,7 +179,7 @@ struct vec
179 179 return *this;
180 180 }*/
181 181  
182   - CUDA_CALLABLE bool operator==(vec<T, N> rhs)
  182 + CUDA_CALLABLE bool operator==(vec<T, N> rhs) const
183 183 {
184 184 if ( (rhs.v[0] == v[0]) && (rhs.v[1] == v[1]) && (rhs.v[2] == v[2]) )
185 185 return true;
... ... @@ -187,7 +187,7 @@ struct vec
187 187 return false;
188 188 }
189 189  
190   - std::string toStr()
  190 + std::string toStr() const
191 191 {
192 192 std::stringstream ss;
193 193  
... ... @@ -203,8 +203,8 @@ struct vec
203 203 return ss.str();
204 204 }
205 205  
206   - //bracket operator
207   - CUDA_CALLABLE T& operator[](int i)
  206 + //bracket operator - allows assignment to the vector
  207 + CUDA_CALLABLE T& operator[](const unsigned int i)
208 208 {
209 209 return v[i];
210 210 }
... ...
optics/beam.h
... ... @@ -3,12 +3,13 @@
3 3  
4 4 #include "../math/vector.h"
5 5 #include "../math/function.h"
  6 +#include "../optics/planewave.h"
6 7 #include <vector>
7 8  
8 9 namespace rts{
9 10  
10 11 template<typename P>
11   -class beam
  12 +class beam : public planewave<P>
12 13 {
13 14 public:
14 15 enum beam_type {Uniform, Bartlett, Hamming, Hanning};
... ... @@ -17,10 +18,6 @@ private:
17 18  
18 19 P na[2]; //numerical aperature of the focusing optics
19 20 vec<P> f; //focal point
20   - vec<P> k; //direction vector
21   - vec<P> E0; //polarization direction
22   - P omega; //frequency
23   -
24 21 function<P, P> apod; //apodization function
25 22 unsigned int apod_res; //resolution of complex apodization filters
26 23  
... ... @@ -71,17 +68,17 @@ private:
71 68 public:
72 69  
73 70 ///constructor: build a default beam (NA=1.0)
74   - beam(beam_type _apod = Uniform)
  71 + beam(
  72 + vec<P> _k = rts::vec<P>(0, 0, TAU),
  73 + vec<P> _E0 = rts::vec<P>(1, 0, 0),
  74 + beam_type _apod = Uniform)
  75 + : planewave<P>(_k, _E0)
75 76 {
76 77 na[0] = (P)0.0;
77 78 na[1] = (P)1.0;
78   - f = vec<P>( (P)0.0, (P)0.0, (P)0.0 );
79   - k = vec<P>( (P)0.0, (P)0.0, (P)1.0 );
80   - E0 = vec<P>( (P)1.0, (P)0.0, (P)0.0 );
81   - omega = (P)2 * (P)3.14159;
  79 + f = vec<P>( (P)0, (P)0, (P)0 );
82 80 apod_res = 256; //set the default resolution for apodization filters
83 81 set_apod(_apod); //set the apodization function type
84   -
85 82 }
86 83  
87 84 ///Numerical Aperature functions
... ... @@ -96,28 +93,32 @@ public:
96 93 na[1] = _na1;
97 94 }
98 95  
  96 + /*string str() :
  97 + {
  98 + stringstream ss;
  99 + ss<<"Beam Center: "<<k<<std::endl;
  100 +
  101 + return ss.str();
  102 + }*/
99 103  
100 104 //Monte-Carlo decomposition into plane waves
101   - std::vector< planewave<P> > mc(unsigned int N, unsigned int seed = 0)
  105 + std::vector< planewave<P> > mc(unsigned int N = 100000, unsigned int seed = 0) const
102 106 {
103 107 /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling
104 108 of a sphere and projecting these samples onto an inscribed sphere.
105 109  
106   - samples = rtsPointer to sample vectors specified as normalized cartesian coordinates
107   - N = number of samples
108   - kSph = incident light direction in spherical coordinates
109   - NAin = internal obscuration NA
110   - NAout = outer cassegrain NA
  110 + seed = seed for the random number generator
111 111 */
112   -
113 112 srand(seed); //seed the random number generator
114 113  
  114 + vec<P> k_hat = beam::k.norm();
  115 +
115 116 ///compute the rotation operator to transform (0, 0, 1) to k
116   - P cos_angle = k.dot(rts::vec<P>(0, 0, 1));
  117 + P cos_angle = k_hat.dot(rts::vec<P>(0, 0, 1));
117 118 rts::matrix<P, 3> rotation;
118 119 if(cos_angle != 1.0)
119 120 {
120   - rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k).norm(); //compute the axis of rotation
  121 + rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation
121 122 P angle = acos(cos_angle); //compute the angle of rotation
122 123 rts::quaternion<P> quat; //create a quaternion describing the rotation
123 124 quat.CreateRotation(angle, r_axis);
... ... @@ -137,8 +138,6 @@ public:
137 138  
138 139 std::vector< planewave<P> > samples; //create a vector of plane waves
139 140  
140   - planewave<P> beam_center(k, E0, omega); //create a plane wave representing the beam center
141   -
142 141 //draw a distribution of random phi, z values
143 142 P z, phi, theta;
144 143 for(int i=0; i<N; i++) //for each sample
... ... @@ -153,7 +152,7 @@ public:
153 152 vec<P> k_prime = rotation * cart; //create a sample vector
154 153  
155 154 //store a wave refracted along the given direction
156   - samples.push_back(beam_center.refract(k_prime) * apod(phi/PHI[1]));
  155 + samples.push_back(beam::refract(k_prime) * apod(phi/PHI[1]));
157 156 }
158 157  
159 158 return samples;
... ...
optics/efield.cuh
  1 +#ifndef RTS_EFIELD
  2 +#define RTS_EFIELD
  3 +
1 4 #include "../math/complex.h"
  5 +#include "../math/realfield.cuh"
2 6 #include "../visualization/colormap.h"
3   -#include "../visualization/scalarfield.cuh"
4   -#include "../visualization/vectorfield.cuh"
5 7 #include "../optics/planewave.h"
6 8 #include "../cuda/devices.h"
  9 +#include "../optics/beam.h"
7 10  
8 11  
9 12  
... ... @@ -26,7 +29,7 @@ __global__ void gpu_planewave2efield(complex&lt;T&gt;* X, complex&lt;T&gt;* Y, complex&lt;T&gt;* Z
26 29 vec<T> p = q( (T)iu/(T)r0, (T)iv/(T)r1 );
27 30 vec<T> r(p[0], p[1], p[2]);
28 31  
29   - complex<T> x( 0.0f, w.omega * (w.k_hat.dot(r)) );
  32 + complex<T> x( 0.0f, w.k.dot(r) );
30 33  
31 34 if(Y == NULL) //if this is a scalar simulation
32 35 X[i] += w.E0.len() * exp(x); //use the vector magnitude as the plane wave amplitude
... ... @@ -77,7 +80,24 @@ __global__ void gpu_efield_polarization(complex&lt;T&gt;* X, complex&lt;T&gt;* Y, complex&lt;T&gt;
77 80 Px[i] = X[i].abs();
78 81 Py[i] = Y[i].abs();
79 82 Pz[i] = Z[i].abs();
  83 +}
  84 +
  85 +/* This function computes the sum of two complex fields and stores the result in *dest
  86 +*/
  87 +template<typename T>
  88 +__global__ void gpu_efield_sum(complex<T>* dest, complex<T>* src, unsigned int r0, unsigned int r1)
  89 +{
  90 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  91 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
80 92  
  93 + //make sure that the thread indices are in-bounds
  94 + if(iu >= r0 || iv >= r1) return;
  95 +
  96 + //compute the index into the field
  97 + int i = iv*r0 + iu;
  98 +
  99 + //sum the fields
  100 + dest[i] += src[i];
81 101 }
82 102  
83 103 /* This class implements a discrete representation of an electromagnetic field
... ... @@ -86,9 +106,9 @@ __global__ void gpu_efield_polarization(complex&lt;T&gt;* X, complex&lt;T&gt;* Y, complex&lt;T&gt;
86 106 template<typename P>
87 107 class efield
88 108 {
89   -private:
  109 +protected:
90 110  
91   - bool scalar;
  111 + bool vector;
92 112  
93 113 //gpu pointer to the field data
94 114 rts::complex<P>* X;
... ... @@ -107,27 +127,14 @@ private:
107 127 //create one thread for each detector pixel
108 128 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
109 129 dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  130 +
110 131  
111   - gpu_planewave2efield<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos);
  132 + gpu_planewave2efield<P> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], p, pos);
112 133 }
113 134  
114   - void clear()
115   - {
116   - cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
117   -
118   - if(!scalar)
119   - {
120   - cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
121   - cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
122   - }
123   - }
124   -
125   -
126   -public:
127   -
128   - efield(unsigned int res0, unsigned int res1, bool _scalar = false)
  135 + void init(unsigned int res0, unsigned int res1, bool _vector)
129 136 {
130   - scalar = _scalar; //initialize field type
  137 + vector = _vector; //initialize field type
131 138  
132 139 X = Y = Z = NULL; //initialize all pointers to NULL
133 140 R[0] = res0;
... ... @@ -137,9 +144,9 @@ public:
137 144 cudaMalloc(&X, sizeof(rts::complex<P>) * R[0] * R[1]);
138 145 cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
139 146  
140   - if(!scalar)
  147 + if(vector)
141 148 {
142   - std::cout<<"scalar:";
  149 + std::cout<<"vector:";
143 150 cudaMalloc(&Y, sizeof(rts::complex<P>) * R[0] * R[1]);
144 151 cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
145 152  
... ... @@ -148,12 +155,66 @@ public:
148 155 }
149 156 }
150 157  
  158 + void destroy()
  159 + {
  160 + if(X != NULL) cudaFree(X);
  161 + if(Y != NULL) cudaFree(Y);
  162 + if(Z != NULL) cudaFree(Z);
  163 + }
  164 +
  165 + void shallowcpy(const rts::efield<P> & src)
  166 + {
  167 + vector = src.vector;
  168 + R[0] = src.R[0];
  169 + R[1] = src.R[1];
  170 + }
  171 +
  172 + void deepcpy(const rts::efield<P> & src)
  173 + {
  174 + //perform a shallow copy
  175 + shallowcpy(src);
  176 +
  177 + //allocate memory on the gpu
  178 + if(src.X != NULL)
  179 + {
  180 + cudaMalloc(&X, sizeof(rts::complex<P>) * R[0] * R[1]);
  181 + cudaMemcpy(X, src.X, sizeof(rts::complex<P>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
  182 + }
  183 + if(src.Y != NULL)
  184 + {
  185 + cudaMalloc(&Y, sizeof(rts::complex<P>) * R[0] * R[1]);
  186 + cudaMemcpy(Y, src.Y, sizeof(rts::complex<P>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
  187 + }
  188 + if(src.Z != NULL)
  189 + {
  190 + cudaMalloc(&Z, sizeof(rts::complex<P>) * R[0] * R[1]);
  191 + cudaMemcpy(Z, src.Z, sizeof(rts::complex<P>) * R[0] * R[1], cudaMemcpyDeviceToDevice);
  192 + }
  193 + }
  194 +
  195 +public:
  196 + efield(unsigned int res0, unsigned int res1, bool _vector = true)
  197 + {
  198 + init(res0, res1, _vector);
  199 + pos = rts::quad<P>(rts::vec<P>(-10, 0, -10), rts::vec<P>(-10, 0, 10), rts::vec<P>(10, 0, 10));
  200 + }
  201 +
151 202 //destructor
152 203 ~efield()
153 204 {
154   - if(X != NULL) cudaFree(X);
155   - if(Y != NULL) cudaFree(Y);
156   - if(Z != NULL) cudaFree(Z);
  205 + destroy();
  206 + }
  207 +
  208 + ///Clear the field - set all points to zero
  209 + void clear()
  210 + {
  211 + cudaMemset(X, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
  212 +
  213 + if(vector)
  214 + {
  215 + cudaMemset(Y, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
  216 + cudaMemset(Z, 0, sizeof(rts::complex<P>) * R[0] * R[1]);
  217 + }
157 218 }
158 219  
159 220 void position(quad<P> _p)
... ... @@ -172,56 +233,88 @@ public:
172 233 return ss.str();
173 234 }
174 235  
  236 + //assignment operator: assignment from another electric field
  237 + efield<P> & operator= (const efield<P> & rhs)
  238 + {
  239 + destroy(); //destroy any previous information about this field
  240 + deepcpy(rhs); //create a deep copy
  241 + return *this; //return the current object
  242 + }
  243 +
175 244 //assignment operator: build an electric field from a plane wave
176 245 efield<P> & operator= (const planewave<P> & rhs)
177 246 {
178 247  
179 248 clear(); //clear any previous field data
180 249 from_planewave(rhs); //create a field from the planewave
181   - return *this;
  250 + return *this; //return the current object
182 251 }
183 252  
184   - //assignment operator: add the electric field from a plane wave
185   - efield<P> & operator+= (const planewave<P> & rhs)
  253 + //assignment operator: add an existing electric field
  254 + efield<P> & operator+= (const efield<P> & rhs)
186 255 {
187   - //create a field from the planewave
188   - from_planewave(rhs);
  256 + //if this field and the source field represent the same regions in space
  257 + if(R[0] == rhs.R[0] && R[1] == rhs.R[1] && pos == rhs.pos)
  258 + {
  259 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  260 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  261 +
  262 + //create one thread for each detector pixel
  263 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  264 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  265 +
  266 + //sum the fields
  267 + gpu_efield_sum <<<dimGrid, dimBlock>>> (X, rhs.X, R[0], R[1]);
  268 + if(Y != NULL)
  269 + {
  270 + gpu_efield_sum <<<dimGrid, dimBlock>>> (Y, rhs.Y, R[0], R[1]);
  271 + gpu_efield_sum <<<dimGrid, dimBlock>>> (Z, rhs.Z, R[0], R[1]);
  272 + }
  273 + }
  274 + else
  275 + {
  276 + std::cout<<"ERROR in efield: The two summed fields do not represent the same geometry."<<std::endl;
  277 + exit(1);
  278 + }
189 279  
190   - return *this;
  280 + return *this; //return this object
191 281 }
192 282  
193   - //assignment operator: build an electric field from a list of plane waves
194   - efield<P> & operator= (const std::vector< planewave<P> > & rhs)
195   - {
196   - clear(); //clear any previous field data
197   - for(unsigned int i = 0; i < rhs.size(); i++)
198   - from_planewave(rhs[i]);
199   - return *this;
200   - }
  283 + efield<P> & operator= (const rts::beam<P> & rhs)
  284 + {
  285 + //get a vector of monte-carlo samples
  286 + std::vector< rts::planewave<P> > p_list = rhs.mc();
  287 +
  288 + clear(); //clear any previous field data
  289 + for(unsigned int i = 0; i < p_list.size(); i++)
  290 + from_planewave(p_list[i]);
  291 + return *this;
  292 + }
  293 +
201 294  
202 295 //return a scalar field representing field magnitude
203   - scalarfield<P> mag()
  296 + realfield<P, 1, true> mag()
204 297 {
205   - int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
206   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  298 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  299 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
207 300  
208 301 //create a scalar field to store the result
209   - scalarfield<P> M(R[0], R[1]);
  302 + realfield<P, 1, true> M(R[0], R[1]);
210 303  
211   - //create one thread for each detector pixel
212   - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
213   - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  304 + //create one thread for each detector pixel
  305 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  306 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
214 307  
215   - //compute the magnitude and store it in a scalar field
216   - gpu_efield_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.S);
  308 + //compute the magnitude and store it in a scalar field
  309 + gpu_efield_magnitude<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], M.ptr(0));
217 310  
218 311 return M;
219 312 }
220 313  
221 314 //return a vector field representing field polarization
222   - vectorfield<P> polarization()
  315 + realfield<P, 3, true> polarization()
223 316 {
224   - if(scalar)
  317 + if(!vector)
225 318 {
226 319 std::cout<<"ERROR: Cannot compute polarization of a scalar field."<<std::endl;
227 320 exit(1);
... ... @@ -234,10 +327,10 @@ public:
234 327 dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
235 328  
236 329  
237   - vectorfield<P> Pol(R[0], R[1]); //create a vector field to store the result
  330 + realfield<P, 3, true> Pol(R[0], R[1]); //create a vector field to store the result
238 331  
239 332 //compute the polarization and store it in the vector field
240   - gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.S[0], Pol.S[1], Pol.S[2]);
  333 + gpu_efield_polarization<float> <<<dimGrid, dimBlock>>> (X, Y, Z, R[0], R[1], Pol.ptr(0), Pol.ptr(1), Pol.ptr(2));
241 334  
242 335 return Pol; //return the vector field
243 336 }
... ... @@ -248,4 +341,6 @@ public:
248 341  
249 342  
250 343  
251   -} //end namespace rts
252 344 \ No newline at end of file
  345 +} //end namespace rts
  346 +
  347 +#endif
253 348 \ No newline at end of file
... ...
optics/planewave.h
... ... @@ -4,8 +4,13 @@
4 4 #include <string>
5 5 #include <sstream>
6 6  
7   -#include "rts/math/vector.h"
8   -#include "rts/math/quaternion.h"
  7 +#include "../math/vector.h"
  8 +#include "../math/quaternion.h"
  9 +#include "../math/constants.h"
  10 +
  11 +/*Basic conversions used here (assuming a vacuum)
  12 + lambda =
  13 +*/
9 14  
10 15 namespace rts{
11 16  
... ... @@ -13,37 +18,28 @@ template&lt;typename P&gt;
13 18 class planewave{
14 19  
15 20 public:
16   - rts::vec<P> k_hat; //normalized planewave direction
17   - P omega; //frequency
18   -
19   - rts::vec<P> E0; //amplitude
  21 + vec<P> k; //k = tau / lambda
  22 + vec<P> E0; //amplitude
20 23  
21 24  
22   -
23   - planewave()
24   - {
25   - omega = (P)2 * (P)3.14159;
26   - k_hat = rts::vec<P>(0, 0, 1);
27   - E0 = rts::vec<P>(1, 0, 0);
28   - }
29 25 ///constructor: create a plane wave propagating along z, polarized along x
30   - planewave(P _omega)
  26 + /*planewave(P lambda = (P)1)
31 27 {
32   - omega = _omega;
33   - k_hat = rts::vec<P>(0, 0, 1);
  28 + k = rts::vec<P>(0, 0, 1) * (TAU/lambda);
34 29 E0 = rts::vec<P>(1, 0, 0);
35   - }
  30 + }*/
36 31 ///constructor: create a plane wave propagating along _k, polarized along _E0, at frequency _omega
37   - planewave(vec<P> _k, vec<P> _E0, P _omega)
  32 + planewave(vec<P> _k = rts::vec<P>(0, 0, TAU), vec<P> _E0 = rts::vec<P>(1, 0, 0))
38 33 {
39   - k_hat = _k.norm();
40   - vec<P> s = k_hat.cross(_E0); //re-orthogonalize
41   - E0 = s.cross(k_hat);
42   - omega = _omega;
  34 + k = _k;
  35 + vec<P> k_hat = _k.norm();
  36 +
  37 + vec<P> s = (k.cross(_E0)).norm(); //compute an orthogonal side vector
  38 + vec<P> E0_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector
  39 + E0 = E0_hat * E0_hat.dot(_E0); //compute the projection of _E0 onto E0_hat
43 40 }
44 41  
45 42 ///multiplication operator: scale E0
46   - //assignment operator: build an electric field from a plane wave
47 43 planewave<P> & operator* (const P & rhs)
48 44 {
49 45  
... ... @@ -51,19 +47,34 @@ public:
51 47 return *this;
52 48 }
53 49  
  50 + P lambda() const
  51 + {
  52 + return TAU / k.len();
  53 + }
  54 +
  55 + P kmag() const
  56 + {
  57 + return k.len();
  58 + }
  59 +
54 60  
55   - planewave<P> refract(rts::vec<P> new_k)
  61 + planewave<P> refract(rts::vec<P> kn) const
56 62 {
57   - new_k = new_k.norm(); //normalize new_k
  63 + vec<P> kn_hat = kn.norm(); //normalize new_k
  64 + vec<P> k_hat = k.norm();
58 65  
59 66 //compute the side vector (around which we will be rotating)
60   - rts::vec<P> s = k_hat.cross(E0.norm());
  67 + vec<P> E0_hat = E0.norm();
  68 + rts::vec<P> s = k_hat.cross(E0_hat);
61 69  
62 70 //compute the projection of k' onto the k-E plane
63   - rts::vec<P> s_prime = s * (new_k.dot(s));
  71 + rts::vec<P> s_prime = s * (kn_hat.dot(s));
64 72  
65 73 //compute the angle between
66   - P theta = acos(k_hat.dot( (new_k - s_prime).norm() ));
  74 + vec<P> kp_hat = (kn_hat - s_prime).norm();
  75 + P theta = acos(k_hat.dot( kp_hat ));
  76 + if(kn_hat.dot(E0_hat) < 0)
  77 + theta = -theta;
67 78  
68 79 //rotate E0 around s by theta
69 80 quaternion<P> q;
... ... @@ -71,17 +82,60 @@ public:
71 82 rts::vec<P> E0_prime = q.toMatrix3() * E0;
72 83  
73 84 //create the refracted plane wave
74   - planewave<P> new_p(omega);
  85 + planewave<P> new_p;
75 86 new_p.E0 = E0_prime;
76   - new_p.k_hat = new_k;
  87 + new_p.k = kn_hat * k.len();
77 88  
78 89 return new_p;
  90 + /*vec<P> kn_hat = kn.norm(); //normalize kn
  91 + vec<P> k_hat = k.norm();
  92 + vec<P> E0_hat = E0.norm();
  93 +
  94 + vec<P> B = k_hat.cross(E0_hat);
  95 + planewave<P> newp;
  96 + newp.k = kn_hat * k.len();
  97 + newp.E0 = B.cross(kn_hat).norm();
  98 + std::cout<<newp.k.norm().dot(newp.E0.norm())<<std::endl;
  99 + return newp;*/
  100 +/*
  101 + //compute the side vector (around which we will be rotating)
  102 + rts::vec<P> s_hat = k_hat.cross(E0_hat);
  103 + //std::cout<<s.len()<<std::endl;
  104 +
  105 + //project kn_hat into the k-E0 plane
  106 + rts::vec<P> sp = s_hat * (kn_hat.dot(s_hat)); //project k_new onto s
  107 + rts::vec<P> kp = (kn_hat - sp); //correct k_new so it lies on the E0-k plane
  108 + rts::vec<P> kp_hat = kp.norm();
  109 +
  110 + //compute the angle and direction between k_prime and k
  111 + P theta = acos(k_hat.dot(kp_hat));
  112 + if(kp_hat.dot(E0_hat) < 0)
  113 + theta = -theta;
  114 +
  115 + //rotate E0 around s by theta
  116 + quaternion<P> q;
  117 + q.CreateRotation(theta, s_hat);
  118 + rts::vec<P> E0n = q.toMatrix3() * E0;
  119 + rts::vec<P> E0n_hat = E0n.norm();
  120 +
  121 + //std::cout<<s_hat.dot(kp_hat)<<" "<<s_hat.dot(E0n_hat)<<" "<<s_hat.dot(E0_hat)<<" "<<s_hat.dot(k_hat)<<" "<<
  122 + // E0_hat.dot(k_hat)<<" "<<k_hat.dot(kp_hat)<<" "<<E0_hat.dot(E0n_hat)<<" "<<E0n_hat.dot(kp_hat)<<std::endl;
  123 +
  124 + //create the refracted plane wave
  125 + //std::cout<<"cos: "<<cos(theta)<<" k*kp: "<<k_hat.dot(kp_hat)<<" E0*E0p: "<<E0_hat.dot(E0n_hat)<<" E0p*kp: "<<E0n_hat.dot(kp_hat)<<std::endl;
  126 +
  127 + //std::cout<<"kp*s: "<<kp.dot(s_hat)<<" E0n*s: "<<E0n.dot(s_hat)<<" |E0n|: "<<E0n.len()<<" E0n*kp: "<<E0n.dot(kp_hat)<<" E0n*kn: "<<E0n.dot(kn_hat)<<std::endl;
  128 +
  129 + planewave<P> new_p(kn_hat * k.len(), E0n); //create the plane wave
  130 + std::cout<<"|E0n|: "<<new_p.E0.len()<<" E0n*kn: "<<(new_p.E0.norm()).dot(new_p.k.norm())<<std::endl;
  131 +
  132 + return new_p;*/
79 133 }
80 134  
81 135 std::string str()
82 136 {
83 137 std::stringstream ss;
84   - ss<<E0<<" e^i ( "<<omega<<"t - "<<omega<<" "<<k_hat * omega<<" . r )";
  138 + ss<<E0<<" e^i ( "<<k<<" . r )";
85 139 return ss.str();
86 140 }
87 141 };
... ...
visualization/colormap.h
... ... @@ -86,7 +86,6 @@ static void initBrewer()
86 86 static void destroyBrewer()
87 87 {
88 88 HANDLE_ERROR(cudaFreeArray(gpuBrewer));
89   -
90 89 }
91 90  
92 91 template<class T>
... ... @@ -99,8 +98,11 @@ __global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigne
99 98 //compute the normalized value on [minVal maxVal]
100 99 float a = (gpuSource[i] - minVal) / (maxVal - minVal);
101 100  
  101 + //compensate for the additional space at the edges
  102 + a *= (T)(BREWER_CTRL_PTS - 1)/(T)(BREWER_CTRL_PTS);
  103 +
102 104 //lookup the color
103   - float shift = 1.0/(2*BREWER_CTRL_PTS);
  105 + float shift = (T)1/(2*BREWER_CTRL_PTS);
104 106 float4 color = tex1D(cudaTexBrewer, a+shift);
105 107 //float4 color = tex1D(cudaTexBrewer, a);
106 108  
... ...
visualization/scalarfield.cuh deleted
1   -#ifndef RTS_SCALAR_SLICE
2   -#define RTS_SCALAR_SLICE
3   -
4   -#include "../visualization/colormap.h"
5   -#include "../envi/envi.h"
6   -#include "../math/quad.h"
7   -#include "../cuda/devices.h"
8   -#include "cublas_v2.h"
9   -#include <cuda_runtime.h>
10   -
11   -///Compute a Gaussian function in 3D (mostly for testing)
12   -template<typename T>
13   -__global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, T std, rts::quad<T> shape)
14   -{
15   - int iu = blockIdx.x * blockDim.x + threadIdx.x;
16   - int iv = blockIdx.y * blockDim.y + threadIdx.y;
17   -
18   - //make sure that the thread indices are in-bounds
19   - if(iu >= r0 || iv >= r1) return;
20   -
21   - //compute the index into the field
22   - int i = iv*r0 + iu;
23   -
24   - T u = (T)iu / (T)r0;
25   - T v = (T)iv / (T)r1;
26   -
27   - rts::vec<T> p = shape(u, v);
28   -
29   - T fx = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[0] - mean, 2) / (2 * std*std) );
30   - T fy = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[1] - mean, 2) / (2 * std*std) );
31   - T fz = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[2] - mean, 2) / (2 * std*std) );
32   -
33   - dest[i] = fx * fy * fz;
34   -}
35   -
36   -namespace rts {
37   -template<typename P>
38   -struct scalarfield
39   -{
40   - //gpu pointer to the scalar slice
41   - P* S;
42   -
43   - //resolution of the slice
44   - int R[2];
45   -
46   - quad<P> shape;
47   -
48   - scalarfield()
49   - {
50   - R[0] = R[1] = 0;
51   - shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));
52   - S = NULL;
53   -
54   - std::cout<<"scalarfield CONSTRUCTOR"<<std::endl;
55   - }
56   -
57   - scalarfield(int x, int y)
58   - {
59   - //set the resolution
60   - R[0] = x;
61   - R[1] = y;
62   -
63   - shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));
64   -
65   - //allocate memory on the GPU
66   - HANDLE_ERROR(cudaMalloc( (void**)&S, sizeof(P) * x * y ));
67   -
68   - std::cout<<"scalarfield CONSTRUCTOR"<<std::endl;
69   - }
70   -
71   - ~scalarfield()
72   - {
73   - if(S != NULL)
74   - HANDLE_ERROR(cudaFree(S));
75   - S = NULL;
76   - R[0] = R[1] = 0;
77   -
78   - std::cout<<"scalarfield DESTRUCTOR"<<std::endl;
79   - }
80   -
81   - void clear()
82   - {
83   - //this function sets the slice to zero
84   - if(S != NULL)
85   - HANDLE_ERROR(cudaMemset(S, 0, sizeof(P) * R[0] * R[1]));
86   - }
87   -
88   - void toImage(std::string filename, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer)
89   - {
90   - rts::gpu2image<P>(S, filename, R[0], R[1], vmin, vmax, cmap);
91   - }
92   -
93   - void toImage(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer)
94   - {
95   - cublasStatus_t stat;
96   - cublasHandle_t handle;
97   -
98   - //create a CUBLAS handle
99   - stat = cublasCreate(&handle);
100   - if(stat != CUBLAS_STATUS_SUCCESS)
101   - {
102   - std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
103   - exit(1);
104   - }
105   -
106   - //find the index of the value with maximum magnitude
107   - int N = R[0] * R[1];
108   - int result;
109   -
110   - if(sizeof(P) == 4)
111   - stat = cublasIsamax(handle, N, (float*)S, 1, &result);
112   - else
113   - stat = cublasIdamax(handle, N, (double*)S, 1, &result);
114   -
115   - //adjust for 1-based indexing
116   - result -= 1;
117   -
118   - if(stat != CUBLAS_STATUS_SUCCESS)
119   - {
120   - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
121   - exit(1);
122   - }
123   -
124   -
125   -
126   - //retrieve the maximum value
127   - P maxVal;
128   - HANDLE_ERROR(cudaMemcpy(&maxVal, S + result, sizeof(P), cudaMemcpyDeviceToHost));
129   -
130   - //destroy the CUBLAS handle
131   - cublasDestroy(handle);
132   -
133   - //output the image
134   - if(positive)
135   - toImage(filename, 0, maxVal, cmap);
136   - else
137   - toImage(filename, -abs(maxVal), abs(maxVal), cmap);
138   - }
139   -
140   -
141   - void toEnvi(std::string filename, P wavelength = 0, bool append = false)
142   - {
143   - std::string mode;
144   - if(append) mode = "a";
145   - else mode = "w";
146   -
147   - //open the ENVI file
148   - EnviFile outfile(filename, mode);
149   -
150   - //get the scalar slice from the GPU to the CPU
151   - int memsize = sizeof(P) * R[0] * R[1];
152   - P* cpuData = (P*) malloc( memsize );
153   - HANDLE_ERROR(cudaMemcpy( cpuData, S, memsize, cudaMemcpyDeviceToHost));
154   -
155   - //add a band to the ENVI file
156   - outfile.addBand(cpuData, R[0], R[1], wavelength);
157   -
158   - outfile.close();
159   - }
160   -
161   - //assignment operator
162   - scalarfield & operator= (const scalarfield & rhs)
163   - {
164   - //de-allocate any existing GPU memory
165   - if(S != NULL)
166   - HANDLE_ERROR(cudaFree(S));
167   -
168   - //copy the slice resolution
169   - R[0] = rhs.R[0];
170   - R[1] = rhs.R[1];
171   -
172   - //allocate the necessary memory
173   - HANDLE_ERROR(cudaMalloc(&S, sizeof(P) * R[0] * R[1]));
174   -
175   - //copy the slice
176   - HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
177   -
178   -
179   - std::cout<<"Assignment operator."<<std::endl;
180   -
181   - return *this;
182   - }
183   -
184   - ///copy constructor
185   - scalarfield(const scalarfield &rhs)
186   - {
187   - //first make a shallow copy
188   - R[0] = rhs.R[0];
189   - R[1] = rhs.R[1];
190   -
191   - //do we have to make a deep copy?
192   - if(rhs.S == NULL)
193   - S = NULL; //no
194   - else
195   - {
196   - //allocate the necessary memory
197   - HANDLE_ERROR(cudaMalloc(&S, sizeof(P) * R[0] * R[1]));
198   -
199   - //copy the slice
200   - HANDLE_ERROR(cudaMemcpy(S, rhs.S, sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
201   - }
202   -
203   - std::cout<<"scalarfield COPY CONSTRUCTOR"<<std::endl;
204   - }
205   -
206   - void gaussian(P mean, P std)
207   - {
208   - int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
209   - int SQRT_BLOCK = (int)sqrt((float)maxThreads);
210   - //create one thread for each detector pixel
211   - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
212   - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
213   -
214   - gpu_gaussian<float> <<<dimGrid, dimBlock>>> (S, R[0], R[1], mean, std, shape);
215   - }
216   -
217   -};
218   -
219   -} //end namespace rts
220   -
221   -
222   -
223   -#endif
visualization/vectorfield.cuh deleted
1   -#ifndef RTS_VECTORFIELD
2   -#define RTS_VECTORFIELD
3   -
4   -#include "../visualization/colormap.h"
5   -#include "../envi/envi.h"
6   -#include "../math/quad.h"
7   -#include "../cuda/devices.h"
8   -#include "cublas_v2.h"
9   -#include <cuda_runtime.h>
10   -#include <iomanip>
11   -
12   -#include <qimage.h>
13   -#include <qcolor.h>
14   -
15   -
16   -namespace rts {
17   -template<typename P, unsigned int N = 3>
18   -struct vectorfield
19   -{
20   -private:
21   - void process_filename(std::string name, std::string &prefix, std::string &postfix,
22   - std::string &ext, unsigned int &digits)
23   - {
24   - std::stringstream ss(name);
25   - std::string item;
26   - std::vector<std::string> elems;
27   - while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension)
28   - {
29   - elems.push_back(item);
30   - }
31   -
32   - prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters)
33   - ext = elems[1]; //file extension (ex. .bmp, .png)
34   - ext = std::string(".") + ext; //add a period back into the extension
35   -
36   - size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'')
37   - size_t i1 = prefix.find_last_of("?");
38   -
39   - postfix = prefix.substr(i1+1);
40   - prefix = prefix.substr(0, i0);
41   -
42   - digits = i1 - i0 + 1; //compute the number of wildcards
43   -
44   - }
45   -
46   -public:
47   - //gpu pointers to scalar slices
48   - P* S[N];
49   -
50   - //resolution of the slice
51   - int R[2];
52   -
53   - quad<P> shape;
54   -
55   - vectorfield()
56   - {
57   - R[0] = R[1] = 0; //set the initial resolution to 0
58   - shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));
59   - for(int n=0; n<N; n++) //set each vector component to NULL
60   - S[n]=NULL;
61   -
62   - std::cout<<"vectorfield CONSTRUCTOR"<<std::endl;
63   - }
64   -
65   - vectorfield(int x, int y)
66   - {
67   - //set the resolution
68   - R[0] = x;
69   - R[1] = y;
70   -
71   - shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0));
72   -
73   - //allocate memory on the GPU
74   - for(int n=0; n<N; n++)
75   - HANDLE_ERROR(cudaMalloc( (void**)&S[n], sizeof(P) * x * y ));
76   -
77   - std::cout<<"vectorfield CONSTRUCTOR"<<std::endl;
78   - }
79   -
80   - ~vectorfield()
81   - {
82   - for(int n=0; n<N; n++)
83   - if(S[n] != NULL)
84   - {
85   - HANDLE_ERROR(cudaFree(S[n]));
86   - S[n] = NULL;
87   - }
88   - R[0] = R[1] = 0;
89   -
90   - std::cout<<"vectorfield DESTRUCTOR"<<std::endl;
91   - }
92   -
93   - void clear()
94   - {
95   - //this function sets the slice to zero
96   - for(int n=0; n<N; n++)
97   - if(S[n] != NULL)
98   - HANDLE_ERROR(cudaMemset(S[n], 0, sizeof(P) * R[0] * R[1]));
99   - }
100   -
101   - void toImage(std::string filename, unsigned int n, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer)
102   - {
103   - rts::gpu2image<P>(S[n], filename, R[0], R[1], vmin, vmax, cmap);
104   - }
105   -
106   - void toImage3(std::string filename, P vmin, P vmax)
107   - {
108   - std::cout<<"Implementing a 3-component rainbow colormap: "<<filename.c_str()<<std::endl;
109   - //create a buffer for each RGB component
110   - unsigned char* red = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]);
111   - unsigned char* green = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]);
112   - unsigned char* blue = (unsigned char*)malloc(sizeof(unsigned char) * 3 * R[0] * R[1]);
113   -
114   - //retrieve the buffered images for each component
115   - rts::gpu2cpu<P>(S[0], red, R[0] * R[1], vmin, vmax);
116   - rts::gpu2cpu<P>(S[1], green, R[0] * R[1], vmin, vmax);
117   - rts::gpu2cpu<P>(S[2], blue, R[0] * R[1], vmin, vmax);
118   -
119   - QImage image(R[0], R[1], QImage::Format_RGB32); //create a QImage object
120   - if(image.isNull()) //if it didn't work, throw an error
121   - {
122   - std::cout<<"Error creating QImage."<<std::endl;
123   - return;
124   - }
125   -
126   - int i;
127   - unsigned char r, g, b;
128   - unsigned int x, y;
129   - for(y=0; y<R[1]; y++)
130   - for(x=0; x<R[0]; x++)
131   - {
132   - //calculate the 1D index
133   - i = y * R[0] + x;
134   -
135   - r = red[i * 3 + 0];
136   - g = green[i * 3 + 1];
137   - b = blue[i * 3 + 2];
138   -
139   - //set the image pixel
140   - QColor color(r, g, b);
141   - image.setPixel(x, y, color.rgb());
142   - }
143   -
144   - if(!image.save(filename.c_str())) //if the image didn't save correctly,
145   - std::cout<<"Error saving QImage."<<std::endl; // throw an error
146   - }
147   -
148   - void toImages(std::string filename, bool positive = true, rts::colormapType cmap = rts::cmBrewer, bool globalmax = true)
149   - {
150   - std::string prefix, postfix, extension;
151   - unsigned int digits;
152   - process_filename(filename, prefix, postfix, extension, digits); //process the filename for wild cards
153   -
154   - cublasStatus_t stat;
155   - cublasHandle_t handle;
156   -
157   - //create a CUBLAS handle
158   - stat = cublasCreate(&handle);
159   - if(stat != CUBLAS_STATUS_SUCCESS)
160   - {
161   - std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
162   - exit(1);
163   - }
164   -
165   - int L = R[0] * R[1]; //compute the number of discrete points in a slice
166   - int result; //result of the max operation
167   -
168   - P maxVal[N]; //array stores minimum and maximum values
169   - P maxAll = 0; //largest value in the data set
170   -
171   - //compute the maximum value for each vector component
172   - for(int n=0; n<N; n++)
173   - {
174   - if(sizeof(P) == 4)
175   - stat = cublasIsamax(handle, L, (const float*)S[n], 1, &result);
176   - else
177   - stat = cublasIdamax(handle, L, (const double*)S[n], 1, &result);
178   -
179   - result -= 1; //adjust for 1-based indexing
180   -
181   - if(stat != CUBLAS_STATUS_SUCCESS) //if there was a GPU error, terminate
182   - {
183   - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
184   - exit(1);
185   - }
186   -
187   - //retrieve the maximum value for this slice and store it in the maxVal array
188   - HANDLE_ERROR(cudaMemcpy(&maxVal[n], S[n] + result, sizeof(P), cudaMemcpyDeviceToHost));
189   - if(maxVal[n] > maxAll) //if maxVal is larger, update the maxAll variable
190   - maxAll = maxVal[n];
191   -
192   - }
193   -
194   - cublasDestroy(handle); //destroy the CUBLAS handle
195   -
196   - if(cmap == rts::cmRainbow && N == 3) //if the user specifies a rainbow colormap, and the vector has 3 elements
197   - {
198   - //implement a single image with RGB = XYZ
199   - if(positive)
200   - toImage3(prefix+postfix+extension, 0, maxAll);
201   - else
202   - toImage3(prefix+postfix+extension, 0, maxAll);
203   - }
204   - else
205   - {
206   - for(int n=0; n<N; n++) //for each image
207   - {
208   - stringstream ss; //assemble the file name
209   - ss<<prefix<<std::setfill('0')<<std::setw(digits)<<n<<postfix<<extension;
210   - std::cout<<ss.str()<<std::endl;
211   - if(positive) //if the image is positive
212   - {
213   - std::cout<<"Positive: "<<n<<std::endl;
214   - if(globalmax) //if the global maximum is used
215   - toImage(ss.str(), n, 0, maxAll, cmap); //save the image using the global maximum
216   - else
217   - toImage(ss.str(), n, 0, maxVal[n], cmap); //save the image using the local maximum
218   - }
219   - else
220   - {
221   - std::cout<<"Negative: "<<n<<std::endl;
222   - if(globalmax) //if the global maximum is used
223   - toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap); //save the image using the global maximum
224   - else
225   - toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap); //save the image using the local maximum
226   - }
227   - }
228   - }
229   - }
230   -
231   - //assignment operator
232   - vectorfield & operator= (const vectorfield & rhs)
233   - {
234   - //de-allocate any existing GPU memory
235   - for(int n=0; n<N; n++)
236   - if(S[n] != NULL)
237   - HANDLE_ERROR(cudaFree(S[n]));
238   -
239   - //copy the slice resolution
240   - R[0] = rhs.R[0];
241   - R[1] = rhs.R[1];
242   -
243   - for(int n=0; n<N; n++)
244   - {
245   - //allocate the necessary memory
246   - HANDLE_ERROR(cudaMalloc(&S[n], sizeof(P) * R[0] * R[1]));
247   -
248   - //copy the slice
249   - HANDLE_ERROR(cudaMemcpy(S[n], rhs.S[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
250   - }
251   -
252   -
253   - std::cout<<"Assignment operator."<<std::endl;
254   -
255   - return *this;
256   - }
257   -
258   - ///copy constructor
259   - vectorfield(const vectorfield &rhs)
260   - {
261   - //first make a shallow copy
262   - R[0] = rhs.R[0];
263   - R[1] = rhs.R[1];
264   -
265   - //do we have to make a deep copy?
266   - if(rhs.S[0] == NULL) //no?
267   - {
268   - for(int n=0; n<N; n++) //set all components to NULL
269   - {
270   - S[n] = NULL;
271   - }
272   - }
273   - else
274   - {
275   - for(int n=0; n<N; n++)
276   - {
277   - //allocate the necessary memory
278   - HANDLE_ERROR(cudaMalloc(&S[n], sizeof(P) * R[0] * R[1]));
279   -
280   - //copy the slice
281   - HANDLE_ERROR(cudaMemcpy(S[n], rhs.S[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
282   - }
283   - }
284   -
285   - std::cout<<"vectorfield COPY CONSTRUCTOR"<<std::endl;
286   - }
287   -
288   -};
289   -
290   -} //end namespace rts
291   -
292   -
293   -
294   -#endif