Commit d609550ea30e9c322da07e09c7fe88735abc3492
1 parent
821513c8
fixed bug in plane wave refraction
Showing
8 changed files
with
483 additions
and
247 deletions
Show diff stats
math/matrix.h
... | ... | @@ -5,6 +5,7 @@ |
5 | 5 | #include <string.h> |
6 | 6 | #include <iostream> |
7 | 7 | #include "vector.h" |
8 | +#include "../cuda/callable.h" | |
8 | 9 | |
9 | 10 | namespace rts |
10 | 11 | { |
... | ... | @@ -15,7 +16,7 @@ struct matrix |
15 | 16 | //the matrix will be stored in column-major order (compatible with OpenGL) |
16 | 17 | T M[N*N]; |
17 | 18 | |
18 | - matrix() | |
19 | + CUDA_CALLABLE matrix() | |
19 | 20 | { |
20 | 21 | for(int r=0; r<N; r++) |
21 | 22 | for(int c=0; c<N; c++) |
... | ... | @@ -25,12 +26,12 @@ struct matrix |
25 | 26 | (*this)(r, c) = 0; |
26 | 27 | } |
27 | 28 | |
28 | - T& operator()(int row, int col) | |
29 | + CUDA_CALLABLE T& operator()(int row, int col) | |
29 | 30 | { |
30 | 31 | return M[col * N + row]; |
31 | 32 | } |
32 | 33 | |
33 | - matrix<T, N> operator=(T rhs) | |
34 | + CUDA_CALLABLE matrix<T, N> operator=(T rhs) | |
34 | 35 | { |
35 | 36 | int Nsq = N*N; |
36 | 37 | for(int i=0; i<Nsq; i++) |
... | ... | @@ -39,15 +40,7 @@ struct matrix |
39 | 40 | return *this; |
40 | 41 | } |
41 | 42 | |
42 | - /*matrix<T, N> operator=(matrix<T, N> rhs) | |
43 | - { | |
44 | - for(int i=0; i<N; i++) | |
45 | - M[i] = rhs.M[i]; | |
46 | - | |
47 | - return *this; | |
48 | - }*/ | |
49 | - | |
50 | - vec<T, N> operator*(vec<T, N> rhs) | |
43 | + CUDA_CALLABLE vec<T, N> operator*(vec<T, N> rhs) | |
51 | 44 | { |
52 | 45 | vec<T, N> result; |
53 | 46 | |
... | ... | @@ -58,7 +51,7 @@ struct matrix |
58 | 51 | return result; |
59 | 52 | } |
60 | 53 | |
61 | - std::string toStr() | |
54 | + CUDA_CALLABLE std::string toStr() | |
62 | 55 | { |
63 | 56 | std::stringstream ss; |
64 | 57 | ... | ... |
math/quad.h
math/quaternion.h
1 | -#include "rts/math/matrix.h" | |
2 | - | |
3 | 1 | #ifndef RTS_QUATERNION_H |
4 | 2 | #define RTS_QUATERNION_H |
5 | 3 | |
4 | +#include "rts/math/matrix.h" | |
5 | +#include "../cuda/callable.h" | |
6 | + | |
6 | 7 | namespace rts{ |
7 | 8 | |
8 | 9 | template<typename T> |
... | ... | @@ -14,160 +15,146 @@ public: |
14 | 15 | T y; |
15 | 16 | T z; |
16 | 17 | |
17 | - void normalize(); | |
18 | - void CreateRotation(T theta, T axis_x, T axis_y, T axis_z); | |
19 | - void CreateRotation(T theta, vec<T, 3> axis); | |
20 | - quaternion<T> operator*(quaternion<T> &rhs); | |
21 | - matrix<T, 3> toMatrix3(); | |
22 | - matrix<T, 4> toMatrix4(); | |
18 | + CUDA_CALLABLE void normalize(){ | |
23 | 19 | |
20 | + double length=sqrt(w*w + x*x + y*y + z*z); | |
21 | + w=w/length; | |
22 | + x=x/length; | |
23 | + y=y/length; | |
24 | + z=z/length; | |
25 | + } | |
24 | 26 | |
25 | - quaternion(); | |
26 | - quaternion(T w, T x, T y, T z); | |
27 | + CUDA_CALLABLE void CreateRotation(T theta, T ux, T uy, T uz){ | |
27 | 28 | |
28 | -}; | |
29 | + vec<T, 3> u(ux, uy, uz); | |
30 | + CreateRotation(theta, u); | |
31 | + } | |
29 | 32 | |
30 | -template<typename T> | |
31 | -void quaternion<T>::normalize() | |
32 | -{ | |
33 | - double length=sqrt(w*w + x*x + y*y + z*z); | |
34 | - w=w/length; | |
35 | - x=x/length; | |
36 | - y=y/length; | |
37 | - z=z/length; | |
38 | -} | |
33 | + CUDA_CALLABLE void CreateRotation(T theta, vec<T, 3> u){ | |
39 | 34 | |
40 | -template<typename T> | |
41 | -void quaternion<T>::CreateRotation(T theta, T ux, T uy, T uz) | |
42 | -{ | |
43 | - vec<T, 3> u(ux, uy, uz); | |
35 | + vec<T, 3> u_hat = u.norm(); | |
44 | 36 | |
45 | - CreateRotation(theta, u); | |
46 | - | |
47 | -} | |
37 | + //assign the given Euler rotation to this quaternion | |
38 | + w = (T)cos(theta/2); | |
39 | + x = u_hat[0]*(T)sin(theta/2); | |
40 | + y = u_hat[1]*(T)sin(theta/2); | |
41 | + z = u_hat[2]*(T)sin(theta/2); | |
42 | + } | |
48 | 43 | |
49 | -template<typename T> | |
50 | -void quaternion<T>::CreateRotation(T theta, vec<T, 3> u) | |
51 | -{ | |
52 | - vec<T, 3> u_hat = u.norm(); | |
44 | + CUDA_CALLABLE void CreateRotation(vec<T, 3> from, vec<T, 3> to){ | |
53 | 45 | |
54 | - //assign the given Euler rotation to this quaternion | |
55 | - w = (T)cos(theta/2); | |
56 | - x = u_hat[0]*(T)sin(theta/2); | |
57 | - y = u_hat[1]*(T)sin(theta/2); | |
58 | - z = u_hat[2]*(T)sin(theta/2); | |
59 | -} | |
46 | + vec<T> r = from.cross(to); //compute the rotation vector | |
47 | + T theta = asin(r.len()); //compute the angle of the rotation about r | |
48 | + //deal with a zero vector (both k and kn point in the same direction) | |
49 | + if(theta == (T)0) | |
50 | + return; | |
60 | 51 | |
61 | -template<typename T> | |
62 | -quaternion<T> quaternion<T>::operator *(quaternion<T> ¶m) | |
63 | -{ | |
64 | - float A, B, C, D, E, F, G, H; | |
52 | + //create a quaternion to capture the rotation | |
53 | + CreateRotation(theta, r.norm()); | |
54 | + } | |
65 | 55 | |
66 | 56 | |
67 | - A = (w + x)*(param.w + param.x); | |
68 | - B = (z - y)*(param.y - param.z); | |
69 | - C = (w - x)*(param.y + param.z); | |
70 | - D = (y + z)*(param.w - param.x); | |
71 | - E = (x + z)*(param.x + param.y); | |
72 | - F = (x - z)*(param.x - param.y); | |
73 | - G = (w + y)*(param.w - param.z); | |
74 | - H = (w - y)*(param.w + param.z); | |
75 | 57 | |
76 | - quaternion<T> result; | |
77 | - result.w = B + (-E - F + G + H) /2; | |
78 | - result.x = A - (E + F + G + H)/2; | |
79 | - result.y = C + (E - F + G - H)/2; | |
80 | - result.z = D + (E - F - G + H)/2; | |
58 | + CUDA_CALLABLE quaternion<T> operator *(quaternion<T> ¶m){ | |
81 | 59 | |
82 | - return result; | |
83 | -} | |
60 | + float A, B, C, D, E, F, G, H; | |
84 | 61 | |
85 | -template<typename T> | |
86 | -matrix<T, 3> quaternion<T>::toMatrix3() | |
87 | -{ | |
88 | - matrix<T, 3> result; | |
89 | 62 | |
63 | + A = (w + x)*(param.w + param.x); | |
64 | + B = (z - y)*(param.y - param.z); | |
65 | + C = (w - x)*(param.y + param.z); | |
66 | + D = (y + z)*(param.w - param.x); | |
67 | + E = (x + z)*(param.x + param.y); | |
68 | + F = (x - z)*(param.x - param.y); | |
69 | + G = (w + y)*(param.w - param.z); | |
70 | + H = (w - y)*(param.w + param.z); | |
90 | 71 | |
91 | - T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; | |
72 | + quaternion<T> result; | |
73 | + result.w = B + (-E - F + G + H) /2; | |
74 | + result.x = A - (E + F + G + H)/2; | |
75 | + result.y = C + (E - F + G - H)/2; | |
76 | + result.z = D + (E - F - G + H)/2; | |
92 | 77 | |
78 | + return result; | |
79 | + } | |
80 | + | |
81 | + CUDA_CALLABLE matrix<T, 3> toMatrix3(){ | |
93 | 82 | |
94 | - // calculate coefficients | |
95 | - x2 = x + x; y2 = y + y; | |
96 | - z2 = z + z; | |
97 | - xx = x * x2; xy = x * y2; xz = x * z2; | |
98 | - yy = y * y2; yz = y * z2; zz = z * z2; | |
99 | - wx = w * x2; wy = w * y2; wz = w * z2; | |
83 | + matrix<T, 3> result; | |
100 | 84 | |
101 | - result(0, 0) = 1 - (yy + zz); | |
102 | - result(0, 1) = xy - wz; | |
103 | 85 | |
104 | - result(0, 2) = xz + wy; | |
86 | + T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; | |
105 | 87 | |
106 | - result(1, 0) = xy + wz; | |
107 | - result(1, 1) = 1 - (xx + zz); | |
108 | 88 | |
109 | - result(1, 2) = yz - wx; | |
89 | + // calculate coefficients | |
90 | + x2 = x + x; y2 = y + y; | |
91 | + z2 = z + z; | |
92 | + xx = x * x2; xy = x * y2; xz = x * z2; | |
93 | + yy = y * y2; yz = y * z2; zz = z * z2; | |
94 | + wx = w * x2; wy = w * y2; wz = w * z2; | |
110 | 95 | |
111 | - result(2, 0) = xz - wy; | |
112 | - result(2, 1) = yz + wx; | |
96 | + result(0, 0) = 1 - (yy + zz); | |
97 | + result(0, 1) = xy - wz; | |
113 | 98 | |
114 | - result(2, 2) = 1 - (xx + yy); | |
99 | + result(0, 2) = xz + wy; | |
115 | 100 | |
116 | - return result; | |
117 | -} | |
101 | + result(1, 0) = xy + wz; | |
102 | + result(1, 1) = 1 - (xx + zz); | |
118 | 103 | |
119 | -template<typename T> | |
120 | -matrix<T, 4> quaternion<T>::toMatrix4() | |
121 | -{ | |
122 | - matrix<T, 4> result; | |
104 | + result(1, 2) = yz - wx; | |
123 | 105 | |
106 | + result(2, 0) = xz - wy; | |
107 | + result(2, 1) = yz + wx; | |
124 | 108 | |
125 | - T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; | |
109 | + result(2, 2) = 1 - (xx + yy); | |
126 | 110 | |
111 | + return result; | |
112 | + } | |
127 | 113 | |
128 | - // calculate coefficients | |
129 | - x2 = x + x; y2 = y + y; | |
130 | - z2 = z + z; | |
131 | - xx = x * x2; xy = x * y2; xz = x * z2; | |
132 | - yy = y * y2; yz = y * z2; zz = z * z2; | |
133 | - wx = w * x2; wy = w * y2; wz = w * z2; | |
114 | + CUDA_CALLABLE matrix<T, 4> toMatrix4(){ | |
134 | 115 | |
135 | - result(0, 0) = 1 - (yy + zz); | |
136 | - result(0, 1) = xy - wz; | |
116 | + matrix<T, 4> result; | |
117 | + T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; | |
137 | 118 | |
138 | - result(0, 2) = xz + wy; | |
119 | + // calculate coefficients | |
120 | + x2 = x + x; y2 = y + y; | |
121 | + z2 = z + z; | |
122 | + xx = x * x2; xy = x * y2; xz = x * z2; | |
123 | + yy = y * y2; yz = y * z2; zz = z * z2; | |
124 | + wx = w * x2; wy = w * y2; wz = w * z2; | |
139 | 125 | |
140 | - result(1, 0) = xy + wz; | |
141 | - result(1, 1) = 1 - (xx + zz); | |
126 | + result(0, 0) = 1 - (yy + zz); | |
127 | + result(0, 1) = xy - wz; | |
142 | 128 | |
143 | - result(1, 2) = yz - wx; | |
129 | + result(0, 2) = xz + wy; | |
144 | 130 | |
145 | - result(2, 0) = xz - wy; | |
146 | - result(2, 1) = yz + wx; | |
131 | + result(1, 0) = xy + wz; | |
132 | + result(1, 1) = 1 - (xx + zz); | |
147 | 133 | |
148 | - result(2, 2) = 1 - (xx + yy); | |
134 | + result(1, 2) = yz - wx; | |
149 | 135 | |
150 | - result(3, 3) = 1; | |
136 | + result(2, 0) = xz - wy; | |
137 | + result(2, 1) = yz + wx; | |
151 | 138 | |
152 | - return result; | |
153 | -} | |
139 | + result(2, 2) = 1 - (xx + yy); | |
154 | 140 | |
155 | -template<typename T> | |
156 | -quaternion<T>::quaternion() | |
157 | -{ | |
158 | - w=0; x=0; y=0; z=0; | |
159 | -} | |
141 | + result(3, 3) = 1; | |
160 | 142 | |
161 | -template<typename T> | |
162 | -quaternion<T>::quaternion(T c, T i, T j, T k) | |
163 | -{ | |
164 | - w=c; x=i; y=j; z=k; | |
165 | -} | |
143 | + return result; | |
144 | + } | |
145 | + | |
146 | + | |
147 | + CUDA_CALLABLE quaternion(){ | |
148 | + w=0; x=0; y=0; z=0; | |
149 | + } | |
150 | + | |
151 | + CUDA_CALLABLE quaternion(T c, T i, T j, T k){ | |
152 | + w=c; x=i; y=j; z=k; | |
153 | + } | |
154 | + | |
155 | +}; | |
166 | 156 | |
167 | 157 | } //end rts namespace |
168 | 158 | |
169 | -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | |
170 | -//template<class T> using rtsQuaternion = rts::quaternion<T>; | |
171 | -//#endif | |
172 | 159 | |
173 | 160 | #endif | ... | ... |
math/realfield.cuh
1 | 1 | #ifndef RTS_REALFIELD_H |
2 | 2 | #define RTS_REALFIELD_H |
3 | 3 | |
4 | -#include "../visualization/colormap.h" | |
5 | -#include "../envi/envi.h" | |
6 | -#include "../math/quad.h" | |
7 | -#include "../cuda/devices.h" | |
8 | -#include "cublas_v2.h" | |
4 | +#include "../visualization/colormap.h" | |
5 | +#include "../envi/envi.h" | |
6 | +#include "../math/rect.h" | |
7 | +#include "../cuda/devices.h" | |
8 | +#include "cublas_v2.h" | |
9 | 9 | #include <cuda_runtime.h> |
10 | 10 | |
11 | -///Compute a Gaussian function in 3D (mostly for testing) | |
12 | -/*template<typename T> | |
13 | -__global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, T std, rts::quad<T> shape) | |
14 | -{ | |
15 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
16 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
17 | - | |
18 | - //make sure that the thread indices are in-bounds | |
19 | - if(iu >= r0 || iv >= r1) return; | |
20 | - | |
21 | - //compute the index into the field | |
22 | - int i = iv*r0 + iu; | |
23 | - | |
24 | - T u = (T)iu / (T)r0; | |
25 | - T v = (T)iv / (T)r1; | |
26 | - | |
27 | - rts::vec<T> p = shape(u, v); | |
28 | - | |
29 | - T fx = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[0] - mean, 2) / (2 * std*std) ); | |
30 | - T fy = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[1] - mean, 2) / (2 * std*std) ); | |
31 | - T fz = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[2] - mean, 2) / (2 * std*std) ); | |
32 | - | |
33 | - dest[i] = fx * fy * fz; | |
11 | +///Compute a Gaussian function in 3D (mostly for testing) | |
12 | +/*template<typename T> | |
13 | +__global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, T std, rts::rect<T> shape) | |
14 | +{ | |
15 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
16 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
17 | + | |
18 | + //make sure that the thread indices are in-bounds | |
19 | + if(iu >= r0 || iv >= r1) return; | |
20 | + | |
21 | + //compute the index into the field | |
22 | + int i = iv*r0 + iu; | |
23 | + | |
24 | + T u = (T)iu / (T)r0; | |
25 | + T v = (T)iv / (T)r1; | |
26 | + | |
27 | + rts::vec<T> p = shape(u, v); | |
28 | + | |
29 | + T fx = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[0] - mean, 2) / (2 * std*std) ); | |
30 | + T fy = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[1] - mean, 2) / (2 * std*std) ); | |
31 | + T fz = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[2] - mean, 2) / (2 * std*std) ); | |
32 | + | |
33 | + dest[i] = fx * fy * fz; | |
34 | 34 | }*/ |
35 | 35 | |
36 | 36 | namespace rts{ |
... | ... | @@ -38,9 +38,9 @@ namespace rts{ |
38 | 38 | template<typename P, unsigned int N = 1, bool positive = false> |
39 | 39 | class realfield{ |
40 | 40 | |
41 | - P* X[N]; //an array of N gpu pointers for each field component | |
42 | - int R[2]; //resolution of the slice | |
43 | - quad<P> shape; | |
41 | + P* X[N]; //an array of N gpu pointers for each field component | |
42 | + int R[2]; //resolution of the slice | |
43 | + rect<P> shape; | |
44 | 44 | |
45 | 45 | void process_filename(std::string name, std::string &prefix, std::string &postfix, |
46 | 46 | std::string &ext, unsigned int &digits) |
... | ... | @@ -68,13 +68,13 @@ class realfield{ |
68 | 68 | |
69 | 69 | void init() |
70 | 70 | { |
71 | - for(unsigned int n=0; n<N; n++) | |
71 | + for(unsigned int n=0; n<N; n++) | |
72 | 72 | X[n] = NULL; |
73 | 73 | } |
74 | 74 | void destroy() |
75 | 75 | { |
76 | - for(unsigned int n=0; n<N; n++) | |
77 | - if(X[n] != NULL) | |
76 | + for(unsigned int n=0; n<N; n++) | |
77 | + if(X[n] != NULL) | |
78 | 78 | HANDLE_ERROR(cudaFree(X[n])); |
79 | 79 | } |
80 | 80 | |
... | ... | @@ -86,25 +86,25 @@ public: |
86 | 86 | init(); |
87 | 87 | std::cout<<"realfield CONSTRUCTOR"<<std::endl; |
88 | 88 | } |
89 | - realfield(unsigned int x, unsigned int y) | |
90 | - { | |
91 | - //set the resolution | |
92 | - R[0] = x; | |
93 | - R[1] = y; | |
94 | - //allocate memory on the GPU | |
95 | - for(unsigned int n=0; n<N; n++) | |
96 | - { | |
89 | + realfield(unsigned int x, unsigned int y) | |
90 | + { | |
91 | + //set the resolution | |
92 | + R[0] = x; | |
93 | + R[1] = y; | |
94 | + //allocate memory on the GPU | |
95 | + for(unsigned int n=0; n<N; n++) | |
96 | + { | |
97 | 97 | HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(P) * R[0] * R[1] )); |
98 | - } | |
99 | - shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); //default geometry | |
100 | - clear(); //zero the field | |
101 | - std::cout<<"realfield CONSTRUCTOR"<<std::endl; | |
98 | + } | |
99 | + //shape = rect<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); //default geometry | |
100 | + clear(); //zero the field | |
101 | + std::cout<<"realfield CONSTRUCTOR"<<std::endl; | |
102 | 102 | } |
103 | 103 | |
104 | - ~realfield() | |
105 | - { | |
106 | - destroy(); | |
107 | - std::cout<<"realfield DESTRUCTOR"<<std::endl; | |
104 | + ~realfield() | |
105 | + { | |
106 | + destroy(); | |
107 | + std::cout<<"realfield DESTRUCTOR"<<std::endl; | |
108 | 108 | } |
109 | 109 | |
110 | 110 | P* ptr(unsigned int n) |
... | ... | @@ -115,11 +115,11 @@ public: |
115 | 115 | } |
116 | 116 | |
117 | 117 | //set all components of the field to zero |
118 | - void clear() | |
119 | - { | |
120 | - for(unsigned int n=0; n<N; n++) | |
121 | - if(X[n] != NULL) | |
122 | - HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(P) * R[0] * R[1])); | |
118 | + void clear() | |
119 | + { | |
120 | + for(unsigned int n=0; n<N; n++) | |
121 | + if(X[n] != NULL) | |
122 | + HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(P) * R[0] * R[1])); | |
123 | 123 | } |
124 | 124 | |
125 | 125 | void toImage(std::string filename, unsigned int n, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer) |
... | ... | @@ -127,7 +127,7 @@ public: |
127 | 127 | rts::gpu2image<P>(X[n], filename, R[0], R[1], vmin, vmax, cmap); |
128 | 128 | } |
129 | 129 | |
130 | - void toImages(std::string filename, rts::colormapType cmap = rts::cmBrewer) | |
130 | + void toImages(std::string filename, bool global_max = true, rts::colormapType cmap = rts::cmBrewer) | |
131 | 131 | { |
132 | 132 | std::string prefix, postfix, extension; |
133 | 133 | unsigned int digits; |
... | ... | @@ -175,74 +175,77 @@ public: |
175 | 175 | |
176 | 176 | cublasDestroy(handle); //destroy the CUBLAS handle |
177 | 177 | |
178 | + P outputMax = abs(maxAll); //maximum value used for each output image | |
178 | 179 | for(int n=0; n<N; n++) //for each image |
179 | 180 | { |
181 | + if(!global_max) outputMax = maxVal[n]; //calculate the maximum value for this image | |
182 | + | |
180 | 183 | stringstream ss; //assemble the file name |
181 | 184 | ss<<prefix<<std::setfill('0')<<std::setw(digits)<<n<<postfix<<extension; |
182 | 185 | std::cout<<ss.str()<<std::endl; |
183 | 186 | if(positive) //if the image is positive |
184 | - toImage(ss.str(), n, 0, maxAll, cmap); //save the image using the global maximum | |
187 | + toImage(ss.str(), n, 0, abs(outputMax), cmap); //save the image using the global maximum | |
185 | 188 | else |
186 | - toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap); //save the image using the global maximum | |
189 | + toImage(ss.str(), n, -abs(outputMax), abs(outputMax), cmap); //save the image using the global maximum | |
187 | 190 | } |
188 | 191 | } |
189 | 192 | |
190 | - //assignment operator | |
191 | - realfield & operator= (const realfield & rhs) | |
192 | - { | |
193 | - //de-allocate any existing GPU memory | |
194 | - destroy(); | |
195 | - | |
196 | - //copy the slice resolution | |
197 | - R[0] = rhs.R[0]; | |
198 | - R[1] = rhs.R[1]; | |
199 | - | |
200 | - for(unsigned int n=0; n<N; n++) | |
201 | - { | |
202 | - //allocate the necessary memory | |
203 | - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1])); | |
204 | - //copy the slice | |
205 | - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
206 | - } | |
207 | - std::cout<<"Assignment operator."<<std::endl; | |
208 | - | |
209 | - return *this; | |
210 | - } | |
211 | - | |
212 | - ///copy constructor | |
213 | - realfield(const realfield &rhs) | |
214 | - { | |
215 | - //first make a shallow copy | |
216 | - R[0] = rhs.R[0]; | |
217 | - R[1] = rhs.R[1]; | |
218 | - | |
219 | - for(unsigned int n=0; n<N; n++) | |
220 | - { | |
221 | - //do we have to make a deep copy? | |
222 | - if(rhs.X[n] == NULL) | |
223 | - X[n] = NULL; //no | |
224 | - else | |
225 | - { | |
226 | - //allocate the necessary memory | |
227 | - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1])); | |
228 | - | |
229 | - //copy the slice | |
230 | - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
231 | - } | |
232 | - } | |
233 | - | |
234 | - std::cout<<"realfield COPY CONSTRUCTOR"<<std::endl; | |
193 | + //assignment operator | |
194 | + realfield & operator= (const realfield & rhs) | |
195 | + { | |
196 | + //de-allocate any existing GPU memory | |
197 | + destroy(); | |
198 | + | |
199 | + //copy the slice resolution | |
200 | + R[0] = rhs.R[0]; | |
201 | + R[1] = rhs.R[1]; | |
202 | + | |
203 | + for(unsigned int n=0; n<N; n++) | |
204 | + { | |
205 | + //allocate the necessary memory | |
206 | + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1])); | |
207 | + //copy the slice | |
208 | + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
209 | + } | |
210 | + std::cout<<"Assignment operator."<<std::endl; | |
211 | + | |
212 | + return *this; | |
213 | + } | |
214 | + | |
215 | + ///copy constructor | |
216 | + realfield(const realfield &rhs) | |
217 | + { | |
218 | + //first make a shallow copy | |
219 | + R[0] = rhs.R[0]; | |
220 | + R[1] = rhs.R[1]; | |
221 | + | |
222 | + for(unsigned int n=0; n<N; n++) | |
223 | + { | |
224 | + //do we have to make a deep copy? | |
225 | + if(rhs.X[n] == NULL) | |
226 | + X[n] = NULL; //no | |
227 | + else | |
228 | + { | |
229 | + //allocate the necessary memory | |
230 | + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1])); | |
231 | + | |
232 | + //copy the slice | |
233 | + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
234 | + } | |
235 | + } | |
236 | + | |
237 | + std::cout<<"realfield COPY CONSTRUCTOR"<<std::endl; | |
235 | 238 | } |
236 | 239 | |
237 | - /*void gaussian(P mean, P std, unsigned int n=0) //creates a 3D gaussian using component n | |
238 | - { | |
239 | - int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
240 | - int SQRT_BLOCK = (int)sqrt((float)maxThreads); | |
241 | - //create one thread for each detector pixel | |
242 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
243 | - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
244 | - | |
245 | - gpu_gaussian<float> <<<dimGrid, dimBlock>>> (X[n], R[0], R[1], mean, std, shape); | |
240 | + /*void gaussian(P mean, P std, unsigned int n=0) //creates a 3D gaussian using component n | |
241 | + { | |
242 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
243 | + int SQRT_BLOCK = (int)sqrt((float)maxThreads); | |
244 | + //create one thread for each detector pixel | |
245 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
246 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
247 | + | |
248 | + gpu_gaussian<float> <<<dimGrid, dimBlock>>> (X[n], R[0], R[1], mean, std, shape); | |
246 | 249 | }*/ |
247 | 250 | |
248 | 251 | ... | ... |
1 | +#ifndef RTS_RECT_H | |
2 | +#define RTS_RECT_H | |
3 | + | |
4 | +//enable CUDA_CALLABLE macro | |
5 | +#include "../cuda/callable.h" | |
6 | +#include "../math/vector.h" | |
7 | +#include "../math/triangle.h" | |
8 | +#include "../math/quaternion.h" | |
9 | +#include <iostream> | |
10 | +#include <iomanip> | |
11 | +#include <algorithm> | |
12 | + | |
13 | +namespace rts{ | |
14 | + | |
15 | +//template for a rectangle class in ND space | |
16 | +template <class T, int N = 3> | |
17 | +struct rect | |
18 | +{ | |
19 | + /* | |
20 | + ^ O | |
21 | + | | |
22 | + | | |
23 | + Y C | |
24 | + | | |
25 | + | | |
26 | + O---------X---------> | |
27 | + */ | |
28 | + | |
29 | +private: | |
30 | + | |
31 | + rts::vec<T, N> C; | |
32 | + rts::vec<T, N> X; | |
33 | + rts::vec<T, N> Y; | |
34 | + | |
35 | + CUDA_CALLABLE void scale(T factor){ | |
36 | + X *= factor; | |
37 | + Y *= factor; | |
38 | + } | |
39 | + | |
40 | + CUDA_CALLABLE void normal(vec<T, N> n){ //orient the rectangle along the specified normal | |
41 | + | |
42 | + n = n.norm(); //normalize, just in case | |
43 | + vec<T, N> n_current = X.cross(Y).norm(); //compute the current normal | |
44 | + quaternion<T> q; //create a quaternion | |
45 | + q.CreateRotation(n_current, n); //initialize a rotation from n_current to n | |
46 | + | |
47 | + //apply the quaternion to the vectors and position | |
48 | + X = q.toMatrix3() * X; | |
49 | + Y = q.toMatrix3() * Y; | |
50 | + } | |
51 | + | |
52 | + CUDA_CALLABLE void init(){ | |
53 | + C = vec<T, N>(0, 0, 0); | |
54 | + X = vec<T, N>(1, 0, 0); | |
55 | + Y = vec<T, N>(0, 1, 0); | |
56 | + } | |
57 | + | |
58 | +public: | |
59 | + | |
60 | + CUDA_CALLABLE rect(){ | |
61 | + init(); | |
62 | + } | |
63 | + | |
64 | + CUDA_CALLABLE rect(T size, T z_pos = (T)0){ | |
65 | + init(); //use the default setup | |
66 | + scale(size); //scale the rectangle | |
67 | + C[2] = z_pos; | |
68 | + } | |
69 | + | |
70 | + CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){ | |
71 | + init(); //start with the default setting | |
72 | + C = c; | |
73 | + scale(size); //scale the rectangle | |
74 | + normal(n); //orient | |
75 | + | |
76 | + } | |
77 | + | |
78 | + /*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c) | |
79 | + { | |
80 | + A = a; | |
81 | + Y = b - a; | |
82 | + X = c - a - Y; | |
83 | + | |
84 | + }*/ | |
85 | + | |
86 | + /******************************************************************* | |
87 | + Constructor - create a rect from a position, normal, and rotation | |
88 | + *******************************************************************/ | |
89 | + /*CUDA_CALLABLE rect(rts::vec<T, N> c, rts::vec<T, N> normal, T width, T height, T theta) | |
90 | + { | |
91 | + | |
92 | + //compute the X direction - start along world-space X | |
93 | + Y = rts::vec<T, N>(0, 1, 0); | |
94 | + if(Y == normal) | |
95 | + Y = rts::vec<T, N>(0, 0, 1); | |
96 | + | |
97 | + X = Y.cross(normal).norm(); | |
98 | + | |
99 | + std::cout<<X<<std::endl; | |
100 | + | |
101 | + //rotate the X axis by theta radians | |
102 | + rts::quaternion<T> q; | |
103 | + q.CreateRotation(theta, normal); | |
104 | + X = q.toMatrix3() * X; | |
105 | + Y = normal.cross(X); | |
106 | + | |
107 | + //normalize everything | |
108 | + X = X.norm(); | |
109 | + Y = Y.norm(); | |
110 | + | |
111 | + //scale to match the rect width and height | |
112 | + X = X * width; | |
113 | + Y = Y * height; | |
114 | + | |
115 | + //set the corner of the plane | |
116 | + A = c - X * 0.5f - Y * 0.5f; | |
117 | + | |
118 | + std::cout<<X<<std::endl; | |
119 | + }*/ | |
120 | + | |
121 | + //boolean comparison | |
122 | + bool operator==(const rect<T, N> & rhs) | |
123 | + { | |
124 | + if(C == rhs.C && X == rhs.X && Y == rhs.Y) | |
125 | + return true; | |
126 | + else | |
127 | + return false; | |
128 | + } | |
129 | + | |
130 | + /******************************************* | |
131 | + Return the normal for the rect | |
132 | + *******************************************/ | |
133 | + CUDA_CALLABLE rts::vec<T, N> n() | |
134 | + { | |
135 | + return (X.cross(Y)).norm(); | |
136 | + } | |
137 | + | |
138 | + CUDA_CALLABLE rts::vec<T, N> p(T a, T b) | |
139 | + { | |
140 | + rts::vec<T, N> result; | |
141 | + //given the two parameters a, b = [0 1], returns the position in world space | |
142 | + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | |
143 | + result = A + X * a + Y * b; | |
144 | + | |
145 | + return result; | |
146 | + } | |
147 | + | |
148 | + CUDA_CALLABLE rts::vec<T, N> operator()(T a, T b) | |
149 | + { | |
150 | + return p(a, b); | |
151 | + } | |
152 | + | |
153 | + std::string str() | |
154 | + { | |
155 | + std::stringstream ss; | |
156 | + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | |
157 | + ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; | |
158 | + ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl; | |
159 | + ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X; | |
160 | + | |
161 | + return ss.str(); | |
162 | + | |
163 | + } | |
164 | + | |
165 | + CUDA_CALLABLE rect<T, N> operator*(T rhs) | |
166 | + { | |
167 | + //scales the plane by a scalar value | |
168 | + | |
169 | + //create the new rectangle | |
170 | + rect<T, N> result = *this; | |
171 | + result.scale(rhs); | |
172 | + | |
173 | + return result; | |
174 | + | |
175 | + } | |
176 | + | |
177 | + CUDA_CALLABLE T dist(vec<T, N> p) | |
178 | + { | |
179 | + //compute the distance between a point and this rect | |
180 | + | |
181 | + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | |
182 | + | |
183 | + //first break the rect up into two triangles | |
184 | + triangle<T, N> T0(A, A+X, A+Y); | |
185 | + triangle<T, N> T1(A+X+Y, A+X, A+Y); | |
186 | + | |
187 | + | |
188 | + T d0 = T0.dist(p); | |
189 | + T d1 = T1.dist(p); | |
190 | + | |
191 | + if(d0 < d1) | |
192 | + return d0; | |
193 | + else | |
194 | + return d1; | |
195 | + } | |
196 | + | |
197 | + CUDA_CALLABLE T dist_max(vec<T, N> p) | |
198 | + { | |
199 | + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | |
200 | + T da = (A - p).len(); | |
201 | + T db = (A+X - p).len(); | |
202 | + T dc = (A+Y - p).len(); | |
203 | + T dd = (A+X+Y - p).len(); | |
204 | + | |
205 | + return std::max( da, std::max(db, std::max(dc, dd) ) ); | |
206 | + } | |
207 | +}; | |
208 | + | |
209 | +} //end namespace rts | |
210 | + | |
211 | +template <typename T, int N> | |
212 | +std::ostream& operator<<(std::ostream& os, rts::rect<T, N> R) | |
213 | +{ | |
214 | + os<<R.str(); | |
215 | + return os; | |
216 | +} | |
217 | + | |
218 | + | |
219 | +#endif | ... | ... |
math/vector.h
... | ... | @@ -169,6 +169,11 @@ struct vec |
169 | 169 | |
170 | 170 | return result; |
171 | 171 | } |
172 | + CUDA_CALLABLE vec<T, N> operator*=(T rhs){ | |
173 | + for(int i=0; i<N; i++) | |
174 | + v[i] = v[i] * rhs; | |
175 | + return *this; | |
176 | + } | |
172 | 177 | |
173 | 178 | //conversion from a point |
174 | 179 | /*CUDA_CALLABLE vector<T, N> & operator=(point<T, N> rhs) | ... | ... |
optics/efield.cuh
... | ... | @@ -14,7 +14,7 @@ namespace rts{ |
14 | 14 | |
15 | 15 | template<typename T> |
16 | 16 | __global__ void gpu_planewave2efield(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1, |
17 | - planewave<T> w, quad<T> q) | |
17 | + planewave<T> w, rect<T> q) | |
18 | 18 | { |
19 | 19 | int iu = blockIdx.x * blockDim.x + threadIdx.x; |
20 | 20 | int iv = blockIdx.y * blockDim.y + threadIdx.y; |
... | ... | @@ -119,7 +119,7 @@ protected: |
119 | 119 | unsigned int R[2]; |
120 | 120 | |
121 | 121 | //shape of the 2D field |
122 | - quad<P> pos; | |
122 | + rect<P> pos; | |
123 | 123 | |
124 | 124 | void from_planewave(planewave<P> p) |
125 | 125 | { |
... | ... | @@ -196,7 +196,7 @@ public: |
196 | 196 | efield(unsigned int res0, unsigned int res1, bool _vector = true) |
197 | 197 | { |
198 | 198 | init(res0, res1, _vector); |
199 | - pos = rts::quad<P>(rts::vec<P>(-10, 0, -10), rts::vec<P>(-10, 0, 10), rts::vec<P>(10, 0, 10)); | |
199 | + //pos = rts::rect<P>(rts::vec<P>(-10, 0, -10), rts::vec<P>(-10, 0, 10), rts::vec<P>(10, 0, 10)); | |
200 | 200 | } |
201 | 201 | |
202 | 202 | //destructor |
... | ... | @@ -217,7 +217,7 @@ public: |
217 | 217 | } |
218 | 218 | } |
219 | 219 | |
220 | - void position(quad<P> _p) | |
220 | + void position(rect<P> _p) | |
221 | 221 | { |
222 | 222 | pos = _p; |
223 | 223 | } | ... | ... |
optics/planewave.h
... | ... | @@ -60,7 +60,35 @@ public: |
60 | 60 | |
61 | 61 | planewave<P> refract(rts::vec<P> kn) const |
62 | 62 | { |
63 | - vec<P> kn_hat = kn.norm(); //normalize new_k | |
63 | + vec<P> kn_hat = kn.norm(); //normalize the new k | |
64 | + vec<P> k_hat = k.norm(); //normalize the current k | |
65 | + | |
66 | + vec<P> r = k_hat.cross(kn_hat); //compute the rotation vector | |
67 | + | |
68 | + P theta = asin(r.len()); //compute the angle of the rotation about r | |
69 | + | |
70 | + planewave<P> new_p; //create a new plane wave | |
71 | + | |
72 | + //deal with a zero vector (both k and kn point in the same direction) | |
73 | + if(theta == (P)0) | |
74 | + { | |
75 | + new_p = *this; | |
76 | + return new_p; | |
77 | + } | |
78 | + | |
79 | + //create a quaternion to capture the rotation | |
80 | + quaternion<P> q; | |
81 | + q.CreateRotation(theta, r.norm()); | |
82 | + | |
83 | + //apply the rotation to E0 | |
84 | + vec<P> E0n = q.toMatrix3() * E0; | |
85 | + | |
86 | + new_p.k = kn_hat * k.len(); | |
87 | + new_p.E0 = E0n; | |
88 | + | |
89 | + return new_p; | |
90 | + | |
91 | + /*vec<P> kn_hat = kn.norm(); //normalize new_k | |
64 | 92 | vec<P> k_hat = k.norm(); |
65 | 93 | |
66 | 94 | //compute the side vector (around which we will be rotating) |
... | ... | @@ -86,7 +114,8 @@ public: |
86 | 114 | new_p.E0 = E0_prime; |
87 | 115 | new_p.k = kn_hat * k.len(); |
88 | 116 | |
89 | - return new_p; | |
117 | + return new_p;*/ | |
118 | + | |
90 | 119 | /*vec<P> kn_hat = kn.norm(); //normalize kn |
91 | 120 | vec<P> k_hat = k.norm(); |
92 | 121 | vec<P> E0_hat = E0.norm(); | ... | ... |