Commit d609550ea30e9c322da07e09c7fe88735abc3492

Authored by David Mayerich
1 parent 821513c8

fixed bug in plane wave refraction

math/matrix.h
... ... @@ -5,6 +5,7 @@
5 5 #include <string.h>
6 6 #include <iostream>
7 7 #include "vector.h"
  8 +#include "../cuda/callable.h"
8 9  
9 10 namespace rts
10 11 {
... ... @@ -15,7 +16,7 @@ struct matrix
15 16 //the matrix will be stored in column-major order (compatible with OpenGL)
16 17 T M[N*N];
17 18  
18   - matrix()
  19 + CUDA_CALLABLE matrix()
19 20 {
20 21 for(int r=0; r<N; r++)
21 22 for(int c=0; c<N; c++)
... ... @@ -25,12 +26,12 @@ struct matrix
25 26 (*this)(r, c) = 0;
26 27 }
27 28  
28   - T& operator()(int row, int col)
  29 + CUDA_CALLABLE T& operator()(int row, int col)
29 30 {
30 31 return M[col * N + row];
31 32 }
32 33  
33   - matrix<T, N> operator=(T rhs)
  34 + CUDA_CALLABLE matrix<T, N> operator=(T rhs)
34 35 {
35 36 int Nsq = N*N;
36 37 for(int i=0; i<Nsq; i++)
... ... @@ -39,15 +40,7 @@ struct matrix
39 40 return *this;
40 41 }
41 42  
42   - /*matrix<T, N> operator=(matrix<T, N> rhs)
43   - {
44   - for(int i=0; i<N; i++)
45   - M[i] = rhs.M[i];
46   -
47   - return *this;
48   - }*/
49   -
50   - vec<T, N> operator*(vec<T, N> rhs)
  43 + CUDA_CALLABLE vec<T, N> operator*(vec<T, N> rhs)
51 44 {
52 45 vec<T, N> result;
53 46  
... ... @@ -58,7 +51,7 @@ struct matrix
58 51 return result;
59 52 }
60 53  
61   - std::string toStr()
  54 + CUDA_CALLABLE std::string toStr()
62 55 {
63 56 std::stringstream ss;
64 57  
... ...
math/quad.h
1   -#ifndef RTS_RECT_H
2   -#define RTS_RECT_H
  1 +#ifndef RTS_QUAD_H
  2 +#define RTS_QUAD_H
3 3  
4 4 //enable CUDA_CALLABLE macro
5 5 #include "../cuda/callable.h"
... ...
math/quaternion.h
1   -#include "rts/math/matrix.h"
2   -
3 1 #ifndef RTS_QUATERNION_H
4 2 #define RTS_QUATERNION_H
5 3  
  4 +#include "rts/math/matrix.h"
  5 +#include "../cuda/callable.h"
  6 +
6 7 namespace rts{
7 8  
8 9 template<typename T>
... ... @@ -14,160 +15,146 @@ public:
14 15 T y;
15 16 T z;
16 17  
17   - void normalize();
18   - void CreateRotation(T theta, T axis_x, T axis_y, T axis_z);
19   - void CreateRotation(T theta, vec<T, 3> axis);
20   - quaternion<T> operator*(quaternion<T> &rhs);
21   - matrix<T, 3> toMatrix3();
22   - matrix<T, 4> toMatrix4();
  18 + CUDA_CALLABLE void normalize(){
23 19  
  20 + double length=sqrt(w*w + x*x + y*y + z*z);
  21 + w=w/length;
  22 + x=x/length;
  23 + y=y/length;
  24 + z=z/length;
  25 + }
24 26  
25   - quaternion();
26   - quaternion(T w, T x, T y, T z);
  27 + CUDA_CALLABLE void CreateRotation(T theta, T ux, T uy, T uz){
27 28  
28   -};
  29 + vec<T, 3> u(ux, uy, uz);
  30 + CreateRotation(theta, u);
  31 + }
29 32  
30   -template<typename T>
31   -void quaternion<T>::normalize()
32   -{
33   - double length=sqrt(w*w + x*x + y*y + z*z);
34   - w=w/length;
35   - x=x/length;
36   - y=y/length;
37   - z=z/length;
38   -}
  33 + CUDA_CALLABLE void CreateRotation(T theta, vec<T, 3> u){
39 34  
40   -template<typename T>
41   -void quaternion<T>::CreateRotation(T theta, T ux, T uy, T uz)
42   -{
43   - vec<T, 3> u(ux, uy, uz);
  35 + vec<T, 3> u_hat = u.norm();
44 36  
45   - CreateRotation(theta, u);
46   -
47   -}
  37 + //assign the given Euler rotation to this quaternion
  38 + w = (T)cos(theta/2);
  39 + x = u_hat[0]*(T)sin(theta/2);
  40 + y = u_hat[1]*(T)sin(theta/2);
  41 + z = u_hat[2]*(T)sin(theta/2);
  42 + }
48 43  
49   -template<typename T>
50   -void quaternion<T>::CreateRotation(T theta, vec<T, 3> u)
51   -{
52   - vec<T, 3> u_hat = u.norm();
  44 + CUDA_CALLABLE void CreateRotation(vec<T, 3> from, vec<T, 3> to){
53 45  
54   - //assign the given Euler rotation to this quaternion
55   - w = (T)cos(theta/2);
56   - x = u_hat[0]*(T)sin(theta/2);
57   - y = u_hat[1]*(T)sin(theta/2);
58   - z = u_hat[2]*(T)sin(theta/2);
59   -}
  46 + vec<T> r = from.cross(to); //compute the rotation vector
  47 + T theta = asin(r.len()); //compute the angle of the rotation about r
  48 + //deal with a zero vector (both k and kn point in the same direction)
  49 + if(theta == (T)0)
  50 + return;
60 51  
61   -template<typename T>
62   -quaternion<T> quaternion<T>::operator *(quaternion<T> &param)
63   -{
64   - float A, B, C, D, E, F, G, H;
  52 + //create a quaternion to capture the rotation
  53 + CreateRotation(theta, r.norm());
  54 + }
65 55  
66 56  
67   - A = (w + x)*(param.w + param.x);
68   - B = (z - y)*(param.y - param.z);
69   - C = (w - x)*(param.y + param.z);
70   - D = (y + z)*(param.w - param.x);
71   - E = (x + z)*(param.x + param.y);
72   - F = (x - z)*(param.x - param.y);
73   - G = (w + y)*(param.w - param.z);
74   - H = (w - y)*(param.w + param.z);
75 57  
76   - quaternion<T> result;
77   - result.w = B + (-E - F + G + H) /2;
78   - result.x = A - (E + F + G + H)/2;
79   - result.y = C + (E - F + G - H)/2;
80   - result.z = D + (E - F - G + H)/2;
  58 + CUDA_CALLABLE quaternion<T> operator *(quaternion<T> &param){
81 59  
82   - return result;
83   -}
  60 + float A, B, C, D, E, F, G, H;
84 61  
85   -template<typename T>
86   -matrix<T, 3> quaternion<T>::toMatrix3()
87   -{
88   - matrix<T, 3> result;
89 62  
  63 + A = (w + x)*(param.w + param.x);
  64 + B = (z - y)*(param.y - param.z);
  65 + C = (w - x)*(param.y + param.z);
  66 + D = (y + z)*(param.w - param.x);
  67 + E = (x + z)*(param.x + param.y);
  68 + F = (x - z)*(param.x - param.y);
  69 + G = (w + y)*(param.w - param.z);
  70 + H = (w - y)*(param.w + param.z);
90 71  
91   - T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
  72 + quaternion<T> result;
  73 + result.w = B + (-E - F + G + H) /2;
  74 + result.x = A - (E + F + G + H)/2;
  75 + result.y = C + (E - F + G - H)/2;
  76 + result.z = D + (E - F - G + H)/2;
92 77  
  78 + return result;
  79 + }
  80 +
  81 + CUDA_CALLABLE matrix<T, 3> toMatrix3(){
93 82  
94   - // calculate coefficients
95   - x2 = x + x; y2 = y + y;
96   - z2 = z + z;
97   - xx = x * x2; xy = x * y2; xz = x * z2;
98   - yy = y * y2; yz = y * z2; zz = z * z2;
99   - wx = w * x2; wy = w * y2; wz = w * z2;
  83 + matrix<T, 3> result;
100 84  
101   - result(0, 0) = 1 - (yy + zz);
102   - result(0, 1) = xy - wz;
103 85  
104   - result(0, 2) = xz + wy;
  86 + T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
105 87  
106   - result(1, 0) = xy + wz;
107   - result(1, 1) = 1 - (xx + zz);
108 88  
109   - result(1, 2) = yz - wx;
  89 + // calculate coefficients
  90 + x2 = x + x; y2 = y + y;
  91 + z2 = z + z;
  92 + xx = x * x2; xy = x * y2; xz = x * z2;
  93 + yy = y * y2; yz = y * z2; zz = z * z2;
  94 + wx = w * x2; wy = w * y2; wz = w * z2;
110 95  
111   - result(2, 0) = xz - wy;
112   - result(2, 1) = yz + wx;
  96 + result(0, 0) = 1 - (yy + zz);
  97 + result(0, 1) = xy - wz;
113 98  
114   - result(2, 2) = 1 - (xx + yy);
  99 + result(0, 2) = xz + wy;
115 100  
116   - return result;
117   -}
  101 + result(1, 0) = xy + wz;
  102 + result(1, 1) = 1 - (xx + zz);
118 103  
119   -template<typename T>
120   -matrix<T, 4> quaternion<T>::toMatrix4()
121   -{
122   - matrix<T, 4> result;
  104 + result(1, 2) = yz - wx;
123 105  
  106 + result(2, 0) = xz - wy;
  107 + result(2, 1) = yz + wx;
124 108  
125   - T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
  109 + result(2, 2) = 1 - (xx + yy);
126 110  
  111 + return result;
  112 + }
127 113  
128   - // calculate coefficients
129   - x2 = x + x; y2 = y + y;
130   - z2 = z + z;
131   - xx = x * x2; xy = x * y2; xz = x * z2;
132   - yy = y * y2; yz = y * z2; zz = z * z2;
133   - wx = w * x2; wy = w * y2; wz = w * z2;
  114 + CUDA_CALLABLE matrix<T, 4> toMatrix4(){
134 115  
135   - result(0, 0) = 1 - (yy + zz);
136   - result(0, 1) = xy - wz;
  116 + matrix<T, 4> result;
  117 + T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
137 118  
138   - result(0, 2) = xz + wy;
  119 + // calculate coefficients
  120 + x2 = x + x; y2 = y + y;
  121 + z2 = z + z;
  122 + xx = x * x2; xy = x * y2; xz = x * z2;
  123 + yy = y * y2; yz = y * z2; zz = z * z2;
  124 + wx = w * x2; wy = w * y2; wz = w * z2;
139 125  
140   - result(1, 0) = xy + wz;
141   - result(1, 1) = 1 - (xx + zz);
  126 + result(0, 0) = 1 - (yy + zz);
  127 + result(0, 1) = xy - wz;
142 128  
143   - result(1, 2) = yz - wx;
  129 + result(0, 2) = xz + wy;
144 130  
145   - result(2, 0) = xz - wy;
146   - result(2, 1) = yz + wx;
  131 + result(1, 0) = xy + wz;
  132 + result(1, 1) = 1 - (xx + zz);
147 133  
148   - result(2, 2) = 1 - (xx + yy);
  134 + result(1, 2) = yz - wx;
149 135  
150   - result(3, 3) = 1;
  136 + result(2, 0) = xz - wy;
  137 + result(2, 1) = yz + wx;
151 138  
152   - return result;
153   -}
  139 + result(2, 2) = 1 - (xx + yy);
154 140  
155   -template<typename T>
156   -quaternion<T>::quaternion()
157   -{
158   - w=0; x=0; y=0; z=0;
159   -}
  141 + result(3, 3) = 1;
160 142  
161   -template<typename T>
162   -quaternion<T>::quaternion(T c, T i, T j, T k)
163   -{
164   - w=c; x=i; y=j; z=k;
165   -}
  143 + return result;
  144 + }
  145 +
  146 +
  147 + CUDA_CALLABLE quaternion(){
  148 + w=0; x=0; y=0; z=0;
  149 + }
  150 +
  151 + CUDA_CALLABLE quaternion(T c, T i, T j, T k){
  152 + w=c; x=i; y=j; z=k;
  153 + }
  154 +
  155 +};
166 156  
167 157 } //end rts namespace
168 158  
169   -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
170   -//template<class T> using rtsQuaternion = rts::quaternion<T>;
171   -//#endif
172 159  
173 160 #endif
... ...
math/realfield.cuh
1 1 #ifndef RTS_REALFIELD_H
2 2 #define RTS_REALFIELD_H
3 3  
4   -#include "../visualization/colormap.h"
5   -#include "../envi/envi.h"
6   -#include "../math/quad.h"
7   -#include "../cuda/devices.h"
8   -#include "cublas_v2.h"
  4 +#include "../visualization/colormap.h"
  5 +#include "../envi/envi.h"
  6 +#include "../math/rect.h"
  7 +#include "../cuda/devices.h"
  8 +#include "cublas_v2.h"
9 9 #include <cuda_runtime.h>
10 10  
11   -///Compute a Gaussian function in 3D (mostly for testing)
12   -/*template<typename T>
13   -__global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, T std, rts::quad<T> shape)
14   -{
15   - int iu = blockIdx.x * blockDim.x + threadIdx.x;
16   - int iv = blockIdx.y * blockDim.y + threadIdx.y;
17   -
18   - //make sure that the thread indices are in-bounds
19   - if(iu >= r0 || iv >= r1) return;
20   -
21   - //compute the index into the field
22   - int i = iv*r0 + iu;
23   -
24   - T u = (T)iu / (T)r0;
25   - T v = (T)iv / (T)r1;
26   -
27   - rts::vec<T> p = shape(u, v);
28   -
29   - T fx = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[0] - mean, 2) / (2 * std*std) );
30   - T fy = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[1] - mean, 2) / (2 * std*std) );
31   - T fz = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[2] - mean, 2) / (2 * std*std) );
32   -
33   - dest[i] = fx * fy * fz;
  11 +///Compute a Gaussian function in 3D (mostly for testing)
  12 +/*template<typename T>
  13 +__global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, T std, rts::rect<T> shape)
  14 +{
  15 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  16 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  17 +
  18 + //make sure that the thread indices are in-bounds
  19 + if(iu >= r0 || iv >= r1) return;
  20 +
  21 + //compute the index into the field
  22 + int i = iv*r0 + iu;
  23 +
  24 + T u = (T)iu / (T)r0;
  25 + T v = (T)iv / (T)r1;
  26 +
  27 + rts::vec<T> p = shape(u, v);
  28 +
  29 + T fx = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[0] - mean, 2) / (2 * std*std) );
  30 + T fy = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[1] - mean, 2) / (2 * std*std) );
  31 + T fz = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[2] - mean, 2) / (2 * std*std) );
  32 +
  33 + dest[i] = fx * fy * fz;
34 34 }*/
35 35  
36 36 namespace rts{
... ... @@ -38,9 +38,9 @@ namespace rts{
38 38 template<typename P, unsigned int N = 1, bool positive = false>
39 39 class realfield{
40 40  
41   - P* X[N]; //an array of N gpu pointers for each field component
42   - int R[2]; //resolution of the slice
43   - quad<P> shape;
  41 + P* X[N]; //an array of N gpu pointers for each field component
  42 + int R[2]; //resolution of the slice
  43 + rect<P> shape;
44 44  
45 45 void process_filename(std::string name, std::string &prefix, std::string &postfix,
46 46 std::string &ext, unsigned int &digits)
... ... @@ -68,13 +68,13 @@ class realfield{
68 68  
69 69 void init()
70 70 {
71   - for(unsigned int n=0; n<N; n++)
  71 + for(unsigned int n=0; n<N; n++)
72 72 X[n] = NULL;
73 73 }
74 74 void destroy()
75 75 {
76   - for(unsigned int n=0; n<N; n++)
77   - if(X[n] != NULL)
  76 + for(unsigned int n=0; n<N; n++)
  77 + if(X[n] != NULL)
78 78 HANDLE_ERROR(cudaFree(X[n]));
79 79 }
80 80  
... ... @@ -86,25 +86,25 @@ public:
86 86 init();
87 87 std::cout<<"realfield CONSTRUCTOR"<<std::endl;
88 88 }
89   - realfield(unsigned int x, unsigned int y)
90   - {
91   - //set the resolution
92   - R[0] = x;
93   - R[1] = y;
94   - //allocate memory on the GPU
95   - for(unsigned int n=0; n<N; n++)
96   - {
  89 + realfield(unsigned int x, unsigned int y)
  90 + {
  91 + //set the resolution
  92 + R[0] = x;
  93 + R[1] = y;
  94 + //allocate memory on the GPU
  95 + for(unsigned int n=0; n<N; n++)
  96 + {
97 97 HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(P) * R[0] * R[1] ));
98   - }
99   - shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); //default geometry
100   - clear(); //zero the field
101   - std::cout<<"realfield CONSTRUCTOR"<<std::endl;
  98 + }
  99 + //shape = rect<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); //default geometry
  100 + clear(); //zero the field
  101 + std::cout<<"realfield CONSTRUCTOR"<<std::endl;
102 102 }
103 103  
104   - ~realfield()
105   - {
106   - destroy();
107   - std::cout<<"realfield DESTRUCTOR"<<std::endl;
  104 + ~realfield()
  105 + {
  106 + destroy();
  107 + std::cout<<"realfield DESTRUCTOR"<<std::endl;
108 108 }
109 109  
110 110 P* ptr(unsigned int n)
... ... @@ -115,11 +115,11 @@ public:
115 115 }
116 116  
117 117 //set all components of the field to zero
118   - void clear()
119   - {
120   - for(unsigned int n=0; n<N; n++)
121   - if(X[n] != NULL)
122   - HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(P) * R[0] * R[1]));
  118 + void clear()
  119 + {
  120 + for(unsigned int n=0; n<N; n++)
  121 + if(X[n] != NULL)
  122 + HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(P) * R[0] * R[1]));
123 123 }
124 124  
125 125 void toImage(std::string filename, unsigned int n, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer)
... ... @@ -127,7 +127,7 @@ public:
127 127 rts::gpu2image<P>(X[n], filename, R[0], R[1], vmin, vmax, cmap);
128 128 }
129 129  
130   - void toImages(std::string filename, rts::colormapType cmap = rts::cmBrewer)
  130 + void toImages(std::string filename, bool global_max = true, rts::colormapType cmap = rts::cmBrewer)
131 131 {
132 132 std::string prefix, postfix, extension;
133 133 unsigned int digits;
... ... @@ -175,74 +175,77 @@ public:
175 175  
176 176 cublasDestroy(handle); //destroy the CUBLAS handle
177 177  
  178 + P outputMax = abs(maxAll); //maximum value used for each output image
178 179 for(int n=0; n<N; n++) //for each image
179 180 {
  181 + if(!global_max) outputMax = maxVal[n]; //calculate the maximum value for this image
  182 +
180 183 stringstream ss; //assemble the file name
181 184 ss<<prefix<<std::setfill('0')<<std::setw(digits)<<n<<postfix<<extension;
182 185 std::cout<<ss.str()<<std::endl;
183 186 if(positive) //if the image is positive
184   - toImage(ss.str(), n, 0, maxAll, cmap); //save the image using the global maximum
  187 + toImage(ss.str(), n, 0, abs(outputMax), cmap); //save the image using the global maximum
185 188 else
186   - toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap); //save the image using the global maximum
  189 + toImage(ss.str(), n, -abs(outputMax), abs(outputMax), cmap); //save the image using the global maximum
187 190 }
188 191 }
189 192  
190   - //assignment operator
191   - realfield & operator= (const realfield & rhs)
192   - {
193   - //de-allocate any existing GPU memory
194   - destroy();
195   -
196   - //copy the slice resolution
197   - R[0] = rhs.R[0];
198   - R[1] = rhs.R[1];
199   -
200   - for(unsigned int n=0; n<N; n++)
201   - {
202   - //allocate the necessary memory
203   - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1]));
204   - //copy the slice
205   - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
206   - }
207   - std::cout<<"Assignment operator."<<std::endl;
208   -
209   - return *this;
210   - }
211   -
212   - ///copy constructor
213   - realfield(const realfield &rhs)
214   - {
215   - //first make a shallow copy
216   - R[0] = rhs.R[0];
217   - R[1] = rhs.R[1];
218   -
219   - for(unsigned int n=0; n<N; n++)
220   - {
221   - //do we have to make a deep copy?
222   - if(rhs.X[n] == NULL)
223   - X[n] = NULL; //no
224   - else
225   - {
226   - //allocate the necessary memory
227   - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1]));
228   -
229   - //copy the slice
230   - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
231   - }
232   - }
233   -
234   - std::cout<<"realfield COPY CONSTRUCTOR"<<std::endl;
  193 + //assignment operator
  194 + realfield & operator= (const realfield & rhs)
  195 + {
  196 + //de-allocate any existing GPU memory
  197 + destroy();
  198 +
  199 + //copy the slice resolution
  200 + R[0] = rhs.R[0];
  201 + R[1] = rhs.R[1];
  202 +
  203 + for(unsigned int n=0; n<N; n++)
  204 + {
  205 + //allocate the necessary memory
  206 + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1]));
  207 + //copy the slice
  208 + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  209 + }
  210 + std::cout<<"Assignment operator."<<std::endl;
  211 +
  212 + return *this;
  213 + }
  214 +
  215 + ///copy constructor
  216 + realfield(const realfield &rhs)
  217 + {
  218 + //first make a shallow copy
  219 + R[0] = rhs.R[0];
  220 + R[1] = rhs.R[1];
  221 +
  222 + for(unsigned int n=0; n<N; n++)
  223 + {
  224 + //do we have to make a deep copy?
  225 + if(rhs.X[n] == NULL)
  226 + X[n] = NULL; //no
  227 + else
  228 + {
  229 + //allocate the necessary memory
  230 + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1]));
  231 +
  232 + //copy the slice
  233 + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  234 + }
  235 + }
  236 +
  237 + std::cout<<"realfield COPY CONSTRUCTOR"<<std::endl;
235 238 }
236 239  
237   - /*void gaussian(P mean, P std, unsigned int n=0) //creates a 3D gaussian using component n
238   - {
239   - int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
240   - int SQRT_BLOCK = (int)sqrt((float)maxThreads);
241   - //create one thread for each detector pixel
242   - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
243   - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
244   -
245   - gpu_gaussian<float> <<<dimGrid, dimBlock>>> (X[n], R[0], R[1], mean, std, shape);
  240 + /*void gaussian(P mean, P std, unsigned int n=0) //creates a 3D gaussian using component n
  241 + {
  242 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  243 + int SQRT_BLOCK = (int)sqrt((float)maxThreads);
  244 + //create one thread for each detector pixel
  245 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  246 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  247 +
  248 + gpu_gaussian<float> <<<dimGrid, dimBlock>>> (X[n], R[0], R[1], mean, std, shape);
246 249 }*/
247 250  
248 251  
... ...
math/rect.h 0 โ†’ 100644
  1 +#ifndef RTS_RECT_H
  2 +#define RTS_RECT_H
  3 +
  4 +//enable CUDA_CALLABLE macro
  5 +#include "../cuda/callable.h"
  6 +#include "../math/vector.h"
  7 +#include "../math/triangle.h"
  8 +#include "../math/quaternion.h"
  9 +#include <iostream>
  10 +#include <iomanip>
  11 +#include <algorithm>
  12 +
  13 +namespace rts{
  14 +
  15 +//template for a rectangle class in ND space
  16 +template <class T, int N = 3>
  17 +struct rect
  18 +{
  19 + /*
  20 + ^ O
  21 + |
  22 + |
  23 + Y C
  24 + |
  25 + |
  26 + O---------X--------->
  27 + */
  28 +
  29 +private:
  30 +
  31 + rts::vec<T, N> C;
  32 + rts::vec<T, N> X;
  33 + rts::vec<T, N> Y;
  34 +
  35 + CUDA_CALLABLE void scale(T factor){
  36 + X *= factor;
  37 + Y *= factor;
  38 + }
  39 +
  40 + CUDA_CALLABLE void normal(vec<T, N> n){ //orient the rectangle along the specified normal
  41 +
  42 + n = n.norm(); //normalize, just in case
  43 + vec<T, N> n_current = X.cross(Y).norm(); //compute the current normal
  44 + quaternion<T> q; //create a quaternion
  45 + q.CreateRotation(n_current, n); //initialize a rotation from n_current to n
  46 +
  47 + //apply the quaternion to the vectors and position
  48 + X = q.toMatrix3() * X;
  49 + Y = q.toMatrix3() * Y;
  50 + }
  51 +
  52 + CUDA_CALLABLE void init(){
  53 + C = vec<T, N>(0, 0, 0);
  54 + X = vec<T, N>(1, 0, 0);
  55 + Y = vec<T, N>(0, 1, 0);
  56 + }
  57 +
  58 +public:
  59 +
  60 + CUDA_CALLABLE rect(){
  61 + init();
  62 + }
  63 +
  64 + CUDA_CALLABLE rect(T size, T z_pos = (T)0){
  65 + init(); //use the default setup
  66 + scale(size); //scale the rectangle
  67 + C[2] = z_pos;
  68 + }
  69 +
  70 + CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){
  71 + init(); //start with the default setting
  72 + C = c;
  73 + scale(size); //scale the rectangle
  74 + normal(n); //orient
  75 +
  76 + }
  77 +
  78 + /*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c)
  79 + {
  80 + A = a;
  81 + Y = b - a;
  82 + X = c - a - Y;
  83 +
  84 + }*/
  85 +
  86 + /*******************************************************************
  87 + Constructor - create a rect from a position, normal, and rotation
  88 + *******************************************************************/
  89 + /*CUDA_CALLABLE rect(rts::vec<T, N> c, rts::vec<T, N> normal, T width, T height, T theta)
  90 + {
  91 +
  92 + //compute the X direction - start along world-space X
  93 + Y = rts::vec<T, N>(0, 1, 0);
  94 + if(Y == normal)
  95 + Y = rts::vec<T, N>(0, 0, 1);
  96 +
  97 + X = Y.cross(normal).norm();
  98 +
  99 + std::cout<<X<<std::endl;
  100 +
  101 + //rotate the X axis by theta radians
  102 + rts::quaternion<T> q;
  103 + q.CreateRotation(theta, normal);
  104 + X = q.toMatrix3() * X;
  105 + Y = normal.cross(X);
  106 +
  107 + //normalize everything
  108 + X = X.norm();
  109 + Y = Y.norm();
  110 +
  111 + //scale to match the rect width and height
  112 + X = X * width;
  113 + Y = Y * height;
  114 +
  115 + //set the corner of the plane
  116 + A = c - X * 0.5f - Y * 0.5f;
  117 +
  118 + std::cout<<X<<std::endl;
  119 + }*/
  120 +
  121 + //boolean comparison
  122 + bool operator==(const rect<T, N> & rhs)
  123 + {
  124 + if(C == rhs.C && X == rhs.X && Y == rhs.Y)
  125 + return true;
  126 + else
  127 + return false;
  128 + }
  129 +
  130 + /*******************************************
  131 + Return the normal for the rect
  132 + *******************************************/
  133 + CUDA_CALLABLE rts::vec<T, N> n()
  134 + {
  135 + return (X.cross(Y)).norm();
  136 + }
  137 +
  138 + CUDA_CALLABLE rts::vec<T, N> p(T a, T b)
  139 + {
  140 + rts::vec<T, N> result;
  141 + //given the two parameters a, b = [0 1], returns the position in world space
  142 + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
  143 + result = A + X * a + Y * b;
  144 +
  145 + return result;
  146 + }
  147 +
  148 + CUDA_CALLABLE rts::vec<T, N> operator()(T a, T b)
  149 + {
  150 + return p(a, b);
  151 + }
  152 +
  153 + std::string str()
  154 + {
  155 + std::stringstream ss;
  156 + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
  157 + ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
  158 + ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl;
  159 + ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X;
  160 +
  161 + return ss.str();
  162 +
  163 + }
  164 +
  165 + CUDA_CALLABLE rect<T, N> operator*(T rhs)
  166 + {
  167 + //scales the plane by a scalar value
  168 +
  169 + //create the new rectangle
  170 + rect<T, N> result = *this;
  171 + result.scale(rhs);
  172 +
  173 + return result;
  174 +
  175 + }
  176 +
  177 + CUDA_CALLABLE T dist(vec<T, N> p)
  178 + {
  179 + //compute the distance between a point and this rect
  180 +
  181 + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
  182 +
  183 + //first break the rect up into two triangles
  184 + triangle<T, N> T0(A, A+X, A+Y);
  185 + triangle<T, N> T1(A+X+Y, A+X, A+Y);
  186 +
  187 +
  188 + T d0 = T0.dist(p);
  189 + T d1 = T1.dist(p);
  190 +
  191 + if(d0 < d1)
  192 + return d0;
  193 + else
  194 + return d1;
  195 + }
  196 +
  197 + CUDA_CALLABLE T dist_max(vec<T, N> p)
  198 + {
  199 + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
  200 + T da = (A - p).len();
  201 + T db = (A+X - p).len();
  202 + T dc = (A+Y - p).len();
  203 + T dd = (A+X+Y - p).len();
  204 +
  205 + return std::max( da, std::max(db, std::max(dc, dd) ) );
  206 + }
  207 +};
  208 +
  209 +} //end namespace rts
  210 +
  211 +template <typename T, int N>
  212 +std::ostream& operator<<(std::ostream& os, rts::rect<T, N> R)
  213 +{
  214 + os<<R.str();
  215 + return os;
  216 +}
  217 +
  218 +
  219 +#endif
... ...
math/vector.h
... ... @@ -169,6 +169,11 @@ struct vec
169 169  
170 170 return result;
171 171 }
  172 + CUDA_CALLABLE vec<T, N> operator*=(T rhs){
  173 + for(int i=0; i<N; i++)
  174 + v[i] = v[i] * rhs;
  175 + return *this;
  176 + }
172 177  
173 178 //conversion from a point
174 179 /*CUDA_CALLABLE vector<T, N> & operator=(point<T, N> rhs)
... ...
optics/efield.cuh
... ... @@ -14,7 +14,7 @@ namespace rts{
14 14  
15 15 template<typename T>
16 16 __global__ void gpu_planewave2efield(complex<T>* X, complex<T>* Y, complex<T>* Z, unsigned int r0, unsigned int r1,
17   - planewave<T> w, quad<T> q)
  17 + planewave<T> w, rect<T> q)
18 18 {
19 19 int iu = blockIdx.x * blockDim.x + threadIdx.x;
20 20 int iv = blockIdx.y * blockDim.y + threadIdx.y;
... ... @@ -119,7 +119,7 @@ protected:
119 119 unsigned int R[2];
120 120  
121 121 //shape of the 2D field
122   - quad<P> pos;
  122 + rect<P> pos;
123 123  
124 124 void from_planewave(planewave<P> p)
125 125 {
... ... @@ -196,7 +196,7 @@ public:
196 196 efield(unsigned int res0, unsigned int res1, bool _vector = true)
197 197 {
198 198 init(res0, res1, _vector);
199   - pos = rts::quad<P>(rts::vec<P>(-10, 0, -10), rts::vec<P>(-10, 0, 10), rts::vec<P>(10, 0, 10));
  199 + //pos = rts::rect<P>(rts::vec<P>(-10, 0, -10), rts::vec<P>(-10, 0, 10), rts::vec<P>(10, 0, 10));
200 200 }
201 201  
202 202 //destructor
... ... @@ -217,7 +217,7 @@ public:
217 217 }
218 218 }
219 219  
220   - void position(quad<P> _p)
  220 + void position(rect<P> _p)
221 221 {
222 222 pos = _p;
223 223 }
... ...
optics/planewave.h
... ... @@ -60,7 +60,35 @@ public:
60 60  
61 61 planewave<P> refract(rts::vec<P> kn) const
62 62 {
63   - vec<P> kn_hat = kn.norm(); //normalize new_k
  63 + vec<P> kn_hat = kn.norm(); //normalize the new k
  64 + vec<P> k_hat = k.norm(); //normalize the current k
  65 +
  66 + vec<P> r = k_hat.cross(kn_hat); //compute the rotation vector
  67 +
  68 + P theta = asin(r.len()); //compute the angle of the rotation about r
  69 +
  70 + planewave<P> new_p; //create a new plane wave
  71 +
  72 + //deal with a zero vector (both k and kn point in the same direction)
  73 + if(theta == (P)0)
  74 + {
  75 + new_p = *this;
  76 + return new_p;
  77 + }
  78 +
  79 + //create a quaternion to capture the rotation
  80 + quaternion<P> q;
  81 + q.CreateRotation(theta, r.norm());
  82 +
  83 + //apply the rotation to E0
  84 + vec<P> E0n = q.toMatrix3() * E0;
  85 +
  86 + new_p.k = kn_hat * k.len();
  87 + new_p.E0 = E0n;
  88 +
  89 + return new_p;
  90 +
  91 + /*vec<P> kn_hat = kn.norm(); //normalize new_k
64 92 vec<P> k_hat = k.norm();
65 93  
66 94 //compute the side vector (around which we will be rotating)
... ... @@ -86,7 +114,8 @@ public:
86 114 new_p.E0 = E0_prime;
87 115 new_p.k = kn_hat * k.len();
88 116  
89   - return new_p;
  117 + return new_p;*/
  118 +
90 119 /*vec<P> kn_hat = kn.norm(); //normalize kn
91 120 vec<P> k_hat = k.norm();
92 121 vec<P> E0_hat = E0.norm();
... ...