Commit 6c4afcacad36be2242959cb5bd0d1e9cb2480375
1 parent
7115e973
introduced a generalized matrix class, previous is now matrix_sq
Showing
4 changed files
with
258 additions
and
86 deletions
Show diff stats
stim/math/matrix.h
... | ... | @@ -10,92 +10,140 @@ |
10 | 10 | |
11 | 11 | namespace stim{ |
12 | 12 | |
13 | -template <class T, int N> | |
14 | -struct matrix | |
15 | -{ | |
13 | +template <class T> | |
14 | +class matrix { | |
16 | 15 | //the matrix will be stored in column-major order (compatible with OpenGL) |
17 | - T M[N*N]; | |
16 | + T* M; //pointer to the matrix data | |
17 | + size_t R; //number of rows | |
18 | + size_t C; //number of colums | |
19 | + | |
20 | + void init(size_t rows, size_t cols){ | |
21 | + R = rows; | |
22 | + C = cols; | |
23 | + M = (T*) malloc (R * C * sizeof(T)); //allocate space for the matrix | |
24 | + } | |
18 | 25 | |
19 | - CUDA_CALLABLE matrix() | |
20 | - { | |
21 | - for(int r=0; r<N; r++) | |
22 | - for(int c=0; c<N; c++) | |
23 | - if(r == c) | |
24 | - (*this)(r, c) = 1; | |
25 | - else | |
26 | - (*this)(r, c) = 0; | |
26 | + T& at(size_t row, size_t col){ | |
27 | + return M[col * R + row]; | |
27 | 28 | } |
28 | 29 | |
29 | - CUDA_CALLABLE matrix(T rhs[N*N]) | |
30 | - { | |
31 | - memcpy(M,rhs, sizeof(T)*N*N); | |
30 | +public: | |
31 | + matrix(size_t rows, size_t cols) { | |
32 | + init(rows, cols); //initialize memory | |
32 | 33 | } |
33 | 34 | |
34 | - CUDA_CALLABLE matrix<T,N> set(T rhs[N*N]) | |
35 | - { | |
36 | - memcpy(M, rhs, sizeof(T)*N*N); | |
37 | - return *this; | |
35 | + matrix(size_t rows, size_t cols, T* data) { | |
36 | + init(rows, cols); | |
37 | + memcpy(M, rhs, R * C * sizeof(T)); | |
38 | 38 | } |
39 | 39 | |
40 | - //create a symmetric matrix given the rhs values, given in column-major order | |
41 | - CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){ | |
42 | - const size_t L = (N*N+N)/2; //store the number of values | |
40 | + matrix(const matrix<T>& cpy){ | |
41 | + init(cpy.R, cpy.C); | |
42 | + memcpy(M, cpy.M, R * C * sizeof(T)); | |
43 | + } | |
43 | 44 | |
44 | - size_t r, c; | |
45 | - r = c = 0; | |
46 | - for(size_t i = 0; i < L; i++){ //for each value | |
47 | - if(r == c) M[c * N + r] = rhs[i]; | |
48 | - else M[c*N + r] = M[r * N + c] = rhs[i]; | |
49 | - r++; | |
50 | - if(r == N) r = ++c; | |
51 | - } | |
45 | + ~matrix() { | |
46 | + R = C = 0; | |
47 | + if(M) free(M); | |
52 | 48 | } |
53 | 49 | |
54 | - CUDA_CALLABLE T& operator()(int row, int col) | |
55 | - { | |
56 | - return M[col * N + row]; | |
50 | + size_t rows(){ | |
51 | + return R; | |
57 | 52 | } |
58 | 53 | |
59 | - CUDA_CALLABLE matrix<T, N> operator=(T rhs) | |
60 | - { | |
61 | - int Nsq = N*N; | |
62 | - for(int i=0; i<Nsq; i++) | |
63 | - M[i] = rhs; | |
54 | + size_t cols(){ | |
55 | + return C; | |
56 | + } | |
64 | 57 | |
58 | + T& operator()(int row, int col) { | |
59 | + return at(row, col); | |
60 | + } | |
61 | + | |
62 | + matrix<T> operator=(T rhs) { | |
63 | + size_t N = R * C; | |
64 | + for(size_t n=0; n<N; n++) | |
65 | + M[n] = rhs; | |
66 | + | |
67 | + return *this; | |
68 | + } | |
69 | + | |
70 | + matrix<T>& operator=(matrix<T> rhs){ | |
71 | + init(rhs.R, rhs.C); | |
72 | + memcpy(M, rhs.M, R * C * sizeof(T)); | |
65 | 73 | return *this; |
66 | 74 | } |
67 | 75 | |
68 | - // M - rhs*I | |
69 | - CUDA_CALLABLE matrix<T, N> operator-(T rhs) | |
70 | - { | |
76 | + //element-wise operations | |
77 | + matrix<T> operator+(T rhs) { | |
78 | + matrix<T> result(R, C); //create a result matrix | |
79 | + size_t N = R * C; | |
80 | + | |
71 | 81 | for(int i=0; i<N; i++) |
72 | - for(int j=0 ; j<N; j++) | |
73 | - if(i == j) | |
74 | - M[i*N+j] -= rhs; | |
75 | - return *this; | |
82 | + result.M[i] = M[i] + rhs; //calculate the operation and assign to result | |
83 | + | |
84 | + return result; | |
76 | 85 | } |
77 | 86 | |
78 | - template<typename Y> | |
79 | - vec<Y> operator*(vec<Y> rhs){ | |
80 | - unsigned int M = rhs.size(); | |
87 | + matrix<T> operator-(T rhs) { | |
88 | + return operator+(-rhs); //add the negative of rhs | |
89 | + } | |
90 | + | |
91 | + matrix<T> operator*(T rhs) { | |
92 | + matrix<T> result(R, C); //create a result matrix | |
93 | + size_t N = R * C; | |
94 | + | |
95 | + for(int i=0; i<N; i++) | |
96 | + result.M[i] = M[i] * rhs; //calculate the operation and assign to result | |
97 | + | |
98 | + return result; | |
99 | + } | |
100 | + | |
101 | + matrix<T> operator/(T rhs) { | |
102 | + matrix<T> result(R, C); //create a result matrix | |
103 | + size_t N = R * C; | |
104 | + | |
105 | + for(int i=0; i<N; i++) | |
106 | + result.M[i] = M[i] / rhs; //calculate the operation and assign to result | |
81 | 107 | |
82 | - vec<Y> result; | |
83 | - result.resize(M); | |
108 | + return result; | |
109 | + } | |
84 | 110 | |
85 | - for(int r=0; r<M; r++) | |
86 | - for(int c=0; c<M; c++) | |
87 | - result[r] += (*this)(r, c) * rhs[c]; | |
111 | + //matrix multiplication | |
112 | + matrix<T> operator*(matrix<T> rhs){ | |
113 | + if(C != rhs.R){ | |
114 | + std::cout<<"ERROR: matrix multiplication is undefined for matrices of size "; | |
115 | + std::cout<<"[ "<<R<<" x "<<C<<" ] and [ "<<rhs.R<<" x "<<rhs.C<<"]"<<std::endl; | |
116 | + exit(1); | |
117 | + } | |
88 | 118 | |
119 | + matrix<T> result(R, rhs.C); //create the output matrix | |
120 | + T inner; //stores the running inner product | |
121 | + for(size_t c = 0; c < rhs.C; c++){ | |
122 | + for(size_t r = 0; r < R; r++){ | |
123 | + inner = (T)0; | |
124 | + for(size_t i = 0; i < C; i++){ | |
125 | + inner += at(r, i) * rhs.at(i, c); | |
126 | + } | |
127 | + result.M[c * R + r] = inner; | |
128 | + } | |
129 | + } | |
89 | 130 | return result; |
90 | 131 | } |
91 | 132 | |
92 | - template<typename Y> | |
93 | - CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){ | |
94 | - vec3<Y> result = 0; | |
95 | - for(int r=0; r<3; r++) | |
96 | - for(int c=0; c<3; c++) | |
97 | - result[r] += (*this)(r, c) * rhs[c]; | |
133 | + //returns a pointer to the raw matrix data (in column major format) | |
134 | + T* data(){ | |
135 | + return M; | |
136 | + } | |
98 | 137 | |
138 | + //return a transposed matrix | |
139 | + matrix<T> transpose(){ | |
140 | + matrix<T> result(C, R); | |
141 | + size_t c, r; | |
142 | + for(c = 0; c < C; c++){ | |
143 | + for(r = 0; r < R; r++){ | |
144 | + result.M[r * C + c] = M[c * R + r]; | |
145 | + } | |
146 | + } | |
99 | 147 | return result; |
100 | 148 | } |
101 | 149 | |
... | ... | @@ -103,12 +151,12 @@ struct matrix |
103 | 151 | { |
104 | 152 | std::stringstream ss; |
105 | 153 | |
106 | - for(int r = 0; r < N; r++) | |
154 | + for(int r = 0; r < R; r++) | |
107 | 155 | { |
108 | 156 | ss << "| "; |
109 | - for(int c=0; c<N; c++) | |
157 | + for(int c=0; c<C; c++) | |
110 | 158 | { |
111 | - ss << (*this)(r, c) << " "; | |
159 | + ss << M[c * R + r] << " "; | |
112 | 160 | } |
113 | 161 | ss << "|" << std::endl; |
114 | 162 | } |
... | ... | @@ -116,26 +164,9 @@ struct matrix |
116 | 164 | return ss.str(); |
117 | 165 | } |
118 | 166 | |
119 | - static matrix<T, N> identity() { | |
120 | - matrix<T, N> I; | |
121 | - I = 0; | |
122 | - for (size_t i = 0; i < N; i++) | |
123 | - I.M[i * N + i] = 1; | |
124 | - return I; | |
125 | - } | |
126 | 167 | }; |
127 | 168 | |
128 | 169 | } //end namespace rts |
129 | 170 | |
130 | -template <typename T, int N> | |
131 | -std::ostream& operator<<(std::ostream& os, stim::matrix<T, N> M) | |
132 | -{ | |
133 | - os<<M.toStr(); | |
134 | - return os; | |
135 | -} | |
136 | - | |
137 | -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | |
138 | -//template<class T, int N> using rtsMatrix = rts::matrix<T, N>; | |
139 | -//#endif | |
140 | 171 | |
141 | 172 | #endif | ... | ... |
1 | +#ifndef RTS_MATRIX_H | |
2 | +#define RTS_MATRIX_H | |
3 | + | |
4 | +//#include "rts/vector.h" | |
5 | +#include <string.h> | |
6 | +#include <iostream> | |
7 | +#include <stim/math/vector.h> | |
8 | +#include <stim/math/vec3.h> | |
9 | +#include <stim/cuda/cudatools/callable.h> | |
10 | + | |
11 | +namespace stim{ | |
12 | + | |
13 | +template <class T, int N> | |
14 | +struct matrix_sq | |
15 | +{ | |
16 | + //the matrix will be stored in column-major order (compatible with OpenGL) | |
17 | + T M[N*N]; | |
18 | + | |
19 | + CUDA_CALLABLE matrix_sq() | |
20 | + { | |
21 | + for(int r=0; r<N; r++) | |
22 | + for(int c=0; c<N; c++) | |
23 | + if(r == c) | |
24 | + (*this)(r, c) = 1; | |
25 | + else | |
26 | + (*this)(r, c) = 0; | |
27 | + } | |
28 | + | |
29 | + CUDA_CALLABLE matrix_sq(T rhs[N*N]) | |
30 | + { | |
31 | + memcpy(M,rhs, sizeof(T)*N*N); | |
32 | + } | |
33 | + | |
34 | + CUDA_CALLABLE matrix_sq<T,N> set(T rhs[N*N]) | |
35 | + { | |
36 | + memcpy(M, rhs, sizeof(T)*N*N); | |
37 | + return *this; | |
38 | + } | |
39 | + | |
40 | + //create a symmetric matrix given the rhs values, given in column-major order | |
41 | + CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){ | |
42 | + const size_t L = (N*N+N)/2; //store the number of values | |
43 | + | |
44 | + size_t r, c; | |
45 | + r = c = 0; | |
46 | + for(size_t i = 0; i < L; i++){ //for each value | |
47 | + if(r == c) M[c * N + r] = rhs[i]; | |
48 | + else M[c*N + r] = M[r * N + c] = rhs[i]; | |
49 | + r++; | |
50 | + if(r == N) r = ++c; | |
51 | + } | |
52 | + } | |
53 | + | |
54 | + CUDA_CALLABLE T& operator()(int row, int col) | |
55 | + { | |
56 | + return M[col * N + row]; | |
57 | + } | |
58 | + | |
59 | + CUDA_CALLABLE matrix_sq<T, N> operator=(T rhs) | |
60 | + { | |
61 | + int Nsq = N*N; | |
62 | + for(int i=0; i<Nsq; i++) | |
63 | + M[i] = rhs; | |
64 | + | |
65 | + return *this; | |
66 | + } | |
67 | + | |
68 | + // M - rhs*I | |
69 | + CUDA_CALLABLE matrix_sq<T, N> operator-(T rhs) | |
70 | + { | |
71 | + for(int i=0; i<N; i++) | |
72 | + for(int j=0 ; j<N; j++) | |
73 | + if(i == j) | |
74 | + M[i*N+j] -= rhs; | |
75 | + return *this; | |
76 | + } | |
77 | + | |
78 | + template<typename Y> | |
79 | + vec<Y> operator*(vec<Y> rhs){ | |
80 | + unsigned int M = rhs.size(); | |
81 | + | |
82 | + vec<Y> result; | |
83 | + result.resize(M); | |
84 | + | |
85 | + for(int r=0; r<M; r++) | |
86 | + for(int c=0; c<M; c++) | |
87 | + result[r] += (*this)(r, c) * rhs[c]; | |
88 | + | |
89 | + return result; | |
90 | + } | |
91 | + | |
92 | + template<typename Y> | |
93 | + CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){ | |
94 | + vec3<Y> result = 0; | |
95 | + for(int r=0; r<3; r++) | |
96 | + for(int c=0; c<3; c++) | |
97 | + result[r] += (*this)(r, c) * rhs[c]; | |
98 | + | |
99 | + return result; | |
100 | + } | |
101 | + | |
102 | + std::string toStr() | |
103 | + { | |
104 | + std::stringstream ss; | |
105 | + | |
106 | + for(int r = 0; r < N; r++) | |
107 | + { | |
108 | + ss << "| "; | |
109 | + for(int c=0; c<N; c++) | |
110 | + { | |
111 | + ss << (*this)(r, c) << " "; | |
112 | + } | |
113 | + ss << "|" << std::endl; | |
114 | + } | |
115 | + | |
116 | + return ss.str(); | |
117 | + } | |
118 | + | |
119 | + static matrix_sq<T, N> identity() { | |
120 | + matrix_sq<T, N> I; | |
121 | + I = 0; | |
122 | + for (size_t i = 0; i < N; i++) | |
123 | + I.M[i * N + i] = 1; | |
124 | + return I; | |
125 | + } | |
126 | +}; | |
127 | + | |
128 | +} //end namespace rts | |
129 | + | |
130 | +template <typename T, int N> | |
131 | +std::ostream& operator<<(std::ostream& os, stim::matrix_sq<T, N> M) | |
132 | +{ | |
133 | + os<<M.toStr(); | |
134 | + return os; | |
135 | +} | |
136 | + | |
137 | +//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | |
138 | +//template<class T, int N> using rtsMatrix = rts::matrix<T, N>; | |
139 | +//#endif | |
140 | + | |
141 | +#endif | ... | ... |
stim/math/quaternion.h
1 | 1 | #ifndef RTS_QUATERNION_H |
2 | 2 | #define RTS_QUATERNION_H |
3 | 3 | |
4 | -#include <stim/math/matrix.h> | |
4 | +#include <stim/math/matrix_sq.h> | |
5 | 5 | #include <stim/cuda/cudatools/callable.h> |
6 | 6 | |
7 | 7 | namespace stim{ |
... | ... | @@ -81,9 +81,9 @@ public: |
81 | 81 | return result; |
82 | 82 | } |
83 | 83 | |
84 | - CUDA_CALLABLE matrix<T, 3> toMatrix3(){ | |
84 | + CUDA_CALLABLE matrix_sq<T, 3> toMatrix3(){ | |
85 | 85 | |
86 | - matrix<T, 3> result; | |
86 | + matrix_sq<T, 3> result; | |
87 | 87 | |
88 | 88 | |
89 | 89 | T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; |
... | ... | @@ -114,9 +114,9 @@ public: |
114 | 114 | return result; |
115 | 115 | } |
116 | 116 | |
117 | - CUDA_CALLABLE matrix<T, 4> toMatrix4(){ | |
117 | + CUDA_CALLABLE matrix_sq<T, 4> toMatrix4(){ | |
118 | 118 | |
119 | - matrix<T, 4> result; | |
119 | + matrix_sq<T, 4> result; | |
120 | 120 | T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; |
121 | 121 | |
122 | 122 | // calculate coefficients | ... | ... |
stim/optics/scalarbeam.h
... | ... | @@ -27,7 +27,7 @@ std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d |
27 | 27 | |
28 | 28 | ///compute the rotation operator to transform (0, 0, 1) to k |
29 | 29 | T cos_angle = d.dot(vec3<T>(0, 0, 1)); |
30 | - stim::matrix<T, 3> rotation; | |
30 | + stim::matrix_sq<T, 3> rotation; | |
31 | 31 | |
32 | 32 | //if the cosine of the angle is -1, the rotation is just a flip across the z axis |
33 | 33 | if(cos_angle == -1){ |
... | ... | @@ -318,7 +318,7 @@ void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lamb |
318 | 318 | |
319 | 319 | stim::quaternion<T> q; //create a quaternion |
320 | 320 | q.CreateRotation(d, stim::vec3<T>(0, 0, 1)); //create a mapping from the propagation direction to the PSF space |
321 | - stim::matrix<T, 3> rot = q.toMatrix3(); | |
321 | + stim::matrix_sq<T, 3> rot = q.toMatrix3(); | |
322 | 322 | int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device |
323 | 323 | dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks |
324 | 324 | cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space | ... | ... |