Commit 6c4afcacad36be2242959cb5bd0d1e9cb2480375
1 parent
7115e973
introduced a generalized matrix class, previous is now matrix_sq
Showing
4 changed files
with
258 additions
and
86 deletions
Show diff stats
stim/math/matrix.h
@@ -10,92 +10,140 @@ | @@ -10,92 +10,140 @@ | ||
10 | 10 | ||
11 | namespace stim{ | 11 | namespace stim{ |
12 | 12 | ||
13 | -template <class T, int N> | ||
14 | -struct matrix | ||
15 | -{ | 13 | +template <class T> |
14 | +class matrix { | ||
16 | //the matrix will be stored in column-major order (compatible with OpenGL) | 15 | //the matrix will be stored in column-major order (compatible with OpenGL) |
17 | - T M[N*N]; | 16 | + T* M; //pointer to the matrix data |
17 | + size_t R; //number of rows | ||
18 | + size_t C; //number of colums | ||
19 | + | ||
20 | + void init(size_t rows, size_t cols){ | ||
21 | + R = rows; | ||
22 | + C = cols; | ||
23 | + M = (T*) malloc (R * C * sizeof(T)); //allocate space for the matrix | ||
24 | + } | ||
18 | 25 | ||
19 | - CUDA_CALLABLE matrix() | ||
20 | - { | ||
21 | - for(int r=0; r<N; r++) | ||
22 | - for(int c=0; c<N; c++) | ||
23 | - if(r == c) | ||
24 | - (*this)(r, c) = 1; | ||
25 | - else | ||
26 | - (*this)(r, c) = 0; | 26 | + T& at(size_t row, size_t col){ |
27 | + return M[col * R + row]; | ||
27 | } | 28 | } |
28 | 29 | ||
29 | - CUDA_CALLABLE matrix(T rhs[N*N]) | ||
30 | - { | ||
31 | - memcpy(M,rhs, sizeof(T)*N*N); | 30 | +public: |
31 | + matrix(size_t rows, size_t cols) { | ||
32 | + init(rows, cols); //initialize memory | ||
32 | } | 33 | } |
33 | 34 | ||
34 | - CUDA_CALLABLE matrix<T,N> set(T rhs[N*N]) | ||
35 | - { | ||
36 | - memcpy(M, rhs, sizeof(T)*N*N); | ||
37 | - return *this; | 35 | + matrix(size_t rows, size_t cols, T* data) { |
36 | + init(rows, cols); | ||
37 | + memcpy(M, rhs, R * C * sizeof(T)); | ||
38 | } | 38 | } |
39 | 39 | ||
40 | - //create a symmetric matrix given the rhs values, given in column-major order | ||
41 | - CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){ | ||
42 | - const size_t L = (N*N+N)/2; //store the number of values | 40 | + matrix(const matrix<T>& cpy){ |
41 | + init(cpy.R, cpy.C); | ||
42 | + memcpy(M, cpy.M, R * C * sizeof(T)); | ||
43 | + } | ||
43 | 44 | ||
44 | - size_t r, c; | ||
45 | - r = c = 0; | ||
46 | - for(size_t i = 0; i < L; i++){ //for each value | ||
47 | - if(r == c) M[c * N + r] = rhs[i]; | ||
48 | - else M[c*N + r] = M[r * N + c] = rhs[i]; | ||
49 | - r++; | ||
50 | - if(r == N) r = ++c; | ||
51 | - } | 45 | + ~matrix() { |
46 | + R = C = 0; | ||
47 | + if(M) free(M); | ||
52 | } | 48 | } |
53 | 49 | ||
54 | - CUDA_CALLABLE T& operator()(int row, int col) | ||
55 | - { | ||
56 | - return M[col * N + row]; | 50 | + size_t rows(){ |
51 | + return R; | ||
57 | } | 52 | } |
58 | 53 | ||
59 | - CUDA_CALLABLE matrix<T, N> operator=(T rhs) | ||
60 | - { | ||
61 | - int Nsq = N*N; | ||
62 | - for(int i=0; i<Nsq; i++) | ||
63 | - M[i] = rhs; | 54 | + size_t cols(){ |
55 | + return C; | ||
56 | + } | ||
64 | 57 | ||
58 | + T& operator()(int row, int col) { | ||
59 | + return at(row, col); | ||
60 | + } | ||
61 | + | ||
62 | + matrix<T> operator=(T rhs) { | ||
63 | + size_t N = R * C; | ||
64 | + for(size_t n=0; n<N; n++) | ||
65 | + M[n] = rhs; | ||
66 | + | ||
67 | + return *this; | ||
68 | + } | ||
69 | + | ||
70 | + matrix<T>& operator=(matrix<T> rhs){ | ||
71 | + init(rhs.R, rhs.C); | ||
72 | + memcpy(M, rhs.M, R * C * sizeof(T)); | ||
65 | return *this; | 73 | return *this; |
66 | } | 74 | } |
67 | 75 | ||
68 | - // M - rhs*I | ||
69 | - CUDA_CALLABLE matrix<T, N> operator-(T rhs) | ||
70 | - { | 76 | + //element-wise operations |
77 | + matrix<T> operator+(T rhs) { | ||
78 | + matrix<T> result(R, C); //create a result matrix | ||
79 | + size_t N = R * C; | ||
80 | + | ||
71 | for(int i=0; i<N; i++) | 81 | for(int i=0; i<N; i++) |
72 | - for(int j=0 ; j<N; j++) | ||
73 | - if(i == j) | ||
74 | - M[i*N+j] -= rhs; | ||
75 | - return *this; | 82 | + result.M[i] = M[i] + rhs; //calculate the operation and assign to result |
83 | + | ||
84 | + return result; | ||
76 | } | 85 | } |
77 | 86 | ||
78 | - template<typename Y> | ||
79 | - vec<Y> operator*(vec<Y> rhs){ | ||
80 | - unsigned int M = rhs.size(); | 87 | + matrix<T> operator-(T rhs) { |
88 | + return operator+(-rhs); //add the negative of rhs | ||
89 | + } | ||
90 | + | ||
91 | + matrix<T> operator*(T rhs) { | ||
92 | + matrix<T> result(R, C); //create a result matrix | ||
93 | + size_t N = R * C; | ||
94 | + | ||
95 | + for(int i=0; i<N; i++) | ||
96 | + result.M[i] = M[i] * rhs; //calculate the operation and assign to result | ||
97 | + | ||
98 | + return result; | ||
99 | + } | ||
100 | + | ||
101 | + matrix<T> operator/(T rhs) { | ||
102 | + matrix<T> result(R, C); //create a result matrix | ||
103 | + size_t N = R * C; | ||
104 | + | ||
105 | + for(int i=0; i<N; i++) | ||
106 | + result.M[i] = M[i] / rhs; //calculate the operation and assign to result | ||
81 | 107 | ||
82 | - vec<Y> result; | ||
83 | - result.resize(M); | 108 | + return result; |
109 | + } | ||
84 | 110 | ||
85 | - for(int r=0; r<M; r++) | ||
86 | - for(int c=0; c<M; c++) | ||
87 | - result[r] += (*this)(r, c) * rhs[c]; | 111 | + //matrix multiplication |
112 | + matrix<T> operator*(matrix<T> rhs){ | ||
113 | + if(C != rhs.R){ | ||
114 | + std::cout<<"ERROR: matrix multiplication is undefined for matrices of size "; | ||
115 | + std::cout<<"[ "<<R<<" x "<<C<<" ] and [ "<<rhs.R<<" x "<<rhs.C<<"]"<<std::endl; | ||
116 | + exit(1); | ||
117 | + } | ||
88 | 118 | ||
119 | + matrix<T> result(R, rhs.C); //create the output matrix | ||
120 | + T inner; //stores the running inner product | ||
121 | + for(size_t c = 0; c < rhs.C; c++){ | ||
122 | + for(size_t r = 0; r < R; r++){ | ||
123 | + inner = (T)0; | ||
124 | + for(size_t i = 0; i < C; i++){ | ||
125 | + inner += at(r, i) * rhs.at(i, c); | ||
126 | + } | ||
127 | + result.M[c * R + r] = inner; | ||
128 | + } | ||
129 | + } | ||
89 | return result; | 130 | return result; |
90 | } | 131 | } |
91 | 132 | ||
92 | - template<typename Y> | ||
93 | - CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){ | ||
94 | - vec3<Y> result = 0; | ||
95 | - for(int r=0; r<3; r++) | ||
96 | - for(int c=0; c<3; c++) | ||
97 | - result[r] += (*this)(r, c) * rhs[c]; | 133 | + //returns a pointer to the raw matrix data (in column major format) |
134 | + T* data(){ | ||
135 | + return M; | ||
136 | + } | ||
98 | 137 | ||
138 | + //return a transposed matrix | ||
139 | + matrix<T> transpose(){ | ||
140 | + matrix<T> result(C, R); | ||
141 | + size_t c, r; | ||
142 | + for(c = 0; c < C; c++){ | ||
143 | + for(r = 0; r < R; r++){ | ||
144 | + result.M[r * C + c] = M[c * R + r]; | ||
145 | + } | ||
146 | + } | ||
99 | return result; | 147 | return result; |
100 | } | 148 | } |
101 | 149 | ||
@@ -103,12 +151,12 @@ struct matrix | @@ -103,12 +151,12 @@ struct matrix | ||
103 | { | 151 | { |
104 | std::stringstream ss; | 152 | std::stringstream ss; |
105 | 153 | ||
106 | - for(int r = 0; r < N; r++) | 154 | + for(int r = 0; r < R; r++) |
107 | { | 155 | { |
108 | ss << "| "; | 156 | ss << "| "; |
109 | - for(int c=0; c<N; c++) | 157 | + for(int c=0; c<C; c++) |
110 | { | 158 | { |
111 | - ss << (*this)(r, c) << " "; | 159 | + ss << M[c * R + r] << " "; |
112 | } | 160 | } |
113 | ss << "|" << std::endl; | 161 | ss << "|" << std::endl; |
114 | } | 162 | } |
@@ -116,26 +164,9 @@ struct matrix | @@ -116,26 +164,9 @@ struct matrix | ||
116 | return ss.str(); | 164 | return ss.str(); |
117 | } | 165 | } |
118 | 166 | ||
119 | - static matrix<T, N> identity() { | ||
120 | - matrix<T, N> I; | ||
121 | - I = 0; | ||
122 | - for (size_t i = 0; i < N; i++) | ||
123 | - I.M[i * N + i] = 1; | ||
124 | - return I; | ||
125 | - } | ||
126 | }; | 167 | }; |
127 | 168 | ||
128 | } //end namespace rts | 169 | } //end namespace rts |
129 | 170 | ||
130 | -template <typename T, int N> | ||
131 | -std::ostream& operator<<(std::ostream& os, stim::matrix<T, N> M) | ||
132 | -{ | ||
133 | - os<<M.toStr(); | ||
134 | - return os; | ||
135 | -} | ||
136 | - | ||
137 | -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | ||
138 | -//template<class T, int N> using rtsMatrix = rts::matrix<T, N>; | ||
139 | -//#endif | ||
140 | 171 | ||
141 | #endif | 172 | #endif |
1 | +#ifndef RTS_MATRIX_H | ||
2 | +#define RTS_MATRIX_H | ||
3 | + | ||
4 | +//#include "rts/vector.h" | ||
5 | +#include <string.h> | ||
6 | +#include <iostream> | ||
7 | +#include <stim/math/vector.h> | ||
8 | +#include <stim/math/vec3.h> | ||
9 | +#include <stim/cuda/cudatools/callable.h> | ||
10 | + | ||
11 | +namespace stim{ | ||
12 | + | ||
13 | +template <class T, int N> | ||
14 | +struct matrix_sq | ||
15 | +{ | ||
16 | + //the matrix will be stored in column-major order (compatible with OpenGL) | ||
17 | + T M[N*N]; | ||
18 | + | ||
19 | + CUDA_CALLABLE matrix_sq() | ||
20 | + { | ||
21 | + for(int r=0; r<N; r++) | ||
22 | + for(int c=0; c<N; c++) | ||
23 | + if(r == c) | ||
24 | + (*this)(r, c) = 1; | ||
25 | + else | ||
26 | + (*this)(r, c) = 0; | ||
27 | + } | ||
28 | + | ||
29 | + CUDA_CALLABLE matrix_sq(T rhs[N*N]) | ||
30 | + { | ||
31 | + memcpy(M,rhs, sizeof(T)*N*N); | ||
32 | + } | ||
33 | + | ||
34 | + CUDA_CALLABLE matrix_sq<T,N> set(T rhs[N*N]) | ||
35 | + { | ||
36 | + memcpy(M, rhs, sizeof(T)*N*N); | ||
37 | + return *this; | ||
38 | + } | ||
39 | + | ||
40 | + //create a symmetric matrix given the rhs values, given in column-major order | ||
41 | + CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){ | ||
42 | + const size_t L = (N*N+N)/2; //store the number of values | ||
43 | + | ||
44 | + size_t r, c; | ||
45 | + r = c = 0; | ||
46 | + for(size_t i = 0; i < L; i++){ //for each value | ||
47 | + if(r == c) M[c * N + r] = rhs[i]; | ||
48 | + else M[c*N + r] = M[r * N + c] = rhs[i]; | ||
49 | + r++; | ||
50 | + if(r == N) r = ++c; | ||
51 | + } | ||
52 | + } | ||
53 | + | ||
54 | + CUDA_CALLABLE T& operator()(int row, int col) | ||
55 | + { | ||
56 | + return M[col * N + row]; | ||
57 | + } | ||
58 | + | ||
59 | + CUDA_CALLABLE matrix_sq<T, N> operator=(T rhs) | ||
60 | + { | ||
61 | + int Nsq = N*N; | ||
62 | + for(int i=0; i<Nsq; i++) | ||
63 | + M[i] = rhs; | ||
64 | + | ||
65 | + return *this; | ||
66 | + } | ||
67 | + | ||
68 | + // M - rhs*I | ||
69 | + CUDA_CALLABLE matrix_sq<T, N> operator-(T rhs) | ||
70 | + { | ||
71 | + for(int i=0; i<N; i++) | ||
72 | + for(int j=0 ; j<N; j++) | ||
73 | + if(i == j) | ||
74 | + M[i*N+j] -= rhs; | ||
75 | + return *this; | ||
76 | + } | ||
77 | + | ||
78 | + template<typename Y> | ||
79 | + vec<Y> operator*(vec<Y> rhs){ | ||
80 | + unsigned int M = rhs.size(); | ||
81 | + | ||
82 | + vec<Y> result; | ||
83 | + result.resize(M); | ||
84 | + | ||
85 | + for(int r=0; r<M; r++) | ||
86 | + for(int c=0; c<M; c++) | ||
87 | + result[r] += (*this)(r, c) * rhs[c]; | ||
88 | + | ||
89 | + return result; | ||
90 | + } | ||
91 | + | ||
92 | + template<typename Y> | ||
93 | + CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){ | ||
94 | + vec3<Y> result = 0; | ||
95 | + for(int r=0; r<3; r++) | ||
96 | + for(int c=0; c<3; c++) | ||
97 | + result[r] += (*this)(r, c) * rhs[c]; | ||
98 | + | ||
99 | + return result; | ||
100 | + } | ||
101 | + | ||
102 | + std::string toStr() | ||
103 | + { | ||
104 | + std::stringstream ss; | ||
105 | + | ||
106 | + for(int r = 0; r < N; r++) | ||
107 | + { | ||
108 | + ss << "| "; | ||
109 | + for(int c=0; c<N; c++) | ||
110 | + { | ||
111 | + ss << (*this)(r, c) << " "; | ||
112 | + } | ||
113 | + ss << "|" << std::endl; | ||
114 | + } | ||
115 | + | ||
116 | + return ss.str(); | ||
117 | + } | ||
118 | + | ||
119 | + static matrix_sq<T, N> identity() { | ||
120 | + matrix_sq<T, N> I; | ||
121 | + I = 0; | ||
122 | + for (size_t i = 0; i < N; i++) | ||
123 | + I.M[i * N + i] = 1; | ||
124 | + return I; | ||
125 | + } | ||
126 | +}; | ||
127 | + | ||
128 | +} //end namespace rts | ||
129 | + | ||
130 | +template <typename T, int N> | ||
131 | +std::ostream& operator<<(std::ostream& os, stim::matrix_sq<T, N> M) | ||
132 | +{ | ||
133 | + os<<M.toStr(); | ||
134 | + return os; | ||
135 | +} | ||
136 | + | ||
137 | +//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | ||
138 | +//template<class T, int N> using rtsMatrix = rts::matrix<T, N>; | ||
139 | +//#endif | ||
140 | + | ||
141 | +#endif |
stim/math/quaternion.h
1 | #ifndef RTS_QUATERNION_H | 1 | #ifndef RTS_QUATERNION_H |
2 | #define RTS_QUATERNION_H | 2 | #define RTS_QUATERNION_H |
3 | 3 | ||
4 | -#include <stim/math/matrix.h> | 4 | +#include <stim/math/matrix_sq.h> |
5 | #include <stim/cuda/cudatools/callable.h> | 5 | #include <stim/cuda/cudatools/callable.h> |
6 | 6 | ||
7 | namespace stim{ | 7 | namespace stim{ |
@@ -81,9 +81,9 @@ public: | @@ -81,9 +81,9 @@ public: | ||
81 | return result; | 81 | return result; |
82 | } | 82 | } |
83 | 83 | ||
84 | - CUDA_CALLABLE matrix<T, 3> toMatrix3(){ | 84 | + CUDA_CALLABLE matrix_sq<T, 3> toMatrix3(){ |
85 | 85 | ||
86 | - matrix<T, 3> result; | 86 | + matrix_sq<T, 3> result; |
87 | 87 | ||
88 | 88 | ||
89 | T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; | 89 | T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; |
@@ -114,9 +114,9 @@ public: | @@ -114,9 +114,9 @@ public: | ||
114 | return result; | 114 | return result; |
115 | } | 115 | } |
116 | 116 | ||
117 | - CUDA_CALLABLE matrix<T, 4> toMatrix4(){ | 117 | + CUDA_CALLABLE matrix_sq<T, 4> toMatrix4(){ |
118 | 118 | ||
119 | - matrix<T, 4> result; | 119 | + matrix_sq<T, 4> result; |
120 | T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; | 120 | T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; |
121 | 121 | ||
122 | // calculate coefficients | 122 | // calculate coefficients |
stim/optics/scalarbeam.h
@@ -27,7 +27,7 @@ std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d | @@ -27,7 +27,7 @@ std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d | ||
27 | 27 | ||
28 | ///compute the rotation operator to transform (0, 0, 1) to k | 28 | ///compute the rotation operator to transform (0, 0, 1) to k |
29 | T cos_angle = d.dot(vec3<T>(0, 0, 1)); | 29 | T cos_angle = d.dot(vec3<T>(0, 0, 1)); |
30 | - stim::matrix<T, 3> rotation; | 30 | + stim::matrix_sq<T, 3> rotation; |
31 | 31 | ||
32 | //if the cosine of the angle is -1, the rotation is just a flip across the z axis | 32 | //if the cosine of the angle is -1, the rotation is just a flip across the z axis |
33 | if(cos_angle == -1){ | 33 | if(cos_angle == -1){ |
@@ -318,7 +318,7 @@ void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lamb | @@ -318,7 +318,7 @@ void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lamb | ||
318 | 318 | ||
319 | stim::quaternion<T> q; //create a quaternion | 319 | stim::quaternion<T> q; //create a quaternion |
320 | q.CreateRotation(d, stim::vec3<T>(0, 0, 1)); //create a mapping from the propagation direction to the PSF space | 320 | q.CreateRotation(d, stim::vec3<T>(0, 0, 1)); //create a mapping from the propagation direction to the PSF space |
321 | - stim::matrix<T, 3> rot = q.toMatrix3(); | 321 | + stim::matrix_sq<T, 3> rot = q.toMatrix3(); |
322 | int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device | 322 | int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device |
323 | dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks | 323 | dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks |
324 | cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space | 324 | cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space |