Commit 6c4afcacad36be2242959cb5bd0d1e9cb2480375

Authored by David Mayerich
1 parent 7115e973

introduced a generalized matrix class, previous is now matrix_sq

stim/math/matrix.h
... ... @@ -10,92 +10,140 @@
10 10  
11 11 namespace stim{
12 12  
13   -template <class T, int N>
14   -struct matrix
15   -{
  13 +template <class T>
  14 +class matrix {
16 15 //the matrix will be stored in column-major order (compatible with OpenGL)
17   - T M[N*N];
  16 + T* M; //pointer to the matrix data
  17 + size_t R; //number of rows
  18 + size_t C; //number of colums
  19 +
  20 + void init(size_t rows, size_t cols){
  21 + R = rows;
  22 + C = cols;
  23 + M = (T*) malloc (R * C * sizeof(T)); //allocate space for the matrix
  24 + }
18 25  
19   - CUDA_CALLABLE matrix()
20   - {
21   - for(int r=0; r<N; r++)
22   - for(int c=0; c<N; c++)
23   - if(r == c)
24   - (*this)(r, c) = 1;
25   - else
26   - (*this)(r, c) = 0;
  26 + T& at(size_t row, size_t col){
  27 + return M[col * R + row];
27 28 }
28 29  
29   - CUDA_CALLABLE matrix(T rhs[N*N])
30   - {
31   - memcpy(M,rhs, sizeof(T)*N*N);
  30 +public:
  31 + matrix(size_t rows, size_t cols) {
  32 + init(rows, cols); //initialize memory
32 33 }
33 34  
34   - CUDA_CALLABLE matrix<T,N> set(T rhs[N*N])
35   - {
36   - memcpy(M, rhs, sizeof(T)*N*N);
37   - return *this;
  35 + matrix(size_t rows, size_t cols, T* data) {
  36 + init(rows, cols);
  37 + memcpy(M, rhs, R * C * sizeof(T));
38 38 }
39 39  
40   - //create a symmetric matrix given the rhs values, given in column-major order
41   - CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){
42   - const size_t L = (N*N+N)/2; //store the number of values
  40 + matrix(const matrix<T>& cpy){
  41 + init(cpy.R, cpy.C);
  42 + memcpy(M, cpy.M, R * C * sizeof(T));
  43 + }
43 44  
44   - size_t r, c;
45   - r = c = 0;
46   - for(size_t i = 0; i < L; i++){ //for each value
47   - if(r == c) M[c * N + r] = rhs[i];
48   - else M[c*N + r] = M[r * N + c] = rhs[i];
49   - r++;
50   - if(r == N) r = ++c;
51   - }
  45 + ~matrix() {
  46 + R = C = 0;
  47 + if(M) free(M);
52 48 }
53 49  
54   - CUDA_CALLABLE T& operator()(int row, int col)
55   - {
56   - return M[col * N + row];
  50 + size_t rows(){
  51 + return R;
57 52 }
58 53  
59   - CUDA_CALLABLE matrix<T, N> operator=(T rhs)
60   - {
61   - int Nsq = N*N;
62   - for(int i=0; i<Nsq; i++)
63   - M[i] = rhs;
  54 + size_t cols(){
  55 + return C;
  56 + }
64 57  
  58 + T& operator()(int row, int col) {
  59 + return at(row, col);
  60 + }
  61 +
  62 + matrix<T> operator=(T rhs) {
  63 + size_t N = R * C;
  64 + for(size_t n=0; n<N; n++)
  65 + M[n] = rhs;
  66 +
  67 + return *this;
  68 + }
  69 +
  70 + matrix<T>& operator=(matrix<T> rhs){
  71 + init(rhs.R, rhs.C);
  72 + memcpy(M, rhs.M, R * C * sizeof(T));
65 73 return *this;
66 74 }
67 75  
68   - // M - rhs*I
69   - CUDA_CALLABLE matrix<T, N> operator-(T rhs)
70   - {
  76 + //element-wise operations
  77 + matrix<T> operator+(T rhs) {
  78 + matrix<T> result(R, C); //create a result matrix
  79 + size_t N = R * C;
  80 +
71 81 for(int i=0; i<N; i++)
72   - for(int j=0 ; j<N; j++)
73   - if(i == j)
74   - M[i*N+j] -= rhs;
75   - return *this;
  82 + result.M[i] = M[i] + rhs; //calculate the operation and assign to result
  83 +
  84 + return result;
76 85 }
77 86  
78   - template<typename Y>
79   - vec<Y> operator*(vec<Y> rhs){
80   - unsigned int M = rhs.size();
  87 + matrix<T> operator-(T rhs) {
  88 + return operator+(-rhs); //add the negative of rhs
  89 + }
  90 +
  91 + matrix<T> operator*(T rhs) {
  92 + matrix<T> result(R, C); //create a result matrix
  93 + size_t N = R * C;
  94 +
  95 + for(int i=0; i<N; i++)
  96 + result.M[i] = M[i] * rhs; //calculate the operation and assign to result
  97 +
  98 + return result;
  99 + }
  100 +
  101 + matrix<T> operator/(T rhs) {
  102 + matrix<T> result(R, C); //create a result matrix
  103 + size_t N = R * C;
  104 +
  105 + for(int i=0; i<N; i++)
  106 + result.M[i] = M[i] / rhs; //calculate the operation and assign to result
81 107  
82   - vec<Y> result;
83   - result.resize(M);
  108 + return result;
  109 + }
84 110  
85   - for(int r=0; r<M; r++)
86   - for(int c=0; c<M; c++)
87   - result[r] += (*this)(r, c) * rhs[c];
  111 + //matrix multiplication
  112 + matrix<T> operator*(matrix<T> rhs){
  113 + if(C != rhs.R){
  114 + std::cout<<"ERROR: matrix multiplication is undefined for matrices of size ";
  115 + std::cout<<"[ "<<R<<" x "<<C<<" ] and [ "<<rhs.R<<" x "<<rhs.C<<"]"<<std::endl;
  116 + exit(1);
  117 + }
88 118  
  119 + matrix<T> result(R, rhs.C); //create the output matrix
  120 + T inner; //stores the running inner product
  121 + for(size_t c = 0; c < rhs.C; c++){
  122 + for(size_t r = 0; r < R; r++){
  123 + inner = (T)0;
  124 + for(size_t i = 0; i < C; i++){
  125 + inner += at(r, i) * rhs.at(i, c);
  126 + }
  127 + result.M[c * R + r] = inner;
  128 + }
  129 + }
89 130 return result;
90 131 }
91 132  
92   - template<typename Y>
93   - CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){
94   - vec3<Y> result = 0;
95   - for(int r=0; r<3; r++)
96   - for(int c=0; c<3; c++)
97   - result[r] += (*this)(r, c) * rhs[c];
  133 + //returns a pointer to the raw matrix data (in column major format)
  134 + T* data(){
  135 + return M;
  136 + }
98 137  
  138 + //return a transposed matrix
  139 + matrix<T> transpose(){
  140 + matrix<T> result(C, R);
  141 + size_t c, r;
  142 + for(c = 0; c < C; c++){
  143 + for(r = 0; r < R; r++){
  144 + result.M[r * C + c] = M[c * R + r];
  145 + }
  146 + }
99 147 return result;
100 148 }
101 149  
... ... @@ -103,12 +151,12 @@ struct matrix
103 151 {
104 152 std::stringstream ss;
105 153  
106   - for(int r = 0; r < N; r++)
  154 + for(int r = 0; r < R; r++)
107 155 {
108 156 ss << "| ";
109   - for(int c=0; c<N; c++)
  157 + for(int c=0; c<C; c++)
110 158 {
111   - ss << (*this)(r, c) << " ";
  159 + ss << M[c * R + r] << " ";
112 160 }
113 161 ss << "|" << std::endl;
114 162 }
... ... @@ -116,26 +164,9 @@ struct matrix
116 164 return ss.str();
117 165 }
118 166  
119   - static matrix<T, N> identity() {
120   - matrix<T, N> I;
121   - I = 0;
122   - for (size_t i = 0; i < N; i++)
123   - I.M[i * N + i] = 1;
124   - return I;
125   - }
126 167 };
127 168  
128 169 } //end namespace rts
129 170  
130   -template <typename T, int N>
131   -std::ostream& operator<<(std::ostream& os, stim::matrix<T, N> M)
132   -{
133   - os<<M.toStr();
134   - return os;
135   -}
136   -
137   -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
138   -//template<class T, int N> using rtsMatrix = rts::matrix<T, N>;
139   -//#endif
140 171  
141 172 #endif
... ...
stim/math/matrix_sq.h 0 โ†’ 100644
  1 +#ifndef RTS_MATRIX_H
  2 +#define RTS_MATRIX_H
  3 +
  4 +//#include "rts/vector.h"
  5 +#include <string.h>
  6 +#include <iostream>
  7 +#include <stim/math/vector.h>
  8 +#include <stim/math/vec3.h>
  9 +#include <stim/cuda/cudatools/callable.h>
  10 +
  11 +namespace stim{
  12 +
  13 +template <class T, int N>
  14 +struct matrix_sq
  15 +{
  16 + //the matrix will be stored in column-major order (compatible with OpenGL)
  17 + T M[N*N];
  18 +
  19 + CUDA_CALLABLE matrix_sq()
  20 + {
  21 + for(int r=0; r<N; r++)
  22 + for(int c=0; c<N; c++)
  23 + if(r == c)
  24 + (*this)(r, c) = 1;
  25 + else
  26 + (*this)(r, c) = 0;
  27 + }
  28 +
  29 + CUDA_CALLABLE matrix_sq(T rhs[N*N])
  30 + {
  31 + memcpy(M,rhs, sizeof(T)*N*N);
  32 + }
  33 +
  34 + CUDA_CALLABLE matrix_sq<T,N> set(T rhs[N*N])
  35 + {
  36 + memcpy(M, rhs, sizeof(T)*N*N);
  37 + return *this;
  38 + }
  39 +
  40 + //create a symmetric matrix given the rhs values, given in column-major order
  41 + CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){
  42 + const size_t L = (N*N+N)/2; //store the number of values
  43 +
  44 + size_t r, c;
  45 + r = c = 0;
  46 + for(size_t i = 0; i < L; i++){ //for each value
  47 + if(r == c) M[c * N + r] = rhs[i];
  48 + else M[c*N + r] = M[r * N + c] = rhs[i];
  49 + r++;
  50 + if(r == N) r = ++c;
  51 + }
  52 + }
  53 +
  54 + CUDA_CALLABLE T& operator()(int row, int col)
  55 + {
  56 + return M[col * N + row];
  57 + }
  58 +
  59 + CUDA_CALLABLE matrix_sq<T, N> operator=(T rhs)
  60 + {
  61 + int Nsq = N*N;
  62 + for(int i=0; i<Nsq; i++)
  63 + M[i] = rhs;
  64 +
  65 + return *this;
  66 + }
  67 +
  68 + // M - rhs*I
  69 + CUDA_CALLABLE matrix_sq<T, N> operator-(T rhs)
  70 + {
  71 + for(int i=0; i<N; i++)
  72 + for(int j=0 ; j<N; j++)
  73 + if(i == j)
  74 + M[i*N+j] -= rhs;
  75 + return *this;
  76 + }
  77 +
  78 + template<typename Y>
  79 + vec<Y> operator*(vec<Y> rhs){
  80 + unsigned int M = rhs.size();
  81 +
  82 + vec<Y> result;
  83 + result.resize(M);
  84 +
  85 + for(int r=0; r<M; r++)
  86 + for(int c=0; c<M; c++)
  87 + result[r] += (*this)(r, c) * rhs[c];
  88 +
  89 + return result;
  90 + }
  91 +
  92 + template<typename Y>
  93 + CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){
  94 + vec3<Y> result = 0;
  95 + for(int r=0; r<3; r++)
  96 + for(int c=0; c<3; c++)
  97 + result[r] += (*this)(r, c) * rhs[c];
  98 +
  99 + return result;
  100 + }
  101 +
  102 + std::string toStr()
  103 + {
  104 + std::stringstream ss;
  105 +
  106 + for(int r = 0; r < N; r++)
  107 + {
  108 + ss << "| ";
  109 + for(int c=0; c<N; c++)
  110 + {
  111 + ss << (*this)(r, c) << " ";
  112 + }
  113 + ss << "|" << std::endl;
  114 + }
  115 +
  116 + return ss.str();
  117 + }
  118 +
  119 + static matrix_sq<T, N> identity() {
  120 + matrix_sq<T, N> I;
  121 + I = 0;
  122 + for (size_t i = 0; i < N; i++)
  123 + I.M[i * N + i] = 1;
  124 + return I;
  125 + }
  126 +};
  127 +
  128 +} //end namespace rts
  129 +
  130 +template <typename T, int N>
  131 +std::ostream& operator<<(std::ostream& os, stim::matrix_sq<T, N> M)
  132 +{
  133 + os<<M.toStr();
  134 + return os;
  135 +}
  136 +
  137 +//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
  138 +//template<class T, int N> using rtsMatrix = rts::matrix<T, N>;
  139 +//#endif
  140 +
  141 +#endif
... ...
stim/math/quaternion.h
1 1 #ifndef RTS_QUATERNION_H
2 2 #define RTS_QUATERNION_H
3 3  
4   -#include <stim/math/matrix.h>
  4 +#include <stim/math/matrix_sq.h>
5 5 #include <stim/cuda/cudatools/callable.h>
6 6  
7 7 namespace stim{
... ... @@ -81,9 +81,9 @@ public:
81 81 return result;
82 82 }
83 83  
84   - CUDA_CALLABLE matrix<T, 3> toMatrix3(){
  84 + CUDA_CALLABLE matrix_sq<T, 3> toMatrix3(){
85 85  
86   - matrix<T, 3> result;
  86 + matrix_sq<T, 3> result;
87 87  
88 88  
89 89 T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
... ... @@ -114,9 +114,9 @@ public:
114 114 return result;
115 115 }
116 116  
117   - CUDA_CALLABLE matrix<T, 4> toMatrix4(){
  117 + CUDA_CALLABLE matrix_sq<T, 4> toMatrix4(){
118 118  
119   - matrix<T, 4> result;
  119 + matrix_sq<T, 4> result;
120 120 T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
121 121  
122 122 // calculate coefficients
... ...
stim/optics/scalarbeam.h
... ... @@ -27,7 +27,7 @@ std::vector&lt; stim::vec3&lt;T&gt; &gt; generate_focusing_vectors(size_t N, stim::vec3&lt;T&gt; d
27 27  
28 28 ///compute the rotation operator to transform (0, 0, 1) to k
29 29 T cos_angle = d.dot(vec3<T>(0, 0, 1));
30   - stim::matrix<T, 3> rotation;
  30 + stim::matrix_sq<T, 3> rotation;
31 31  
32 32 //if the cosine of the angle is -1, the rotation is just a flip across the z axis
33 33 if(cos_angle == -1){
... ... @@ -318,7 +318,7 @@ void gpu_scalar_psf_cart(stim::complex&lt;T&gt;* E, size_t N, T* x, T* y, T* z, T lamb
318 318  
319 319 stim::quaternion<T> q; //create a quaternion
320 320 q.CreateRotation(d, stim::vec3<T>(0, 0, 1)); //create a mapping from the propagation direction to the PSF space
321   - stim::matrix<T, 3> rot = q.toMatrix3();
  321 + stim::matrix_sq<T, 3> rot = q.toMatrix3();
322 322 int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
323 323 dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks
324 324 cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space
... ...