From 6c4afcacad36be2242959cb5bd0d1e9cb2480375 Mon Sep 17 00:00:00 2001 From: David Date: Mon, 19 Dec 2016 16:02:25 -0600 Subject: [PATCH] introduced a generalized matrix class, previous is now matrix_sq --- stim/math/matrix.h | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------- stim/math/matrix_sq.h | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ stim/math/quaternion.h | 10 +++++----- stim/optics/scalarbeam.h | 4 ++-- 4 files changed, 258 insertions(+), 86 deletions(-) create mode 100644 stim/math/matrix_sq.h diff --git a/stim/math/matrix.h b/stim/math/matrix.h index 513b588..10b39f8 100644 --- a/stim/math/matrix.h +++ b/stim/math/matrix.h @@ -10,92 +10,140 @@ namespace stim{ -template -struct matrix -{ +template +class matrix { //the matrix will be stored in column-major order (compatible with OpenGL) - T M[N*N]; + T* M; //pointer to the matrix data + size_t R; //number of rows + size_t C; //number of colums + + void init(size_t rows, size_t cols){ + R = rows; + C = cols; + M = (T*) malloc (R * C * sizeof(T)); //allocate space for the matrix + } - CUDA_CALLABLE matrix() - { - for(int r=0; r set(T rhs[N*N]) - { - memcpy(M, rhs, sizeof(T)*N*N); - return *this; + matrix(size_t rows, size_t cols, T* data) { + init(rows, cols); + memcpy(M, rhs, R * C * sizeof(T)); } - //create a symmetric matrix given the rhs values, given in column-major order - CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){ - const size_t L = (N*N+N)/2; //store the number of values + matrix(const matrix& cpy){ + init(cpy.R, cpy.C); + memcpy(M, cpy.M, R * C * sizeof(T)); + } - size_t r, c; - r = c = 0; - for(size_t i = 0; i < L; i++){ //for each value - if(r == c) M[c * N + r] = rhs[i]; - else M[c*N + r] = M[r * N + c] = rhs[i]; - r++; - if(r == N) r = ++c; - } + ~matrix() { + R = C = 0; + if(M) free(M); } - CUDA_CALLABLE T& operator()(int row, int col) - { - return M[col * N + row]; + size_t rows(){ + return R; } - CUDA_CALLABLE matrix operator=(T rhs) - { - int Nsq = N*N; - for(int i=0; i operator=(T rhs) { + size_t N = R * C; + for(size_t n=0; n& operator=(matrix rhs){ + init(rhs.R, rhs.C); + memcpy(M, rhs.M, R * C * sizeof(T)); return *this; } - // M - rhs*I - CUDA_CALLABLE matrix operator-(T rhs) - { + //element-wise operations + matrix operator+(T rhs) { + matrix result(R, C); //create a result matrix + size_t N = R * C; + for(int i=0; i - vec operator*(vec rhs){ - unsigned int M = rhs.size(); + matrix operator-(T rhs) { + return operator+(-rhs); //add the negative of rhs + } + + matrix operator*(T rhs) { + matrix result(R, C); //create a result matrix + size_t N = R * C; + + for(int i=0; i operator/(T rhs) { + matrix result(R, C); //create a result matrix + size_t N = R * C; + + for(int i=0; i result; - result.resize(M); + return result; + } - for(int r=0; r operator*(matrix rhs){ + if(C != rhs.R){ + std::cout<<"ERROR: matrix multiplication is undefined for matrices of size "; + std::cout<<"[ "< result(R, rhs.C); //create the output matrix + T inner; //stores the running inner product + for(size_t c = 0; c < rhs.C; c++){ + for(size_t r = 0; r < R; r++){ + inner = (T)0; + for(size_t i = 0; i < C; i++){ + inner += at(r, i) * rhs.at(i, c); + } + result.M[c * R + r] = inner; + } + } return result; } - template - CUDA_CALLABLE vec3 operator*(vec3 rhs){ - vec3 result = 0; - for(int r=0; r<3; r++) - for(int c=0; c<3; c++) - result[r] += (*this)(r, c) * rhs[c]; + //returns a pointer to the raw matrix data (in column major format) + T* data(){ + return M; + } + //return a transposed matrix + matrix transpose(){ + matrix result(C, R); + size_t c, r; + for(c = 0; c < C; c++){ + for(r = 0; r < R; r++){ + result.M[r * C + c] = M[c * R + r]; + } + } return result; } @@ -103,12 +151,12 @@ struct matrix { std::stringstream ss; - for(int r = 0; r < N; r++) + for(int r = 0; r < R; r++) { ss << "| "; - for(int c=0; c identity() { - matrix I; - I = 0; - for (size_t i = 0; i < N; i++) - I.M[i * N + i] = 1; - return I; - } }; } //end namespace rts -template -std::ostream& operator<<(std::ostream& os, stim::matrix M) -{ - os< 3 && __GNUC_MINOR__ > 7 -//template using rtsMatrix = rts::matrix; -//#endif #endif diff --git a/stim/math/matrix_sq.h b/stim/math/matrix_sq.h new file mode 100644 index 0000000..ee61ca7 --- /dev/null +++ b/stim/math/matrix_sq.h @@ -0,0 +1,141 @@ +#ifndef RTS_MATRIX_H +#define RTS_MATRIX_H + +//#include "rts/vector.h" +#include +#include +#include +#include +#include + +namespace stim{ + +template +struct matrix_sq +{ + //the matrix will be stored in column-major order (compatible with OpenGL) + T M[N*N]; + + CUDA_CALLABLE matrix_sq() + { + for(int r=0; r set(T rhs[N*N]) + { + memcpy(M, rhs, sizeof(T)*N*N); + return *this; + } + + //create a symmetric matrix given the rhs values, given in column-major order + CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){ + const size_t L = (N*N+N)/2; //store the number of values + + size_t r, c; + r = c = 0; + for(size_t i = 0; i < L; i++){ //for each value + if(r == c) M[c * N + r] = rhs[i]; + else M[c*N + r] = M[r * N + c] = rhs[i]; + r++; + if(r == N) r = ++c; + } + } + + CUDA_CALLABLE T& operator()(int row, int col) + { + return M[col * N + row]; + } + + CUDA_CALLABLE matrix_sq operator=(T rhs) + { + int Nsq = N*N; + for(int i=0; i operator-(T rhs) + { + for(int i=0; i + vec operator*(vec rhs){ + unsigned int M = rhs.size(); + + vec result; + result.resize(M); + + for(int r=0; r + CUDA_CALLABLE vec3 operator*(vec3 rhs){ + vec3 result = 0; + for(int r=0; r<3; r++) + for(int c=0; c<3; c++) + result[r] += (*this)(r, c) * rhs[c]; + + return result; + } + + std::string toStr() + { + std::stringstream ss; + + for(int r = 0; r < N; r++) + { + ss << "| "; + for(int c=0; c identity() { + matrix_sq I; + I = 0; + for (size_t i = 0; i < N; i++) + I.M[i * N + i] = 1; + return I; + } +}; + +} //end namespace rts + +template +std::ostream& operator<<(std::ostream& os, stim::matrix_sq M) +{ + os< 3 && __GNUC_MINOR__ > 7 +//template using rtsMatrix = rts::matrix; +//#endif + +#endif diff --git a/stim/math/quaternion.h b/stim/math/quaternion.h index 6c90592..4170873 100644 --- a/stim/math/quaternion.h +++ b/stim/math/quaternion.h @@ -1,7 +1,7 @@ #ifndef RTS_QUATERNION_H #define RTS_QUATERNION_H -#include +#include #include namespace stim{ @@ -81,9 +81,9 @@ public: return result; } - CUDA_CALLABLE matrix toMatrix3(){ + CUDA_CALLABLE matrix_sq toMatrix3(){ - matrix result; + matrix_sq result; T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; @@ -114,9 +114,9 @@ public: return result; } - CUDA_CALLABLE matrix toMatrix4(){ + CUDA_CALLABLE matrix_sq toMatrix4(){ - matrix result; + matrix_sq result; T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; // calculate coefficients diff --git a/stim/optics/scalarbeam.h b/stim/optics/scalarbeam.h index b4179cd..07d3484 100644 --- a/stim/optics/scalarbeam.h +++ b/stim/optics/scalarbeam.h @@ -27,7 +27,7 @@ std::vector< stim::vec3 > generate_focusing_vectors(size_t N, stim::vec3 d ///compute the rotation operator to transform (0, 0, 1) to k T cos_angle = d.dot(vec3(0, 0, 1)); - stim::matrix rotation; + stim::matrix_sq rotation; //if the cosine of the angle is -1, the rotation is just a flip across the z axis if(cos_angle == -1){ @@ -318,7 +318,7 @@ void gpu_scalar_psf_cart(stim::complex* E, size_t N, T* x, T* y, T* z, T lamb stim::quaternion q; //create a quaternion q.CreateRotation(d, stim::vec3(0, 0, 1)); //create a mapping from the propagation direction to the PSF space - stim::matrix rot = q.toMatrix3(); + stim::matrix_sq rot = q.toMatrix3(); int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks cuda_cart2psf <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space -- libgit2 0.21.4