From 6c4afcacad36be2242959cb5bd0d1e9cb2480375 Mon Sep 17 00:00:00 2001
From: David <mayerich@uh.edu>
Date: Mon, 19 Dec 2016 16:02:25 -0600
Subject: [PATCH] introduced a generalized matrix class, previous is now matrix_sq

---
 stim/math/matrix.h       | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------
 stim/math/matrix_sq.h    | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 stim/math/quaternion.h   |  10 +++++-----
 stim/optics/scalarbeam.h |   4 ++--
 4 files changed, 258 insertions(+), 86 deletions(-)
 create mode 100644 stim/math/matrix_sq.h
diff --git a/stim/math/matrix.h b/stim/math/matrix.h
index 513b588..10b39f8 100644
--- a/stim/math/matrix.h
+++ b/stim/math/matrix.h
@@ -10,92 +10,140 @@
 
 namespace stim{
 
-template <class T, int N>
-struct matrix
-{
+template <class T>
+class matrix {
 	//the matrix will be stored in column-major order (compatible with OpenGL)
-	T M[N*N];
+	T* M;								//pointer to the matrix data
+	size_t R;							//number of rows
+	size_t C;							//number of colums
+
+	void init(size_t rows, size_t cols){
+		R = rows;
+		C = cols;
+		M = (T*) malloc (R * C * sizeof(T));	//allocate space for the matrix
+	}
 
-	CUDA_CALLABLE matrix()
-	{
-		for(int r=0; r<N; r++)
-			for(int c=0; c<N; c++)
-				if(r == c)
-					(*this)(r, c) = 1;
-				else
-					(*this)(r, c) = 0;
+	T& at(size_t row, size_t col){
+		return M[col * R + row];
 	}
 
-	CUDA_CALLABLE matrix(T rhs[N*N])
-	{
-		memcpy(M,rhs, sizeof(T)*N*N);
+public:
+	matrix(size_t rows, size_t cols) {
+		init(rows, cols);									//initialize memory
 	}
 
-	CUDA_CALLABLE matrix<T,N> set(T rhs[N*N])
-	{
-		memcpy(M, rhs, sizeof(T)*N*N);
-		return *this;
+	matrix(size_t rows, size_t cols, T* data) {
+		init(rows, cols);
+		memcpy(M, rhs, R * C * sizeof(T));
 	}
 
-	//create a symmetric matrix given the rhs values, given in column-major order
-	CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){
-		const size_t L = (N*N+N)/2;		//store the number of values
+	matrix(const matrix<T>& cpy){
+		init(cpy.R, cpy.C);
+		memcpy(M, cpy.M, R * C * sizeof(T));
+	}
 
-		size_t r, c;
-		r = c = 0;
-		for(size_t i = 0; i < L; i++){ 				//for each value
-			if(r == c) M[c * N + r] = rhs[i];
-			else M[c*N + r] = M[r * N + c] = rhs[i];
-			r++;
-			if(r == N) r = ++c;
-		}
+	~matrix() {
+		R = C = 0;
+		if(M) free(M);
 	}
 
-	CUDA_CALLABLE T& operator()(int row, int col)
-	{
-		return M[col * N + row];
+	size_t rows(){
+		return R;
 	}
 
-	CUDA_CALLABLE matrix<T, N> operator=(T rhs)
-	{
-		int Nsq = N*N;
-		for(int i=0; i<Nsq; i++)
-			M[i] = rhs;
+	size_t cols(){
+		return C;
+	}
 
+	T& operator()(int row, int col) {
+		return at(row, col);
+	}
+
+	matrix<T> operator=(T rhs) {
+		size_t N = R * C;
+		for(size_t n=0; n<N; n++)
+			M[n] = rhs;
+
+		return *this;
+	}
+
+	matrix<T>& operator=(matrix<T> rhs){
+		init(rhs.R, rhs.C);
+		memcpy(M, rhs.M, R * C * sizeof(T));
 		return *this;
 	}
 	
-	// M - rhs*I
-	CUDA_CALLABLE matrix<T, N> operator-(T rhs)
-	{
+	//element-wise operations
+	matrix<T> operator+(T rhs) {
+		matrix<T> result(R, C);					//create a result matrix
+		size_t N = R * C;
+
 		for(int i=0; i<N; i++)
-			for(int j=0 ; j<N; j++)
-				if(i == j)
-					M[i*N+j] -= rhs;
-		return *this;
+			result.M[i] = M[i] + rhs;			//calculate the operation and assign to result
+
+		return result;
 	}
 
-	template<typename Y>
-	vec<Y> operator*(vec<Y> rhs){
-		unsigned int M = rhs.size();
+	matrix<T> operator-(T rhs) {
+		return operator+(-rhs);					//add the negative of rhs
+	}
+
+	matrix<T> operator*(T rhs) {
+		matrix<T> result(R, C);					//create a result matrix
+		size_t N = R * C;
+
+		for(int i=0; i<N; i++)
+			result.M[i] = M[i] * rhs;			//calculate the operation and assign to result
+
+		return result;
+	}
+
+	matrix<T> operator/(T rhs) {
+		matrix<T> result(R, C);					//create a result matrix
+		size_t N = R * C;
+
+		for(int i=0; i<N; i++)
+			result.M[i] = M[i] / rhs;			//calculate the operation and assign to result
 
-		vec<Y> result;
-		result.resize(M);
+		return result;
+	}
 
-		for(int r=0; r<M; r++)
-			for(int c=0; c<M; c++)
-				result[r] += (*this)(r, c) * rhs[c];
+	//matrix multiplication
+	matrix<T> operator*(matrix<T> rhs){
+		if(C != rhs.R){
+			std::cout<<"ERROR: matrix multiplication is undefined for matrices of size ";
+			std::cout<<"[ "<<R<<" x "<<C<<" ] and [ "<<rhs.R<<" x "<<rhs.C<<"]"<<std::endl;
+			exit(1);
+		}
 
+		matrix<T> result(R, rhs.C);				//create the output matrix
+		T inner;								//stores the running inner product
+		for(size_t c = 0; c < rhs.C; c++){
+			for(size_t r = 0; r < R; r++){
+				inner = (T)0;
+				for(size_t i = 0; i < C; i++){
+					inner += at(r, i) * rhs.at(i, c);
+				}
+				result.M[c * R + r] = inner;
+			}
+		}
 		return result;
 	}
 
-	template<typename Y>
-	CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){
-		vec3<Y> result = 0;
-		for(int r=0; r<3; r++)
-			for(int c=0; c<3; c++)
-				result[r] += (*this)(r, c) * rhs[c];
+	//returns a pointer to the raw matrix data (in column major format)
+	T* data(){
+		return M;
+	}
 
+	//return a transposed matrix
+	matrix<T> transpose(){
+		matrix<T> result(C, R);
+		size_t c, r;
+		for(c = 0; c < C; c++){
+			for(r = 0; r < R; r++){
+				result.M[r * C + c] = M[c * R + r];
+			}
+		}
 		return result;
 	}
 
@@ -103,12 +151,12 @@ struct matrix
 	{
 		std::stringstream ss;
 
-		for(int r = 0; r < N; r++)
+		for(int r = 0; r < R; r++)
 		{
 			ss << "| ";
-			for(int c=0; c<N; c++)
+			for(int c=0; c<C; c++)
 			{
-				ss << (*this)(r, c) << " ";
+				ss << M[c * R + r] << " ";
 			}
 			ss << "|" << std::endl;
 		}
@@ -116,26 +164,9 @@ struct matrix
 		return ss.str();
 	}
 
-	static matrix<T, N> identity() {
-		matrix<T, N> I;
-		I = 0;
-		for (size_t i = 0; i < N; i++)
-			I.M[i * N + i] = 1;
-		return I;
-	}
 };
 
 }	//end namespace rts
 
-template <typename T, int N>
-std::ostream& operator<<(std::ostream& os, stim::matrix<T, N> M)
-{
-    os<<M.toStr();
-    return os;
-}
-
-//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
-//template<class T, int N> using rtsMatrix = rts::matrix<T, N>;
-//#endif
 
 #endif
diff --git a/stim/math/matrix_sq.h b/stim/math/matrix_sq.h
new file mode 100644
index 0000000..ee61ca7
--- /dev/null
+++ b/stim/math/matrix_sq.h
@@ -0,0 +1,141 @@
+#ifndef RTS_MATRIX_H
+#define RTS_MATRIX_H
+
+//#include "rts/vector.h"
+#include <string.h>
+#include <iostream>
+#include <stim/math/vector.h>
+#include <stim/math/vec3.h>
+#include <stim/cuda/cudatools/callable.h>
+
+namespace stim{
+
+template <class T, int N>
+struct matrix_sq
+{
+	//the matrix will be stored in column-major order (compatible with OpenGL)
+	T M[N*N];
+
+	CUDA_CALLABLE matrix_sq()
+	{
+		for(int r=0; r<N; r++)
+			for(int c=0; c<N; c++)
+				if(r == c)
+					(*this)(r, c) = 1;
+				else
+					(*this)(r, c) = 0;
+	}
+
+	CUDA_CALLABLE matrix_sq(T rhs[N*N])
+	{
+		memcpy(M,rhs, sizeof(T)*N*N);
+	}
+
+	CUDA_CALLABLE matrix_sq<T,N> set(T rhs[N*N])
+	{
+		memcpy(M, rhs, sizeof(T)*N*N);
+		return *this;
+	}
+
+	//create a symmetric matrix given the rhs values, given in column-major order
+	CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){
+		const size_t L = (N*N+N)/2;		//store the number of values
+
+		size_t r, c;
+		r = c = 0;
+		for(size_t i = 0; i < L; i++){ 				//for each value
+			if(r == c) M[c * N + r] = rhs[i];
+			else M[c*N + r] = M[r * N + c] = rhs[i];
+			r++;
+			if(r == N) r = ++c;
+		}
+	}
+
+	CUDA_CALLABLE T& operator()(int row, int col)
+	{
+		return M[col * N + row];
+	}
+
+	CUDA_CALLABLE matrix_sq<T, N> operator=(T rhs)
+	{
+		int Nsq = N*N;
+		for(int i=0; i<Nsq; i++)
+			M[i] = rhs;
+
+		return *this;
+	}
+	
+	// M - rhs*I
+	CUDA_CALLABLE matrix_sq<T, N> operator-(T rhs)
+	{
+		for(int i=0; i<N; i++)
+			for(int j=0 ; j<N; j++)
+				if(i == j)
+					M[i*N+j] -= rhs;
+		return *this;
+	}
+
+	template<typename Y>
+	vec<Y> operator*(vec<Y> rhs){
+		unsigned int M = rhs.size();
+
+		vec<Y> result;
+		result.resize(M);
+
+		for(int r=0; r<M; r++)
+			for(int c=0; c<M; c++)
+				result[r] += (*this)(r, c) * rhs[c];
+
+		return result;
+	}
+
+	template<typename Y>
+	CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){
+		vec3<Y> result = 0;
+		for(int r=0; r<3; r++)
+			for(int c=0; c<3; c++)
+				result[r] += (*this)(r, c) * rhs[c];
+
+		return result;
+	}
+
+	std::string toStr()
+	{
+		std::stringstream ss;
+
+		for(int r = 0; r < N; r++)
+		{
+			ss << "| ";
+			for(int c=0; c<N; c++)
+			{
+				ss << (*this)(r, c) << " ";
+			}
+			ss << "|" << std::endl;
+		}
+
+		return ss.str();
+	}
+
+	static matrix_sq<T, N> identity() {
+		matrix_sq<T, N> I;
+		I = 0;
+		for (size_t i = 0; i < N; i++)
+			I.M[i * N + i] = 1;
+		return I;
+	}
+};
+
+}	//end namespace rts
+
+template <typename T, int N>
+std::ostream& operator<<(std::ostream& os, stim::matrix_sq<T, N> M)
+{
+    os<<M.toStr();
+    return os;
+}
+
+//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
+//template<class T, int N> using rtsMatrix = rts::matrix<T, N>;
+//#endif
+
+#endif
diff --git a/stim/math/quaternion.h b/stim/math/quaternion.h
index 6c90592..4170873 100644
--- a/stim/math/quaternion.h
+++ b/stim/math/quaternion.h
@@ -1,7 +1,7 @@
 #ifndef RTS_QUATERNION_H
 #define RTS_QUATERNION_H
 
-#include <stim/math/matrix.h>
+#include <stim/math/matrix_sq.h>
 #include <stim/cuda/cudatools/callable.h>
 
 namespace stim{
@@ -81,9 +81,9 @@ public:
 		return result;
 	}
 	
-	CUDA_CALLABLE matrix<T, 3> toMatrix3(){
+	CUDA_CALLABLE matrix_sq<T, 3> toMatrix3(){
 
-		matrix<T, 3> result;
+		matrix_sq<T, 3> result;
 
 
 	    T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
@@ -114,9 +114,9 @@ public:
 		return result;
 	}
 
-	CUDA_CALLABLE matrix<T, 4> toMatrix4(){
+	CUDA_CALLABLE matrix_sq<T, 4> toMatrix4(){
 
-		matrix<T, 4> result;
+		matrix_sq<T, 4> result;
 	    T wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
 
 	    // calculate coefficients
diff --git a/stim/optics/scalarbeam.h b/stim/optics/scalarbeam.h
index b4179cd..07d3484 100644
--- a/stim/optics/scalarbeam.h
+++ b/stim/optics/scalarbeam.h
@@ -27,7 +27,7 @@ std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d
 
 	///compute the rotation operator to transform (0, 0, 1) to k
 	T cos_angle = d.dot(vec3<T>(0, 0, 1));
-	stim::matrix<T, 3> rotation;
+	stim::matrix_sq<T, 3> rotation;
 
 	//if the cosine of the angle is -1, the rotation is just a flip across the z axis
 	if(cos_angle == -1){
@@ -318,7 +318,7 @@ void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lamb
 
 	stim::quaternion<T> q;												//create a quaternion
 	q.CreateRotation(d, stim::vec3<T>(0, 0, 1));						//create a mapping from the propagation direction to the PSF space
-	stim::matrix<T, 3> rot = q.toMatrix3();
+	stim::matrix_sq<T, 3> rot = q.toMatrix3();
 	int threads = stim::maxThreadsPerBlock();							//get the maximum number of threads per block for the CUDA device
 	dim3 blocks( (unsigned)(N / threads + 1));							//calculate the optimal number of blocks
 	cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q);	//call the CUDA kernel to move the cartesian coordinates to PSF space
--
libgit2 0.21.4