From 71d5696d385e7d2e662bbd1e98264293479ed780 Mon Sep 17 00:00:00 2001
From: David Mayerich <mayerich@uh.edu>
Date: Mon, 9 Nov 2020 11:17:29 -0600
Subject: [PATCH] First commit after development and testing

---
 CMakeLists.txt             |  69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 FindSTIM.cmake             |  16 ++++++++++++++++
 desktop.ini                | Bin 0 -> 176 bytes
 docs/Readme_BytesOrder.txt |  19 +++++++++++++++++++
 docs/lyr_format.pptx       | Bin 0 -> 57140 bytes
 docs/testcases.txt         |  11 +++++++++++
 layerview.py               | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/layer.cpp              | 556 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/layer.h                |  35 +++++++++++++++++++++++++++++++++++
 src/linalg.cpp             |  63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/linalg.h               |  17 +++++++++++++++++
 src/main.cpp               | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 12 files changed, 1182 insertions(+), 0 deletions(-)
 create mode 100644 CMakeLists.txt
 create mode 100644 FindSTIM.cmake
 create mode 100644 desktop.ini
 create mode 100644 docs/Readme_BytesOrder.txt
 create mode 100644 docs/lyr_format.pptx
 create mode 100644 docs/testcases.txt
 create mode 100644 layerview.py
 create mode 100644 src/layer.cpp
 create mode 100644 src/layer.h
 create mode 100644 src/linalg.cpp
 create mode 100644 src/linalg.h
 create mode 100644 src/main.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..35768c6
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,69 @@
+#Specify the version being used aswell as the language
+cmake_minimum_required(VERSION 3.12)
+
+#Name your project here
+project(multilayer)
+
+#set the module directory
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}")
+
+#default to release mode
+if(NOT CMAKE_BUILD_TYPE) 
+	set(CMAKE_BUILD_TYPE Release)
+endif(NOT CMAKE_BUILD_TYPE)
+
+#build the executable in the binary directory on MS Visual Studio
+if ( MSVC )
+	SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
+	SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
+	SET( LIBRARY_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
+	SET( LIBRARY_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
+	add_definitions(-D_CRT_SECURE_NO_WARNINGS)
+	add_definitions(-D_SCL_SECURE_NO_WARNINGS)
+endif ( MSVC )
+
+
+#find packages-----------------------------------
+#find the pthreads package
+find_package(Threads)
+
+#find the X11 package
+find_package(X11)
+
+#find CUDA, mostly for LA stuff using cuBLAS
+find_package(CUDA REQUIRED)
+
+#find Boost
+#find_package(Boost)
+
+#find the STIM library
+find_package(STIM REQUIRED)
+
+#find LAPACK and supporting link_libraries
+find_package(clapack CONFIG REQUIRED)
+find_package(OpenBLAS CONFIG REQUIRED)
+
+#include include directories
+include_directories(${CUDA_INCLUDE_DIRS}
+			${STIM_INCLUDE_DIRS}
+)
+
+#Assign source files to the appropriate variables to easily associate them with executables
+file(GLOB SRC "src/*.cpp")
+
+#-----------------------------Create the executable--------------------------
+#-----------------------------Show all four examples-------------------------
+add_executable(multilayer
+	       ${SRC}
+)
+link_directories(${CUDA_BIN_DIRS})
+target_link_libraries(multilayer ${CUDA_LIBRARIES}
+			${CUDA_CUBLAS_LIBRARIES}
+			${CUDA_cusparse_LIBRARY}
+			${CUDA_cusolver_LIBRARY}
+			${CUDA_CUFFT_LIBRARIES}
+			OpenBLAS::OpenBLAS
+			f2c lapack
+)
+
+		
\ No newline at end of file
diff --git a/FindSTIM.cmake b/FindSTIM.cmake
new file mode 100644
index 0000000..cb2bcc7
--- /dev/null
+++ b/FindSTIM.cmake
@@ -0,0 +1,16 @@
+# finds the STIM library (downloads it if it isn't present)
+# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository)
+
+include(FindPackageHandleStandardArgs)
+
+set(STIM_ROOT $ENV{STIM_ROOT})
+
+IF(NOT STIM_ROOT)
+    MESSAGE("ERROR: STIM_ROOT environment variable must be set!")
+ENDIF(NOT STIM_ROOT)
+
+    FIND_PATH(STIM_INCLUDE_DIRS DOC "Path to STIM include directory."
+              NAMES stim/image/image.h
+              PATHS ${STIM_ROOT})
+
+find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIRS)
diff --git a/desktop.ini b/desktop.ini
new file mode 100644
index 0000000..a7cc8d5
Binary files /dev/null and b/desktop.ini differ
diff --git a/docs/Readme_BytesOrder.txt b/docs/Readme_BytesOrder.txt
new file mode 100644
index 0000000..ebd7548
--- /dev/null
+++ b/docs/Readme_BytesOrder.txt
@@ -0,0 +1,19 @@
+Order of "output.lyr" parameters.
+
+The wavenumber in free space: 		 	k0	double			8B
+The direction of propogation:   	 	d	double*2		16B
+The refractive index in the first layer: 	n[0]	complex<double>		16B
+
+for i in LAYERS:
+	z positions[i]:			 	z[i]	double			8B * LAYERS
+	z-component of propogation directions:	sz[i]	complex<double>		16B * LAYERS
+	Transmission:				Ptx[i]	complex<double>		16B * LAYERS
+	Transmission:				Pty[i]	complex<double>		16B * LAYERS
+	Transmission:				Ptz[i]	complex<double>		16B * LAYERS
+	Reflection:				Prx[i]	complex<double>		16B * LAYERS
+	Transmission:				Pry[i]	complex<double>		16B * LAYERS
+	Transmission:				Prz[i]	complex<double>		16B * LAYERS
+	
+
+All parameters we need will be:
+	15 * LAYERS + 5
diff --git a/docs/lyr_format.pptx b/docs/lyr_format.pptx
new file mode 100644
index 0000000..3f2cf5a
Binary files /dev/null and b/docs/lyr_format.pptx differ
diff --git a/docs/testcases.txt b/docs/testcases.txt
new file mode 100644
index 0000000..b52dd9c
--- /dev/null
+++ b/docs/testcases.txt
@@ -0,0 +1,11 @@
+output.lyr --n 1.0 1.2 1.3 1.4 1.5 1.4 1.3 1.2 1.1 --kappa 0.0 0.01 0.02 0.03 0.04 0.05 0.6 0.07 0.08 --zPos -50 -40 -30 -20 -10 0 10 20 30
+
+output.lyr --n 1.0 2.0 1.0 --kappa 0.0 0.0 0.0 --z -50 0 50
+
+defaults:
+
+*) If kappa is not specified, set them all to zero
+
+*) If z is not specified, create equally spaced layers between -100 and 100
+
+*) Output: Place a color bar next to each image (colorbar())
\ No newline at end of file
diff --git a/layerview.py b/layerview.py
new file mode 100644
index 0000000..437a94c
--- /dev/null
+++ b/layerview.py
@@ -0,0 +1,176 @@
+# create a function that displays the output when run this way:
+#	python layerview.py ouput.dat
+
+import sys
+import os
+from time import time
+import subprocess
+import struct
+import numpy as np
+import matplotlib
+import math
+import matplotlib.pyplot as plt
+
+from mpl_toolkits.axes_grid1 import ImageGrid
+
+def intensity(E):
+    Econj = np.conj(E)
+    I = np.sum(E*Econj, axis=-1)
+    return np.real(I)
+
+#evaluate a solved homogeneous substrate
+# Returns a complex NxMx3 array representing the cross section of the field at Y=0
+def evaluate(Depths, k, d, n0, sz, Pt, Pr, X, Y, Z):
+    Depths = np.array(Depths)
+    sz = np.array(sz)
+    Pt = np.array(Pt)
+    Pr = np.array(Pr)
+    s = np.array(d) * n0
+    #allocate space for layer indices
+    LI = np.zeros(Z.shape, dtype=np.int)
+            
+    #find the layer index for each sample point
+    L = len(Depths)
+    LI[Z < Depths[0]] = 0
+    for l in range(L-1):
+        idx = np.logical_and(Z > Depths[l], Z <= Depths[l+1])
+        LI[idx] = l
+        LI[Z > Depths[-1]] = L - 1
+    
+    #calculate the appropriate phase shift for the wave transmitted through the layer
+    Ph_t = np.exp(1j * k * sz[LI] * (Z - Depths[LI]))
+    
+    #calculate the appropriate phase shift for the wave reflected off of the layer boundary
+    LIp = LI + 1
+    LIp[LIp >= L] = 0
+    Ph_r = np.exp(-1j * k * sz[LI] * (Z - Depths[LIp]))
+    Ph_r[LI >= L-1] = 0
+    
+    #calculate the phase shift based on the X and Y positions
+    Ph_xy = np.exp(1j * k * (s[0] * X + s[1] * Y))
+    
+    #apply the phase shifts
+    Et = Pt[:, LI] * Ph_t[:, :]
+    Er = Pr[:, LI] * Ph_r[:, :]
+    
+    #add everything together coherently
+    E = (Et + Er) * Ph_xy[:, :]
+    
+    #return the electric field
+    return np.moveaxis(E, 0, -1)
+        
+class planewave:
+    def __int__(self):
+        self.LAYERS = 0                          #Number of layers.                          int
+        self.depths = []                         #z positions of layers. [1, 5, ..., 10]     double
+        self.k0 = 0.0                              #wavenumber at free space.                  double
+        self.d = []                              #direction of propogation. [0.5, 0]         double
+        self.n0 = 0.0+0.0j                           #the refractive index of the first layer.   complex<double>
+        self.sz = []                             #z-component of propagation for each layer. complex<double>
+        self.Pt = [[] for i in range(3)]         #transmission                               complex<double>
+        self.Pr = [[],[],[]]                     #refraction                                 complex<double>
+        
+# display a binary file produced using the coupled wave C code
+def layer(strc):    
+    f = open(strc, "rb")
+
+    # create an empty plane wave structure
+    L = planewave()
+    L.depths = []
+    L.d = []
+    L.sz = []
+    L.Pt = [[],[],[]]
+    L.Pr = [[],[],[]]
+
+    # open the input file for reading
+    file_bytes = os.path.getsize(strc)
+
+    # calculate the number of layers in the sample
+    L.LAYERS = int((file_bytes/8-5)/15)
+
+    # load the raw layer data into the plane wave structure
+    data_raw = struct.unpack('d' * (15*L.LAYERS+5), f.read((15*L.LAYERS+5)* 8))
+    data = np.asarray(data_raw)    
+    L.k0 = data[0]
+    L.d.append(data[1])
+    L.d.append(data[2])
+    L.n0 = complex(data[3], data[4])
+
+    # load each layer's plane waves from the binary file
+    for i in range(L.LAYERS):
+        L.depths.append(data[5+15*i])
+        L.sz.append(complex(data[6+15*i], data[7+15*i]))
+        L.Pt[0].append(complex(data[8+15*i], data[9+15*i]))
+        L.Pt[1].append(complex(data[15*i+10], data[15*i+11]))
+        L.Pt[2].append(complex(data[15*i+12], data[15*i+13]))
+        L.Pr[0].append(complex(data[15*i+14], data[15*i+15]))
+        L.Pr[1].append(complex(data[15*i+16], data[15*i+17]))
+        L.Pr[2].append(complex(data[15*i+18], data[15*i+19]))
+
+    N = 512															# simulation resolution NxM
+    M = 1024
+    #DAVID: Don't hard-code the dimensions - you'll have to calculate them based on the sample information in the file
+    D = [-110, 110, 0, 60]											# dimensions of the simulation
+    x = np.linspace(D[2], D[3], N)									# set the sample points for the simulation
+    z = np.linspace(D[0], D[1], M)
+    [X, Z] = np.meshgrid(x, z)										# create a mesh grid to evaluate layers
+    Y = np.zeros(X.shape)
+
+    # evaluate the field across all layers
+    E = evaluate(L.depths, L.k0, L.d, L.n0, L.sz, L.Pt, L.Pr, X, Y, Z)
+    Er = np.real(E)
+    I = intensity(E)
+
+    plt.set_cmap("afmhot")											# set the color map
+    plt.subplot(1, 4, 1)
+    plt.imshow(Er[:, :, 0], extent=(D[3], D[2], D[1], D[0]))
+    #plt.colorbar()
+    plt.title("Ex")
+
+    plt.subplot(1, 4, 2)
+    plt.imshow(Er[:, :, 1], extent=(D[3], D[2], D[1], D[0]))
+    #plt.colorbar()
+    plt.title("Ey")
+
+    plt.subplot(1, 4, 3)
+    plt.imshow(Er[:, :, 2], extent=(D[3], D[2], D[1], D[0]))
+    #plt.colorbar()
+    plt.title("Ez")
+
+    plt.subplot(1, 4, 4)
+    plt.imshow(I, extent=(D[3], D[2], D[1], D[0]))
+    plt.colorbar()
+    plt.title("I")
+    
+    #fig = plt.figure(1, (5, 10))
+    #plt.set_cmap("afmhot")
+    #matplotlib.rcParams.update({'font.size': 10})
+    #grid = ImageGrid(fig, rect = 211, nrows_ncols = (1, 3), axes_pad = 0.2, label_mode = "1", cbar_mode = "single", cbar_size = "18%")
+    #Title = ["Ex", "Ey", "Ez"]
+    #for i in range(3):
+        # grid[i].axis('off')
+    #    im = grid[i].imshow(Er[..., i], extent=(D[3], D[2], D[1], D[0]), interpolation="nearest")
+    #    grid[i].set_title(Title[i])
+    #grid.cbar_axes[0].colorbar(im)
+    #plt.title("E")
+    #plt.subplot(212)
+    #plt.imshow(I, extent=(D[3], D[2], D[1], D[0]))
+    #plt.title("I")
+    #plt.colorbar()
+    plt.show()
+
+# function displays usage text to the console
+def usage():
+	print("Usage:")
+	print("     layerview input.dat")
+
+if __name__ == '__main__':
+    start = time()
+    if len(sys.argv) < 2:				# if there are no command line arguments
+    	usage()							# display the usage text
+    	exit()							# exit
+    else:
+    	layer(sys.argv[1])				# otherwise display the given data file
+
+    end = time()
+    print("The elapsed time is " + str(end - start) + " s. ")
\ No newline at end of file
diff --git a/src/layer.cpp b/src/layer.cpp
new file mode 100644
index 0000000..e5f6505
--- /dev/null
+++ b/src/layer.cpp
@@ -0,0 +1,556 @@
+#include "layer.h"
+#include "linalg.h"   //LAPACKE support for Visual Studio
+
+#include <cusparse.h>
+#include <cuda_runtime.h>
+//#include "cublas_v2.h"
+#include "cusolverSp.h"
+/*----------------------------GPU-----------------------------*/
+
+
+//Cross product.c is result.
+void crossProduct(vector<complex<double>>* a, vector<complex<double>>* b,		//The given matrices.
+					vector<complex<double>>* c) {								//Matrix to be gotten.
+	c->push_back((*a)[1] * (*b)[2] - (*a)[2] * (*b)[1]);
+	c->push_back((*a)[2] * (*b)[0] - (*a)[0] * (*b)[2]);
+	c->push_back((*a)[0] * (*b)[1] - (*a)[1] * (*b)[0]);
+}
+
+//Calculate the norm for a matrix.
+complex<double> Norm(vector<complex<double>>* E) {
+	complex<double> sum = 0;
+	for (unsigned int i = 0; i < E->size(); i++) {
+		sum += (*E)[i] * (*E)[i];
+	}
+	return sqrt(sum);
+}
+
+//Normalize matrix.
+void Normalize(vector<complex<double>>* E) {
+	complex<double> sum = 0;
+	for (unsigned int i = 0; i < E->size(); i++) {
+		sum += (*E)[i] * (*E)[i];
+	}
+	for (unsigned int i = 0; i < E->size(); i++) {
+		(*E)[i] = (*E)[i] / sqrt(sum);
+
+	}
+}
+
+//Orthogonalization.
+void orthogonalize(vector<complex<double>>* E_0rtho, vector<complex<double>>* E0, vector<complex<double>>* d) {
+	vector<complex<double>> s;
+	if (d->size() == 2) {
+		complex<double> dz = sqrt(1.0+0i - pow((*d)[0], 2) - pow((*d)[1], 2));
+		d->push_back(dz);
+	}
+	crossProduct(E0, d, &s);
+	crossProduct(d, &s, E_0rtho);
+	vector<complex<double>>().swap(s);
+}
+
+/*--------------------------------------------------Define Class layersample.--------------------------------------------------------*/
+
+/*Do not try to replace "int" as "size_t". 
+This will result in a bunch of warnings and if we continuously change the type of M_rowInd and M_colInd, the EXCEPTION will occur again.*/
+size_t layersample::ii(size_t l, size_t c, size_t d) {		//ii(l, c, d) means the column indexes for every element.
+	return l * 6 + d * 3 + c - 3;
+}				
+
+void layersample::generate_linsys(size_t LAYERS,
+									vector<complex<double>>& M,				//All non-zero values in "A" matirx.(A * X = b)
+									vector<complex<double>>& b,				//The right hand side column vector.
+									vector<complex<double>>& E,				//orthogonalized E0 vectors
+									vector<complex<double>>* P,
+									bool CPU_op) {			//Solution of the matrices multiplication.
+	//Calculate the sz component for each layer.
+	s.clear();										//s is the plane wave direction scaled by the refractive index.
+	for (size_t i = 0; i < 2; i++)
+		s.push_back(d[i] * n[0]);
+	sz.clear();
+	for (int l = 0; l < LAYERS; l++) {
+		sz.push_back(sqrt(pow(n[l], 2) - pow(s[0], 2) - pow(s[1], 2)));
+	}
+
+	if (!CPU_op){
+		//Computer in GPU.
+		vector<int> M_rowInd;												//Sparse matrix M CSR ->row index
+		vector<int> M_colInd;												//Sparse matrix M CSR ->number of elements
+		M_rowInd.push_back(0);
+		////Build M by setting constraints based on Gauss's Law.
+		for (size_t l = 0; l < LAYERS; l++) {
+			//Set the upward components for each layer.
+			//Layer "LAYERS-1" doesn't have a upward component.
+			if (l != LAYERS - 1) {
+				M.push_back(s[0]);
+				M_colInd.push_back((int)ii(l, 0, 1));
+				M.push_back(s[1]);
+				M_colInd.push_back((int)ii(l, 1, 1));
+				M.push_back(-sz[l]);
+				M_colInd.push_back((int)ii(l, 2, 1));
+				M_rowInd.push_back((int)M.size());
+				b.push_back(0);
+			}
+			//Set the downward components for each layer.
+			if (l != 0) {
+				M.push_back(s[0]);
+				M_colInd.push_back((int)ii(l, 0, 0));
+				M.push_back(s[1]);
+				M_colInd.push_back((int)ii(l, 1, 0));
+				M.push_back(sz[l]);
+				M_colInd.push_back((int)ii(l, 2, 0));
+				M_rowInd.push_back((int)M.size());
+				b.push_back(0);
+			}
+		}
+		//Continue to build M by enforcing a continuous field across boundaries.
+		complex<double> arg, arg_in, B;		
+		for (size_t l = 1; l < LAYERS; l++) {
+			complex<double> sz0 = sz[l - 1];
+			complex<double> sz1 = sz[l];
+
+			//Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1]))
+			complex<double> A_in = k * sz0 * (z[l] - z[l - 1]);
+			complex<double> A_in2 = { -A_in.imag(), A_in.real() };
+			complex<double> A = exp(A_in2);
+
+			if (l < LAYERS - 1) {
+				double dl = z[l] - z[l + 1];
+				arg_in = -k * sz1 * (complex<double>)dl;
+				arg = { -arg_in.imag(), arg_in.real() };
+				B = exp(arg);
+			}
+			//if this is the second layer, use the simplified equations that account for the incident field
+			if (l == 1) {
+				M.push_back(1);
+				M_colInd.push_back((int)ii(0, 0, 1));
+				M.push_back(-1);
+				M_colInd.push_back((int)ii(1, 0, 0));
+				if (LAYERS > 2) {
+					M.push_back(-B);
+					M_colInd.push_back((int)ii(1, 0, 1));
+				}
+				M_rowInd.push_back((int)M.size());
+				b.push_back(-A * E[0]);
+
+				M.push_back(1);
+				M_colInd.push_back((int)ii(0, 1, 1));
+				M.push_back(-1);
+				M_colInd.push_back((int)ii(1, 1, 0));
+				if (LAYERS > 2) {
+					M.push_back(-B);
+					M_colInd.push_back((int)ii(1, 1, 1));
+				}
+				M_rowInd.push_back((int)M.size());
+				b.push_back(-A * E[l]);
+
+				M.push_back(sz0);
+				M_colInd.push_back((int)ii(0, 1, 1));
+				M.push_back(s[1]);
+				M_colInd.push_back((int)ii(0, 2, 1));
+				M.push_back(sz1);
+				M_colInd.push_back((int)ii(1, 1, 0));
+				M.push_back(-s[1]);
+				M_colInd.push_back((int)ii(1, 2, 0));
+				if (LAYERS > 2) {
+					M.push_back(-B * sz1);
+					M_colInd.push_back((int)ii(1, 1, 1));
+					M.push_back(-B * s[1]);
+					M_colInd.push_back((int)ii(1, 2, 1));
+				}
+				M_rowInd.push_back((int)M.size());
+				b.push_back(A * sz0 * E[1] - A * s[1] * E[2]);
+
+				M.push_back(-sz0);
+				M_colInd.push_back((int)ii(0, 0, 1));
+				M.push_back(-s[0]);
+				M_colInd.push_back((int)ii(0, 2, 1));
+				M.push_back(-sz1);
+				M_colInd.push_back((int)ii(1, 0, 0));
+				M.push_back(s[0]);
+				M_colInd.push_back((int)ii(1, 2, 0));
+				if (LAYERS > 2) {
+					M.push_back(B * sz1);
+					M_colInd.push_back((int)ii(1, 0, 1));
+					M.push_back(B* s[0]);
+					M_colInd.push_back((int)ii(1, 2, 1));
+				}
+				M_rowInd.push_back((int)M.size());
+				b.push_back(A * s[0] * E[2] - A * sz0 * E[0]);
+			}
+			else if (l == LAYERS - 1) {
+				M.push_back(A);
+				M_colInd.push_back((int)ii(l - 1, 0, 0));
+				M.push_back(1);
+				M_colInd.push_back((int)ii(l - 1, 0, 1));
+				M.push_back(-1);
+				M_colInd.push_back((int)ii(l, 0, 0));
+				M_rowInd.push_back((int)M.size());
+				b.push_back(0);
+
+				M.push_back(A);
+				M_colInd.push_back((int)ii(l - 1, 1, 0));
+				M.push_back(1);
+				M_colInd.push_back((int)ii(l - 1, 1, 1));
+				M.push_back(-1);
+				M_colInd.push_back((int)ii(l, 1, 0));
+				M_rowInd.push_back((int)M.size());
+				b.push_back(0);
+
+				M.push_back(-A * sz0);
+				M_colInd.push_back((int)ii(l - 1, 1, 0));
+				M.push_back(A * s[1]);
+				M_colInd.push_back((int)ii(l - 1, 2, 0));
+				M.push_back(sz0);
+				M_colInd.push_back((int)ii(l - 1, 1, 1));
+				M.push_back(s[1]);
+				M_colInd.push_back((int)ii(l - 1, 2, 1));
+				M.push_back(sz1);
+				M_colInd.push_back((int)ii(l, 1, 0));
+				M.push_back(-s[1]);
+				M_colInd.push_back((int)ii(l, 2, 0));
+				M_rowInd.push_back((int)M.size());
+				b.push_back(0);
+
+				M.push_back(A * sz0);
+				M_colInd.push_back((int)ii(l - 1, 0, 0));
+				M.push_back(-A * s[0]);
+				M_colInd.push_back((int)ii(l - 1, 2, 0));
+				M.push_back(-sz0);
+				M_colInd.push_back((int)ii(l - 1, 0, 1));
+				M.push_back(-s[0]);
+				M_colInd.push_back((int)ii(l - 1, 2, 1));
+				M.push_back(-sz1);
+				M_colInd.push_back((int)ii(l, 0, 0));
+				M.push_back(s[0]);
+				M_colInd.push_back((int)ii(l, 2, 0));
+				M_rowInd.push_back((int)M.size());
+				b.push_back(0);
+			}
+			else {
+				M.push_back(A);
+				M_colInd.push_back((int)ii(l - 1, 0, 0));
+				M.push_back(1);
+				M_colInd.push_back((int)ii(l - 1, 0, 1));
+				M.push_back(-1);
+				M_colInd.push_back((int)ii(l, 0, 0));
+				M.push_back(-B);
+				M_colInd.push_back((int)ii(l, 0, 1));
+				M_rowInd.push_back((int)M.size());
+				b.push_back(0);
+
+				M.push_back(A);
+				M_colInd.push_back((int)ii(l - 1, 1, 0));
+				M.push_back(1);
+				M_colInd.push_back((int)ii(l - 1, 1, 1));
+				M.push_back(-1);
+				M_colInd.push_back((int)ii(l, 1, 0));
+				M.push_back(-B);
+				M_colInd.push_back((int)ii(l, 1, 1));
+				M_rowInd.push_back((int)M.size());
+				b.push_back(0);
+
+				M.push_back(-A * sz0);
+				M_colInd.push_back((int)ii(l - 1, 1, 0));
+				M.push_back(A * s[1]);
+				M_colInd.push_back((int)ii(l - 1, 2, 0));
+				M.push_back(sz0);
+				M_colInd.push_back((int)ii(l - 1, 1, 1));
+				M.push_back(s[1]);
+				M_colInd.push_back((int)ii(l - 1, 2, 1));
+				M.push_back(sz1);
+				M_colInd.push_back((int)ii(l, 1, 0));
+				M.push_back(-s[1]);
+				M_colInd.push_back((int)ii(l, 2, 0));
+				M.push_back(-B * sz1);
+				M_colInd.push_back((int)ii(l, 1, 1));
+				M.push_back(-B * s[1]);
+				M_colInd.push_back((int)ii(l, 2, 1));
+				M_rowInd.push_back((int)M.size());
+				b.push_back(0);
+
+				M.push_back(A * sz0);
+				M_colInd.push_back((int)ii(l - 1, 0, 0));
+				M.push_back(-A * s[0]);
+				M_colInd.push_back((int)ii(l - 1, 2, 0));
+				M.push_back(-sz0);
+				M_colInd.push_back((int)ii(l - 1, 0, 1));
+				M.push_back(-s[0]);
+				M_colInd.push_back((int)ii(l - 1, 2, 1));
+				M.push_back(-sz1);
+				M_colInd.push_back((int)ii(l, 0, 0));
+				M.push_back(s[0]);
+				M_colInd.push_back((int)ii(l, 2, 0));
+				M.push_back(B * sz1);
+				M_colInd.push_back((int)ii(l, 0, 1));
+				M.push_back(B * s[0]);
+				M_colInd.push_back((int)ii(l, 2, 1));
+				M_rowInd.push_back((int)M.size());
+				b.push_back(0);
+			}
+		}
+		cudaError_t cudaStatus;
+		cusolverStatus_t cusolverStatus;
+		cusparseStatus_t cusparseStatus;
+		cusolverSpHandle_t handle = NULL;
+		cusparseHandle_t cusparseHandle = NULL;
+		cudaStream_t stream = NULL;
+		cusparseMatDescr_t descrM = NULL;
+		cuDoubleComplex * csrValM_, * b_, *P_;
+		size_t rowsA = b.size(), colsA = b.size(), nnA = M.size(), baseM_ = 0;		//nnA is the number of non-zero elements.
+		int* csrRowPtrM = NULL;														//row index M_rowInd projected to GPU.
+		int* csrColIndM = NULL; //CSR(A) from I/O.									// M_colInd projected to GPU.
+		double tol = 1.e-12; int reorder = 0;
+		int singularity = 0;
+	
+		//Initialize.
+		cusolverStatus = cusolverSpCreate(&handle);
+		int num = 1;
+		cudaStatus = cudaGetDevice(&num);
+		cusparseStatus = cusparseCreate(&cusparseHandle);
+		cudaStatus = cudaStreamCreate(&stream);
+		cusolverStatus = cusolverSpSetStream(handle, stream);
+		cusparseStatus = cusparseSetStream(cusparseHandle, stream);
+		cusparseStatus = cusparseCreateMatDescr(&descrM);
+		cusparseStatus = cusparseSetMatType(descrM, CUSPARSE_MATRIX_TYPE_GENERAL);
+		if (baseM_) {
+			cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ONE);
+		}
+		else {
+			cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ZERO);
+		}
+
+		cudaStatus = cudaMalloc((void**)&csrRowPtrM, sizeof(int) * (rowsA + 1));				//Projection of M_rowInd.
+		cudaStatus = cudaMalloc((void**)&csrColIndM, sizeof(int) * M_colInd.size());			//Projection of M_colInd.
+		cudaStatus = cudaMalloc((void**)&csrValM_, sizeof(cuDoubleComplex) * M.size());			//Projection of M.
+		cudaStatus = cudaMalloc((void**)&b_, sizeof(cuDoubleComplex) * b.size());				//Projection of b.
+		cudaStatus = cudaMalloc((void**)&P_, sizeof(cuDoubleComplex) * b.size());				//Projection of P.
+
+		cudaStatus = cudaMemcpy(csrValM_, M.data(), M.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
+		cudaStatus = cudaMemcpy(csrRowPtrM, M_rowInd.data(), M_rowInd.size() * sizeof(int), cudaMemcpyHostToDevice);
+		cudaStatus = cudaMemcpy(csrColIndM, M_colInd.data(), M_colInd.size() * sizeof(int), cudaMemcpyHostToDevice);
+		cudaStatus = cudaMemcpy(b_, b.data(), b.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
+		// Output the current CUDA error.
+		//if (cudaStatus != cudaSuccess) {
+		//	cout<<"%s " << cudaGetErrorString(cudaStatus) << endl;
+		//}
+		P->resize(rowsA);							//P is the to-be-solved matrix in CPU.
+		//QR method.
+		cusolverStatus = cusolverSpZcsrlsvqr(handle, (int)rowsA, (int)nnA, descrM, csrValM_, csrRowPtrM, csrColIndM, b_, tol, reorder, P_, (int*)&singularity);
+		/*cusparseStatus = cusparseZsctr(*cusparseHandle, rowsA, g_z, g_Q, g_x, CUSPARSE_INDEX_BASE_ZERO);*/
+		cudaStatus = cudaMemcpyAsync(P->data(), P_, sizeof(cuDoubleComplex) * rowsA, cudaMemcpyDeviceToHost, stream);
+	
+		cudaStatus = cudaFree(csrRowPtrM);
+		cudaStatus = cudaFree(csrColIndM);
+		cudaStatus = cudaFree(csrValM_);
+		cudaStatus = cudaFree(b_);
+		cudaStatus = cudaFree(P_);
+		vector<int>().swap(M_rowInd);
+		vector<int>().swap(M_colInd);
+	}
+	else {
+		//Work on CPU.
+		M.resize(6 * (LAYERS - 1) * 6 * (LAYERS - 1));
+		b.resize(6 * (LAYERS - 1));
+
+		size_t ei = 0;
+		//Set constraints based on Gauss's Law.
+		for (size_t l = 0; l < LAYERS; l++) {
+			//Set the upward components for each layer.
+			//Layer "LAYERS-1" doesn't have a upward component.
+			if (l != LAYERS - 1) {
+				M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = s[0];
+				M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = s[1];
+				M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -sz[l];
+				ei += 1;
+			}
+			//Set the downward components for each layer.
+			if (l != 0) {
+				M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = s[0];
+				M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = s[1];
+				M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = sz[l];
+				ei += 1;
+			}
+		}
+		//Enforce a continuous field across boundaries.
+		complex<double> arg, arg_in, B;
+		for (size_t l = 1; l < LAYERS; l++) {
+			complex<double> sz0 = sz[l - 1];
+			complex<double> sz1 = sz[l];
+
+			//Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1]))
+			complex<double> A_in = k * sz0 * (z[l] - z[l - 1]);
+			complex<double> A_in2 = { -A_in.imag(), A_in.real() };
+			complex<double> A = exp(A_in2);
+
+			if (l < LAYERS - 1) {
+				double dl = z[l] - z[l + 1];
+				arg_in = -k * sz1 * (complex<double>)dl;
+				arg = { -arg_in.imag(), arg_in.real() };
+				B = exp(arg);
+			}
+			//if this is the second layer, use the simplified equations that account for the incident field
+			if (l == 1) {
+				M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = 1;
+				M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -1;
+				if (LAYERS > 2) {
+					M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = -B;
+				}
+				b[ei] = -A * E[0];
+				ei += 1;
+
+				M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = 1;
+				M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = -1;
+				if (LAYERS > 2) {
+					M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B;
+				}
+				b[ei] = -A * E[l];
+				ei += 1;
+
+				M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = s[1];
+				M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = sz0;
+				M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = -s[1];
+				M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = sz1;
+				if (LAYERS > 2) {
+					M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = -B * s[1];
+					M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B * sz1;
+				}
+				b[ei] = A * sz0 * E[1] - A * s[1] * E[2];
+				ei += 1;
+
+				M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = -sz0;
+				M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = -s[0];
+				M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -sz1;
+				M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = s[0];
+				if (LAYERS > 2) {
+					M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = B * sz1;
+					M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = B * s[0];
+				}
+				b[ei] = A * s[0] * E[2] - A * sz0 * E[0];
+				ei += 1;
+			}
+			else if (l == LAYERS - 1) {
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A;
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1;
+				M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1;
+				ei += 1;
+
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A;
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1;
+				M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1;
+				ei += 1;
+
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1];
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0;
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1];
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0;
+				M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1];
+				M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1;
+				ei += 1;
+
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0;
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0];
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0;
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0];
+				M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1;
+				M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0];
+				ei += 1;
+			}
+			else {
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A;
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1;
+				M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1;
+				M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = -B;
+				ei += 1;
+
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A;
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1;
+				M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1;
+				M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B;
+				ei += 1;
+
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1];
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0;
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1];
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0;
+				M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1];
+				M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1;
+				M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -B * s[1];
+				M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B * sz1;
+				ei += 1;
+
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0;
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0];
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0;
+				M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0];
+				M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1;
+				M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0];
+				M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = B * sz1;
+				M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = B * s[0];
+				ei += 1;
+			}
+		}
+
+		complex<double>* M_ = new complex<double>[M.size()];
+		complex<double>* b_ = new complex<double>[b.size()];
+		complex<double>* P_ = new complex<double>[b.size()];
+		for (size_t i = 0; i < M.size(); i++) {
+			M_[i] = M[i];
+			if (i < b.size())	b_[i] = b[i];
+		}
+		LINALG_inverse(M_, (int)(6 * (LAYERS - 1)));
+		LINALG_zgemm((int)(6 * (LAYERS - 1)), (int)1, (int)(6 * (LAYERS - 1)), M_, (int)(6 * (LAYERS - 1)), b_, (int)1, P_, (int)1);
+		for (int i = 0; i < b.size(); i++) {
+			P->push_back(P_[i]);
+		}
+
+		delete[] M_;
+		delete[] b_;
+		delete[] P_;
+	}
+}
+
+
+//Build matrix and get E.
+void layersample::solve(vector<complex<double>>* E, bool CPU_op) {		//orthogonalized E0 vectors.
+	size_t LAYERS = n.size();
+	//Store the matrix and RHS vector.
+	vector<complex<double>> M;						//All non-zero values in the sparse matrix.
+	vector<complex<double>> b;						//The right hand side column vector.
+
+	//Evaluate the linear system.
+	vector<complex<double>> P;						//Solution of matrix.
+	layersample::generate_linsys(LAYERS, M, b, *E, &P, CPU_op);
+
+	//Store the coefficients for each layer.
+	//Pt[3, L] transmission. Pr[3, L] reflection.
+	Pt.resize(3 * LAYERS); 
+	Pr.resize(3 * LAYERS);
+
+	for (size_t l = 0; l < LAYERS; l++) {
+		if (l == 0) {
+			Pt[0] = (complex<double>)(*E)[0];
+			Pt[LAYERS] = (complex<double>)(*E)[1];
+			Pt[2 * LAYERS] = (complex<double>)(*E)[2];
+		}
+		else {
+			Pt[l] = P[ii(l, 0, 0)];
+			Pt[l + LAYERS] = P[ii(l, 1, 0)];
+			Pt[l + 2 * LAYERS] = P[ii(l, 2, 0)];
+		}
+
+		if (l == LAYERS - 1) {
+			Pr[LAYERS - 1] = 0;
+			Pr[2 * LAYERS - 1] = 0;
+			Pr[3 * LAYERS - 1] = 0;
+		}
+		else {
+			Pr[l] = P[ii(l, 0, 1)];
+			Pr[l + LAYERS] = P[ii(l, 1, 1)];
+			Pr[l + 2 * LAYERS] = P[ii(l, 2, 1)];
+		}
+	}
+	vector<complex<double>>().swap(M);
+	vector<complex<double>>().swap(b);
+	vector<complex<double>>().swap(P);
+}
\ No newline at end of file
diff --git a/src/layer.h b/src/layer.h
new file mode 100644
index 0000000..5f85d42
--- /dev/null
+++ b/src/layer.h
@@ -0,0 +1,35 @@
+#ifndef LAYER_H
+#define LAYER_H
+#include <vector>
+#include <complex>
+using namespace std;
+
+void crossProduct(vector<complex<double>>* a, vector<complex<double>>* b, vector<complex<double>>* c);
+complex<double> Norm(vector<complex<double>>* E);
+void Normalize(vector<complex<double>>* E);
+vector<vector<double> > transpose(vector<vector<double> >* matrix);
+void orthogonalize(vector<complex<double>>* E_0, vector<complex<double>>* E0, vector<complex<double>>* d);
+
+class layersample {
+
+public:
+	double k;						//wavenumber.
+	vector<complex<double>> n;		//refractive index.	
+	vector<double> z;				//z postions.
+	vector<complex<double>> s;		//propagation direction. Keep it for output.
+	vector<complex<double>> sz;		//propagation direction. Keep it for output.
+	vector<complex<double>> d;		//direction of propagation of the plane wave.
+	vector<complex<double>> Pt;		//transimission.
+	vector<complex<double>> Pr;		//reflection.
+	//Calulate the index of the field component associated with a layer.
+	// l is the layer index. c is the component(x, y, z). d is the direction(0for transmission, 1 for reflection).
+	size_t ii(size_t l, size_t c, size_t d);
+
+	//Generate the linear system corresponding to this layered sample and plane wave.
+	void generate_linsys(size_t LAYERS, vector<complex<double>>& M, vector<complex<double>>& b, vector<complex<double>>& E, vector<complex<double>>* P, bool CPU_op);
+
+	//Build matrix and get E.
+	void solve(vector<complex<double>>* E, bool CPU_op);
+};
+
+#endif
\ No newline at end of file
diff --git a/src/linalg.cpp b/src/linalg.cpp
new file mode 100644
index 0000000..31bd4ef
--- /dev/null
+++ b/src/linalg.cpp
@@ -0,0 +1,63 @@
+#include "linalg.h"
+
+// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK.
+extern "C" {
+#include "f2c.h"
+#include "clapack.h"
+#include "cblas.h"
+}
+
+
+void LINALG_zgetrf(
+	int M,
+	int N,
+	std::complex<double>* A,
+	int LDA,
+	int* IPIV)
+{
+	integer INFO;
+	zgetrf_((integer*)&M, (integer*)&N, (doublecomplex*)A, (integer*)&LDA, (integer*)IPIV, &INFO);
+}
+
+void LINALG_zgetri(
+	size_t N,
+	std::complex<double>* A,
+	int LDA,
+	int* IPIV)
+{
+	integer LWORK = -1;
+	std::complex<double> WORK[1];
+	integer INFO;
+	zgetri_((integer*)&N, (doublecomplex*)A, (integer*)&LDA, (integer*)IPIV, (doublecomplex*)WORK, &LWORK, &INFO);
+}
+void LINALG_inverse(std::complex<double>* A, int N)
+{
+	int* IPIV = new int[N + (size_t)1];
+	integer LWORK = N * N;
+	std::complex<double>* WORK = new std::complex<double>[LWORK];
+	integer INFO;
+
+	zgetrf_((integer*)&N, (integer*)&N, (doublecomplex*)A, (integer*)&N, (integer*)IPIV, &INFO);
+	zgetri_((integer*)&N, (doublecomplex*)A, (integer*)&N, (integer*)IPIV, (doublecomplex*)WORK, &LWORK, &INFO);
+
+	delete[] IPIV;
+	delete[] WORK;
+}
+
+void LINALG_zgemm(
+	const int M,	//A(M*K) B(K*N)
+	const int N,
+	const int K,
+	std::complex<double>* A,
+	const int LDA,   //=K
+	std::complex<double>* B,
+	const int LDB, //=N
+	std::complex<double>* C,
+	const int LDC) //=columns of C.
+{
+	std::complex<double> alpha = 1;
+	std::complex<double> beta = 0;
+	cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, (OPENBLAS_CONST blasint)M, (OPENBLAS_CONST blasint)N, (OPENBLAS_CONST blasint)K,
+		&alpha, A, (OPENBLAS_CONST blasint)LDA, B, (OPENBLAS_CONST blasint)LDB, &beta, C, (OPENBLAS_CONST blasint)LDC);
+
+}
\ No newline at end of file
diff --git a/src/linalg.h b/src/linalg.h
new file mode 100644
index 0000000..98ddb69
--- /dev/null
+++ b/src/linalg.h
@@ -0,0 +1,17 @@
+// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK
+#include <complex>
+
+//Solve matrix inverse.
+void LINALG_inverse(std::complex<double>* A, int N);
+
+//Solve matrix multiplication. C = A * B.
+void LINALG_zgemm(
+	const int M,	//A(M*K) B(K*N)
+	const int N,
+	const int K,
+	std::complex<double>* A,
+	const int LDA,   //=K
+	std::complex<double>* B,
+	const int LDB, //=N
+	std::complex<double>* C,
+	const int LDC); //=columns of C.
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
new file mode 100644
index 0000000..1e8bd99
--- /dev/null
+++ b/src/main.cpp
@@ -0,0 +1,220 @@
+﻿//Update the data output format in example_layer().
+#include "layer.h"
+#include <windows.h>
+
+#include <fstream>
+#include <iostream>
+#include "stim/parser/arguments.h"
+
+using namespace std;
+#define PI 3.14159265358979323846264338328
+
+bool ASCII_output = false;
+bool CPU_op = false;
+
+void advertise() {
+	std::cout << std::endl << std::endl;
+	std::cout << "=========================================================================" << std::endl;
+	//std::cout << "Thank you for using the NetMets network comparison tool!" << std::endl;
+	std::cout << "Scalable Tissue Imaging and Modeling (STIM) Lab, University of Houston" << std::endl;
+	std::cout << "=========================================================================" << std::endl << std::endl;
+}
+
+//Calculate Ez from vector d and Ex Ey.
+void E_Cal(vector<complex<double>>* E, vector<complex<double>>* d) {
+	if (d->size() == 2) {
+		complex<double> dz = sqrt(1.0 + 0i - pow((*d)[0], 2) - pow((*d)[1], 2));
+		d->push_back(dz);
+	}
+	E->push_back(-((*E)[0] * (*d)[0] + (*E)[1] * (*d)[1]) / (*d)[2]);
+}
+void output_binary(std::string filename, layersample& layers) {
+	size_t L = layers.n.size();											// get the number of layers
+	std::ofstream outFile;
+	outFile.open(filename, std::ofstream::binary);						// open the output file for binary writing
+	if (outFile) {
+		outFile.write((char*)&layers.k, sizeof(double));
+		outFile.write((char*)&layers.d[0], sizeof(double));
+		outFile.write((char*)&layers.d[1], sizeof(double));
+		outFile.write((char*)&layers.n[0], sizeof(double) * 2);
+
+		for (size_t i = 0; i < L; i++) {
+			outFile.write((char*)&layers.z[i], sizeof(double));
+			outFile.write((char*)&layers.sz[i], 2 * sizeof(double));
+			outFile.write((char*)&layers.Pt[i], 2 * sizeof(double));
+			outFile.write((char*)&layers.Pt[i + L], 2 * sizeof(double));
+			outFile.write((char*)&layers.Pt[i + 2 * L], 2 * sizeof(double));
+			outFile.write((char*)&layers.Pr[i], 2 * sizeof(double));
+			outFile.write((char*)&layers.Pr[i + L], 2 * sizeof(double));
+			outFile.write((char*)&layers.Pr[i + 2 * L], 2 * sizeof(double));
+		}
+		outFile.close();
+	}
+	else {
+		std::cout << "ERROR opening output file for binary writing: " << filename << std::endl;
+	}
+}
+
+void output_txt(std::string filename, layersample& layers) {
+	size_t L = layers.n.size();											// get the number of layers
+	std::ofstream outFile;
+	outFile.open(filename);												// open the output file for text writing
+	if (!outFile) {
+		std::cout << "ERROR: Could not open file " << filename << std::endl;
+		exit(1);
+	}
+	int width = 15;
+
+	outFile << "--------------------------------" << endl;
+	outFile << "The wavenumber at free space is : " << layers.k << endl;
+	for (size_t i = 0; i < L; i++) {
+		if (i == 0) {
+			outFile << "--------------------------------" << endl;
+			outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl;
+			outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl;
+			outFile << "----------------------" << endl;
+			outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl;
+			outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl;
+			outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl;
+			outFile << "----->>>>>" << endl;
+			outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl;
+			outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl;
+			outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl;
+			outFile << "<<<<<-----" << endl;
+			outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl;
+			outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl;
+			outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl;
+		}
+		else {
+			outFile << "----------------------" << endl;
+			outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl;
+			outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl;
+			outFile << "----------------------" << endl;
+			outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl;
+			outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl;
+			outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl;
+			outFile << "----->>>>>" << endl;
+			outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl;
+			outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl;
+			outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl;
+
+			outFile << "<<<<<-----" << endl;
+			outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl;
+			outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl;
+			outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl;
+		}
+
+	}
+	outFile.close();
+}
+
+void calculate_layer(std::string outName,
+					vector<complex<double>>* ns,		// the refractive index.
+					vector<double>* depths,				// z-direction position.
+					vector<complex<double>>* E0,		// the initialized E0.
+					vector<complex<double>>* d0,			// direction of propagation of the plane wave.
+					double k0) {						// the wavenumber at free space.
+	std::string outName_ext(outName, size(outName)-4, size(outName)-1);			// extract the extension of the output file				
+	E_Cal(E0, d0);					// make sure that both vectors are orthogonal.
+
+	//Creat a new layersample and initialize.
+	layersample Layer1;									// create a layered sample
+	Layer1.n = *ns;										// set a pointer to the refractive indices
+	Layer1.z = *depths;									// set a pointer to the layer depths
+	Layer1.d = *d0;
+	Layer1.k = k0;
+
+	LARGE_INTEGER t1, t2, tc;							// Timing.
+	QueryPerformanceFrequency(&tc);
+	QueryPerformanceCounter(&t1);
+	Layer1.solve(E0, CPU_op);			// Solve for the substrate field in GPU.
+	QueryPerformanceCounter(&t2);
+	std::cout << "time for 'solving linear functions':" << (t2.QuadPart - t1.QuadPart) / (double)tc.QuadPart << "ms."<< std::endl;
+	//output(outName, Layer1);
+	if (ASCII_output)
+		output_txt(outName, Layer1);		
+	else
+		output_binary(outName, Layer1);
+}
+
+//Main function for example_layer.
+int main(int argc, char* argv[]) {
+	stim::arglist args;
+
+	//Basic argument lists.
+	args.add("help", "prints this help");
+	args.add("s", "propagation direction vector (x, y)", "0.5 0.0", "[-1.0, 1.0]");
+	args.add("l", "wavelength", "5.0", "in arbitrary units (ex. um)");
+	args.add("Ex", "complex amplitude (x direction)", "0.5 0.0");
+	args.add("Ey", "complex amplitude (y direction)", "0.5 0.0");
+	args.add("z", "layer positions");
+	args.add("n", "layer optical path length (real refractive index)", "1.0 1.4 1.4 1.0");
+	args.add("kappa", "layer absorbance (imaginary refractive index)");
+	args.add("ascii", "output as an ASCII file");
+	args.add("CPU", "execute the program in CPU");
+	args.parse(argc, argv);
+
+	if (args["help"].is_set()) {				// test for help
+		advertise();							// output the advertisement
+		std::cout << args.str();				// output arguments
+		exit(1);								// exit
+	}
+
+	ASCII_output = args["ascii"];			// if the ascii flag is set, output an ascii file
+	CPU_op = args["CPU"];					// if the CPU_op is set, run the program in CPU.
+	std::string outName;
+	if (args.nargs() == 1) {
+		outName = args.arg(0);
+	}
+	else if (args.nargs() == 0) {
+		if (ASCII_output)
+			outName = "output.txt";
+		else
+			outName = "output.lyr";
+	}
+	else {
+		std::cout << "ERROR: Too many arguments." << std::endl;
+		exit(1);
+	}
+	
+
+	vector<complex<double>> d;					//direction of propagation of the plane wave Init: {0.5, 0}
+	if (args["s"].nargs() == 2) {
+		d.push_back({ (double)args["s"].as_float(0), 0 });
+		d.push_back({ (double)args["s"].as_float(1), 0 });
+	}
+
+	double l0 = (double)args["l"].as_float(0);		//wavelength.Init: l0 = 5;
+	double k0 = 2 * PI / l0;						//Calculate the free-space wavenumber.
+
+	complex<double> Ex = { (double)args["Ex"].as_float(0), (double)args["Ex"].as_float(1) };	//Input E. Init: {1, 1, 0}
+	complex<double> Ey = { (double)args["Ey"].as_float(0), (double)args["Ey"].as_float(1) };
+	vector<complex<double>> E0;
+	E0.push_back(Ex);
+	E0.push_back(Ey);
+
+	//const int LAYERS = args["L"].as_int(); //LAYERS Init: 4
+
+	vector<double> depths;
+	vector<complex<double>> ns;
+	size_t i = 0;
+	while(args["n"].is_set() && args["n"].as_float(i)!=0) {											//n is the real part.
+		if (args["kappa"].is_set())																	//kappa is the imaginary part.
+			ns.push_back({ (double)args["n"].as_float(i), (double)args["kappa"].as_float(i) });		
+		else
+			ns.push_back({ (double)args["n"].as_float(i), 0 });										//ns <- n + i * kappa
+		i++;
+	}
+	for (size_t j = 0; j < i; j++)
+		if (args["z"].is_set())
+			depths.push_back((double)args["z"].as_float(i));
+		else {
+			depths.push_back((double)-100 + j * 200 / i);
+		}
+	/*---------------------------------------example_layer--------------------------------------------*/
+
+	calculate_layer(outName, &ns, &depths, &E0, &d, k0);
+
+	return 0;
+	std::cin.get();
+}
\ No newline at end of file
--
libgit2 0.21.4