release / multilayer

Browse Code »

Commit 71d5696d385e7d2e662bbd1e98264293479ed780

Authored by David Mayerich 2020-11-09 11:17:29 -0600

0 parents

First commit after development and testing

Showing 12 changed files with 1182 additions and 0 deletions Show diff stats

Inline Side-by-side

CMakeLists.txt 0 → 100644

Show/Hide comments View file @71d5696

		1	+++ a/CMakeLists.txt
		1	+#Specify the version being used aswell as the language
		2	+cmake_minimum_required(VERSION 3.12)
		3	+
		4	+#Name your project here
		5	+project(multilayer)
		6	+
		7	+#set the module directory
		8	+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}")
		9	+
		10	+#default to release mode
		11	+if(NOT CMAKE_BUILD_TYPE)
		12	+ set(CMAKE_BUILD_TYPE Release)
		13	+endif(NOT CMAKE_BUILD_TYPE)
		14	+
		15	+#build the executable in the binary directory on MS Visual Studio
		16	+if ( MSVC )
		17	+ SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
		18	+ SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
		19	+ SET( LIBRARY_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
		20	+ SET( LIBRARY_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
		21	+ add_definitions(-D_CRT_SECURE_NO_WARNINGS)
		22	+ add_definitions(-D_SCL_SECURE_NO_WARNINGS)
		23	+endif ( MSVC )
		24	+
		25	+
		26	+#find packages-----------------------------------
		27	+#find the pthreads package
		28	+find_package(Threads)
		29	+
		30	+#find the X11 package
		31	+find_package(X11)
		32	+
		33	+#find CUDA, mostly for LA stuff using cuBLAS
		34	+find_package(CUDA REQUIRED)
		35	+
		36	+#find Boost
		37	+#find_package(Boost)
		38	+
		39	+#find the STIM library
		40	+find_package(STIM REQUIRED)
		41	+
		42	+#find LAPACK and supporting link_libraries
		43	+find_package(clapack CONFIG REQUIRED)
		44	+find_package(OpenBLAS CONFIG REQUIRED)
		45	+
		46	+#include include directories
		47	+include_directories(${CUDA_INCLUDE_DIRS}
		48	+ ${STIM_INCLUDE_DIRS}
		49	+)
		50	+
		51	+#Assign source files to the appropriate variables to easily associate them with executables
		52	+file(GLOB SRC "src/*.cpp")
		53	+
		54	+#-----------------------------Create the executable--------------------------
		55	+#-----------------------------Show all four examples-------------------------
		56	+add_executable(multilayer
		57	+ ${SRC}
		58	+)
		59	+link_directories(${CUDA_BIN_DIRS})
		60	+target_link_libraries(multilayer ${CUDA_LIBRARIES}
		61	+ ${CUDA_CUBLAS_LIBRARIES}
		62	+ ${CUDA_cusparse_LIBRARY}
		63	+ ${CUDA_cusolver_LIBRARY}
		64	+ ${CUDA_CUFFT_LIBRARIES}
		65	+ OpenBLAS::OpenBLAS
		66	+ f2c lapack
		67	+)
		68	+
		69	+
0	\ No newline at end of file	70	\ No newline at end of file

FindSTIM.cmake 0 → 100644

Show/Hide comments View file @71d5696

		1	+++ a/FindSTIM.cmake
		1	+# finds the STIM library (downloads it if it isn't present)
		2	+# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository)
		3	+
		4	+include(FindPackageHandleStandardArgs)
		5	+
		6	+set(STIM_ROOT $ENV{STIM_ROOT})
		7	+
		8	+IF(NOT STIM_ROOT)
		9	+ MESSAGE("ERROR: STIM_ROOT environment variable must be set!")
		10	+ENDIF(NOT STIM_ROOT)
		11	+
		12	+ FIND_PATH(STIM_INCLUDE_DIRS DOC "Path to STIM include directory."
		13	+ NAMES stim/image/image.h
		14	+ PATHS ${STIM_ROOT})
		15	+
		16	+find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIRS)

desktop.ini 0 → 100644

Show/Hide comments View file @71d5696

Binary files /dev/null and a/desktop.ini differ

docs/Readme_BytesOrder.txt 0 → 100644

Show/Hide comments View file @71d5696

		1	+++ a/docs/Readme_BytesOrder.txt
		1	+Order of "output.lyr" parameters.
		2	+
		3	+The wavenumber in free space: k0 double 8B
		4	+The direction of propogation: d double*2 16B
		5	+The refractive index in the first layer: n[0] complex<double> 16B
		6	+
		7	+for i in LAYERS:
		8	+ z positions[i]: z[i] double 8B * LAYERS
		9	+ z-component of propogation directions: sz[i] complex<double> 16B * LAYERS
		10	+ Transmission: Ptx[i] complex<double> 16B * LAYERS
		11	+ Transmission: Pty[i] complex<double> 16B * LAYERS
		12	+ Transmission: Ptz[i] complex<double> 16B * LAYERS
		13	+ Reflection: Prx[i] complex<double> 16B * LAYERS
		14	+ Transmission: Pry[i] complex<double> 16B * LAYERS
		15	+ Transmission: Prz[i] complex<double> 16B * LAYERS
		16	+
		17	+
		18	+All parameters we need will be:
		19	+ 15 * LAYERS + 5

docs/lyr_format.pptx 0 → 100644

View file @71d5696

No preview for this file type

docs/testcases.txt 0 → 100644

Show/Hide comments View file @71d5696

		1	+++ a/docs/testcases.txt
		1	+output.lyr --n 1.0 1.2 1.3 1.4 1.5 1.4 1.3 1.2 1.1 --kappa 0.0 0.01 0.02 0.03 0.04 0.05 0.6 0.07 0.08 --zPos -50 -40 -30 -20 -10 0 10 20 30
		2	+
		3	+output.lyr --n 1.0 2.0 1.0 --kappa 0.0 0.0 0.0 --z -50 0 50
		4	+
		5	+defaults:
		6	+
		7	+*) If kappa is not specified, set them all to zero
		8	+
		9	+*) If z is not specified, create equally spaced layers between -100 and 100
		10	+
		11	+*) Output: Place a color bar next to each image (colorbar())
0	\ No newline at end of file	12	\ No newline at end of file

layerview.py 0 → 100644

Show/Hide comments View file @71d5696

		1	+++ a/layerview.py
		1	+# create a function that displays the output when run this way:
		2	+# python layerview.py ouput.dat
		3	+
		4	+import sys
		5	+import os
		6	+from time import time
		7	+import subprocess
		8	+import struct
		9	+import numpy as np
		10	+import matplotlib
		11	+import math
		12	+import matplotlib.pyplot as plt
		13	+
		14	+from mpl_toolkits.axes_grid1 import ImageGrid
		15	+
		16	+def intensity(E):
		17	+ Econj = np.conj(E)
		18	+ I = np.sum(E*Econj, axis=-1)
		19	+ return np.real(I)
		20	+
		21	+#evaluate a solved homogeneous substrate
		22	+# Returns a complex NxMx3 array representing the cross section of the field at Y=0
		23	+def evaluate(Depths, k, d, n0, sz, Pt, Pr, X, Y, Z):
		24	+ Depths = np.array(Depths)
		25	+ sz = np.array(sz)
		26	+ Pt = np.array(Pt)
		27	+ Pr = np.array(Pr)
		28	+ s = np.array(d) * n0
		29	+ #allocate space for layer indices
		30	+ LI = np.zeros(Z.shape, dtype=np.int)
		31	+
		32	+ #find the layer index for each sample point
		33	+ L = len(Depths)
		34	+ LI[Z < Depths[0]] = 0
		35	+ for l in range(L-1):
		36	+ idx = np.logical_and(Z > Depths[l], Z <= Depths[l+1])
		37	+ LI[idx] = l
		38	+ LI[Z > Depths[-1]] = L - 1
		39	+
		40	+ #calculate the appropriate phase shift for the wave transmitted through the layer
		41	+ Ph_t = np.exp(1j * k * sz[LI] * (Z - Depths[LI]))
		42	+
		43	+ #calculate the appropriate phase shift for the wave reflected off of the layer boundary
		44	+ LIp = LI + 1
		45	+ LIp[LIp >= L] = 0
		46	+ Ph_r = np.exp(-1j * k * sz[LI] * (Z - Depths[LIp]))
		47	+ Ph_r[LI >= L-1] = 0
		48	+
		49	+ #calculate the phase shift based on the X and Y positions
		50	+ Ph_xy = np.exp(1j * k * (s[0] * X + s[1] * Y))
		51	+
		52	+ #apply the phase shifts
		53	+ Et = Pt[:, LI] * Ph_t[:, :]
		54	+ Er = Pr[:, LI] * Ph_r[:, :]
		55	+
		56	+ #add everything together coherently
		57	+ E = (Et + Er) * Ph_xy[:, :]
		58	+
		59	+ #return the electric field
		60	+ return np.moveaxis(E, 0, -1)
		61	+
		62	+class planewave:
		63	+ def __int__(self):
		64	+ self.LAYERS = 0 #Number of layers. int
		65	+ self.depths = [] #z positions of layers. [1, 5, ..., 10] double
		66	+ self.k0 = 0.0 #wavenumber at free space. double
		67	+ self.d = [] #direction of propogation. [0.5, 0] double
		68	+ self.n0 = 0.0+0.0j #the refractive index of the first layer. complex<double>
		69	+ self.sz = [] #z-component of propagation for each layer. complex<double>
		70	+ self.Pt = [[] for i in range(3)] #transmission complex<double>
		71	+ self.Pr = [[],[],[]] #refraction complex<double>
		72	+
		73	+# display a binary file produced using the coupled wave C code
		74	+def layer(strc):
		75	+ f = open(strc, "rb")
		76	+
		77	+ # create an empty plane wave structure
		78	+ L = planewave()
		79	+ L.depths = []
		80	+ L.d = []
		81	+ L.sz = []
		82	+ L.Pt = [[],[],[]]
		83	+ L.Pr = [[],[],[]]
		84	+
		85	+ # open the input file for reading
		86	+ file_bytes = os.path.getsize(strc)
		87	+
		88	+ # calculate the number of layers in the sample
		89	+ L.LAYERS = int((file_bytes/8-5)/15)
		90	+
		91	+ # load the raw layer data into the plane wave structure
		92	+ data_raw = struct.unpack('d' * (15L.LAYERS+5), f.read((15L.LAYERS+5)* 8))
		93	+ data = np.asarray(data_raw)
		94	+ L.k0 = data[0]
		95	+ L.d.append(data[1])
		96	+ L.d.append(data[2])
		97	+ L.n0 = complex(data[3], data[4])
		98	+
		99	+ # load each layer's plane waves from the binary file
		100	+ for i in range(L.LAYERS):
		101	+ L.depths.append(data[5+15*i])
		102	+ L.sz.append(complex(data[6+15i], data[7+15i]))
		103	+ L.Pt[0].append(complex(data[8+15i], data[9+15i]))
		104	+ L.Pt[1].append(complex(data[15i+10], data[15i+11]))
		105	+ L.Pt[2].append(complex(data[15i+12], data[15i+13]))
		106	+ L.Pr[0].append(complex(data[15i+14], data[15i+15]))
		107	+ L.Pr[1].append(complex(data[15i+16], data[15i+17]))
		108	+ L.Pr[2].append(complex(data[15i+18], data[15i+19]))
		109	+
		110	+ N = 512 # simulation resolution NxM
		111	+ M = 1024
		112	+ #DAVID: Don't hard-code the dimensions - you'll have to calculate them based on the sample information in the file
		113	+ D = [-110, 110, 0, 60] # dimensions of the simulation
		114	+ x = np.linspace(D[2], D[3], N) # set the sample points for the simulation
		115	+ z = np.linspace(D[0], D[1], M)
		116	+ [X, Z] = np.meshgrid(x, z) # create a mesh grid to evaluate layers
		117	+ Y = np.zeros(X.shape)
		118	+
		119	+ # evaluate the field across all layers
		120	+ E = evaluate(L.depths, L.k0, L.d, L.n0, L.sz, L.Pt, L.Pr, X, Y, Z)
		121	+ Er = np.real(E)
		122	+ I = intensity(E)
		123	+
		124	+ plt.set_cmap("afmhot") # set the color map
		125	+ plt.subplot(1, 4, 1)
		126	+ plt.imshow(Er[:, :, 0], extent=(D[3], D[2], D[1], D[0]))
		127	+ #plt.colorbar()
		128	+ plt.title("Ex")
		129	+
		130	+ plt.subplot(1, 4, 2)
		131	+ plt.imshow(Er[:, :, 1], extent=(D[3], D[2], D[1], D[0]))
		132	+ #plt.colorbar()
		133	+ plt.title("Ey")
		134	+
		135	+ plt.subplot(1, 4, 3)
		136	+ plt.imshow(Er[:, :, 2], extent=(D[3], D[2], D[1], D[0]))
		137	+ #plt.colorbar()
		138	+ plt.title("Ez")
		139	+
		140	+ plt.subplot(1, 4, 4)
		141	+ plt.imshow(I, extent=(D[3], D[2], D[1], D[0]))
		142	+ plt.colorbar()
		143	+ plt.title("I")
		144	+
		145	+ #fig = plt.figure(1, (5, 10))
		146	+ #plt.set_cmap("afmhot")
		147	+ #matplotlib.rcParams.update({'font.size': 10})
		148	+ #grid = ImageGrid(fig, rect = 211, nrows_ncols = (1, 3), axes_pad = 0.2, label_mode = "1", cbar_mode = "single", cbar_size = "18%")
		149	+ #Title = ["Ex", "Ey", "Ez"]
		150	+ #for i in range(3):
		151	+ # grid[i].axis('off')
		152	+ # im = grid[i].imshow(Er[..., i], extent=(D[3], D[2], D[1], D[0]), interpolation="nearest")
		153	+ # grid[i].set_title(Title[i])
		154	+ #grid.cbar_axes[0].colorbar(im)
		155	+ #plt.title("E")
		156	+ #plt.subplot(212)
		157	+ #plt.imshow(I, extent=(D[3], D[2], D[1], D[0]))
		158	+ #plt.title("I")
		159	+ #plt.colorbar()
		160	+ plt.show()
		161	+
		162	+# function displays usage text to the console
		163	+def usage():
		164	+ print("Usage:")
		165	+ print(" layerview input.dat")
		166	+
		167	+if __name__ == '__main__':
		168	+ start = time()
		169	+ if len(sys.argv) < 2: # if there are no command line arguments
		170	+ usage() # display the usage text
		171	+ exit() # exit
		172	+ else:
		173	+ layer(sys.argv[1]) # otherwise display the given data file
		174	+
		175	+ end = time()
		176	+ print("The elapsed time is " + str(end - start) + " s. ")
0	\ No newline at end of file	177	\ No newline at end of file

src/layer.cpp 0 → 100644

Show/Hide comments View file @71d5696

		1	+++ a/src/layer.cpp
		1	+#include "layer.h"
		2	+#include "linalg.h" //LAPACKE support for Visual Studio
		3	+
		4	+#include <cusparse.h>
		5	+#include <cuda_runtime.h>
		6	+//#include "cublas_v2.h"
		7	+#include "cusolverSp.h"
		8	+/----------------------------GPU-----------------------------/
		9	+
		10	+
		11	+//Cross product.c is result.
		12	+void crossProduct(vector<complex<double>>* a, vector<complex<double>>* b, //The given matrices.
		13	+ vector<complex<double>>* c) { //Matrix to be gotten.
		14	+ c->push_back((a)[1] (b)[2] - (a)[2] * (*b)[1]);
		15	+ c->push_back((a)[2] (b)[0] - (a)[0] * (*b)[2]);
		16	+ c->push_back((a)[0] (b)[1] - (a)[1] * (*b)[0]);
		17	+}
		18	+
		19	+//Calculate the norm for a matrix.
		20	+complex<double> Norm(vector<complex<double>>* E) {
		21	+ complex<double> sum = 0;
		22	+ for (unsigned int i = 0; i < E->size(); i++) {
		23	+ sum += (E)[i] (*E)[i];
		24	+ }
		25	+ return sqrt(sum);
		26	+}
		27	+
		28	+//Normalize matrix.
		29	+void Normalize(vector<complex<double>>* E) {
		30	+ complex<double> sum = 0;
		31	+ for (unsigned int i = 0; i < E->size(); i++) {
		32	+ sum += (E)[i] (*E)[i];
		33	+ }
		34	+ for (unsigned int i = 0; i < E->size(); i++) {
		35	+ (E)[i] = (E)[i] / sqrt(sum);
		36	+
		37	+ }
		38	+}
		39	+
		40	+//Orthogonalization.
		41	+void orthogonalize(vector<complex<double>>* E_0rtho, vector<complex<double>>* E0, vector<complex<double>>* d) {
		42	+ vector<complex<double>> s;
		43	+ if (d->size() == 2) {
		44	+ complex<double> dz = sqrt(1.0+0i - pow((d)[0], 2) - pow((d)[1], 2));
		45	+ d->push_back(dz);
		46	+ }
		47	+ crossProduct(E0, d, &s);
		48	+ crossProduct(d, &s, E_0rtho);
		49	+ vector<complex<double>>().swap(s);
		50	+}
		51	+
		52	+/--------------------------------------------------Define Class layersample.--------------------------------------------------------/
		53	+
		54	+/*Do not try to replace "int" as "size_t".
		55	+This will result in a bunch of warnings and if we continuously change the type of M_rowInd and M_colInd, the EXCEPTION will occur again.*/
		56	+size_t layersample::ii(size_t l, size_t c, size_t d) { //ii(l, c, d) means the column indexes for every element.
		57	+ return l * 6 + d * 3 + c - 3;
		58	+}
		59	+
		60	+void layersample::generate_linsys(size_t LAYERS,
		61	+ vector<complex<double>>& M, //All non-zero values in "A" matirx.(A * X = b)
		62	+ vector<complex<double>>& b, //The right hand side column vector.
		63	+ vector<complex<double>>& E, //orthogonalized E0 vectors
		64	+ vector<complex<double>>* P,
		65	+ bool CPU_op) { //Solution of the matrices multiplication.
		66	+ //Calculate the sz component for each layer.
		67	+ s.clear(); //s is the plane wave direction scaled by the refractive index.
		68	+ for (size_t i = 0; i < 2; i++)
		69	+ s.push_back(d[i] * n[0]);
		70	+ sz.clear();
		71	+ for (int l = 0; l < LAYERS; l++) {
		72	+ sz.push_back(sqrt(pow(n[l], 2) - pow(s[0], 2) - pow(s[1], 2)));
		73	+ }
		74	+
		75	+ if (!CPU_op){
		76	+ //Computer in GPU.
		77	+ vector<int> M_rowInd; //Sparse matrix M CSR ->row index
		78	+ vector<int> M_colInd; //Sparse matrix M CSR ->number of elements
		79	+ M_rowInd.push_back(0);
		80	+ ////Build M by setting constraints based on Gauss's Law.
		81	+ for (size_t l = 0; l < LAYERS; l++) {
		82	+ //Set the upward components for each layer.
		83	+ //Layer "LAYERS-1" doesn't have a upward component.
		84	+ if (l != LAYERS - 1) {
		85	+ M.push_back(s[0]);
		86	+ M_colInd.push_back((int)ii(l, 0, 1));
		87	+ M.push_back(s[1]);
		88	+ M_colInd.push_back((int)ii(l, 1, 1));
		89	+ M.push_back(-sz[l]);
		90	+ M_colInd.push_back((int)ii(l, 2, 1));
		91	+ M_rowInd.push_back((int)M.size());
		92	+ b.push_back(0);
		93	+ }
		94	+ //Set the downward components for each layer.
		95	+ if (l != 0) {
		96	+ M.push_back(s[0]);
		97	+ M_colInd.push_back((int)ii(l, 0, 0));
		98	+ M.push_back(s[1]);
		99	+ M_colInd.push_back((int)ii(l, 1, 0));
		100	+ M.push_back(sz[l]);
		101	+ M_colInd.push_back((int)ii(l, 2, 0));
		102	+ M_rowInd.push_back((int)M.size());
		103	+ b.push_back(0);
		104	+ }
		105	+ }
		106	+ //Continue to build M by enforcing a continuous field across boundaries.
		107	+ complex<double> arg, arg_in, B;
		108	+ for (size_t l = 1; l < LAYERS; l++) {
		109	+ complex<double> sz0 = sz[l - 1];
		110	+ complex<double> sz1 = sz[l];
		111	+
		112	+ //Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1]))
		113	+ complex<double> A_in = k * sz0 * (z[l] - z[l - 1]);
		114	+ complex<double> A_in2 = { -A_in.imag(), A_in.real() };
		115	+ complex<double> A = exp(A_in2);
		116	+
		117	+ if (l < LAYERS - 1) {
		118	+ double dl = z[l] - z[l + 1];
		119	+ arg_in = -k * sz1 * (complex<double>)dl;
		120	+ arg = { -arg_in.imag(), arg_in.real() };
		121	+ B = exp(arg);
		122	+ }
		123	+ //if this is the second layer, use the simplified equations that account for the incident field
		124	+ if (l == 1) {
		125	+ M.push_back(1);
		126	+ M_colInd.push_back((int)ii(0, 0, 1));
		127	+ M.push_back(-1);
		128	+ M_colInd.push_back((int)ii(1, 0, 0));
		129	+ if (LAYERS > 2) {
		130	+ M.push_back(-B);
		131	+ M_colInd.push_back((int)ii(1, 0, 1));
		132	+ }
		133	+ M_rowInd.push_back((int)M.size());
		134	+ b.push_back(-A * E[0]);
		135	+
		136	+ M.push_back(1);
		137	+ M_colInd.push_back((int)ii(0, 1, 1));
		138	+ M.push_back(-1);
		139	+ M_colInd.push_back((int)ii(1, 1, 0));
		140	+ if (LAYERS > 2) {
		141	+ M.push_back(-B);
		142	+ M_colInd.push_back((int)ii(1, 1, 1));
		143	+ }
		144	+ M_rowInd.push_back((int)M.size());
		145	+ b.push_back(-A * E[l]);
		146	+
		147	+ M.push_back(sz0);
		148	+ M_colInd.push_back((int)ii(0, 1, 1));
		149	+ M.push_back(s[1]);
		150	+ M_colInd.push_back((int)ii(0, 2, 1));
		151	+ M.push_back(sz1);
		152	+ M_colInd.push_back((int)ii(1, 1, 0));
		153	+ M.push_back(-s[1]);
		154	+ M_colInd.push_back((int)ii(1, 2, 0));
		155	+ if (LAYERS > 2) {
		156	+ M.push_back(-B * sz1);
		157	+ M_colInd.push_back((int)ii(1, 1, 1));
		158	+ M.push_back(-B * s[1]);
		159	+ M_colInd.push_back((int)ii(1, 2, 1));
		160	+ }
		161	+ M_rowInd.push_back((int)M.size());
		162	+ b.push_back(A * sz0 * E[1] - A * s[1] * E[2]);
		163	+
		164	+ M.push_back(-sz0);
		165	+ M_colInd.push_back((int)ii(0, 0, 1));
		166	+ M.push_back(-s[0]);
		167	+ M_colInd.push_back((int)ii(0, 2, 1));
		168	+ M.push_back(-sz1);
		169	+ M_colInd.push_back((int)ii(1, 0, 0));
		170	+ M.push_back(s[0]);
		171	+ M_colInd.push_back((int)ii(1, 2, 0));
		172	+ if (LAYERS > 2) {
		173	+ M.push_back(B * sz1);
		174	+ M_colInd.push_back((int)ii(1, 0, 1));
		175	+ M.push_back(B* s[0]);
		176	+ M_colInd.push_back((int)ii(1, 2, 1));
		177	+ }
		178	+ M_rowInd.push_back((int)M.size());
		179	+ b.push_back(A * s[0] * E[2] - A * sz0 * E[0]);
		180	+ }
		181	+ else if (l == LAYERS - 1) {
		182	+ M.push_back(A);
		183	+ M_colInd.push_back((int)ii(l - 1, 0, 0));
		184	+ M.push_back(1);
		185	+ M_colInd.push_back((int)ii(l - 1, 0, 1));
		186	+ M.push_back(-1);
		187	+ M_colInd.push_back((int)ii(l, 0, 0));
		188	+ M_rowInd.push_back((int)M.size());
		189	+ b.push_back(0);
		190	+
		191	+ M.push_back(A);
		192	+ M_colInd.push_back((int)ii(l - 1, 1, 0));
		193	+ M.push_back(1);
		194	+ M_colInd.push_back((int)ii(l - 1, 1, 1));
		195	+ M.push_back(-1);
		196	+ M_colInd.push_back((int)ii(l, 1, 0));
		197	+ M_rowInd.push_back((int)M.size());
		198	+ b.push_back(0);
		199	+
		200	+ M.push_back(-A * sz0);
		201	+ M_colInd.push_back((int)ii(l - 1, 1, 0));
		202	+ M.push_back(A * s[1]);
		203	+ M_colInd.push_back((int)ii(l - 1, 2, 0));
		204	+ M.push_back(sz0);
		205	+ M_colInd.push_back((int)ii(l - 1, 1, 1));
		206	+ M.push_back(s[1]);
		207	+ M_colInd.push_back((int)ii(l - 1, 2, 1));
		208	+ M.push_back(sz1);
		209	+ M_colInd.push_back((int)ii(l, 1, 0));
		210	+ M.push_back(-s[1]);
		211	+ M_colInd.push_back((int)ii(l, 2, 0));
		212	+ M_rowInd.push_back((int)M.size());
		213	+ b.push_back(0);
		214	+
		215	+ M.push_back(A * sz0);
		216	+ M_colInd.push_back((int)ii(l - 1, 0, 0));
		217	+ M.push_back(-A * s[0]);
		218	+ M_colInd.push_back((int)ii(l - 1, 2, 0));
		219	+ M.push_back(-sz0);
		220	+ M_colInd.push_back((int)ii(l - 1, 0, 1));
		221	+ M.push_back(-s[0]);
		222	+ M_colInd.push_back((int)ii(l - 1, 2, 1));
		223	+ M.push_back(-sz1);
		224	+ M_colInd.push_back((int)ii(l, 0, 0));
		225	+ M.push_back(s[0]);
		226	+ M_colInd.push_back((int)ii(l, 2, 0));
		227	+ M_rowInd.push_back((int)M.size());
		228	+ b.push_back(0);
		229	+ }
		230	+ else {
		231	+ M.push_back(A);
		232	+ M_colInd.push_back((int)ii(l - 1, 0, 0));
		233	+ M.push_back(1);
		234	+ M_colInd.push_back((int)ii(l - 1, 0, 1));
		235	+ M.push_back(-1);
		236	+ M_colInd.push_back((int)ii(l, 0, 0));
		237	+ M.push_back(-B);
		238	+ M_colInd.push_back((int)ii(l, 0, 1));
		239	+ M_rowInd.push_back((int)M.size());
		240	+ b.push_back(0);
		241	+
		242	+ M.push_back(A);
		243	+ M_colInd.push_back((int)ii(l - 1, 1, 0));
		244	+ M.push_back(1);
		245	+ M_colInd.push_back((int)ii(l - 1, 1, 1));
		246	+ M.push_back(-1);
		247	+ M_colInd.push_back((int)ii(l, 1, 0));
		248	+ M.push_back(-B);
		249	+ M_colInd.push_back((int)ii(l, 1, 1));
		250	+ M_rowInd.push_back((int)M.size());
		251	+ b.push_back(0);
		252	+
		253	+ M.push_back(-A * sz0);
		254	+ M_colInd.push_back((int)ii(l - 1, 1, 0));
		255	+ M.push_back(A * s[1]);
		256	+ M_colInd.push_back((int)ii(l - 1, 2, 0));
		257	+ M.push_back(sz0);
		258	+ M_colInd.push_back((int)ii(l - 1, 1, 1));
		259	+ M.push_back(s[1]);
		260	+ M_colInd.push_back((int)ii(l - 1, 2, 1));
		261	+ M.push_back(sz1);
		262	+ M_colInd.push_back((int)ii(l, 1, 0));
		263	+ M.push_back(-s[1]);
		264	+ M_colInd.push_back((int)ii(l, 2, 0));
		265	+ M.push_back(-B * sz1);
		266	+ M_colInd.push_back((int)ii(l, 1, 1));
		267	+ M.push_back(-B * s[1]);
		268	+ M_colInd.push_back((int)ii(l, 2, 1));
		269	+ M_rowInd.push_back((int)M.size());
		270	+ b.push_back(0);
		271	+
		272	+ M.push_back(A * sz0);
		273	+ M_colInd.push_back((int)ii(l - 1, 0, 0));
		274	+ M.push_back(-A * s[0]);
		275	+ M_colInd.push_back((int)ii(l - 1, 2, 0));
		276	+ M.push_back(-sz0);
		277	+ M_colInd.push_back((int)ii(l - 1, 0, 1));
		278	+ M.push_back(-s[0]);
		279	+ M_colInd.push_back((int)ii(l - 1, 2, 1));
		280	+ M.push_back(-sz1);
		281	+ M_colInd.push_back((int)ii(l, 0, 0));
		282	+ M.push_back(s[0]);
		283	+ M_colInd.push_back((int)ii(l, 2, 0));
		284	+ M.push_back(B * sz1);
		285	+ M_colInd.push_back((int)ii(l, 0, 1));
		286	+ M.push_back(B * s[0]);
		287	+ M_colInd.push_back((int)ii(l, 2, 1));
		288	+ M_rowInd.push_back((int)M.size());
		289	+ b.push_back(0);
		290	+ }
		291	+ }
		292	+ cudaError_t cudaStatus;
		293	+ cusolverStatus_t cusolverStatus;
		294	+ cusparseStatus_t cusparseStatus;
		295	+ cusolverSpHandle_t handle = NULL;
		296	+ cusparseHandle_t cusparseHandle = NULL;
		297	+ cudaStream_t stream = NULL;
		298	+ cusparseMatDescr_t descrM = NULL;
		299	+ cuDoubleComplex * csrValM_, * b_, *P_;
		300	+ size_t rowsA = b.size(), colsA = b.size(), nnA = M.size(), baseM_ = 0; //nnA is the number of non-zero elements.
		301	+ int* csrRowPtrM = NULL; //row index M_rowInd projected to GPU.
		302	+ int* csrColIndM = NULL; //CSR(A) from I/O. // M_colInd projected to GPU.
		303	+ double tol = 1.e-12; int reorder = 0;
		304	+ int singularity = 0;
		305	+
		306	+ //Initialize.
		307	+ cusolverStatus = cusolverSpCreate(&handle);
		308	+ int num = 1;
		309	+ cudaStatus = cudaGetDevice(&num);
		310	+ cusparseStatus = cusparseCreate(&cusparseHandle);
		311	+ cudaStatus = cudaStreamCreate(&stream);
		312	+ cusolverStatus = cusolverSpSetStream(handle, stream);
		313	+ cusparseStatus = cusparseSetStream(cusparseHandle, stream);
		314	+ cusparseStatus = cusparseCreateMatDescr(&descrM);
		315	+ cusparseStatus = cusparseSetMatType(descrM, CUSPARSE_MATRIX_TYPE_GENERAL);
		316	+ if (baseM_) {
		317	+ cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ONE);
		318	+ }
		319	+ else {
		320	+ cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ZERO);
		321	+ }
		322	+
		323	+ cudaStatus = cudaMalloc((void*)&csrRowPtrM, sizeof(int) (rowsA + 1)); //Projection of M_rowInd.
		324	+ cudaStatus = cudaMalloc((void*)&csrColIndM, sizeof(int) M_colInd.size()); //Projection of M_colInd.
		325	+ cudaStatus = cudaMalloc((void*)&csrValM_, sizeof(cuDoubleComplex) M.size()); //Projection of M.
		326	+ cudaStatus = cudaMalloc((void*)&b_, sizeof(cuDoubleComplex) b.size()); //Projection of b.
		327	+ cudaStatus = cudaMalloc((void*)&P_, sizeof(cuDoubleComplex) b.size()); //Projection of P.
		328	+
		329	+ cudaStatus = cudaMemcpy(csrValM_, M.data(), M.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
		330	+ cudaStatus = cudaMemcpy(csrRowPtrM, M_rowInd.data(), M_rowInd.size() * sizeof(int), cudaMemcpyHostToDevice);
		331	+ cudaStatus = cudaMemcpy(csrColIndM, M_colInd.data(), M_colInd.size() * sizeof(int), cudaMemcpyHostToDevice);
		332	+ cudaStatus = cudaMemcpy(b_, b.data(), b.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
		333	+ // Output the current CUDA error.
		334	+ //if (cudaStatus != cudaSuccess) {
		335	+ // cout<<"%s " << cudaGetErrorString(cudaStatus) << endl;
		336	+ //}
		337	+ P->resize(rowsA); //P is the to-be-solved matrix in CPU.
		338	+ //QR method.
		339	+ cusolverStatus = cusolverSpZcsrlsvqr(handle, (int)rowsA, (int)nnA, descrM, csrValM_, csrRowPtrM, csrColIndM, b_, tol, reorder, P_, (int*)&singularity);
		340	+ /cusparseStatus = cusparseZsctr(cusparseHandle, rowsA, g_z, g_Q, g_x, CUSPARSE_INDEX_BASE_ZERO);*/
		341	+ cudaStatus = cudaMemcpyAsync(P->data(), P_, sizeof(cuDoubleComplex) * rowsA, cudaMemcpyDeviceToHost, stream);
		342	+
		343	+ cudaStatus = cudaFree(csrRowPtrM);
		344	+ cudaStatus = cudaFree(csrColIndM);
		345	+ cudaStatus = cudaFree(csrValM_);
		346	+ cudaStatus = cudaFree(b_);
		347	+ cudaStatus = cudaFree(P_);
		348	+ vector<int>().swap(M_rowInd);
		349	+ vector<int>().swap(M_colInd);
		350	+ }
		351	+ else {
		352	+ //Work on CPU.
		353	+ M.resize(6 * (LAYERS - 1) * 6 * (LAYERS - 1));
		354	+ b.resize(6 * (LAYERS - 1));
		355	+
		356	+ size_t ei = 0;
		357	+ //Set constraints based on Gauss's Law.
		358	+ for (size_t l = 0; l < LAYERS; l++) {
		359	+ //Set the upward components for each layer.
		360	+ //Layer "LAYERS-1" doesn't have a upward component.
		361	+ if (l != LAYERS - 1) {
		362	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = s[0];
		363	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = s[1];
		364	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -sz[l];
		365	+ ei += 1;
		366	+ }
		367	+ //Set the downward components for each layer.
		368	+ if (l != 0) {
		369	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = s[0];
		370	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = s[1];
		371	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = sz[l];
		372	+ ei += 1;
		373	+ }
		374	+ }
		375	+ //Enforce a continuous field across boundaries.
		376	+ complex<double> arg, arg_in, B;
		377	+ for (size_t l = 1; l < LAYERS; l++) {
		378	+ complex<double> sz0 = sz[l - 1];
		379	+ complex<double> sz1 = sz[l];
		380	+
		381	+ //Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1]))
		382	+ complex<double> A_in = k * sz0 * (z[l] - z[l - 1]);
		383	+ complex<double> A_in2 = { -A_in.imag(), A_in.real() };
		384	+ complex<double> A = exp(A_in2);
		385	+
		386	+ if (l < LAYERS - 1) {
		387	+ double dl = z[l] - z[l + 1];
		388	+ arg_in = -k * sz1 * (complex<double>)dl;
		389	+ arg = { -arg_in.imag(), arg_in.real() };
		390	+ B = exp(arg);
		391	+ }
		392	+ //if this is the second layer, use the simplified equations that account for the incident field
		393	+ if (l == 1) {
		394	+ M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = 1;
		395	+ M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -1;
		396	+ if (LAYERS > 2) {
		397	+ M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = -B;
		398	+ }
		399	+ b[ei] = -A * E[0];
		400	+ ei += 1;
		401	+
		402	+ M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = 1;
		403	+ M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = -1;
		404	+ if (LAYERS > 2) {
		405	+ M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B;
		406	+ }
		407	+ b[ei] = -A * E[l];
		408	+ ei += 1;
		409	+
		410	+ M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = s[1];
		411	+ M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = sz0;
		412	+ M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = -s[1];
		413	+ M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = sz1;
		414	+ if (LAYERS > 2) {
		415	+ M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = -B * s[1];
		416	+ M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B * sz1;
		417	+ }
		418	+ b[ei] = A * sz0 * E[1] - A * s[1] * E[2];
		419	+ ei += 1;
		420	+
		421	+ M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = -sz0;
		422	+ M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = -s[0];
		423	+ M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -sz1;
		424	+ M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = s[0];
		425	+ if (LAYERS > 2) {
		426	+ M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = B * sz1;
		427	+ M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = B * s[0];
		428	+ }
		429	+ b[ei] = A * s[0] * E[2] - A * sz0 * E[0];
		430	+ ei += 1;
		431	+ }
		432	+ else if (l == LAYERS - 1) {
		433	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A;
		434	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1;
		435	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1;
		436	+ ei += 1;
		437	+
		438	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A;
		439	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1;
		440	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1;
		441	+ ei += 1;
		442	+
		443	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1];
		444	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0;
		445	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1];
		446	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0;
		447	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1];
		448	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1;
		449	+ ei += 1;
		450	+
		451	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0;
		452	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0];
		453	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0;
		454	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0];
		455	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1;
		456	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0];
		457	+ ei += 1;
		458	+ }
		459	+ else {
		460	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A;
		461	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1;
		462	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1;
		463	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = -B;
		464	+ ei += 1;
		465	+
		466	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A;
		467	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1;
		468	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1;
		469	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B;
		470	+ ei += 1;
		471	+
		472	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1];
		473	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0;
		474	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1];
		475	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0;
		476	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1];
		477	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1;
		478	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -B * s[1];
		479	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B * sz1;
		480	+ ei += 1;
		481	+
		482	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0;
		483	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0];
		484	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0;
		485	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0];
		486	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1;
		487	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0];
		488	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = B * sz1;
		489	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = B * s[0];
		490	+ ei += 1;
		491	+ }
		492	+ }
		493	+
		494	+ complex<double>* M_ = new complex<double>[M.size()];
		495	+ complex<double>* b_ = new complex<double>[b.size()];
		496	+ complex<double>* P_ = new complex<double>[b.size()];
		497	+ for (size_t i = 0; i < M.size(); i++) {
		498	+ M_[i] = M[i];
		499	+ if (i < b.size()) b_[i] = b[i];
		500	+ }
		501	+ LINALG_inverse(M_, (int)(6 * (LAYERS - 1)));
		502	+ LINALG_zgemm((int)(6 * (LAYERS - 1)), (int)1, (int)(6 * (LAYERS - 1)), M_, (int)(6 * (LAYERS - 1)), b_, (int)1, P_, (int)1);
		503	+ for (int i = 0; i < b.size(); i++) {
		504	+ P->push_back(P_[i]);
		505	+ }
		506	+
		507	+ delete[] M_;
		508	+ delete[] b_;
		509	+ delete[] P_;
		510	+ }
		511	+}
		512	+
		513	+
		514	+//Build matrix and get E.
		515	+void layersample::solve(vector<complex<double>>* E, bool CPU_op) { //orthogonalized E0 vectors.
		516	+ size_t LAYERS = n.size();
		517	+ //Store the matrix and RHS vector.
		518	+ vector<complex<double>> M; //All non-zero values in the sparse matrix.
		519	+ vector<complex<double>> b; //The right hand side column vector.
		520	+
		521	+ //Evaluate the linear system.
		522	+ vector<complex<double>> P; //Solution of matrix.
		523	+ layersample::generate_linsys(LAYERS, M, b, *E, &P, CPU_op);
		524	+
		525	+ //Store the coefficients for each layer.
		526	+ //Pt[3, L] transmission. Pr[3, L] reflection.
		527	+ Pt.resize(3 * LAYERS);
		528	+ Pr.resize(3 * LAYERS);
		529	+
		530	+ for (size_t l = 0; l < LAYERS; l++) {
		531	+ if (l == 0) {
		532	+ Pt[0] = (complex<double>)(*E)[0];
		533	+ Pt[LAYERS] = (complex<double>)(*E)[1];
		534	+ Pt[2 * LAYERS] = (complex<double>)(*E)[2];
		535	+ }
		536	+ else {
		537	+ Pt[l] = P[ii(l, 0, 0)];
		538	+ Pt[l + LAYERS] = P[ii(l, 1, 0)];
		539	+ Pt[l + 2 * LAYERS] = P[ii(l, 2, 0)];
		540	+ }
		541	+
		542	+ if (l == LAYERS - 1) {
		543	+ Pr[LAYERS - 1] = 0;
		544	+ Pr[2 * LAYERS - 1] = 0;
		545	+ Pr[3 * LAYERS - 1] = 0;
		546	+ }
		547	+ else {
		548	+ Pr[l] = P[ii(l, 0, 1)];
		549	+ Pr[l + LAYERS] = P[ii(l, 1, 1)];
		550	+ Pr[l + 2 * LAYERS] = P[ii(l, 2, 1)];
		551	+ }
		552	+ }
		553	+ vector<complex<double>>().swap(M);
		554	+ vector<complex<double>>().swap(b);
		555	+ vector<complex<double>>().swap(P);
		556	+}
0	\ No newline at end of file	557	\ No newline at end of file

src/layer.h 0 → 100644

Show/Hide comments View file @71d5696

		1	+++ a/src/layer.h
		1	+#ifndef LAYER_H
		2	+#define LAYER_H
		3	+#include <vector>
		4	+#include <complex>
		5	+using namespace std;
		6	+
		7	+void crossProduct(vector<complex<double>>* a, vector<complex<double>>* b, vector<complex<double>>* c);
		8	+complex<double> Norm(vector<complex<double>>* E);
		9	+void Normalize(vector<complex<double>>* E);
		10	+vector<vector<double> > transpose(vector<vector<double> >* matrix);
		11	+void orthogonalize(vector<complex<double>>* E_0, vector<complex<double>>* E0, vector<complex<double>>* d);
		12	+
		13	+class layersample {
		14	+
		15	+public:
		16	+ double k; //wavenumber.
		17	+ vector<complex<double>> n; //refractive index.
		18	+ vector<double> z; //z postions.
		19	+ vector<complex<double>> s; //propagation direction. Keep it for output.
		20	+ vector<complex<double>> sz; //propagation direction. Keep it for output.
		21	+ vector<complex<double>> d; //direction of propagation of the plane wave.
		22	+ vector<complex<double>> Pt; //transimission.
		23	+ vector<complex<double>> Pr; //reflection.
		24	+ //Calulate the index of the field component associated with a layer.
		25	+ // l is the layer index. c is the component(x, y, z). d is the direction(0for transmission, 1 for reflection).
		26	+ size_t ii(size_t l, size_t c, size_t d);
		27	+
		28	+ //Generate the linear system corresponding to this layered sample and plane wave.
		29	+ void generate_linsys(size_t LAYERS, vector<complex<double>>& M, vector<complex<double>>& b, vector<complex<double>>& E, vector<complex<double>>* P, bool CPU_op);
		30	+
		31	+ //Build matrix and get E.
		32	+ void solve(vector<complex<double>>* E, bool CPU_op);
		33	+};
		34	+
		35	+#endif
0	\ No newline at end of file	36	\ No newline at end of file

src/linalg.cpp 0 → 100644

Show/Hide comments View file @71d5696

		1	+++ a/src/linalg.cpp
		1	+#include "linalg.h"
		2	+
		3	+// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK.
		4	+extern "C" {
		5	+#include "f2c.h"
		6	+#include "clapack.h"
		7	+#include "cblas.h"
		8	+}
		9	+
		10	+
		11	+void LINALG_zgetrf(
		12	+ int M,
		13	+ int N,
		14	+ std::complex<double>* A,
		15	+ int LDA,
		16	+ int* IPIV)
		17	+{
		18	+ integer INFO;
		19	+ zgetrf_((integer)&M, (integer)&N, (doublecomplex)A, (integer)&LDA, (integer*)IPIV, &INFO);
		20	+}
		21	+
		22	+void LINALG_zgetri(
		23	+ size_t N,
		24	+ std::complex<double>* A,
		25	+ int LDA,
		26	+ int* IPIV)
		27	+{
		28	+ integer LWORK = -1;
		29	+ std::complex<double> WORK[1];
		30	+ integer INFO;
		31	+ zgetri_((integer)&N, (doublecomplex)A, (integer)&LDA, (integer)IPIV, (doublecomplex*)WORK, &LWORK, &INFO);
		32	+}
		33	+void LINALG_inverse(std::complex<double>* A, int N)
		34	+{
		35	+ int* IPIV = new int[N + (size_t)1];
		36	+ integer LWORK = N * N;
		37	+ std::complex<double>* WORK = new std::complex<double>[LWORK];
		38	+ integer INFO;
		39	+
		40	+ zgetrf_((integer)&N, (integer)&N, (doublecomplex)A, (integer)&N, (integer*)IPIV, &INFO);
		41	+ zgetri_((integer)&N, (doublecomplex)A, (integer)&N, (integer)IPIV, (doublecomplex*)WORK, &LWORK, &INFO);
		42	+
		43	+ delete[] IPIV;
		44	+ delete[] WORK;
		45	+}
		46	+
		47	+void LINALG_zgemm(
		48	+ const int M, //A(MK) B(KN)
		49	+ const int N,
		50	+ const int K,
		51	+ std::complex<double>* A,
		52	+ const int LDA, //=K
		53	+ std::complex<double>* B,
		54	+ const int LDB, //=N
		55	+ std::complex<double>* C,
		56	+ const int LDC) //=columns of C.
		57	+{
		58	+ std::complex<double> alpha = 1;
		59	+ std::complex<double> beta = 0;
		60	+ cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, (OPENBLAS_CONST blasint)M, (OPENBLAS_CONST blasint)N, (OPENBLAS_CONST blasint)K,
		61	+ &alpha, A, (OPENBLAS_CONST blasint)LDA, B, (OPENBLAS_CONST blasint)LDB, &beta, C, (OPENBLAS_CONST blasint)LDC);
		62	+
		63	+}
0	\ No newline at end of file	64	\ No newline at end of file

src/linalg.h 0 → 100644

Show/Hide comments View file @71d5696

		1	+++ a/src/linalg.h
		1	+// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK
		2	+#include <complex>
		3	+
		4	+//Solve matrix inverse.
		5	+void LINALG_inverse(std::complex<double>* A, int N);
		6	+
		7	+//Solve matrix multiplication. C = A * B.
		8	+void LINALG_zgemm(
		9	+ const int M, //A(MK) B(KN)
		10	+ const int N,
		11	+ const int K,
		12	+ std::complex<double>* A,
		13	+ const int LDA, //=K
		14	+ std::complex<double>* B,
		15	+ const int LDB, //=N
		16	+ std::complex<double>* C,
		17	+ const int LDC); //=columns of C.
0	\ No newline at end of file	18	\ No newline at end of file

src/main.cpp 0 → 100644

Show/Hide comments View file @71d5696

		1	+++ a/src/main.cpp
		1	+//Update the data output format in example_layer().
		2	+#include "layer.h"
		3	+#include <windows.h>
		4	+
		5	+#include <fstream>
		6	+#include <iostream>
		7	+#include "stim/parser/arguments.h"
		8	+
		9	+using namespace std;
		10	+#define PI 3.14159265358979323846264338328
		11	+
		12	+bool ASCII_output = false;
		13	+bool CPU_op = false;
		14	+
		15	+void advertise() {
		16	+ std::cout << std::endl << std::endl;
		17	+ std::cout << "=========================================================================" << std::endl;
		18	+ //std::cout << "Thank you for using the NetMets network comparison tool!" << std::endl;
		19	+ std::cout << "Scalable Tissue Imaging and Modeling (STIM) Lab, University of Houston" << std::endl;
		20	+ std::cout << "=========================================================================" << std::endl << std::endl;
		21	+}
		22	+
		23	+//Calculate Ez from vector d and Ex Ey.
		24	+void E_Cal(vector<complex<double>>* E, vector<complex<double>>* d) {
		25	+ if (d->size() == 2) {
		26	+ complex<double> dz = sqrt(1.0 + 0i - pow((d)[0], 2) - pow((d)[1], 2));
		27	+ d->push_back(dz);
		28	+ }
		29	+ E->push_back(-((E)[0] (d)[0] + (E)[1] * (d)[1]) / (d)[2]);
		30	+}
		31	+void output_binary(std::string filename, layersample& layers) {
		32	+ size_t L = layers.n.size(); // get the number of layers
		33	+ std::ofstream outFile;
		34	+ outFile.open(filename, std::ofstream::binary); // open the output file for binary writing
		35	+ if (outFile) {
		36	+ outFile.write((char*)&layers.k, sizeof(double));
		37	+ outFile.write((char*)&layers.d[0], sizeof(double));
		38	+ outFile.write((char*)&layers.d[1], sizeof(double));
		39	+ outFile.write((char)&layers.n[0], sizeof(double) 2);
		40	+
		41	+ for (size_t i = 0; i < L; i++) {
		42	+ outFile.write((char*)&layers.z[i], sizeof(double));
		43	+ outFile.write((char)&layers.sz[i], 2 sizeof(double));
		44	+ outFile.write((char)&layers.Pt[i], 2 sizeof(double));
		45	+ outFile.write((char)&layers.Pt[i + L], 2 sizeof(double));
		46	+ outFile.write((char)&layers.Pt[i + 2 L], 2 * sizeof(double));
		47	+ outFile.write((char)&layers.Pr[i], 2 sizeof(double));
		48	+ outFile.write((char)&layers.Pr[i + L], 2 sizeof(double));
		49	+ outFile.write((char)&layers.Pr[i + 2 L], 2 * sizeof(double));
		50	+ }
		51	+ outFile.close();
		52	+ }
		53	+ else {
		54	+ std::cout << "ERROR opening output file for binary writing: " << filename << std::endl;
		55	+ }
		56	+}
		57	+
		58	+void output_txt(std::string filename, layersample& layers) {
		59	+ size_t L = layers.n.size(); // get the number of layers
		60	+ std::ofstream outFile;
		61	+ outFile.open(filename); // open the output file for text writing
		62	+ if (!outFile) {
		63	+ std::cout << "ERROR: Could not open file " << filename << std::endl;
		64	+ exit(1);
		65	+ }
		66	+ int width = 15;
		67	+
		68	+ outFile << "--------------------------------" << endl;
		69	+ outFile << "The wavenumber at free space is : " << layers.k << endl;
		70	+ for (size_t i = 0; i < L; i++) {
		71	+ if (i == 0) {
		72	+ outFile << "--------------------------------" << endl;
		73	+ outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl;
		74	+ outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl;
		75	+ outFile << "----------------------" << endl;
		76	+ outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl;
		77	+ outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl;
		78	+ outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl;
		79	+ outFile << "----->>>>>" << endl;
		80	+ outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl;
		81	+ outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl;
		82	+ outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl;
		83	+ outFile << "<<<<<-----" << endl;
		84	+ outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl;
		85	+ outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl;
		86	+ outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl;
		87	+ }
		88	+ else {
		89	+ outFile << "----------------------" << endl;
		90	+ outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl;
		91	+ outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl;
		92	+ outFile << "----------------------" << endl;
		93	+ outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl;
		94	+ outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl;
		95	+ outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl;
		96	+ outFile << "----->>>>>" << endl;
		97	+ outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl;
		98	+ outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl;
		99	+ outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl;
		100	+
		101	+ outFile << "<<<<<-----" << endl;
		102	+ outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl;
		103	+ outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl;
		104	+ outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl;
		105	+ }
		106	+
		107	+ }
		108	+ outFile.close();
		109	+}
		110	+
		111	+void calculate_layer(std::string outName,
		112	+ vector<complex<double>>* ns, // the refractive index.
		113	+ vector<double>* depths, // z-direction position.
		114	+ vector<complex<double>>* E0, // the initialized E0.
		115	+ vector<complex<double>>* d0, // direction of propagation of the plane wave.
		116	+ double k0) { // the wavenumber at free space.
		117	+ std::string outName_ext(outName, size(outName)-4, size(outName)-1); // extract the extension of the output file
		118	+ E_Cal(E0, d0); // make sure that both vectors are orthogonal.
		119	+
		120	+ //Creat a new layersample and initialize.
		121	+ layersample Layer1; // create a layered sample
		122	+ Layer1.n = *ns; // set a pointer to the refractive indices
		123	+ Layer1.z = *depths; // set a pointer to the layer depths
		124	+ Layer1.d = *d0;
		125	+ Layer1.k = k0;
		126	+
		127	+ LARGE_INTEGER t1, t2, tc; // Timing.
		128	+ QueryPerformanceFrequency(&tc);
		129	+ QueryPerformanceCounter(&t1);
		130	+ Layer1.solve(E0, CPU_op); // Solve for the substrate field in GPU.
		131	+ QueryPerformanceCounter(&t2);
		132	+ std::cout << "time for 'solving linear functions':" << (t2.QuadPart - t1.QuadPart) / (double)tc.QuadPart << "ms."<< std::endl;
		133	+ //output(outName, Layer1);
		134	+ if (ASCII_output)
		135	+ output_txt(outName, Layer1);
		136	+ else
		137	+ output_binary(outName, Layer1);
		138	+}
		139	+
		140	+//Main function for example_layer.
		141	+int main(int argc, char* argv[]) {
		142	+ stim::arglist args;
		143	+
		144	+ //Basic argument lists.
		145	+ args.add("help", "prints this help");
		146	+ args.add("s", "propagation direction vector (x, y)", "0.5 0.0", "[-1.0, 1.0]");
		147	+ args.add("l", "wavelength", "5.0", "in arbitrary units (ex. um)");
		148	+ args.add("Ex", "complex amplitude (x direction)", "0.5 0.0");
		149	+ args.add("Ey", "complex amplitude (y direction)", "0.5 0.0");
		150	+ args.add("z", "layer positions");
		151	+ args.add("n", "layer optical path length (real refractive index)", "1.0 1.4 1.4 1.0");
		152	+ args.add("kappa", "layer absorbance (imaginary refractive index)");
		153	+ args.add("ascii", "output as an ASCII file");
		154	+ args.add("CPU", "execute the program in CPU");
		155	+ args.parse(argc, argv);
		156	+
		157	+ if (args["help"].is_set()) { // test for help
		158	+ advertise(); // output the advertisement
		159	+ std::cout << args.str(); // output arguments
		160	+ exit(1); // exit
		161	+ }
		162	+
		163	+ ASCII_output = args["ascii"]; // if the ascii flag is set, output an ascii file
		164	+ CPU_op = args["CPU"]; // if the CPU_op is set, run the program in CPU.
		165	+ std::string outName;
		166	+ if (args.nargs() == 1) {
		167	+ outName = args.arg(0);
		168	+ }
		169	+ else if (args.nargs() == 0) {
		170	+ if (ASCII_output)
		171	+ outName = "output.txt";
		172	+ else
		173	+ outName = "output.lyr";
		174	+ }
		175	+ else {
		176	+ std::cout << "ERROR: Too many arguments." << std::endl;
		177	+ exit(1);
		178	+ }
		179	+
		180	+
		181	+ vector<complex<double>> d; //direction of propagation of the plane wave Init: {0.5, 0}
		182	+ if (args["s"].nargs() == 2) {
		183	+ d.push_back({ (double)args["s"].as_float(0), 0 });
		184	+ d.push_back({ (double)args["s"].as_float(1), 0 });
		185	+ }
		186	+
		187	+ double l0 = (double)args["l"].as_float(0); //wavelength.Init: l0 = 5;
		188	+ double k0 = 2 * PI / l0; //Calculate the free-space wavenumber.
		189	+
		190	+ complex<double> Ex = { (double)args["Ex"].as_float(0), (double)args["Ex"].as_float(1) }; //Input E. Init: {1, 1, 0}
		191	+ complex<double> Ey = { (double)args["Ey"].as_float(0), (double)args["Ey"].as_float(1) };
		192	+ vector<complex<double>> E0;
		193	+ E0.push_back(Ex);
		194	+ E0.push_back(Ey);
		195	+
		196	+ //const int LAYERS = args["L"].as_int(); //LAYERS Init: 4
		197	+
		198	+ vector<double> depths;
		199	+ vector<complex<double>> ns;
		200	+ size_t i = 0;
		201	+ while(args["n"].is_set() && args["n"].as_float(i)!=0) { //n is the real part.
		202	+ if (args["kappa"].is_set()) //kappa is the imaginary part.
		203	+ ns.push_back({ (double)args["n"].as_float(i), (double)args["kappa"].as_float(i) });
		204	+ else
		205	+ ns.push_back({ (double)args["n"].as_float(i), 0 }); //ns <- n + i * kappa
		206	+ i++;
		207	+ }
		208	+ for (size_t j = 0; j < i; j++)
		209	+ if (args["z"].is_set())
		210	+ depths.push_back((double)args["z"].as_float(i));
		211	+ else {
		212	+ depths.push_back((double)-100 + j * 200 / i);
		213	+ }
		214	+ /---------------------------------------example_layer--------------------------------------------/
		215	+
		216	+ calculate_layer(outName, &ns, &depths, &E0, &d, k0);
		217	+
		218	+ return 0;
		219	+ std::cin.get();
		220	+}
0	\ No newline at end of file	221	\ No newline at end of file