release / multilayer

Browse Code »

Commit 71d5696d385e7d2e662bbd1e98264293479ed780

Authored by David Mayerich 2020-11-09 11:17:29 -0600

0 parents

First commit after development and testing

Showing 12 changed files with 1182 additions and 0 deletions Show diff stats

Inline Side-by-side

CMakeLists.txt 0 → 100644

Wrap text Show/Hide comments View file @71d5696

	1	+++ a/CMakeLists.txt
	1	+#Specify the version being used aswell as the language
	2	+cmake_minimum_required(VERSION 3.12)
	3	+
	4	+#Name your project here
	5	+project(multilayer)
	6	+
	7	+#set the module directory
	8	+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}")
	9	+
	10	+#default to release mode
	11	+if(NOT CMAKE_BUILD_TYPE)
	12	+ set(CMAKE_BUILD_TYPE Release)
	13	+endif(NOT CMAKE_BUILD_TYPE)
	14	+
	15	+#build the executable in the binary directory on MS Visual Studio
	16	+if ( MSVC )
	17	+ SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
	18	+ SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
	19	+ SET( LIBRARY_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
	20	+ SET( LIBRARY_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
	21	+ add_definitions(-D_CRT_SECURE_NO_WARNINGS)
	22	+ add_definitions(-D_SCL_SECURE_NO_WARNINGS)
	23	+endif ( MSVC )
	24	+
	25	+
	26	+#find packages-----------------------------------
	27	+#find the pthreads package
	28	+find_package(Threads)
	29	+
	30	+#find the X11 package
	31	+find_package(X11)
	32	+
	33	+#find CUDA, mostly for LA stuff using cuBLAS
	34	+find_package(CUDA REQUIRED)
	35	+
	36	+#find Boost
	37	+#find_package(Boost)
	38	+
	39	+#find the STIM library
	40	+find_package(STIM REQUIRED)
	41	+
	42	+#find LAPACK and supporting link_libraries
	43	+find_package(clapack CONFIG REQUIRED)
	44	+find_package(OpenBLAS CONFIG REQUIRED)
	45	+
	46	+#include include directories
	47	+include_directories(${CUDA_INCLUDE_DIRS}
	48	+ ${STIM_INCLUDE_DIRS}
	49	+)
	50	+
	51	+#Assign source files to the appropriate variables to easily associate them with executables
	52	+file(GLOB SRC "src/*.cpp")
	53	+
	54	+#-----------------------------Create the executable--------------------------
	55	+#-----------------------------Show all four examples-------------------------
	56	+add_executable(multilayer
	57	+ ${SRC}
	58	+)
	59	+link_directories(${CUDA_BIN_DIRS})
	60	+target_link_libraries(multilayer ${CUDA_LIBRARIES}
	61	+ ${CUDA_CUBLAS_LIBRARIES}
	62	+ ${CUDA_cusparse_LIBRARY}
	63	+ ${CUDA_cusolver_LIBRARY}
	64	+ ${CUDA_CUFFT_LIBRARIES}
	65	+ OpenBLAS::OpenBLAS
	66	+ f2c lapack
	67	+)
	68	+
	69	+
0	70	\ No newline at end of file
...	...

FindSTIM.cmake 0 → 100644

Wrap text Show/Hide comments View file @71d5696

	1	+++ a/FindSTIM.cmake
	1	+# finds the STIM library (downloads it if it isn't present)
	2	+# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository)
	3	+
	4	+include(FindPackageHandleStandardArgs)
	5	+
	6	+set(STIM_ROOT $ENV{STIM_ROOT})
	7	+
	8	+IF(NOT STIM_ROOT)
	9	+ MESSAGE("ERROR: STIM_ROOT environment variable must be set!")
	10	+ENDIF(NOT STIM_ROOT)
	11	+
	12	+ FIND_PATH(STIM_INCLUDE_DIRS DOC "Path to STIM include directory."
	13	+ NAMES stim/image/image.h
	14	+ PATHS ${STIM_ROOT})
	15	+
	16	+find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIRS)
...	...

desktop.ini 0 → 100644

Wrap text Show/Hide comments View file @71d5696

1	1	Binary files /dev/null and a/desktop.ini differ
...	...

docs/Readme_BytesOrder.txt 0 → 100644

Wrap text Show/Hide comments View file @71d5696

	1	+++ a/docs/Readme_BytesOrder.txt
	1	+Order of "output.lyr" parameters.
	2	+
	3	+The wavenumber in free space: k0 double 8B
	4	+The direction of propogation: d double*2 16B
	5	+The refractive index in the first layer: n[0] complex<double> 16B
	6	+
	7	+for i in LAYERS:
	8	+ z positions[i]: z[i] double 8B * LAYERS
	9	+ z-component of propogation directions: sz[i] complex<double> 16B * LAYERS
	10	+ Transmission: Ptx[i] complex<double> 16B * LAYERS
	11	+ Transmission: Pty[i] complex<double> 16B * LAYERS
	12	+ Transmission: Ptz[i] complex<double> 16B * LAYERS
	13	+ Reflection: Prx[i] complex<double> 16B * LAYERS
	14	+ Transmission: Pry[i] complex<double> 16B * LAYERS
	15	+ Transmission: Prz[i] complex<double> 16B * LAYERS
	16	+
	17	+
	18	+All parameters we need will be:
	19	+ 15 * LAYERS + 5
...	...

docs/lyr_format.pptx 0 → 100644

View file @71d5696

No preview for this file type

docs/testcases.txt 0 → 100644

Wrap text Show/Hide comments View file @71d5696

	1	+++ a/docs/testcases.txt
	1	+output.lyr --n 1.0 1.2 1.3 1.4 1.5 1.4 1.3 1.2 1.1 --kappa 0.0 0.01 0.02 0.03 0.04 0.05 0.6 0.07 0.08 --zPos -50 -40 -30 -20 -10 0 10 20 30
	2	+
	3	+output.lyr --n 1.0 2.0 1.0 --kappa 0.0 0.0 0.0 --z -50 0 50
	4	+
	5	+defaults:
	6	+
	7	+*) If kappa is not specified, set them all to zero
	8	+
	9	+*) If z is not specified, create equally spaced layers between -100 and 100
	10	+
	11	+*) Output: Place a color bar next to each image (colorbar())
0	12	\ No newline at end of file
...	...

layerview.py 0 → 100644

Wrap text Show/Hide comments View file @71d5696

	1	+++ a/layerview.py
	1	+# create a function that displays the output when run this way:
	2	+# python layerview.py ouput.dat
	3	+
	4	+import sys
	5	+import os
	6	+from time import time
	7	+import subprocess
	8	+import struct
	9	+import numpy as np
	10	+import matplotlib
	11	+import math
	12	+import matplotlib.pyplot as plt
	13	+
	14	+from mpl_toolkits.axes_grid1 import ImageGrid
	15	+
	16	+def intensity(E):
	17	+ Econj = np.conj(E)
	18	+ I = np.sum(E*Econj, axis=-1)
	19	+ return np.real(I)
	20	+
	21	+#evaluate a solved homogeneous substrate
	22	+# Returns a complex NxMx3 array representing the cross section of the field at Y=0
	23	+def evaluate(Depths, k, d, n0, sz, Pt, Pr, X, Y, Z):
	24	+ Depths = np.array(Depths)
	25	+ sz = np.array(sz)
	26	+ Pt = np.array(Pt)
	27	+ Pr = np.array(Pr)
	28	+ s = np.array(d) * n0
	29	+ #allocate space for layer indices
	30	+ LI = np.zeros(Z.shape, dtype=np.int)
	31	+
	32	+ #find the layer index for each sample point
	33	+ L = len(Depths)
	34	+ LI[Z < Depths[0]] = 0
	35	+ for l in range(L-1):
	36	+ idx = np.logical_and(Z > Depths[l], Z <= Depths[l+1])
	37	+ LI[idx] = l
	38	+ LI[Z > Depths[-1]] = L - 1
	39	+
	40	+ #calculate the appropriate phase shift for the wave transmitted through the layer
	41	+ Ph_t = np.exp(1j * k * sz[LI] * (Z - Depths[LI]))
	42	+
	43	+ #calculate the appropriate phase shift for the wave reflected off of the layer boundary
	44	+ LIp = LI + 1
	45	+ LIp[LIp >= L] = 0
	46	+ Ph_r = np.exp(-1j * k * sz[LI] * (Z - Depths[LIp]))
	47	+ Ph_r[LI >= L-1] = 0
	48	+
	49	+ #calculate the phase shift based on the X and Y positions
	50	+ Ph_xy = np.exp(1j * k * (s[0] * X + s[1] * Y))
	51	+
	52	+ #apply the phase shifts
	53	+ Et = Pt[:, LI] * Ph_t[:, :]
	54	+ Er = Pr[:, LI] * Ph_r[:, :]
	55	+
	56	+ #add everything together coherently
	57	+ E = (Et + Er) * Ph_xy[:, :]
	58	+
	59	+ #return the electric field
	60	+ return np.moveaxis(E, 0, -1)
	61	+
	62	+class planewave:
	63	+ def __int__(self):
	64	+ self.LAYERS = 0 #Number of layers. int
	65	+ self.depths = [] #z positions of layers. [1, 5, ..., 10] double
	66	+ self.k0 = 0.0 #wavenumber at free space. double
	67	+ self.d = [] #direction of propogation. [0.5, 0] double
	68	+ self.n0 = 0.0+0.0j #the refractive index of the first layer. complex<double>
	69	+ self.sz = [] #z-component of propagation for each layer. complex<double>
	70	+ self.Pt = [[] for i in range(3)] #transmission complex<double>
	71	+ self.Pr = [[],[],[]] #refraction complex<double>
	72	+
	73	+# display a binary file produced using the coupled wave C code
	74	+def layer(strc):
	75	+ f = open(strc, "rb")
	76	+
	77	+ # create an empty plane wave structure
	78	+ L = planewave()
	79	+ L.depths = []
	80	+ L.d = []
	81	+ L.sz = []
	82	+ L.Pt = [[],[],[]]
	83	+ L.Pr = [[],[],[]]
	84	+
	85	+ # open the input file for reading
	86	+ file_bytes = os.path.getsize(strc)
	87	+
	88	+ # calculate the number of layers in the sample
	89	+ L.LAYERS = int((file_bytes/8-5)/15)
	90	+
	91	+ # load the raw layer data into the plane wave structure
	92	+ data_raw = struct.unpack('d' * (15L.LAYERS+5), f.read((15L.LAYERS+5)* 8))
	93	+ data = np.asarray(data_raw)
	94	+ L.k0 = data[0]
	95	+ L.d.append(data[1])
	96	+ L.d.append(data[2])
	97	+ L.n0 = complex(data[3], data[4])
	98	+
	99	+ # load each layer's plane waves from the binary file
	100	+ for i in range(L.LAYERS):
	101	+ L.depths.append(data[5+15*i])
	102	+ L.sz.append(complex(data[6+15i], data[7+15i]))
	103	+ L.Pt[0].append(complex(data[8+15i], data[9+15i]))
	104	+ L.Pt[1].append(complex(data[15i+10], data[15i+11]))
	105	+ L.Pt[2].append(complex(data[15i+12], data[15i+13]))
	106	+ L.Pr[0].append(complex(data[15i+14], data[15i+15]))
	107	+ L.Pr[1].append(complex(data[15i+16], data[15i+17]))
	108	+ L.Pr[2].append(complex(data[15i+18], data[15i+19]))
	109	+
	110	+ N = 512 # simulation resolution NxM
	111	+ M = 1024
	112	+ #DAVID: Don't hard-code the dimensions - you'll have to calculate them based on the sample information in the file
	113	+ D = [-110, 110, 0, 60] # dimensions of the simulation
	114	+ x = np.linspace(D[2], D[3], N) # set the sample points for the simulation
	115	+ z = np.linspace(D[0], D[1], M)
	116	+ [X, Z] = np.meshgrid(x, z) # create a mesh grid to evaluate layers
	117	+ Y = np.zeros(X.shape)
	118	+
	119	+ # evaluate the field across all layers
	120	+ E = evaluate(L.depths, L.k0, L.d, L.n0, L.sz, L.Pt, L.Pr, X, Y, Z)
	121	+ Er = np.real(E)
	122	+ I = intensity(E)
	123	+
	124	+ plt.set_cmap("afmhot") # set the color map
	125	+ plt.subplot(1, 4, 1)
	126	+ plt.imshow(Er[:, :, 0], extent=(D[3], D[2], D[1], D[0]))
	127	+ #plt.colorbar()
	128	+ plt.title("Ex")
	129	+
	130	+ plt.subplot(1, 4, 2)
	131	+ plt.imshow(Er[:, :, 1], extent=(D[3], D[2], D[1], D[0]))
	132	+ #plt.colorbar()
	133	+ plt.title("Ey")
	134	+
	135	+ plt.subplot(1, 4, 3)
	136	+ plt.imshow(Er[:, :, 2], extent=(D[3], D[2], D[1], D[0]))
	137	+ #plt.colorbar()
	138	+ plt.title("Ez")
	139	+
	140	+ plt.subplot(1, 4, 4)
	141	+ plt.imshow(I, extent=(D[3], D[2], D[1], D[0]))
	142	+ plt.colorbar()
	143	+ plt.title("I")
	144	+
	145	+ #fig = plt.figure(1, (5, 10))
	146	+ #plt.set_cmap("afmhot")
	147	+ #matplotlib.rcParams.update({'font.size': 10})
	148	+ #grid = ImageGrid(fig, rect = 211, nrows_ncols = (1, 3), axes_pad = 0.2, label_mode = "1", cbar_mode = "single", cbar_size = "18%")
	149	+ #Title = ["Ex", "Ey", "Ez"]
	150	+ #for i in range(3):
	151	+ # grid[i].axis('off')
	152	+ # im = grid[i].imshow(Er[..., i], extent=(D[3], D[2], D[1], D[0]), interpolation="nearest")
	153	+ # grid[i].set_title(Title[i])
	154	+ #grid.cbar_axes[0].colorbar(im)
	155	+ #plt.title("E")
	156	+ #plt.subplot(212)
	157	+ #plt.imshow(I, extent=(D[3], D[2], D[1], D[0]))
	158	+ #plt.title("I")
	159	+ #plt.colorbar()
	160	+ plt.show()
	161	+
	162	+# function displays usage text to the console
	163	+def usage():
	164	+ print("Usage:")
	165	+ print(" layerview input.dat")
	166	+
	167	+if __name__ == '__main__':
	168	+ start = time()
	169	+ if len(sys.argv) < 2: # if there are no command line arguments
	170	+ usage() # display the usage text
	171	+ exit() # exit
	172	+ else:
	173	+ layer(sys.argv[1]) # otherwise display the given data file
	174	+
	175	+ end = time()
	176	+ print("The elapsed time is " + str(end - start) + " s. ")
0	177	\ No newline at end of file
...	...

src/layer.cpp 0 → 100644

Wrap text Show/Hide comments View file @71d5696

	1	+++ a/src/layer.cpp
	1	+#include "layer.h"
	2	+#include "linalg.h" //LAPACKE support for Visual Studio
	3	+
	4	+#include <cusparse.h>
	5	+#include <cuda_runtime.h>
	6	+//#include "cublas_v2.h"
	7	+#include "cusolverSp.h"
	8	+/----------------------------GPU-----------------------------/
	9	+
	10	+
	11	+//Cross product.c is result.
	12	+void crossProduct(vector<complex<double>>* a, vector<complex<double>>* b, //The given matrices.
	13	+ vector<complex<double>>* c) { //Matrix to be gotten.
	14	+ c->push_back((a)[1] (b)[2] - (a)[2] * (*b)[1]);
	15	+ c->push_back((a)[2] (b)[0] - (a)[0] * (*b)[2]);
	16	+ c->push_back((a)[0] (b)[1] - (a)[1] * (*b)[0]);
	17	+}
	18	+
	19	+//Calculate the norm for a matrix.
	20	+complex<double> Norm(vector<complex<double>>* E) {
	21	+ complex<double> sum = 0;
	22	+ for (unsigned int i = 0; i < E->size(); i++) {
	23	+ sum += (E)[i] (*E)[i];
	24	+ }
	25	+ return sqrt(sum);
	26	+}
	27	+
	28	+//Normalize matrix.
	29	+void Normalize(vector<complex<double>>* E) {
	30	+ complex<double> sum = 0;
	31	+ for (unsigned int i = 0; i < E->size(); i++) {
	32	+ sum += (E)[i] (*E)[i];
	33	+ }
	34	+ for (unsigned int i = 0; i < E->size(); i++) {
	35	+ (E)[i] = (E)[i] / sqrt(sum);
	36	+
	37	+ }
	38	+}
	39	+
	40	+//Orthogonalization.
	41	+void orthogonalize(vector<complex<double>>* E_0rtho, vector<complex<double>>* E0, vector<complex<double>>* d) {
	42	+ vector<complex<double>> s;
	43	+ if (d->size() == 2) {
	44	+ complex<double> dz = sqrt(1.0+0i - pow((d)[0], 2) - pow((d)[1], 2));
	45	+ d->push_back(dz);
	46	+ }
	47	+ crossProduct(E0, d, &s);
	48	+ crossProduct(d, &s, E_0rtho);
	49	+ vector<complex<double>>().swap(s);
	50	+}
	51	+
	52	+/--------------------------------------------------Define Class layersample.--------------------------------------------------------/
	53	+
	54	+/*Do not try to replace "int" as "size_t".
	55	+This will result in a bunch of warnings and if we continuously change the type of M_rowInd and M_colInd, the EXCEPTION will occur again.*/
	56	+size_t layersample::ii(size_t l, size_t c, size_t d) { //ii(l, c, d) means the column indexes for every element.
	57	+ return l * 6 + d * 3 + c - 3;
	58	+}
	59	+
	60	+void layersample::generate_linsys(size_t LAYERS,
	61	+ vector<complex<double>>& M, //All non-zero values in "A" matirx.(A * X = b)
	62	+ vector<complex<double>>& b, //The right hand side column vector.
	63	+ vector<complex<double>>& E, //orthogonalized E0 vectors
	64	+ vector<complex<double>>* P,
	65	+ bool CPU_op) { //Solution of the matrices multiplication.
	66	+ //Calculate the sz component for each layer.
	67	+ s.clear(); //s is the plane wave direction scaled by the refractive index.
	68	+ for (size_t i = 0; i < 2; i++)
	69	+ s.push_back(d[i] * n[0]);
	70	+ sz.clear();
	71	+ for (int l = 0; l < LAYERS; l++) {
	72	+ sz.push_back(sqrt(pow(n[l], 2) - pow(s[0], 2) - pow(s[1], 2)));
	73	+ }
	74	+
	75	+ if (!CPU_op){
	76	+ //Computer in GPU.
	77	+ vector<int> M_rowInd; //Sparse matrix M CSR ->row index
	78	+ vector<int> M_colInd; //Sparse matrix M CSR ->number of elements
	79	+ M_rowInd.push_back(0);
	80	+ ////Build M by setting constraints based on Gauss's Law.
	81	+ for (size_t l = 0; l < LAYERS; l++) {
	82	+ //Set the upward components for each layer.
	83	+ //Layer "LAYERS-1" doesn't have a upward component.
	84	+ if (l != LAYERS - 1) {
	85	+ M.push_back(s[0]);
	86	+ M_colInd.push_back((int)ii(l, 0, 1));
	87	+ M.push_back(s[1]);
	88	+ M_colInd.push_back((int)ii(l, 1, 1));
	89	+ M.push_back(-sz[l]);
	90	+ M_colInd.push_back((int)ii(l, 2, 1));
	91	+ M_rowInd.push_back((int)M.size());
	92	+ b.push_back(0);
	93	+ }
	94	+ //Set the downward components for each layer.
	95	+ if (l != 0) {
	96	+ M.push_back(s[0]);
	97	+ M_colInd.push_back((int)ii(l, 0, 0));
	98	+ M.push_back(s[1]);
	99	+ M_colInd.push_back((int)ii(l, 1, 0));
	100	+ M.push_back(sz[l]);
	101	+ M_colInd.push_back((int)ii(l, 2, 0));
	102	+ M_rowInd.push_back((int)M.size());
	103	+ b.push_back(0);
	104	+ }
	105	+ }
	106	+ //Continue to build M by enforcing a continuous field across boundaries.
	107	+ complex<double> arg, arg_in, B;
	108	+ for (size_t l = 1; l < LAYERS; l++) {
	109	+ complex<double> sz0 = sz[l - 1];
	110	+ complex<double> sz1 = sz[l];
	111	+
	112	+ //Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1]))
	113	+ complex<double> A_in = k * sz0 * (z[l] - z[l - 1]);
	114	+ complex<double> A_in2 = { -A_in.imag(), A_in.real() };
	115	+ complex<double> A = exp(A_in2);
	116	+
	117	+ if (l < LAYERS - 1) {
	118	+ double dl = z[l] - z[l + 1];
	119	+ arg_in = -k * sz1 * (complex<double>)dl;
	120	+ arg = { -arg_in.imag(), arg_in.real() };
	121	+ B = exp(arg);
	122	+ }
	123	+ //if this is the second layer, use the simplified equations that account for the incident field
	124	+ if (l == 1) {
	125	+ M.push_back(1);
	126	+ M_colInd.push_back((int)ii(0, 0, 1));
	127	+ M.push_back(-1);
	128	+ M_colInd.push_back((int)ii(1, 0, 0));
	129	+ if (LAYERS > 2) {
	130	+ M.push_back(-B);
	131	+ M_colInd.push_back((int)ii(1, 0, 1));
	132	+ }
	133	+ M_rowInd.push_back((int)M.size());
	134	+ b.push_back(-A * E[0]);
	135	+
	136	+ M.push_back(1);
	137	+ M_colInd.push_back((int)ii(0, 1, 1));
	138	+ M.push_back(-1);
	139	+ M_colInd.push_back((int)ii(1, 1, 0));
	140	+ if (LAYERS > 2) {
	141	+ M.push_back(-B);
	142	+ M_colInd.push_back((int)ii(1, 1, 1));
	143	+ }
	144	+ M_rowInd.push_back((int)M.size());
	145	+ b.push_back(-A * E[l]);
	146	+
	147	+ M.push_back(sz0);
	148	+ M_colInd.push_back((int)ii(0, 1, 1));
	149	+ M.push_back(s[1]);
	150	+ M_colInd.push_back((int)ii(0, 2, 1));
	151	+ M.push_back(sz1);
	152	+ M_colInd.push_back((int)ii(1, 1, 0));
	153	+ M.push_back(-s[1]);
	154	+ M_colInd.push_back((int)ii(1, 2, 0));
	155	+ if (LAYERS > 2) {
	156	+ M.push_back(-B * sz1);
	157	+ M_colInd.push_back((int)ii(1, 1, 1));
	158	+ M.push_back(-B * s[1]);
	159	+ M_colInd.push_back((int)ii(1, 2, 1));
	160	+ }
	161	+ M_rowInd.push_back((int)M.size());
	162	+ b.push_back(A * sz0 * E[1] - A * s[1] * E[2]);
	163	+
	164	+ M.push_back(-sz0);
	165	+ M_colInd.push_back((int)ii(0, 0, 1));
	166	+ M.push_back(-s[0]);
	167	+ M_colInd.push_back((int)ii(0, 2, 1));
	168	+ M.push_back(-sz1);
	169	+ M_colInd.push_back((int)ii(1, 0, 0));
	170	+ M.push_back(s[0]);
	171	+ M_colInd.push_back((int)ii(1, 2, 0));
	172	+ if (LAYERS > 2) {
	173	+ M.push_back(B * sz1);
	174	+ M_colInd.push_back((int)ii(1, 0, 1));
	175	+ M.push_back(B* s[0]);
	176	+ M_colInd.push_back((int)ii(1, 2, 1));
	177	+ }
	178	+ M_rowInd.push_back((int)M.size());
	179	+ b.push_back(A * s[0] * E[2] - A * sz0 * E[0]);
	180	+ }
	181	+ else if (l == LAYERS - 1) {
	182	+ M.push_back(A);
	183	+ M_colInd.push_back((int)ii(l - 1, 0, 0));
	184	+ M.push_back(1);
	185	+ M_colInd.push_back((int)ii(l - 1, 0, 1));
	186	+ M.push_back(-1);
	187	+ M_colInd.push_back((int)ii(l, 0, 0));
	188	+ M_rowInd.push_back((int)M.size());
	189	+ b.push_back(0);
	190	+
	191	+ M.push_back(A);
	192	+ M_colInd.push_back((int)ii(l - 1, 1, 0));
	193	+ M.push_back(1);
	194	+ M_colInd.push_back((int)ii(l - 1, 1, 1));
	195	+ M.push_back(-1);
	196	+ M_colInd.push_back((int)ii(l, 1, 0));
	197	+ M_rowInd.push_back((int)M.size());
	198	+ b.push_back(0);
	199	+
	200	+ M.push_back(-A * sz0);
	201	+ M_colInd.push_back((int)ii(l - 1, 1, 0));
	202	+ M.push_back(A * s[1]);
	203	+ M_colInd.push_back((int)ii(l - 1, 2, 0));
	204	+ M.push_back(sz0);
	205	+ M_colInd.push_back((int)ii(l - 1, 1, 1));
	206	+ M.push_back(s[1]);
	207	+ M_colInd.push_back((int)ii(l - 1, 2, 1));
	208	+ M.push_back(sz1);
	209	+ M_colInd.push_back((int)ii(l, 1, 0));
	210	+ M.push_back(-s[1]);
	211	+ M_colInd.push_back((int)ii(l, 2, 0));
	212	+ M_rowInd.push_back((int)M.size());
	213	+ b.push_back(0);
	214	+
	215	+ M.push_back(A * sz0);
	216	+ M_colInd.push_back((int)ii(l - 1, 0, 0));
	217	+ M.push_back(-A * s[0]);
	218	+ M_colInd.push_back((int)ii(l - 1, 2, 0));
	219	+ M.push_back(-sz0);
	220	+ M_colInd.push_back((int)ii(l - 1, 0, 1));
	221	+ M.push_back(-s[0]);
	222	+ M_colInd.push_back((int)ii(l - 1, 2, 1));
	223	+ M.push_back(-sz1);
	224	+ M_colInd.push_back((int)ii(l, 0, 0));
	225	+ M.push_back(s[0]);
	226	+ M_colInd.push_back((int)ii(l, 2, 0));
	227	+ M_rowInd.push_back((int)M.size());
	228	+ b.push_back(0);
	229	+ }
	230	+ else {
	231	+ M.push_back(A);
	232	+ M_colInd.push_back((int)ii(l - 1, 0, 0));
	233	+ M.push_back(1);
	234	+ M_colInd.push_back((int)ii(l - 1, 0, 1));
	235	+ M.push_back(-1);
	236	+ M_colInd.push_back((int)ii(l, 0, 0));
	237	+ M.push_back(-B);
	238	+ M_colInd.push_back((int)ii(l, 0, 1));
	239	+ M_rowInd.push_back((int)M.size());
	240	+ b.push_back(0);
	241	+
	242	+ M.push_back(A);
	243	+ M_colInd.push_back((int)ii(l - 1, 1, 0));
	244	+ M.push_back(1);
	245	+ M_colInd.push_back((int)ii(l - 1, 1, 1));
	246	+ M.push_back(-1);
	247	+ M_colInd.push_back((int)ii(l, 1, 0));
	248	+ M.push_back(-B);
	249	+ M_colInd.push_back((int)ii(l, 1, 1));
	250	+ M_rowInd.push_back((int)M.size());
	251	+ b.push_back(0);
	252	+
	253	+ M.push_back(-A * sz0);
	254	+ M_colInd.push_back((int)ii(l - 1, 1, 0));
	255	+ M.push_back(A * s[1]);
	256	+ M_colInd.push_back((int)ii(l - 1, 2, 0));
	257	+ M.push_back(sz0);
	258	+ M_colInd.push_back((int)ii(l - 1, 1, 1));
	259	+ M.push_back(s[1]);
	260	+ M_colInd.push_back((int)ii(l - 1, 2, 1));
	261	+ M.push_back(sz1);
	262	+ M_colInd.push_back((int)ii(l, 1, 0));
	263	+ M.push_back(-s[1]);
	264	+ M_colInd.push_back((int)ii(l, 2, 0));
	265	+ M.push_back(-B * sz1);
	266	+ M_colInd.push_back((int)ii(l, 1, 1));
	267	+ M.push_back(-B * s[1]);
	268	+ M_colInd.push_back((int)ii(l, 2, 1));
	269	+ M_rowInd.push_back((int)M.size());
	270	+ b.push_back(0);
	271	+
	272	+ M.push_back(A * sz0);
	273	+ M_colInd.push_back((int)ii(l - 1, 0, 0));
	274	+ M.push_back(-A * s[0]);
	275	+ M_colInd.push_back((int)ii(l - 1, 2, 0));
	276	+ M.push_back(-sz0);
	277	+ M_colInd.push_back((int)ii(l - 1, 0, 1));
	278	+ M.push_back(-s[0]);
	279	+ M_colInd.push_back((int)ii(l - 1, 2, 1));
	280	+ M.push_back(-sz1);
	281	+ M_colInd.push_back((int)ii(l, 0, 0));
	282	+ M.push_back(s[0]);
	283	+ M_colInd.push_back((int)ii(l, 2, 0));
	284	+ M.push_back(B * sz1);
	285	+ M_colInd.push_back((int)ii(l, 0, 1));
	286	+ M.push_back(B * s[0]);
	287	+ M_colInd.push_back((int)ii(l, 2, 1));
	288	+ M_rowInd.push_back((int)M.size());
	289	+ b.push_back(0);
	290	+ }
	291	+ }
	292	+ cudaError_t cudaStatus;
	293	+ cusolverStatus_t cusolverStatus;
	294	+ cusparseStatus_t cusparseStatus;
	295	+ cusolverSpHandle_t handle = NULL;
	296	+ cusparseHandle_t cusparseHandle = NULL;
	297	+ cudaStream_t stream = NULL;
	298	+ cusparseMatDescr_t descrM = NULL;
	299	+ cuDoubleComplex * csrValM_, * b_, *P_;
	300	+ size_t rowsA = b.size(), colsA = b.size(), nnA = M.size(), baseM_ = 0; //nnA is the number of non-zero elements.
	301	+ int* csrRowPtrM = NULL; //row index M_rowInd projected to GPU.
	302	+ int* csrColIndM = NULL; //CSR(A) from I/O. // M_colInd projected to GPU.
	303	+ double tol = 1.e-12; int reorder = 0;
	304	+ int singularity = 0;
	305	+
	306	+ //Initialize.
	307	+ cusolverStatus = cusolverSpCreate(&handle);
	308	+ int num = 1;
	309	+ cudaStatus = cudaGetDevice(&num);
	310	+ cusparseStatus = cusparseCreate(&cusparseHandle);
	311	+ cudaStatus = cudaStreamCreate(&stream);
	312	+ cusolverStatus = cusolverSpSetStream(handle, stream);
	313	+ cusparseStatus = cusparseSetStream(cusparseHandle, stream);
	314	+ cusparseStatus = cusparseCreateMatDescr(&descrM);
	315	+ cusparseStatus = cusparseSetMatType(descrM, CUSPARSE_MATRIX_TYPE_GENERAL);
	316	+ if (baseM_) {
	317	+ cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ONE);
	318	+ }
	319	+ else {
	320	+ cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ZERO);
	321	+ }
	322	+
	323	+ cudaStatus = cudaMalloc((void*)&csrRowPtrM, sizeof(int) (rowsA + 1)); //Projection of M_rowInd.
	324	+ cudaStatus = cudaMalloc((void*)&csrColIndM, sizeof(int) M_colInd.size()); //Projection of M_colInd.
	325	+ cudaStatus = cudaMalloc((void*)&csrValM_, sizeof(cuDoubleComplex) M.size()); //Projection of M.
	326	+ cudaStatus = cudaMalloc((void*)&b_, sizeof(cuDoubleComplex) b.size()); //Projection of b.
	327	+ cudaStatus = cudaMalloc((void*)&P_, sizeof(cuDoubleComplex) b.size()); //Projection of P.
	328	+
	329	+ cudaStatus = cudaMemcpy(csrValM_, M.data(), M.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
	330	+ cudaStatus = cudaMemcpy(csrRowPtrM, M_rowInd.data(), M_rowInd.size() * sizeof(int), cudaMemcpyHostToDevice);
	331	+ cudaStatus = cudaMemcpy(csrColIndM, M_colInd.data(), M_colInd.size() * sizeof(int), cudaMemcpyHostToDevice);
	332	+ cudaStatus = cudaMemcpy(b_, b.data(), b.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
	333	+ // Output the current CUDA error.
	334	+ //if (cudaStatus != cudaSuccess) {
	335	+ // cout<<"%s " << cudaGetErrorString(cudaStatus) << endl;
	336	+ //}
	337	+ P->resize(rowsA); //P is the to-be-solved matrix in CPU.
	338	+ //QR method.
	339	+ cusolverStatus = cusolverSpZcsrlsvqr(handle, (int)rowsA, (int)nnA, descrM, csrValM_, csrRowPtrM, csrColIndM, b_, tol, reorder, P_, (int*)&singularity);
	340	+ /cusparseStatus = cusparseZsctr(cusparseHandle, rowsA, g_z, g_Q, g_x, CUSPARSE_INDEX_BASE_ZERO);*/
	341	+ cudaStatus = cudaMemcpyAsync(P->data(), P_, sizeof(cuDoubleComplex) * rowsA, cudaMemcpyDeviceToHost, stream);
	342	+
	343	+ cudaStatus = cudaFree(csrRowPtrM);
	344	+ cudaStatus = cudaFree(csrColIndM);
	345	+ cudaStatus = cudaFree(csrValM_);
	346	+ cudaStatus = cudaFree(b_);
	347	+ cudaStatus = cudaFree(P_);
	348	+ vector<int>().swap(M_rowInd);
	349	+ vector<int>().swap(M_colInd);
	350	+ }
	351	+ else {
	352	+ //Work on CPU.
	353	+ M.resize(6 * (LAYERS - 1) * 6 * (LAYERS - 1));
	354	+ b.resize(6 * (LAYERS - 1));
	355	+
	356	+ size_t ei = 0;
	357	+ //Set constraints based on Gauss's Law.
	358	+ for (size_t l = 0; l < LAYERS; l++) {
	359	+ //Set the upward components for each layer.
	360	+ //Layer "LAYERS-1" doesn't have a upward component.
	361	+ if (l != LAYERS - 1) {
	362	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = s[0];
	363	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = s[1];
	364	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -sz[l];
	365	+ ei += 1;
	366	+ }
	367	+ //Set the downward components for each layer.
	368	+ if (l != 0) {
	369	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = s[0];
	370	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = s[1];
	371	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = sz[l];
	372	+ ei += 1;
	373	+ }
	374	+ }
	375	+ //Enforce a continuous field across boundaries.
	376	+ complex<double> arg, arg_in, B;
	377	+ for (size_t l = 1; l < LAYERS; l++) {
	378	+ complex<double> sz0 = sz[l - 1];
	379	+ complex<double> sz1 = sz[l];
	380	+
	381	+ //Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1]))
	382	+ complex<double> A_in = k * sz0 * (z[l] - z[l - 1]);
	383	+ complex<double> A_in2 = { -A_in.imag(), A_in.real() };
	384	+ complex<double> A = exp(A_in2);
	385	+
	386	+ if (l < LAYERS - 1) {
	387	+ double dl = z[l] - z[l + 1];
	388	+ arg_in = -k * sz1 * (complex<double>)dl;
	389	+ arg = { -arg_in.imag(), arg_in.real() };
	390	+ B = exp(arg);
	391	+ }
	392	+ //if this is the second layer, use the simplified equations that account for the incident field
	393	+ if (l == 1) {
	394	+ M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = 1;
	395	+ M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -1;
	396	+ if (LAYERS > 2) {
	397	+ M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = -B;
	398	+ }
	399	+ b[ei] = -A * E[0];
	400	+ ei += 1;
	401	+
	402	+ M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = 1;
	403	+ M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = -1;
	404	+ if (LAYERS > 2) {
	405	+ M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B;
	406	+ }
	407	+ b[ei] = -A * E[l];
	408	+ ei += 1;
	409	+
	410	+ M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = s[1];
	411	+ M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = sz0;
	412	+ M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = -s[1];
	413	+ M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = sz1;
	414	+ if (LAYERS > 2) {
	415	+ M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = -B * s[1];
	416	+ M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B * sz1;
	417	+ }
	418	+ b[ei] = A * sz0 * E[1] - A * s[1] * E[2];
	419	+ ei += 1;
	420	+
	421	+ M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = -sz0;
	422	+ M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = -s[0];
	423	+ M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -sz1;
	424	+ M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = s[0];
	425	+ if (LAYERS > 2) {
	426	+ M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = B * sz1;
	427	+ M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = B * s[0];
	428	+ }
	429	+ b[ei] = A * s[0] * E[2] - A * sz0 * E[0];
	430	+ ei += 1;
	431	+ }
	432	+ else if (l == LAYERS - 1) {
	433	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A;
	434	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1;
	435	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1;
	436	+ ei += 1;
	437	+
	438	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A;
	439	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1;
	440	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1;
	441	+ ei += 1;
	442	+
	443	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1];
	444	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0;
	445	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1];
	446	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0;
	447	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1];
	448	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1;
	449	+ ei += 1;
	450	+
	451	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0;
	452	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0];
	453	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0;
	454	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0];
	455	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1;
	456	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0];
	457	+ ei += 1;
	458	+ }
	459	+ else {
	460	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A;
	461	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1;
	462	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1;
	463	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = -B;
	464	+ ei += 1;
	465	+
	466	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A;
	467	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1;
	468	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1;
	469	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B;
	470	+ ei += 1;
	471	+
	472	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1];
	473	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0;
	474	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1];
	475	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0;
	476	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1];
	477	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1;
	478	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -B * s[1];
	479	+ M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B * sz1;
	480	+ ei += 1;
	481	+
	482	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0;
	483	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0];
	484	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0;
	485	+ M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0];
	486	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1;
	487	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0];
	488	+ M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = B * sz1;
	489	+ M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = B * s[0];
	490	+ ei += 1;
	491	+ }
	492	+ }
	493	+
	494	+ complex<double>* M_ = new complex<double>[M.size()];
	495	+ complex<double>* b_ = new complex<double>[b.size()];
	496	+ complex<double>* P_ = new complex<double>[b.size()];
	497	+ for (size_t i = 0; i < M.size(); i++) {
	498	+ M_[i] = M[i];
	499	+ if (i < b.size()) b_[i] = b[i];
	500	+ }
	501	+ LINALG_inverse(M_, (int)(6 * (LAYERS - 1)));
	502	+ LINALG_zgemm((int)(6 * (LAYERS - 1)), (int)1, (int)(6 * (LAYERS - 1)), M_, (int)(6 * (LAYERS - 1)), b_, (int)1, P_, (int)1);
	503	+ for (int i = 0; i < b.size(); i++) {
	504	+ P->push_back(P_[i]);
	505	+ }
	506	+
	507	+ delete[] M_;
	508	+ delete[] b_;
	509	+ delete[] P_;
	510	+ }
	511	+}
	512	+
	513	+
	514	+//Build matrix and get E.
	515	+void layersample::solve(vector<complex<double>>* E, bool CPU_op) { //orthogonalized E0 vectors.
	516	+ size_t LAYERS = n.size();
	517	+ //Store the matrix and RHS vector.
	518	+ vector<complex<double>> M; //All non-zero values in the sparse matrix.
	519	+ vector<complex<double>> b; //The right hand side column vector.
	520	+
	521	+ //Evaluate the linear system.
	522	+ vector<complex<double>> P; //Solution of matrix.
	523	+ layersample::generate_linsys(LAYERS, M, b, *E, &P, CPU_op);
	524	+
	525	+ //Store the coefficients for each layer.
	526	+ //Pt[3, L] transmission. Pr[3, L] reflection.
	527	+ Pt.resize(3 * LAYERS);
	528	+ Pr.resize(3 * LAYERS);
	529	+
	530	+ for (size_t l = 0; l < LAYERS; l++) {
	531	+ if (l == 0) {
	532	+ Pt[0] = (complex<double>)(*E)[0];
	533	+ Pt[LAYERS] = (complex<double>)(*E)[1];
	534	+ Pt[2 * LAYERS] = (complex<double>)(*E)[2];
	535	+ }
	536	+ else {
	537	+ Pt[l] = P[ii(l, 0, 0)];
	538	+ Pt[l + LAYERS] = P[ii(l, 1, 0)];
	539	+ Pt[l + 2 * LAYERS] = P[ii(l, 2, 0)];
	540	+ }
	541	+
	542	+ if (l == LAYERS - 1) {
	543	+ Pr[LAYERS - 1] = 0;
	544	+ Pr[2 * LAYERS - 1] = 0;
	545	+ Pr[3 * LAYERS - 1] = 0;
	546	+ }
	547	+ else {
	548	+ Pr[l] = P[ii(l, 0, 1)];
	549	+ Pr[l + LAYERS] = P[ii(l, 1, 1)];
	550	+ Pr[l + 2 * LAYERS] = P[ii(l, 2, 1)];
	551	+ }
	552	+ }
	553	+ vector<complex<double>>().swap(M);
	554	+ vector<complex<double>>().swap(b);
	555	+ vector<complex<double>>().swap(P);
	556	+}
0	557	\ No newline at end of file
...	...

src/layer.h 0 → 100644

Wrap text Show/Hide comments View file @71d5696

	1	+++ a/src/layer.h
	1	+#ifndef LAYER_H
	2	+#define LAYER_H
	3	+#include <vector>
	4	+#include <complex>
	5	+using namespace std;
	6	+
	7	+void crossProduct(vector<complex<double>>* a, vector<complex<double>>* b, vector<complex<double>>* c);
	8	+complex<double> Norm(vector<complex<double>>* E);
	9	+void Normalize(vector<complex<double>>* E);
	10	+vector<vector<double> > transpose(vector<vector<double> >* matrix);
	11	+void orthogonalize(vector<complex<double>>* E_0, vector<complex<double>>* E0, vector<complex<double>>* d);
	12	+
	13	+class layersample {
	14	+
	15	+public:
	16	+ double k; //wavenumber.
	17	+ vector<complex<double>> n; //refractive index.
	18	+ vector<double> z; //z postions.
	19	+ vector<complex<double>> s; //propagation direction. Keep it for output.
	20	+ vector<complex<double>> sz; //propagation direction. Keep it for output.
	21	+ vector<complex<double>> d; //direction of propagation of the plane wave.
	22	+ vector<complex<double>> Pt; //transimission.
	23	+ vector<complex<double>> Pr; //reflection.
	24	+ //Calulate the index of the field component associated with a layer.
	25	+ // l is the layer index. c is the component(x, y, z). d is the direction(0for transmission, 1 for reflection).
	26	+ size_t ii(size_t l, size_t c, size_t d);
	27	+
	28	+ //Generate the linear system corresponding to this layered sample and plane wave.
	29	+ void generate_linsys(size_t LAYERS, vector<complex<double>>& M, vector<complex<double>>& b, vector<complex<double>>& E, vector<complex<double>>* P, bool CPU_op);
	30	+
	31	+ //Build matrix and get E.
	32	+ void solve(vector<complex<double>>* E, bool CPU_op);
	33	+};
	34	+
	35	+#endif
0	36	\ No newline at end of file
...	...

src/linalg.cpp 0 → 100644

Wrap text Show/Hide comments View file @71d5696

	1	+++ a/src/linalg.cpp
	1	+#include "linalg.h"
	2	+
	3	+// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK.
	4	+extern "C" {
	5	+#include "f2c.h"
	6	+#include "clapack.h"
	7	+#include "cblas.h"
	8	+}
	9	+
	10	+
	11	+void LINALG_zgetrf(
	12	+ int M,
	13	+ int N,
	14	+ std::complex<double>* A,
	15	+ int LDA,
	16	+ int* IPIV)
	17	+{
	18	+ integer INFO;
	19	+ zgetrf_((integer)&M, (integer)&N, (doublecomplex)A, (integer)&LDA, (integer*)IPIV, &INFO);
	20	+}
	21	+
	22	+void LINALG_zgetri(
	23	+ size_t N,
	24	+ std::complex<double>* A,
	25	+ int LDA,
	26	+ int* IPIV)
	27	+{
	28	+ integer LWORK = -1;
	29	+ std::complex<double> WORK[1];
	30	+ integer INFO;
	31	+ zgetri_((integer)&N, (doublecomplex)A, (integer)&LDA, (integer)IPIV, (doublecomplex*)WORK, &LWORK, &INFO);
	32	+}
	33	+void LINALG_inverse(std::complex<double>* A, int N)
	34	+{
	35	+ int* IPIV = new int[N + (size_t)1];
	36	+ integer LWORK = N * N;
	37	+ std::complex<double>* WORK = new std::complex<double>[LWORK];
	38	+ integer INFO;
	39	+
	40	+ zgetrf_((integer)&N, (integer)&N, (doublecomplex)A, (integer)&N, (integer*)IPIV, &INFO);
	41	+ zgetri_((integer)&N, (doublecomplex)A, (integer)&N, (integer)IPIV, (doublecomplex*)WORK, &LWORK, &INFO);
	42	+
	43	+ delete[] IPIV;
	44	+ delete[] WORK;
	45	+}
	46	+
	47	+void LINALG_zgemm(
	48	+ const int M, //A(MK) B(KN)
	49	+ const int N,
	50	+ const int K,
	51	+ std::complex<double>* A,
	52	+ const int LDA, //=K
	53	+ std::complex<double>* B,
	54	+ const int LDB, //=N
	55	+ std::complex<double>* C,
	56	+ const int LDC) //=columns of C.
	57	+{
	58	+ std::complex<double> alpha = 1;
	59	+ std::complex<double> beta = 0;
	60	+ cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, (OPENBLAS_CONST blasint)M, (OPENBLAS_CONST blasint)N, (OPENBLAS_CONST blasint)K,
	61	+ &alpha, A, (OPENBLAS_CONST blasint)LDA, B, (OPENBLAS_CONST blasint)LDB, &beta, C, (OPENBLAS_CONST blasint)LDC);
	62	+
	63	+}
0	64	\ No newline at end of file
...	...

src/linalg.h 0 → 100644

Wrap text Show/Hide comments View file @71d5696

	1	+++ a/src/linalg.h
	1	+// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK
	2	+#include <complex>
	3	+
	4	+//Solve matrix inverse.
	5	+void LINALG_inverse(std::complex<double>* A, int N);
	6	+
	7	+//Solve matrix multiplication. C = A * B.
	8	+void LINALG_zgemm(
	9	+ const int M, //A(MK) B(KN)
	10	+ const int N,
	11	+ const int K,
	12	+ std::complex<double>* A,
	13	+ const int LDA, //=K
	14	+ std::complex<double>* B,
	15	+ const int LDB, //=N
	16	+ std::complex<double>* C,
	17	+ const int LDC); //=columns of C.
0	18	\ No newline at end of file
...	...

src/main.cpp 0 → 100644

Wrap text Show/Hide comments View file @71d5696

	1	+++ a/src/main.cpp
	1	+//Update the data output format in example_layer().
	2	+#include "layer.h"
	3	+#include <windows.h>
	4	+
	5	+#include <fstream>
	6	+#include <iostream>
	7	+#include "stim/parser/arguments.h"
	8	+
	9	+using namespace std;
	10	+#define PI 3.14159265358979323846264338328
	11	+
	12	+bool ASCII_output = false;
	13	+bool CPU_op = false;
	14	+
	15	+void advertise() {
	16	+ std::cout << std::endl << std::endl;
	17	+ std::cout << "=========================================================================" << std::endl;
	18	+ //std::cout << "Thank you for using the NetMets network comparison tool!" << std::endl;
	19	+ std::cout << "Scalable Tissue Imaging and Modeling (STIM) Lab, University of Houston" << std::endl;
	20	+ std::cout << "=========================================================================" << std::endl << std::endl;
	21	+}
	22	+
	23	+//Calculate Ez from vector d and Ex Ey.
	24	+void E_Cal(vector<complex<double>>* E, vector<complex<double>>* d) {
	25	+ if (d->size() == 2) {
	26	+ complex<double> dz = sqrt(1.0 + 0i - pow((d)[0], 2) - pow((d)[1], 2));
	27	+ d->push_back(dz);
	28	+ }
	29	+ E->push_back(-((E)[0] (d)[0] + (E)[1] * (d)[1]) / (d)[2]);
	30	+}
	31	+void output_binary(std::string filename, layersample& layers) {
	32	+ size_t L = layers.n.size(); // get the number of layers
	33	+ std::ofstream outFile;
	34	+ outFile.open(filename, std::ofstream::binary); // open the output file for binary writing
	35	+ if (outFile) {
	36	+ outFile.write((char*)&layers.k, sizeof(double));
	37	+ outFile.write((char*)&layers.d[0], sizeof(double));
	38	+ outFile.write((char*)&layers.d[1], sizeof(double));
	39	+ outFile.write((char)&layers.n[0], sizeof(double) 2);
	40	+
	41	+ for (size_t i = 0; i < L; i++) {
	42	+ outFile.write((char*)&layers.z[i], sizeof(double));
	43	+ outFile.write((char)&layers.sz[i], 2 sizeof(double));
	44	+ outFile.write((char)&layers.Pt[i], 2 sizeof(double));
	45	+ outFile.write((char)&layers.Pt[i + L], 2 sizeof(double));
	46	+ outFile.write((char)&layers.Pt[i + 2 L], 2 * sizeof(double));
	47	+ outFile.write((char)&layers.Pr[i], 2 sizeof(double));
	48	+ outFile.write((char)&layers.Pr[i + L], 2 sizeof(double));
	49	+ outFile.write((char)&layers.Pr[i + 2 L], 2 * sizeof(double));
	50	+ }
	51	+ outFile.close();
	52	+ }
	53	+ else {
	54	+ std::cout << "ERROR opening output file for binary writing: " << filename << std::endl;
	55	+ }
	56	+}
	57	+
	58	+void output_txt(std::string filename, layersample& layers) {
	59	+ size_t L = layers.n.size(); // get the number of layers
	60	+ std::ofstream outFile;
	61	+ outFile.open(filename); // open the output file for text writing
	62	+ if (!outFile) {
	63	+ std::cout << "ERROR: Could not open file " << filename << std::endl;
	64	+ exit(1);
	65	+ }
	66	+ int width = 15;
	67	+
	68	+ outFile << "--------------------------------" << endl;
	69	+ outFile << "The wavenumber at free space is : " << layers.k << endl;
	70	+ for (size_t i = 0; i < L; i++) {
	71	+ if (i == 0) {
	72	+ outFile << "--------------------------------" << endl;
	73	+ outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl;
	74	+ outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl;
	75	+ outFile << "----------------------" << endl;
	76	+ outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl;
	77	+ outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl;
	78	+ outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl;
	79	+ outFile << "----->>>>>" << endl;
	80	+ outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl;
	81	+ outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl;
	82	+ outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl;
	83	+ outFile << "<<<<<-----" << endl;
	84	+ outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl;
	85	+ outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl;
	86	+ outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl;
	87	+ }
	88	+ else {
	89	+ outFile << "----------------------" << endl;
	90	+ outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl;
	91	+ outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl;
	92	+ outFile << "----------------------" << endl;
	93	+ outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl;
	94	+ outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl;
	95	+ outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl;
	96	+ outFile << "----->>>>>" << endl;
	97	+ outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl;
	98	+ outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl;
	99	+ outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl;
	100	+
	101	+ outFile << "<<<<<-----" << endl;
	102	+ outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl;
	103	+ outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl;
	104	+ outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl;
	105	+ }
	106	+
	107	+ }
	108	+ outFile.close();
	109	+}
	110	+
	111	+void calculate_layer(std::string outName,
	112	+ vector<complex<double>>* ns, // the refractive index.
	113	+ vector<double>* depths, // z-direction position.
	114	+ vector<complex<double>>* E0, // the initialized E0.
	115	+ vector<complex<double>>* d0, // direction of propagation of the plane wave.
	116	+ double k0) { // the wavenumber at free space.
	117	+ std::string outName_ext(outName, size(outName)-4, size(outName)-1); // extract the extension of the output file
	118	+ E_Cal(E0, d0); // make sure that both vectors are orthogonal.
	119	+
	120	+ //Creat a new layersample and initialize.
	121	+ layersample Layer1; // create a layered sample
	122	+ Layer1.n = *ns; // set a pointer to the refractive indices
	123	+ Layer1.z = *depths; // set a pointer to the layer depths
	124	+ Layer1.d = *d0;
	125	+ Layer1.k = k0;
	126	+
	127	+ LARGE_INTEGER t1, t2, tc; // Timing.
	128	+ QueryPerformanceFrequency(&tc);
	129	+ QueryPerformanceCounter(&t1);
	130	+ Layer1.solve(E0, CPU_op); // Solve for the substrate field in GPU.
	131	+ QueryPerformanceCounter(&t2);
	132	+ std::cout << "time for 'solving linear functions':" << (t2.QuadPart - t1.QuadPart) / (double)tc.QuadPart << "ms."<< std::endl;
	133	+ //output(outName, Layer1);
	134	+ if (ASCII_output)
	135	+ output_txt(outName, Layer1);
	136	+ else
	137	+ output_binary(outName, Layer1);
	138	+}
	139	+
	140	+//Main function for example_layer.
	141	+int main(int argc, char* argv[]) {
	142	+ stim::arglist args;
	143	+
	144	+ //Basic argument lists.
	145	+ args.add("help", "prints this help");
	146	+ args.add("s", "propagation direction vector (x, y)", "0.5 0.0", "[-1.0, 1.0]");
	147	+ args.add("l", "wavelength", "5.0", "in arbitrary units (ex. um)");
	148	+ args.add("Ex", "complex amplitude (x direction)", "0.5 0.0");
	149	+ args.add("Ey", "complex amplitude (y direction)", "0.5 0.0");
	150	+ args.add("z", "layer positions");
	151	+ args.add("n", "layer optical path length (real refractive index)", "1.0 1.4 1.4 1.0");
	152	+ args.add("kappa", "layer absorbance (imaginary refractive index)");
	153	+ args.add("ascii", "output as an ASCII file");
	154	+ args.add("CPU", "execute the program in CPU");
	155	+ args.parse(argc, argv);
	156	+
	157	+ if (args["help"].is_set()) { // test for help
	158	+ advertise(); // output the advertisement
	159	+ std::cout << args.str(); // output arguments
	160	+ exit(1); // exit
	161	+ }
	162	+
	163	+ ASCII_output = args["ascii"]; // if the ascii flag is set, output an ascii file
	164	+ CPU_op = args["CPU"]; // if the CPU_op is set, run the program in CPU.
	165	+ std::string outName;
	166	+ if (args.nargs() == 1) {
	167	+ outName = args.arg(0);
	168	+ }
	169	+ else if (args.nargs() == 0) {
	170	+ if (ASCII_output)
	171	+ outName = "output.txt";
	172	+ else
	173	+ outName = "output.lyr";
	174	+ }
	175	+ else {
	176	+ std::cout << "ERROR: Too many arguments." << std::endl;
	177	+ exit(1);
	178	+ }
	179	+
	180	+
	181	+ vector<complex<double>> d; //direction of propagation of the plane wave Init: {0.5, 0}
	182	+ if (args["s"].nargs() == 2) {
	183	+ d.push_back({ (double)args["s"].as_float(0), 0 });
	184	+ d.push_back({ (double)args["s"].as_float(1), 0 });
	185	+ }
	186	+
	187	+ double l0 = (double)args["l"].as_float(0); //wavelength.Init: l0 = 5;
	188	+ double k0 = 2 * PI / l0; //Calculate the free-space wavenumber.
	189	+
	190	+ complex<double> Ex = { (double)args["Ex"].as_float(0), (double)args["Ex"].as_float(1) }; //Input E. Init: {1, 1, 0}
	191	+ complex<double> Ey = { (double)args["Ey"].as_float(0), (double)args["Ey"].as_float(1) };
	192	+ vector<complex<double>> E0;
	193	+ E0.push_back(Ex);
	194	+ E0.push_back(Ey);
	195	+
	196	+ //const int LAYERS = args["L"].as_int(); //LAYERS Init: 4
	197	+
	198	+ vector<double> depths;
	199	+ vector<complex<double>> ns;
	200	+ size_t i = 0;
	201	+ while(args["n"].is_set() && args["n"].as_float(i)!=0) { //n is the real part.
	202	+ if (args["kappa"].is_set()) //kappa is the imaginary part.
	203	+ ns.push_back({ (double)args["n"].as_float(i), (double)args["kappa"].as_float(i) });
	204	+ else
	205	+ ns.push_back({ (double)args["n"].as_float(i), 0 }); //ns <- n + i * kappa
	206	+ i++;
	207	+ }
	208	+ for (size_t j = 0; j < i; j++)
	209	+ if (args["z"].is_set())
	210	+ depths.push_back((double)args["z"].as_float(i));
	211	+ else {
	212	+ depths.push_back((double)-100 + j * 200 / i);
	213	+ }
	214	+ /---------------------------------------example_layer--------------------------------------------/
	215	+
	216	+ calculate_layer(outName, &ns, &depths, &E0, &d, k0);
	217	+
	218	+ return 0;
	219	+ std::cin.get();
	220	+}
0	221	\ No newline at end of file
...	...