From 71d5696d385e7d2e662bbd1e98264293479ed780 Mon Sep 17 00:00:00 2001 From: David Mayerich Date: Mon, 9 Nov 2020 11:17:29 -0600 Subject: [PATCH] First commit after development and testing --- CMakeLists.txt | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FindSTIM.cmake | 16 ++++++++++++++++ desktop.ini | Bin 0 -> 176 bytes docs/Readme_BytesOrder.txt | 19 +++++++++++++++++++ docs/lyr_format.pptx | Bin 0 -> 57140 bytes docs/testcases.txt | 11 +++++++++++ layerview.py | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/layer.cpp | 556 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/layer.h | 35 +++++++++++++++++++++++++++++++++++ src/linalg.cpp | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/linalg.h | 17 +++++++++++++++++ src/main.cpp | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 1182 insertions(+), 0 deletions(-) create mode 100644 CMakeLists.txt create mode 100644 FindSTIM.cmake create mode 100644 desktop.ini create mode 100644 docs/Readme_BytesOrder.txt create mode 100644 docs/lyr_format.pptx create mode 100644 docs/testcases.txt create mode 100644 layerview.py create mode 100644 src/layer.cpp create mode 100644 src/layer.h create mode 100644 src/linalg.cpp create mode 100644 src/linalg.h create mode 100644 src/main.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..35768c6 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,69 @@ +#Specify the version being used aswell as the language +cmake_minimum_required(VERSION 3.12) + +#Name your project here +project(multilayer) + +#set the module directory +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}") + +#default to release mode +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif(NOT CMAKE_BUILD_TYPE) + +#build the executable in the binary directory on MS Visual Studio +if ( MSVC ) + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}") + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}") + SET( LIBRARY_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}") + SET( LIBRARY_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}") + add_definitions(-D_CRT_SECURE_NO_WARNINGS) + add_definitions(-D_SCL_SECURE_NO_WARNINGS) +endif ( MSVC ) + + +#find packages----------------------------------- +#find the pthreads package +find_package(Threads) + +#find the X11 package +find_package(X11) + +#find CUDA, mostly for LA stuff using cuBLAS +find_package(CUDA REQUIRED) + +#find Boost +#find_package(Boost) + +#find the STIM library +find_package(STIM REQUIRED) + +#find LAPACK and supporting link_libraries +find_package(clapack CONFIG REQUIRED) +find_package(OpenBLAS CONFIG REQUIRED) + +#include include directories +include_directories(${CUDA_INCLUDE_DIRS} + ${STIM_INCLUDE_DIRS} +) + +#Assign source files to the appropriate variables to easily associate them with executables +file(GLOB SRC "src/*.cpp") + +#-----------------------------Create the executable-------------------------- +#-----------------------------Show all four examples------------------------- +add_executable(multilayer + ${SRC} +) +link_directories(${CUDA_BIN_DIRS}) +target_link_libraries(multilayer ${CUDA_LIBRARIES} + ${CUDA_CUBLAS_LIBRARIES} + ${CUDA_cusparse_LIBRARY} + ${CUDA_cusolver_LIBRARY} + ${CUDA_CUFFT_LIBRARIES} + OpenBLAS::OpenBLAS + f2c lapack +) + + \ No newline at end of file diff --git a/FindSTIM.cmake b/FindSTIM.cmake new file mode 100644 index 0000000..cb2bcc7 --- /dev/null +++ b/FindSTIM.cmake @@ -0,0 +1,16 @@ +# finds the STIM library (downloads it if it isn't present) +# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository) + +include(FindPackageHandleStandardArgs) + +set(STIM_ROOT $ENV{STIM_ROOT}) + +IF(NOT STIM_ROOT) + MESSAGE("ERROR: STIM_ROOT environment variable must be set!") +ENDIF(NOT STIM_ROOT) + + FIND_PATH(STIM_INCLUDE_DIRS DOC "Path to STIM include directory." + NAMES stim/image/image.h + PATHS ${STIM_ROOT}) + +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIRS) diff --git a/desktop.ini b/desktop.ini new file mode 100644 index 0000000..a7cc8d5 Binary files /dev/null and b/desktop.ini differ diff --git a/docs/Readme_BytesOrder.txt b/docs/Readme_BytesOrder.txt new file mode 100644 index 0000000..ebd7548 --- /dev/null +++ b/docs/Readme_BytesOrder.txt @@ -0,0 +1,19 @@ +Order of "output.lyr" parameters. + +The wavenumber in free space: k0 double 8B +The direction of propogation: d double*2 16B +The refractive index in the first layer: n[0] complex 16B + +for i in LAYERS: + z positions[i]: z[i] double 8B * LAYERS + z-component of propogation directions: sz[i] complex 16B * LAYERS + Transmission: Ptx[i] complex 16B * LAYERS + Transmission: Pty[i] complex 16B * LAYERS + Transmission: Ptz[i] complex 16B * LAYERS + Reflection: Prx[i] complex 16B * LAYERS + Transmission: Pry[i] complex 16B * LAYERS + Transmission: Prz[i] complex 16B * LAYERS + + +All parameters we need will be: + 15 * LAYERS + 5 diff --git a/docs/lyr_format.pptx b/docs/lyr_format.pptx new file mode 100644 index 0000000..3f2cf5a Binary files /dev/null and b/docs/lyr_format.pptx differ diff --git a/docs/testcases.txt b/docs/testcases.txt new file mode 100644 index 0000000..b52dd9c --- /dev/null +++ b/docs/testcases.txt @@ -0,0 +1,11 @@ +output.lyr --n 1.0 1.2 1.3 1.4 1.5 1.4 1.3 1.2 1.1 --kappa 0.0 0.01 0.02 0.03 0.04 0.05 0.6 0.07 0.08 --zPos -50 -40 -30 -20 -10 0 10 20 30 + +output.lyr --n 1.0 2.0 1.0 --kappa 0.0 0.0 0.0 --z -50 0 50 + +defaults: + +*) If kappa is not specified, set them all to zero + +*) If z is not specified, create equally spaced layers between -100 and 100 + +*) Output: Place a color bar next to each image (colorbar()) \ No newline at end of file diff --git a/layerview.py b/layerview.py new file mode 100644 index 0000000..437a94c --- /dev/null +++ b/layerview.py @@ -0,0 +1,176 @@ +# create a function that displays the output when run this way: +# python layerview.py ouput.dat + +import sys +import os +from time import time +import subprocess +import struct +import numpy as np +import matplotlib +import math +import matplotlib.pyplot as plt + +from mpl_toolkits.axes_grid1 import ImageGrid + +def intensity(E): + Econj = np.conj(E) + I = np.sum(E*Econj, axis=-1) + return np.real(I) + +#evaluate a solved homogeneous substrate +# Returns a complex NxMx3 array representing the cross section of the field at Y=0 +def evaluate(Depths, k, d, n0, sz, Pt, Pr, X, Y, Z): + Depths = np.array(Depths) + sz = np.array(sz) + Pt = np.array(Pt) + Pr = np.array(Pr) + s = np.array(d) * n0 + #allocate space for layer indices + LI = np.zeros(Z.shape, dtype=np.int) + + #find the layer index for each sample point + L = len(Depths) + LI[Z < Depths[0]] = 0 + for l in range(L-1): + idx = np.logical_and(Z > Depths[l], Z <= Depths[l+1]) + LI[idx] = l + LI[Z > Depths[-1]] = L - 1 + + #calculate the appropriate phase shift for the wave transmitted through the layer + Ph_t = np.exp(1j * k * sz[LI] * (Z - Depths[LI])) + + #calculate the appropriate phase shift for the wave reflected off of the layer boundary + LIp = LI + 1 + LIp[LIp >= L] = 0 + Ph_r = np.exp(-1j * k * sz[LI] * (Z - Depths[LIp])) + Ph_r[LI >= L-1] = 0 + + #calculate the phase shift based on the X and Y positions + Ph_xy = np.exp(1j * k * (s[0] * X + s[1] * Y)) + + #apply the phase shifts + Et = Pt[:, LI] * Ph_t[:, :] + Er = Pr[:, LI] * Ph_r[:, :] + + #add everything together coherently + E = (Et + Er) * Ph_xy[:, :] + + #return the electric field + return np.moveaxis(E, 0, -1) + +class planewave: + def __int__(self): + self.LAYERS = 0 #Number of layers. int + self.depths = [] #z positions of layers. [1, 5, ..., 10] double + self.k0 = 0.0 #wavenumber at free space. double + self.d = [] #direction of propogation. [0.5, 0] double + self.n0 = 0.0+0.0j #the refractive index of the first layer. complex + self.sz = [] #z-component of propagation for each layer. complex + self.Pt = [[] for i in range(3)] #transmission complex + self.Pr = [[],[],[]] #refraction complex + +# display a binary file produced using the coupled wave C code +def layer(strc): + f = open(strc, "rb") + + # create an empty plane wave structure + L = planewave() + L.depths = [] + L.d = [] + L.sz = [] + L.Pt = [[],[],[]] + L.Pr = [[],[],[]] + + # open the input file for reading + file_bytes = os.path.getsize(strc) + + # calculate the number of layers in the sample + L.LAYERS = int((file_bytes/8-5)/15) + + # load the raw layer data into the plane wave structure + data_raw = struct.unpack('d' * (15*L.LAYERS+5), f.read((15*L.LAYERS+5)* 8)) + data = np.asarray(data_raw) + L.k0 = data[0] + L.d.append(data[1]) + L.d.append(data[2]) + L.n0 = complex(data[3], data[4]) + + # load each layer's plane waves from the binary file + for i in range(L.LAYERS): + L.depths.append(data[5+15*i]) + L.sz.append(complex(data[6+15*i], data[7+15*i])) + L.Pt[0].append(complex(data[8+15*i], data[9+15*i])) + L.Pt[1].append(complex(data[15*i+10], data[15*i+11])) + L.Pt[2].append(complex(data[15*i+12], data[15*i+13])) + L.Pr[0].append(complex(data[15*i+14], data[15*i+15])) + L.Pr[1].append(complex(data[15*i+16], data[15*i+17])) + L.Pr[2].append(complex(data[15*i+18], data[15*i+19])) + + N = 512 # simulation resolution NxM + M = 1024 + #DAVID: Don't hard-code the dimensions - you'll have to calculate them based on the sample information in the file + D = [-110, 110, 0, 60] # dimensions of the simulation + x = np.linspace(D[2], D[3], N) # set the sample points for the simulation + z = np.linspace(D[0], D[1], M) + [X, Z] = np.meshgrid(x, z) # create a mesh grid to evaluate layers + Y = np.zeros(X.shape) + + # evaluate the field across all layers + E = evaluate(L.depths, L.k0, L.d, L.n0, L.sz, L.Pt, L.Pr, X, Y, Z) + Er = np.real(E) + I = intensity(E) + + plt.set_cmap("afmhot") # set the color map + plt.subplot(1, 4, 1) + plt.imshow(Er[:, :, 0], extent=(D[3], D[2], D[1], D[0])) + #plt.colorbar() + plt.title("Ex") + + plt.subplot(1, 4, 2) + plt.imshow(Er[:, :, 1], extent=(D[3], D[2], D[1], D[0])) + #plt.colorbar() + plt.title("Ey") + + plt.subplot(1, 4, 3) + plt.imshow(Er[:, :, 2], extent=(D[3], D[2], D[1], D[0])) + #plt.colorbar() + plt.title("Ez") + + plt.subplot(1, 4, 4) + plt.imshow(I, extent=(D[3], D[2], D[1], D[0])) + plt.colorbar() + plt.title("I") + + #fig = plt.figure(1, (5, 10)) + #plt.set_cmap("afmhot") + #matplotlib.rcParams.update({'font.size': 10}) + #grid = ImageGrid(fig, rect = 211, nrows_ncols = (1, 3), axes_pad = 0.2, label_mode = "1", cbar_mode = "single", cbar_size = "18%") + #Title = ["Ex", "Ey", "Ez"] + #for i in range(3): + # grid[i].axis('off') + # im = grid[i].imshow(Er[..., i], extent=(D[3], D[2], D[1], D[0]), interpolation="nearest") + # grid[i].set_title(Title[i]) + #grid.cbar_axes[0].colorbar(im) + #plt.title("E") + #plt.subplot(212) + #plt.imshow(I, extent=(D[3], D[2], D[1], D[0])) + #plt.title("I") + #plt.colorbar() + plt.show() + +# function displays usage text to the console +def usage(): + print("Usage:") + print(" layerview input.dat") + +if __name__ == '__main__': + start = time() + if len(sys.argv) < 2: # if there are no command line arguments + usage() # display the usage text + exit() # exit + else: + layer(sys.argv[1]) # otherwise display the given data file + + end = time() + print("The elapsed time is " + str(end - start) + " s. ") \ No newline at end of file diff --git a/src/layer.cpp b/src/layer.cpp new file mode 100644 index 0000000..e5f6505 --- /dev/null +++ b/src/layer.cpp @@ -0,0 +1,556 @@ +#include "layer.h" +#include "linalg.h" //LAPACKE support for Visual Studio + +#include +#include +//#include "cublas_v2.h" +#include "cusolverSp.h" +/*----------------------------GPU-----------------------------*/ + + +//Cross product.c is result. +void crossProduct(vector>* a, vector>* b, //The given matrices. + vector>* c) { //Matrix to be gotten. + c->push_back((*a)[1] * (*b)[2] - (*a)[2] * (*b)[1]); + c->push_back((*a)[2] * (*b)[0] - (*a)[0] * (*b)[2]); + c->push_back((*a)[0] * (*b)[1] - (*a)[1] * (*b)[0]); +} + +//Calculate the norm for a matrix. +complex Norm(vector>* E) { + complex sum = 0; + for (unsigned int i = 0; i < E->size(); i++) { + sum += (*E)[i] * (*E)[i]; + } + return sqrt(sum); +} + +//Normalize matrix. +void Normalize(vector>* E) { + complex sum = 0; + for (unsigned int i = 0; i < E->size(); i++) { + sum += (*E)[i] * (*E)[i]; + } + for (unsigned int i = 0; i < E->size(); i++) { + (*E)[i] = (*E)[i] / sqrt(sum); + + } +} + +//Orthogonalization. +void orthogonalize(vector>* E_0rtho, vector>* E0, vector>* d) { + vector> s; + if (d->size() == 2) { + complex dz = sqrt(1.0+0i - pow((*d)[0], 2) - pow((*d)[1], 2)); + d->push_back(dz); + } + crossProduct(E0, d, &s); + crossProduct(d, &s, E_0rtho); + vector>().swap(s); +} + +/*--------------------------------------------------Define Class layersample.--------------------------------------------------------*/ + +/*Do not try to replace "int" as "size_t". +This will result in a bunch of warnings and if we continuously change the type of M_rowInd and M_colInd, the EXCEPTION will occur again.*/ +size_t layersample::ii(size_t l, size_t c, size_t d) { //ii(l, c, d) means the column indexes for every element. + return l * 6 + d * 3 + c - 3; +} + +void layersample::generate_linsys(size_t LAYERS, + vector>& M, //All non-zero values in "A" matirx.(A * X = b) + vector>& b, //The right hand side column vector. + vector>& E, //orthogonalized E0 vectors + vector>* P, + bool CPU_op) { //Solution of the matrices multiplication. + //Calculate the sz component for each layer. + s.clear(); //s is the plane wave direction scaled by the refractive index. + for (size_t i = 0; i < 2; i++) + s.push_back(d[i] * n[0]); + sz.clear(); + for (int l = 0; l < LAYERS; l++) { + sz.push_back(sqrt(pow(n[l], 2) - pow(s[0], 2) - pow(s[1], 2))); + } + + if (!CPU_op){ + //Computer in GPU. + vector M_rowInd; //Sparse matrix M CSR ->row index + vector M_colInd; //Sparse matrix M CSR ->number of elements + M_rowInd.push_back(0); + ////Build M by setting constraints based on Gauss's Law. + for (size_t l = 0; l < LAYERS; l++) { + //Set the upward components for each layer. + //Layer "LAYERS-1" doesn't have a upward component. + if (l != LAYERS - 1) { + M.push_back(s[0]); + M_colInd.push_back((int)ii(l, 0, 1)); + M.push_back(s[1]); + M_colInd.push_back((int)ii(l, 1, 1)); + M.push_back(-sz[l]); + M_colInd.push_back((int)ii(l, 2, 1)); + M_rowInd.push_back((int)M.size()); + b.push_back(0); + } + //Set the downward components for each layer. + if (l != 0) { + M.push_back(s[0]); + M_colInd.push_back((int)ii(l, 0, 0)); + M.push_back(s[1]); + M_colInd.push_back((int)ii(l, 1, 0)); + M.push_back(sz[l]); + M_colInd.push_back((int)ii(l, 2, 0)); + M_rowInd.push_back((int)M.size()); + b.push_back(0); + } + } + //Continue to build M by enforcing a continuous field across boundaries. + complex arg, arg_in, B; + for (size_t l = 1; l < LAYERS; l++) { + complex sz0 = sz[l - 1]; + complex sz1 = sz[l]; + + //Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1])) + complex A_in = k * sz0 * (z[l] - z[l - 1]); + complex A_in2 = { -A_in.imag(), A_in.real() }; + complex A = exp(A_in2); + + if (l < LAYERS - 1) { + double dl = z[l] - z[l + 1]; + arg_in = -k * sz1 * (complex)dl; + arg = { -arg_in.imag(), arg_in.real() }; + B = exp(arg); + } + //if this is the second layer, use the simplified equations that account for the incident field + if (l == 1) { + M.push_back(1); + M_colInd.push_back((int)ii(0, 0, 1)); + M.push_back(-1); + M_colInd.push_back((int)ii(1, 0, 0)); + if (LAYERS > 2) { + M.push_back(-B); + M_colInd.push_back((int)ii(1, 0, 1)); + } + M_rowInd.push_back((int)M.size()); + b.push_back(-A * E[0]); + + M.push_back(1); + M_colInd.push_back((int)ii(0, 1, 1)); + M.push_back(-1); + M_colInd.push_back((int)ii(1, 1, 0)); + if (LAYERS > 2) { + M.push_back(-B); + M_colInd.push_back((int)ii(1, 1, 1)); + } + M_rowInd.push_back((int)M.size()); + b.push_back(-A * E[l]); + + M.push_back(sz0); + M_colInd.push_back((int)ii(0, 1, 1)); + M.push_back(s[1]); + M_colInd.push_back((int)ii(0, 2, 1)); + M.push_back(sz1); + M_colInd.push_back((int)ii(1, 1, 0)); + M.push_back(-s[1]); + M_colInd.push_back((int)ii(1, 2, 0)); + if (LAYERS > 2) { + M.push_back(-B * sz1); + M_colInd.push_back((int)ii(1, 1, 1)); + M.push_back(-B * s[1]); + M_colInd.push_back((int)ii(1, 2, 1)); + } + M_rowInd.push_back((int)M.size()); + b.push_back(A * sz0 * E[1] - A * s[1] * E[2]); + + M.push_back(-sz0); + M_colInd.push_back((int)ii(0, 0, 1)); + M.push_back(-s[0]); + M_colInd.push_back((int)ii(0, 2, 1)); + M.push_back(-sz1); + M_colInd.push_back((int)ii(1, 0, 0)); + M.push_back(s[0]); + M_colInd.push_back((int)ii(1, 2, 0)); + if (LAYERS > 2) { + M.push_back(B * sz1); + M_colInd.push_back((int)ii(1, 0, 1)); + M.push_back(B* s[0]); + M_colInd.push_back((int)ii(1, 2, 1)); + } + M_rowInd.push_back((int)M.size()); + b.push_back(A * s[0] * E[2] - A * sz0 * E[0]); + } + else if (l == LAYERS - 1) { + M.push_back(A); + M_colInd.push_back((int)ii(l - 1, 0, 0)); + M.push_back(1); + M_colInd.push_back((int)ii(l - 1, 0, 1)); + M.push_back(-1); + M_colInd.push_back((int)ii(l, 0, 0)); + M_rowInd.push_back((int)M.size()); + b.push_back(0); + + M.push_back(A); + M_colInd.push_back((int)ii(l - 1, 1, 0)); + M.push_back(1); + M_colInd.push_back((int)ii(l - 1, 1, 1)); + M.push_back(-1); + M_colInd.push_back((int)ii(l, 1, 0)); + M_rowInd.push_back((int)M.size()); + b.push_back(0); + + M.push_back(-A * sz0); + M_colInd.push_back((int)ii(l - 1, 1, 0)); + M.push_back(A * s[1]); + M_colInd.push_back((int)ii(l - 1, 2, 0)); + M.push_back(sz0); + M_colInd.push_back((int)ii(l - 1, 1, 1)); + M.push_back(s[1]); + M_colInd.push_back((int)ii(l - 1, 2, 1)); + M.push_back(sz1); + M_colInd.push_back((int)ii(l, 1, 0)); + M.push_back(-s[1]); + M_colInd.push_back((int)ii(l, 2, 0)); + M_rowInd.push_back((int)M.size()); + b.push_back(0); + + M.push_back(A * sz0); + M_colInd.push_back((int)ii(l - 1, 0, 0)); + M.push_back(-A * s[0]); + M_colInd.push_back((int)ii(l - 1, 2, 0)); + M.push_back(-sz0); + M_colInd.push_back((int)ii(l - 1, 0, 1)); + M.push_back(-s[0]); + M_colInd.push_back((int)ii(l - 1, 2, 1)); + M.push_back(-sz1); + M_colInd.push_back((int)ii(l, 0, 0)); + M.push_back(s[0]); + M_colInd.push_back((int)ii(l, 2, 0)); + M_rowInd.push_back((int)M.size()); + b.push_back(0); + } + else { + M.push_back(A); + M_colInd.push_back((int)ii(l - 1, 0, 0)); + M.push_back(1); + M_colInd.push_back((int)ii(l - 1, 0, 1)); + M.push_back(-1); + M_colInd.push_back((int)ii(l, 0, 0)); + M.push_back(-B); + M_colInd.push_back((int)ii(l, 0, 1)); + M_rowInd.push_back((int)M.size()); + b.push_back(0); + + M.push_back(A); + M_colInd.push_back((int)ii(l - 1, 1, 0)); + M.push_back(1); + M_colInd.push_back((int)ii(l - 1, 1, 1)); + M.push_back(-1); + M_colInd.push_back((int)ii(l, 1, 0)); + M.push_back(-B); + M_colInd.push_back((int)ii(l, 1, 1)); + M_rowInd.push_back((int)M.size()); + b.push_back(0); + + M.push_back(-A * sz0); + M_colInd.push_back((int)ii(l - 1, 1, 0)); + M.push_back(A * s[1]); + M_colInd.push_back((int)ii(l - 1, 2, 0)); + M.push_back(sz0); + M_colInd.push_back((int)ii(l - 1, 1, 1)); + M.push_back(s[1]); + M_colInd.push_back((int)ii(l - 1, 2, 1)); + M.push_back(sz1); + M_colInd.push_back((int)ii(l, 1, 0)); + M.push_back(-s[1]); + M_colInd.push_back((int)ii(l, 2, 0)); + M.push_back(-B * sz1); + M_colInd.push_back((int)ii(l, 1, 1)); + M.push_back(-B * s[1]); + M_colInd.push_back((int)ii(l, 2, 1)); + M_rowInd.push_back((int)M.size()); + b.push_back(0); + + M.push_back(A * sz0); + M_colInd.push_back((int)ii(l - 1, 0, 0)); + M.push_back(-A * s[0]); + M_colInd.push_back((int)ii(l - 1, 2, 0)); + M.push_back(-sz0); + M_colInd.push_back((int)ii(l - 1, 0, 1)); + M.push_back(-s[0]); + M_colInd.push_back((int)ii(l - 1, 2, 1)); + M.push_back(-sz1); + M_colInd.push_back((int)ii(l, 0, 0)); + M.push_back(s[0]); + M_colInd.push_back((int)ii(l, 2, 0)); + M.push_back(B * sz1); + M_colInd.push_back((int)ii(l, 0, 1)); + M.push_back(B * s[0]); + M_colInd.push_back((int)ii(l, 2, 1)); + M_rowInd.push_back((int)M.size()); + b.push_back(0); + } + } + cudaError_t cudaStatus; + cusolverStatus_t cusolverStatus; + cusparseStatus_t cusparseStatus; + cusolverSpHandle_t handle = NULL; + cusparseHandle_t cusparseHandle = NULL; + cudaStream_t stream = NULL; + cusparseMatDescr_t descrM = NULL; + cuDoubleComplex * csrValM_, * b_, *P_; + size_t rowsA = b.size(), colsA = b.size(), nnA = M.size(), baseM_ = 0; //nnA is the number of non-zero elements. + int* csrRowPtrM = NULL; //row index M_rowInd projected to GPU. + int* csrColIndM = NULL; //CSR(A) from I/O. // M_colInd projected to GPU. + double tol = 1.e-12; int reorder = 0; + int singularity = 0; + + //Initialize. + cusolverStatus = cusolverSpCreate(&handle); + int num = 1; + cudaStatus = cudaGetDevice(&num); + cusparseStatus = cusparseCreate(&cusparseHandle); + cudaStatus = cudaStreamCreate(&stream); + cusolverStatus = cusolverSpSetStream(handle, stream); + cusparseStatus = cusparseSetStream(cusparseHandle, stream); + cusparseStatus = cusparseCreateMatDescr(&descrM); + cusparseStatus = cusparseSetMatType(descrM, CUSPARSE_MATRIX_TYPE_GENERAL); + if (baseM_) { + cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ONE); + } + else { + cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ZERO); + } + + cudaStatus = cudaMalloc((void**)&csrRowPtrM, sizeof(int) * (rowsA + 1)); //Projection of M_rowInd. + cudaStatus = cudaMalloc((void**)&csrColIndM, sizeof(int) * M_colInd.size()); //Projection of M_colInd. + cudaStatus = cudaMalloc((void**)&csrValM_, sizeof(cuDoubleComplex) * M.size()); //Projection of M. + cudaStatus = cudaMalloc((void**)&b_, sizeof(cuDoubleComplex) * b.size()); //Projection of b. + cudaStatus = cudaMalloc((void**)&P_, sizeof(cuDoubleComplex) * b.size()); //Projection of P. + + cudaStatus = cudaMemcpy(csrValM_, M.data(), M.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice); + cudaStatus = cudaMemcpy(csrRowPtrM, M_rowInd.data(), M_rowInd.size() * sizeof(int), cudaMemcpyHostToDevice); + cudaStatus = cudaMemcpy(csrColIndM, M_colInd.data(), M_colInd.size() * sizeof(int), cudaMemcpyHostToDevice); + cudaStatus = cudaMemcpy(b_, b.data(), b.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice); + // Output the current CUDA error. + //if (cudaStatus != cudaSuccess) { + // cout<<"%s " << cudaGetErrorString(cudaStatus) << endl; + //} + P->resize(rowsA); //P is the to-be-solved matrix in CPU. + //QR method. + cusolverStatus = cusolverSpZcsrlsvqr(handle, (int)rowsA, (int)nnA, descrM, csrValM_, csrRowPtrM, csrColIndM, b_, tol, reorder, P_, (int*)&singularity); + /*cusparseStatus = cusparseZsctr(*cusparseHandle, rowsA, g_z, g_Q, g_x, CUSPARSE_INDEX_BASE_ZERO);*/ + cudaStatus = cudaMemcpyAsync(P->data(), P_, sizeof(cuDoubleComplex) * rowsA, cudaMemcpyDeviceToHost, stream); + + cudaStatus = cudaFree(csrRowPtrM); + cudaStatus = cudaFree(csrColIndM); + cudaStatus = cudaFree(csrValM_); + cudaStatus = cudaFree(b_); + cudaStatus = cudaFree(P_); + vector().swap(M_rowInd); + vector().swap(M_colInd); + } + else { + //Work on CPU. + M.resize(6 * (LAYERS - 1) * 6 * (LAYERS - 1)); + b.resize(6 * (LAYERS - 1)); + + size_t ei = 0; + //Set constraints based on Gauss's Law. + for (size_t l = 0; l < LAYERS; l++) { + //Set the upward components for each layer. + //Layer "LAYERS-1" doesn't have a upward component. + if (l != LAYERS - 1) { + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = s[0]; + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = s[1]; + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -sz[l]; + ei += 1; + } + //Set the downward components for each layer. + if (l != 0) { + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = s[0]; + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = s[1]; + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = sz[l]; + ei += 1; + } + } + //Enforce a continuous field across boundaries. + complex arg, arg_in, B; + for (size_t l = 1; l < LAYERS; l++) { + complex sz0 = sz[l - 1]; + complex sz1 = sz[l]; + + //Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1])) + complex A_in = k * sz0 * (z[l] - z[l - 1]); + complex A_in2 = { -A_in.imag(), A_in.real() }; + complex A = exp(A_in2); + + if (l < LAYERS - 1) { + double dl = z[l] - z[l + 1]; + arg_in = -k * sz1 * (complex)dl; + arg = { -arg_in.imag(), arg_in.real() }; + B = exp(arg); + } + //if this is the second layer, use the simplified equations that account for the incident field + if (l == 1) { + M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = 1; + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -1; + if (LAYERS > 2) { + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = -B; + } + b[ei] = -A * E[0]; + ei += 1; + + M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = 1; + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = -1; + if (LAYERS > 2) { + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B; + } + b[ei] = -A * E[l]; + ei += 1; + + M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = s[1]; + M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = sz0; + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = -s[1]; + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = sz1; + if (LAYERS > 2) { + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = -B * s[1]; + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B * sz1; + } + b[ei] = A * sz0 * E[1] - A * s[1] * E[2]; + ei += 1; + + M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = -sz0; + M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = -s[0]; + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -sz1; + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = s[0]; + if (LAYERS > 2) { + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = B * sz1; + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = B * s[0]; + } + b[ei] = A * s[0] * E[2] - A * sz0 * E[0]; + ei += 1; + } + else if (l == LAYERS - 1) { + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1; + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1; + ei += 1; + + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1; + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1; + ei += 1; + + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1]; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1]; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0; + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1]; + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1; + ei += 1; + + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0]; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0]; + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1; + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0]; + ei += 1; + } + else { + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1; + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1; + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = -B; + ei += 1; + + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1; + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1; + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B; + ei += 1; + + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1]; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1]; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0; + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1]; + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1; + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -B * s[1]; + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B * sz1; + ei += 1; + + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0]; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0; + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0]; + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1; + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0]; + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = B * sz1; + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = B * s[0]; + ei += 1; + } + } + + complex* M_ = new complex[M.size()]; + complex* b_ = new complex[b.size()]; + complex* P_ = new complex[b.size()]; + for (size_t i = 0; i < M.size(); i++) { + M_[i] = M[i]; + if (i < b.size()) b_[i] = b[i]; + } + LINALG_inverse(M_, (int)(6 * (LAYERS - 1))); + LINALG_zgemm((int)(6 * (LAYERS - 1)), (int)1, (int)(6 * (LAYERS - 1)), M_, (int)(6 * (LAYERS - 1)), b_, (int)1, P_, (int)1); + for (int i = 0; i < b.size(); i++) { + P->push_back(P_[i]); + } + + delete[] M_; + delete[] b_; + delete[] P_; + } +} + + +//Build matrix and get E. +void layersample::solve(vector>* E, bool CPU_op) { //orthogonalized E0 vectors. + size_t LAYERS = n.size(); + //Store the matrix and RHS vector. + vector> M; //All non-zero values in the sparse matrix. + vector> b; //The right hand side column vector. + + //Evaluate the linear system. + vector> P; //Solution of matrix. + layersample::generate_linsys(LAYERS, M, b, *E, &P, CPU_op); + + //Store the coefficients for each layer. + //Pt[3, L] transmission. Pr[3, L] reflection. + Pt.resize(3 * LAYERS); + Pr.resize(3 * LAYERS); + + for (size_t l = 0; l < LAYERS; l++) { + if (l == 0) { + Pt[0] = (complex)(*E)[0]; + Pt[LAYERS] = (complex)(*E)[1]; + Pt[2 * LAYERS] = (complex)(*E)[2]; + } + else { + Pt[l] = P[ii(l, 0, 0)]; + Pt[l + LAYERS] = P[ii(l, 1, 0)]; + Pt[l + 2 * LAYERS] = P[ii(l, 2, 0)]; + } + + if (l == LAYERS - 1) { + Pr[LAYERS - 1] = 0; + Pr[2 * LAYERS - 1] = 0; + Pr[3 * LAYERS - 1] = 0; + } + else { + Pr[l] = P[ii(l, 0, 1)]; + Pr[l + LAYERS] = P[ii(l, 1, 1)]; + Pr[l + 2 * LAYERS] = P[ii(l, 2, 1)]; + } + } + vector>().swap(M); + vector>().swap(b); + vector>().swap(P); +} \ No newline at end of file diff --git a/src/layer.h b/src/layer.h new file mode 100644 index 0000000..5f85d42 --- /dev/null +++ b/src/layer.h @@ -0,0 +1,35 @@ +#ifndef LAYER_H +#define LAYER_H +#include +#include +using namespace std; + +void crossProduct(vector>* a, vector>* b, vector>* c); +complex Norm(vector>* E); +void Normalize(vector>* E); +vector > transpose(vector >* matrix); +void orthogonalize(vector>* E_0, vector>* E0, vector>* d); + +class layersample { + +public: + double k; //wavenumber. + vector> n; //refractive index. + vector z; //z postions. + vector> s; //propagation direction. Keep it for output. + vector> sz; //propagation direction. Keep it for output. + vector> d; //direction of propagation of the plane wave. + vector> Pt; //transimission. + vector> Pr; //reflection. + //Calulate the index of the field component associated with a layer. + // l is the layer index. c is the component(x, y, z). d is the direction(0for transmission, 1 for reflection). + size_t ii(size_t l, size_t c, size_t d); + + //Generate the linear system corresponding to this layered sample and plane wave. + void generate_linsys(size_t LAYERS, vector>& M, vector>& b, vector>& E, vector>* P, bool CPU_op); + + //Build matrix and get E. + void solve(vector>* E, bool CPU_op); +}; + +#endif \ No newline at end of file diff --git a/src/linalg.cpp b/src/linalg.cpp new file mode 100644 index 0000000..31bd4ef --- /dev/null +++ b/src/linalg.cpp @@ -0,0 +1,63 @@ +#include "linalg.h" + +// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK. +extern "C" { +#include "f2c.h" +#include "clapack.h" +#include "cblas.h" +} + + +void LINALG_zgetrf( + int M, + int N, + std::complex* A, + int LDA, + int* IPIV) +{ + integer INFO; + zgetrf_((integer*)&M, (integer*)&N, (doublecomplex*)A, (integer*)&LDA, (integer*)IPIV, &INFO); +} + +void LINALG_zgetri( + size_t N, + std::complex* A, + int LDA, + int* IPIV) +{ + integer LWORK = -1; + std::complex WORK[1]; + integer INFO; + zgetri_((integer*)&N, (doublecomplex*)A, (integer*)&LDA, (integer*)IPIV, (doublecomplex*)WORK, &LWORK, &INFO); +} +void LINALG_inverse(std::complex* A, int N) +{ + int* IPIV = new int[N + (size_t)1]; + integer LWORK = N * N; + std::complex* WORK = new std::complex[LWORK]; + integer INFO; + + zgetrf_((integer*)&N, (integer*)&N, (doublecomplex*)A, (integer*)&N, (integer*)IPIV, &INFO); + zgetri_((integer*)&N, (doublecomplex*)A, (integer*)&N, (integer*)IPIV, (doublecomplex*)WORK, &LWORK, &INFO); + + delete[] IPIV; + delete[] WORK; +} + +void LINALG_zgemm( + const int M, //A(M*K) B(K*N) + const int N, + const int K, + std::complex* A, + const int LDA, //=K + std::complex* B, + const int LDB, //=N + std::complex* C, + const int LDC) //=columns of C. +{ + std::complex alpha = 1; + std::complex beta = 0; + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, (OPENBLAS_CONST blasint)M, (OPENBLAS_CONST blasint)N, (OPENBLAS_CONST blasint)K, + &alpha, A, (OPENBLAS_CONST blasint)LDA, B, (OPENBLAS_CONST blasint)LDB, &beta, C, (OPENBLAS_CONST blasint)LDC); + +} \ No newline at end of file diff --git a/src/linalg.h b/src/linalg.h new file mode 100644 index 0000000..98ddb69 --- /dev/null +++ b/src/linalg.h @@ -0,0 +1,17 @@ +// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK +#include + +//Solve matrix inverse. +void LINALG_inverse(std::complex* A, int N); + +//Solve matrix multiplication. C = A * B. +void LINALG_zgemm( + const int M, //A(M*K) B(K*N) + const int N, + const int K, + std::complex* A, + const int LDA, //=K + std::complex* B, + const int LDB, //=N + std::complex* C, + const int LDC); //=columns of C. \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..1e8bd99 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,220 @@ +//Update the data output format in example_layer(). +#include "layer.h" +#include + +#include +#include +#include "stim/parser/arguments.h" + +using namespace std; +#define PI 3.14159265358979323846264338328 + +bool ASCII_output = false; +bool CPU_op = false; + +void advertise() { + std::cout << std::endl << std::endl; + std::cout << "=========================================================================" << std::endl; + //std::cout << "Thank you for using the NetMets network comparison tool!" << std::endl; + std::cout << "Scalable Tissue Imaging and Modeling (STIM) Lab, University of Houston" << std::endl; + std::cout << "=========================================================================" << std::endl << std::endl; +} + +//Calculate Ez from vector d and Ex Ey. +void E_Cal(vector>* E, vector>* d) { + if (d->size() == 2) { + complex dz = sqrt(1.0 + 0i - pow((*d)[0], 2) - pow((*d)[1], 2)); + d->push_back(dz); + } + E->push_back(-((*E)[0] * (*d)[0] + (*E)[1] * (*d)[1]) / (*d)[2]); +} +void output_binary(std::string filename, layersample& layers) { + size_t L = layers.n.size(); // get the number of layers + std::ofstream outFile; + outFile.open(filename, std::ofstream::binary); // open the output file for binary writing + if (outFile) { + outFile.write((char*)&layers.k, sizeof(double)); + outFile.write((char*)&layers.d[0], sizeof(double)); + outFile.write((char*)&layers.d[1], sizeof(double)); + outFile.write((char*)&layers.n[0], sizeof(double) * 2); + + for (size_t i = 0; i < L; i++) { + outFile.write((char*)&layers.z[i], sizeof(double)); + outFile.write((char*)&layers.sz[i], 2 * sizeof(double)); + outFile.write((char*)&layers.Pt[i], 2 * sizeof(double)); + outFile.write((char*)&layers.Pt[i + L], 2 * sizeof(double)); + outFile.write((char*)&layers.Pt[i + 2 * L], 2 * sizeof(double)); + outFile.write((char*)&layers.Pr[i], 2 * sizeof(double)); + outFile.write((char*)&layers.Pr[i + L], 2 * sizeof(double)); + outFile.write((char*)&layers.Pr[i + 2 * L], 2 * sizeof(double)); + } + outFile.close(); + } + else { + std::cout << "ERROR opening output file for binary writing: " << filename << std::endl; + } +} + +void output_txt(std::string filename, layersample& layers) { + size_t L = layers.n.size(); // get the number of layers + std::ofstream outFile; + outFile.open(filename); // open the output file for text writing + if (!outFile) { + std::cout << "ERROR: Could not open file " << filename << std::endl; + exit(1); + } + int width = 15; + + outFile << "--------------------------------" << endl; + outFile << "The wavenumber at free space is : " << layers.k << endl; + for (size_t i = 0; i < L; i++) { + if (i == 0) { + outFile << "--------------------------------" << endl; + outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl; + outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl; + outFile << "----------------------" << endl; + outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl; + outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl; + outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl; + outFile << "----->>>>>" << endl; + outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl; + outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl; + outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl; + outFile << "<<<<<-----" << endl; + outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl; + outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl; + outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl; + } + else { + outFile << "----------------------" << endl; + outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl; + outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl; + outFile << "----------------------" << endl; + outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl; + outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl; + outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl; + outFile << "----->>>>>" << endl; + outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl; + outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl; + outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl; + + outFile << "<<<<<-----" << endl; + outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl; + outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl; + outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl; + } + + } + outFile.close(); +} + +void calculate_layer(std::string outName, + vector>* ns, // the refractive index. + vector* depths, // z-direction position. + vector>* E0, // the initialized E0. + vector>* d0, // direction of propagation of the plane wave. + double k0) { // the wavenumber at free space. + std::string outName_ext(outName, size(outName)-4, size(outName)-1); // extract the extension of the output file + E_Cal(E0, d0); // make sure that both vectors are orthogonal. + + //Creat a new layersample and initialize. + layersample Layer1; // create a layered sample + Layer1.n = *ns; // set a pointer to the refractive indices + Layer1.z = *depths; // set a pointer to the layer depths + Layer1.d = *d0; + Layer1.k = k0; + + LARGE_INTEGER t1, t2, tc; // Timing. + QueryPerformanceFrequency(&tc); + QueryPerformanceCounter(&t1); + Layer1.solve(E0, CPU_op); // Solve for the substrate field in GPU. + QueryPerformanceCounter(&t2); + std::cout << "time for 'solving linear functions':" << (t2.QuadPart - t1.QuadPart) / (double)tc.QuadPart << "ms."<< std::endl; + //output(outName, Layer1); + if (ASCII_output) + output_txt(outName, Layer1); + else + output_binary(outName, Layer1); +} + +//Main function for example_layer. +int main(int argc, char* argv[]) { + stim::arglist args; + + //Basic argument lists. + args.add("help", "prints this help"); + args.add("s", "propagation direction vector (x, y)", "0.5 0.0", "[-1.0, 1.0]"); + args.add("l", "wavelength", "5.0", "in arbitrary units (ex. um)"); + args.add("Ex", "complex amplitude (x direction)", "0.5 0.0"); + args.add("Ey", "complex amplitude (y direction)", "0.5 0.0"); + args.add("z", "layer positions"); + args.add("n", "layer optical path length (real refractive index)", "1.0 1.4 1.4 1.0"); + args.add("kappa", "layer absorbance (imaginary refractive index)"); + args.add("ascii", "output as an ASCII file"); + args.add("CPU", "execute the program in CPU"); + args.parse(argc, argv); + + if (args["help"].is_set()) { // test for help + advertise(); // output the advertisement + std::cout << args.str(); // output arguments + exit(1); // exit + } + + ASCII_output = args["ascii"]; // if the ascii flag is set, output an ascii file + CPU_op = args["CPU"]; // if the CPU_op is set, run the program in CPU. + std::string outName; + if (args.nargs() == 1) { + outName = args.arg(0); + } + else if (args.nargs() == 0) { + if (ASCII_output) + outName = "output.txt"; + else + outName = "output.lyr"; + } + else { + std::cout << "ERROR: Too many arguments." << std::endl; + exit(1); + } + + + vector> d; //direction of propagation of the plane wave Init: {0.5, 0} + if (args["s"].nargs() == 2) { + d.push_back({ (double)args["s"].as_float(0), 0 }); + d.push_back({ (double)args["s"].as_float(1), 0 }); + } + + double l0 = (double)args["l"].as_float(0); //wavelength.Init: l0 = 5; + double k0 = 2 * PI / l0; //Calculate the free-space wavenumber. + + complex Ex = { (double)args["Ex"].as_float(0), (double)args["Ex"].as_float(1) }; //Input E. Init: {1, 1, 0} + complex Ey = { (double)args["Ey"].as_float(0), (double)args["Ey"].as_float(1) }; + vector> E0; + E0.push_back(Ex); + E0.push_back(Ey); + + //const int LAYERS = args["L"].as_int(); //LAYERS Init: 4 + + vector depths; + vector> ns; + size_t i = 0; + while(args["n"].is_set() && args["n"].as_float(i)!=0) { //n is the real part. + if (args["kappa"].is_set()) //kappa is the imaginary part. + ns.push_back({ (double)args["n"].as_float(i), (double)args["kappa"].as_float(i) }); + else + ns.push_back({ (double)args["n"].as_float(i), 0 }); //ns <- n + i * kappa + i++; + } + for (size_t j = 0; j < i; j++) + if (args["z"].is_set()) + depths.push_back((double)args["z"].as_float(i)); + else { + depths.push_back((double)-100 + j * 200 / i); + } + /*---------------------------------------example_layer--------------------------------------------*/ + + calculate_layer(outName, &ns, &depths, &E0, &d, k0); + + return 0; + std::cin.get(); +} \ No newline at end of file -- libgit2 0.21.4