Commit 71d5696d385e7d2e662bbd1e98264293479ed780

Authored by David Mayerich
0 parents

First commit after development and testing

CMakeLists.txt 0 → 100644
  1 +++ a/CMakeLists.txt
  1 +#Specify the version being used aswell as the language
  2 +cmake_minimum_required(VERSION 3.12)
  3 +
  4 +#Name your project here
  5 +project(multilayer)
  6 +
  7 +#set the module directory
  8 +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}")
  9 +
  10 +#default to release mode
  11 +if(NOT CMAKE_BUILD_TYPE)
  12 + set(CMAKE_BUILD_TYPE Release)
  13 +endif(NOT CMAKE_BUILD_TYPE)
  14 +
  15 +#build the executable in the binary directory on MS Visual Studio
  16 +if ( MSVC )
  17 + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
  18 + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
  19 + SET( LIBRARY_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
  20 + SET( LIBRARY_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
  21 + add_definitions(-D_CRT_SECURE_NO_WARNINGS)
  22 + add_definitions(-D_SCL_SECURE_NO_WARNINGS)
  23 +endif ( MSVC )
  24 +
  25 +
  26 +#find packages-----------------------------------
  27 +#find the pthreads package
  28 +find_package(Threads)
  29 +
  30 +#find the X11 package
  31 +find_package(X11)
  32 +
  33 +#find CUDA, mostly for LA stuff using cuBLAS
  34 +find_package(CUDA REQUIRED)
  35 +
  36 +#find Boost
  37 +#find_package(Boost)
  38 +
  39 +#find the STIM library
  40 +find_package(STIM REQUIRED)
  41 +
  42 +#find LAPACK and supporting link_libraries
  43 +find_package(clapack CONFIG REQUIRED)
  44 +find_package(OpenBLAS CONFIG REQUIRED)
  45 +
  46 +#include include directories
  47 +include_directories(${CUDA_INCLUDE_DIRS}
  48 + ${STIM_INCLUDE_DIRS}
  49 +)
  50 +
  51 +#Assign source files to the appropriate variables to easily associate them with executables
  52 +file(GLOB SRC "src/*.cpp")
  53 +
  54 +#-----------------------------Create the executable--------------------------
  55 +#-----------------------------Show all four examples-------------------------
  56 +add_executable(multilayer
  57 + ${SRC}
  58 +)
  59 +link_directories(${CUDA_BIN_DIRS})
  60 +target_link_libraries(multilayer ${CUDA_LIBRARIES}
  61 + ${CUDA_CUBLAS_LIBRARIES}
  62 + ${CUDA_cusparse_LIBRARY}
  63 + ${CUDA_cusolver_LIBRARY}
  64 + ${CUDA_CUFFT_LIBRARIES}
  65 + OpenBLAS::OpenBLAS
  66 + f2c lapack
  67 +)
  68 +
  69 +
0 70 \ No newline at end of file
... ...
FindSTIM.cmake 0 → 100644
  1 +++ a/FindSTIM.cmake
  1 +# finds the STIM library (downloads it if it isn't present)
  2 +# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository)
  3 +
  4 +include(FindPackageHandleStandardArgs)
  5 +
  6 +set(STIM_ROOT $ENV{STIM_ROOT})
  7 +
  8 +IF(NOT STIM_ROOT)
  9 + MESSAGE("ERROR: STIM_ROOT environment variable must be set!")
  10 +ENDIF(NOT STIM_ROOT)
  11 +
  12 + FIND_PATH(STIM_INCLUDE_DIRS DOC "Path to STIM include directory."
  13 + NAMES stim/image/image.h
  14 + PATHS ${STIM_ROOT})
  15 +
  16 +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIRS)
... ...
desktop.ini 0 → 100644
1 1 Binary files /dev/null and a/desktop.ini differ
... ...
docs/Readme_BytesOrder.txt 0 → 100644
  1 +++ a/docs/Readme_BytesOrder.txt
  1 +Order of "output.lyr" parameters.
  2 +
  3 +The wavenumber in free space: k0 double 8B
  4 +The direction of propogation: d double*2 16B
  5 +The refractive index in the first layer: n[0] complex<double> 16B
  6 +
  7 +for i in LAYERS:
  8 + z positions[i]: z[i] double 8B * LAYERS
  9 + z-component of propogation directions: sz[i] complex<double> 16B * LAYERS
  10 + Transmission: Ptx[i] complex<double> 16B * LAYERS
  11 + Transmission: Pty[i] complex<double> 16B * LAYERS
  12 + Transmission: Ptz[i] complex<double> 16B * LAYERS
  13 + Reflection: Prx[i] complex<double> 16B * LAYERS
  14 + Transmission: Pry[i] complex<double> 16B * LAYERS
  15 + Transmission: Prz[i] complex<double> 16B * LAYERS
  16 +
  17 +
  18 +All parameters we need will be:
  19 + 15 * LAYERS + 5
... ...
docs/lyr_format.pptx 0 → 100644
No preview for this file type
docs/testcases.txt 0 → 100644
  1 +++ a/docs/testcases.txt
  1 +output.lyr --n 1.0 1.2 1.3 1.4 1.5 1.4 1.3 1.2 1.1 --kappa 0.0 0.01 0.02 0.03 0.04 0.05 0.6 0.07 0.08 --zPos -50 -40 -30 -20 -10 0 10 20 30
  2 +
  3 +output.lyr --n 1.0 2.0 1.0 --kappa 0.0 0.0 0.0 --z -50 0 50
  4 +
  5 +defaults:
  6 +
  7 +*) If kappa is not specified, set them all to zero
  8 +
  9 +*) If z is not specified, create equally spaced layers between -100 and 100
  10 +
  11 +*) Output: Place a color bar next to each image (colorbar())
0 12 \ No newline at end of file
... ...
layerview.py 0 → 100644
  1 +++ a/layerview.py
  1 +# create a function that displays the output when run this way:
  2 +# python layerview.py ouput.dat
  3 +
  4 +import sys
  5 +import os
  6 +from time import time
  7 +import subprocess
  8 +import struct
  9 +import numpy as np
  10 +import matplotlib
  11 +import math
  12 +import matplotlib.pyplot as plt
  13 +
  14 +from mpl_toolkits.axes_grid1 import ImageGrid
  15 +
  16 +def intensity(E):
  17 + Econj = np.conj(E)
  18 + I = np.sum(E*Econj, axis=-1)
  19 + return np.real(I)
  20 +
  21 +#evaluate a solved homogeneous substrate
  22 +# Returns a complex NxMx3 array representing the cross section of the field at Y=0
  23 +def evaluate(Depths, k, d, n0, sz, Pt, Pr, X, Y, Z):
  24 + Depths = np.array(Depths)
  25 + sz = np.array(sz)
  26 + Pt = np.array(Pt)
  27 + Pr = np.array(Pr)
  28 + s = np.array(d) * n0
  29 + #allocate space for layer indices
  30 + LI = np.zeros(Z.shape, dtype=np.int)
  31 +
  32 + #find the layer index for each sample point
  33 + L = len(Depths)
  34 + LI[Z < Depths[0]] = 0
  35 + for l in range(L-1):
  36 + idx = np.logical_and(Z > Depths[l], Z <= Depths[l+1])
  37 + LI[idx] = l
  38 + LI[Z > Depths[-1]] = L - 1
  39 +
  40 + #calculate the appropriate phase shift for the wave transmitted through the layer
  41 + Ph_t = np.exp(1j * k * sz[LI] * (Z - Depths[LI]))
  42 +
  43 + #calculate the appropriate phase shift for the wave reflected off of the layer boundary
  44 + LIp = LI + 1
  45 + LIp[LIp >= L] = 0
  46 + Ph_r = np.exp(-1j * k * sz[LI] * (Z - Depths[LIp]))
  47 + Ph_r[LI >= L-1] = 0
  48 +
  49 + #calculate the phase shift based on the X and Y positions
  50 + Ph_xy = np.exp(1j * k * (s[0] * X + s[1] * Y))
  51 +
  52 + #apply the phase shifts
  53 + Et = Pt[:, LI] * Ph_t[:, :]
  54 + Er = Pr[:, LI] * Ph_r[:, :]
  55 +
  56 + #add everything together coherently
  57 + E = (Et + Er) * Ph_xy[:, :]
  58 +
  59 + #return the electric field
  60 + return np.moveaxis(E, 0, -1)
  61 +
  62 +class planewave:
  63 + def __int__(self):
  64 + self.LAYERS = 0 #Number of layers. int
  65 + self.depths = [] #z positions of layers. [1, 5, ..., 10] double
  66 + self.k0 = 0.0 #wavenumber at free space. double
  67 + self.d = [] #direction of propogation. [0.5, 0] double
  68 + self.n0 = 0.0+0.0j #the refractive index of the first layer. complex<double>
  69 + self.sz = [] #z-component of propagation for each layer. complex<double>
  70 + self.Pt = [[] for i in range(3)] #transmission complex<double>
  71 + self.Pr = [[],[],[]] #refraction complex<double>
  72 +
  73 +# display a binary file produced using the coupled wave C code
  74 +def layer(strc):
  75 + f = open(strc, "rb")
  76 +
  77 + # create an empty plane wave structure
  78 + L = planewave()
  79 + L.depths = []
  80 + L.d = []
  81 + L.sz = []
  82 + L.Pt = [[],[],[]]
  83 + L.Pr = [[],[],[]]
  84 +
  85 + # open the input file for reading
  86 + file_bytes = os.path.getsize(strc)
  87 +
  88 + # calculate the number of layers in the sample
  89 + L.LAYERS = int((file_bytes/8-5)/15)
  90 +
  91 + # load the raw layer data into the plane wave structure
  92 + data_raw = struct.unpack('d' * (15*L.LAYERS+5), f.read((15*L.LAYERS+5)* 8))
  93 + data = np.asarray(data_raw)
  94 + L.k0 = data[0]
  95 + L.d.append(data[1])
  96 + L.d.append(data[2])
  97 + L.n0 = complex(data[3], data[4])
  98 +
  99 + # load each layer's plane waves from the binary file
  100 + for i in range(L.LAYERS):
  101 + L.depths.append(data[5+15*i])
  102 + L.sz.append(complex(data[6+15*i], data[7+15*i]))
  103 + L.Pt[0].append(complex(data[8+15*i], data[9+15*i]))
  104 + L.Pt[1].append(complex(data[15*i+10], data[15*i+11]))
  105 + L.Pt[2].append(complex(data[15*i+12], data[15*i+13]))
  106 + L.Pr[0].append(complex(data[15*i+14], data[15*i+15]))
  107 + L.Pr[1].append(complex(data[15*i+16], data[15*i+17]))
  108 + L.Pr[2].append(complex(data[15*i+18], data[15*i+19]))
  109 +
  110 + N = 512 # simulation resolution NxM
  111 + M = 1024
  112 + #DAVID: Don't hard-code the dimensions - you'll have to calculate them based on the sample information in the file
  113 + D = [-110, 110, 0, 60] # dimensions of the simulation
  114 + x = np.linspace(D[2], D[3], N) # set the sample points for the simulation
  115 + z = np.linspace(D[0], D[1], M)
  116 + [X, Z] = np.meshgrid(x, z) # create a mesh grid to evaluate layers
  117 + Y = np.zeros(X.shape)
  118 +
  119 + # evaluate the field across all layers
  120 + E = evaluate(L.depths, L.k0, L.d, L.n0, L.sz, L.Pt, L.Pr, X, Y, Z)
  121 + Er = np.real(E)
  122 + I = intensity(E)
  123 +
  124 + plt.set_cmap("afmhot") # set the color map
  125 + plt.subplot(1, 4, 1)
  126 + plt.imshow(Er[:, :, 0], extent=(D[3], D[2], D[1], D[0]))
  127 + #plt.colorbar()
  128 + plt.title("Ex")
  129 +
  130 + plt.subplot(1, 4, 2)
  131 + plt.imshow(Er[:, :, 1], extent=(D[3], D[2], D[1], D[0]))
  132 + #plt.colorbar()
  133 + plt.title("Ey")
  134 +
  135 + plt.subplot(1, 4, 3)
  136 + plt.imshow(Er[:, :, 2], extent=(D[3], D[2], D[1], D[0]))
  137 + #plt.colorbar()
  138 + plt.title("Ez")
  139 +
  140 + plt.subplot(1, 4, 4)
  141 + plt.imshow(I, extent=(D[3], D[2], D[1], D[0]))
  142 + plt.colorbar()
  143 + plt.title("I")
  144 +
  145 + #fig = plt.figure(1, (5, 10))
  146 + #plt.set_cmap("afmhot")
  147 + #matplotlib.rcParams.update({'font.size': 10})
  148 + #grid = ImageGrid(fig, rect = 211, nrows_ncols = (1, 3), axes_pad = 0.2, label_mode = "1", cbar_mode = "single", cbar_size = "18%")
  149 + #Title = ["Ex", "Ey", "Ez"]
  150 + #for i in range(3):
  151 + # grid[i].axis('off')
  152 + # im = grid[i].imshow(Er[..., i], extent=(D[3], D[2], D[1], D[0]), interpolation="nearest")
  153 + # grid[i].set_title(Title[i])
  154 + #grid.cbar_axes[0].colorbar(im)
  155 + #plt.title("E")
  156 + #plt.subplot(212)
  157 + #plt.imshow(I, extent=(D[3], D[2], D[1], D[0]))
  158 + #plt.title("I")
  159 + #plt.colorbar()
  160 + plt.show()
  161 +
  162 +# function displays usage text to the console
  163 +def usage():
  164 + print("Usage:")
  165 + print(" layerview input.dat")
  166 +
  167 +if __name__ == '__main__':
  168 + start = time()
  169 + if len(sys.argv) < 2: # if there are no command line arguments
  170 + usage() # display the usage text
  171 + exit() # exit
  172 + else:
  173 + layer(sys.argv[1]) # otherwise display the given data file
  174 +
  175 + end = time()
  176 + print("The elapsed time is " + str(end - start) + " s. ")
0 177 \ No newline at end of file
... ...
src/layer.cpp 0 → 100644
  1 +++ a/src/layer.cpp
  1 +#include "layer.h"
  2 +#include "linalg.h" //LAPACKE support for Visual Studio
  3 +
  4 +#include <cusparse.h>
  5 +#include <cuda_runtime.h>
  6 +//#include "cublas_v2.h"
  7 +#include "cusolverSp.h"
  8 +/*----------------------------GPU-----------------------------*/
  9 +
  10 +
  11 +//Cross product.c is result.
  12 +void crossProduct(vector<complex<double>>* a, vector<complex<double>>* b, //The given matrices.
  13 + vector<complex<double>>* c) { //Matrix to be gotten.
  14 + c->push_back((*a)[1] * (*b)[2] - (*a)[2] * (*b)[1]);
  15 + c->push_back((*a)[2] * (*b)[0] - (*a)[0] * (*b)[2]);
  16 + c->push_back((*a)[0] * (*b)[1] - (*a)[1] * (*b)[0]);
  17 +}
  18 +
  19 +//Calculate the norm for a matrix.
  20 +complex<double> Norm(vector<complex<double>>* E) {
  21 + complex<double> sum = 0;
  22 + for (unsigned int i = 0; i < E->size(); i++) {
  23 + sum += (*E)[i] * (*E)[i];
  24 + }
  25 + return sqrt(sum);
  26 +}
  27 +
  28 +//Normalize matrix.
  29 +void Normalize(vector<complex<double>>* E) {
  30 + complex<double> sum = 0;
  31 + for (unsigned int i = 0; i < E->size(); i++) {
  32 + sum += (*E)[i] * (*E)[i];
  33 + }
  34 + for (unsigned int i = 0; i < E->size(); i++) {
  35 + (*E)[i] = (*E)[i] / sqrt(sum);
  36 +
  37 + }
  38 +}
  39 +
  40 +//Orthogonalization.
  41 +void orthogonalize(vector<complex<double>>* E_0rtho, vector<complex<double>>* E0, vector<complex<double>>* d) {
  42 + vector<complex<double>> s;
  43 + if (d->size() == 2) {
  44 + complex<double> dz = sqrt(1.0+0i - pow((*d)[0], 2) - pow((*d)[1], 2));
  45 + d->push_back(dz);
  46 + }
  47 + crossProduct(E0, d, &s);
  48 + crossProduct(d, &s, E_0rtho);
  49 + vector<complex<double>>().swap(s);
  50 +}
  51 +
  52 +/*--------------------------------------------------Define Class layersample.--------------------------------------------------------*/
  53 +
  54 +/*Do not try to replace "int" as "size_t".
  55 +This will result in a bunch of warnings and if we continuously change the type of M_rowInd and M_colInd, the EXCEPTION will occur again.*/
  56 +size_t layersample::ii(size_t l, size_t c, size_t d) { //ii(l, c, d) means the column indexes for every element.
  57 + return l * 6 + d * 3 + c - 3;
  58 +}
  59 +
  60 +void layersample::generate_linsys(size_t LAYERS,
  61 + vector<complex<double>>& M, //All non-zero values in "A" matirx.(A * X = b)
  62 + vector<complex<double>>& b, //The right hand side column vector.
  63 + vector<complex<double>>& E, //orthogonalized E0 vectors
  64 + vector<complex<double>>* P,
  65 + bool CPU_op) { //Solution of the matrices multiplication.
  66 + //Calculate the sz component for each layer.
  67 + s.clear(); //s is the plane wave direction scaled by the refractive index.
  68 + for (size_t i = 0; i < 2; i++)
  69 + s.push_back(d[i] * n[0]);
  70 + sz.clear();
  71 + for (int l = 0; l < LAYERS; l++) {
  72 + sz.push_back(sqrt(pow(n[l], 2) - pow(s[0], 2) - pow(s[1], 2)));
  73 + }
  74 +
  75 + if (!CPU_op){
  76 + //Computer in GPU.
  77 + vector<int> M_rowInd; //Sparse matrix M CSR ->row index
  78 + vector<int> M_colInd; //Sparse matrix M CSR ->number of elements
  79 + M_rowInd.push_back(0);
  80 + ////Build M by setting constraints based on Gauss's Law.
  81 + for (size_t l = 0; l < LAYERS; l++) {
  82 + //Set the upward components for each layer.
  83 + //Layer "LAYERS-1" doesn't have a upward component.
  84 + if (l != LAYERS - 1) {
  85 + M.push_back(s[0]);
  86 + M_colInd.push_back((int)ii(l, 0, 1));
  87 + M.push_back(s[1]);
  88 + M_colInd.push_back((int)ii(l, 1, 1));
  89 + M.push_back(-sz[l]);
  90 + M_colInd.push_back((int)ii(l, 2, 1));
  91 + M_rowInd.push_back((int)M.size());
  92 + b.push_back(0);
  93 + }
  94 + //Set the downward components for each layer.
  95 + if (l != 0) {
  96 + M.push_back(s[0]);
  97 + M_colInd.push_back((int)ii(l, 0, 0));
  98 + M.push_back(s[1]);
  99 + M_colInd.push_back((int)ii(l, 1, 0));
  100 + M.push_back(sz[l]);
  101 + M_colInd.push_back((int)ii(l, 2, 0));
  102 + M_rowInd.push_back((int)M.size());
  103 + b.push_back(0);
  104 + }
  105 + }
  106 + //Continue to build M by enforcing a continuous field across boundaries.
  107 + complex<double> arg, arg_in, B;
  108 + for (size_t l = 1; l < LAYERS; l++) {
  109 + complex<double> sz0 = sz[l - 1];
  110 + complex<double> sz1 = sz[l];
  111 +
  112 + //Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1]))
  113 + complex<double> A_in = k * sz0 * (z[l] - z[l - 1]);
  114 + complex<double> A_in2 = { -A_in.imag(), A_in.real() };
  115 + complex<double> A = exp(A_in2);
  116 +
  117 + if (l < LAYERS - 1) {
  118 + double dl = z[l] - z[l + 1];
  119 + arg_in = -k * sz1 * (complex<double>)dl;
  120 + arg = { -arg_in.imag(), arg_in.real() };
  121 + B = exp(arg);
  122 + }
  123 + //if this is the second layer, use the simplified equations that account for the incident field
  124 + if (l == 1) {
  125 + M.push_back(1);
  126 + M_colInd.push_back((int)ii(0, 0, 1));
  127 + M.push_back(-1);
  128 + M_colInd.push_back((int)ii(1, 0, 0));
  129 + if (LAYERS > 2) {
  130 + M.push_back(-B);
  131 + M_colInd.push_back((int)ii(1, 0, 1));
  132 + }
  133 + M_rowInd.push_back((int)M.size());
  134 + b.push_back(-A * E[0]);
  135 +
  136 + M.push_back(1);
  137 + M_colInd.push_back((int)ii(0, 1, 1));
  138 + M.push_back(-1);
  139 + M_colInd.push_back((int)ii(1, 1, 0));
  140 + if (LAYERS > 2) {
  141 + M.push_back(-B);
  142 + M_colInd.push_back((int)ii(1, 1, 1));
  143 + }
  144 + M_rowInd.push_back((int)M.size());
  145 + b.push_back(-A * E[l]);
  146 +
  147 + M.push_back(sz0);
  148 + M_colInd.push_back((int)ii(0, 1, 1));
  149 + M.push_back(s[1]);
  150 + M_colInd.push_back((int)ii(0, 2, 1));
  151 + M.push_back(sz1);
  152 + M_colInd.push_back((int)ii(1, 1, 0));
  153 + M.push_back(-s[1]);
  154 + M_colInd.push_back((int)ii(1, 2, 0));
  155 + if (LAYERS > 2) {
  156 + M.push_back(-B * sz1);
  157 + M_colInd.push_back((int)ii(1, 1, 1));
  158 + M.push_back(-B * s[1]);
  159 + M_colInd.push_back((int)ii(1, 2, 1));
  160 + }
  161 + M_rowInd.push_back((int)M.size());
  162 + b.push_back(A * sz0 * E[1] - A * s[1] * E[2]);
  163 +
  164 + M.push_back(-sz0);
  165 + M_colInd.push_back((int)ii(0, 0, 1));
  166 + M.push_back(-s[0]);
  167 + M_colInd.push_back((int)ii(0, 2, 1));
  168 + M.push_back(-sz1);
  169 + M_colInd.push_back((int)ii(1, 0, 0));
  170 + M.push_back(s[0]);
  171 + M_colInd.push_back((int)ii(1, 2, 0));
  172 + if (LAYERS > 2) {
  173 + M.push_back(B * sz1);
  174 + M_colInd.push_back((int)ii(1, 0, 1));
  175 + M.push_back(B* s[0]);
  176 + M_colInd.push_back((int)ii(1, 2, 1));
  177 + }
  178 + M_rowInd.push_back((int)M.size());
  179 + b.push_back(A * s[0] * E[2] - A * sz0 * E[0]);
  180 + }
  181 + else if (l == LAYERS - 1) {
  182 + M.push_back(A);
  183 + M_colInd.push_back((int)ii(l - 1, 0, 0));
  184 + M.push_back(1);
  185 + M_colInd.push_back((int)ii(l - 1, 0, 1));
  186 + M.push_back(-1);
  187 + M_colInd.push_back((int)ii(l, 0, 0));
  188 + M_rowInd.push_back((int)M.size());
  189 + b.push_back(0);
  190 +
  191 + M.push_back(A);
  192 + M_colInd.push_back((int)ii(l - 1, 1, 0));
  193 + M.push_back(1);
  194 + M_colInd.push_back((int)ii(l - 1, 1, 1));
  195 + M.push_back(-1);
  196 + M_colInd.push_back((int)ii(l, 1, 0));
  197 + M_rowInd.push_back((int)M.size());
  198 + b.push_back(0);
  199 +
  200 + M.push_back(-A * sz0);
  201 + M_colInd.push_back((int)ii(l - 1, 1, 0));
  202 + M.push_back(A * s[1]);
  203 + M_colInd.push_back((int)ii(l - 1, 2, 0));
  204 + M.push_back(sz0);
  205 + M_colInd.push_back((int)ii(l - 1, 1, 1));
  206 + M.push_back(s[1]);
  207 + M_colInd.push_back((int)ii(l - 1, 2, 1));
  208 + M.push_back(sz1);
  209 + M_colInd.push_back((int)ii(l, 1, 0));
  210 + M.push_back(-s[1]);
  211 + M_colInd.push_back((int)ii(l, 2, 0));
  212 + M_rowInd.push_back((int)M.size());
  213 + b.push_back(0);
  214 +
  215 + M.push_back(A * sz0);
  216 + M_colInd.push_back((int)ii(l - 1, 0, 0));
  217 + M.push_back(-A * s[0]);
  218 + M_colInd.push_back((int)ii(l - 1, 2, 0));
  219 + M.push_back(-sz0);
  220 + M_colInd.push_back((int)ii(l - 1, 0, 1));
  221 + M.push_back(-s[0]);
  222 + M_colInd.push_back((int)ii(l - 1, 2, 1));
  223 + M.push_back(-sz1);
  224 + M_colInd.push_back((int)ii(l, 0, 0));
  225 + M.push_back(s[0]);
  226 + M_colInd.push_back((int)ii(l, 2, 0));
  227 + M_rowInd.push_back((int)M.size());
  228 + b.push_back(0);
  229 + }
  230 + else {
  231 + M.push_back(A);
  232 + M_colInd.push_back((int)ii(l - 1, 0, 0));
  233 + M.push_back(1);
  234 + M_colInd.push_back((int)ii(l - 1, 0, 1));
  235 + M.push_back(-1);
  236 + M_colInd.push_back((int)ii(l, 0, 0));
  237 + M.push_back(-B);
  238 + M_colInd.push_back((int)ii(l, 0, 1));
  239 + M_rowInd.push_back((int)M.size());
  240 + b.push_back(0);
  241 +
  242 + M.push_back(A);
  243 + M_colInd.push_back((int)ii(l - 1, 1, 0));
  244 + M.push_back(1);
  245 + M_colInd.push_back((int)ii(l - 1, 1, 1));
  246 + M.push_back(-1);
  247 + M_colInd.push_back((int)ii(l, 1, 0));
  248 + M.push_back(-B);
  249 + M_colInd.push_back((int)ii(l, 1, 1));
  250 + M_rowInd.push_back((int)M.size());
  251 + b.push_back(0);
  252 +
  253 + M.push_back(-A * sz0);
  254 + M_colInd.push_back((int)ii(l - 1, 1, 0));
  255 + M.push_back(A * s[1]);
  256 + M_colInd.push_back((int)ii(l - 1, 2, 0));
  257 + M.push_back(sz0);
  258 + M_colInd.push_back((int)ii(l - 1, 1, 1));
  259 + M.push_back(s[1]);
  260 + M_colInd.push_back((int)ii(l - 1, 2, 1));
  261 + M.push_back(sz1);
  262 + M_colInd.push_back((int)ii(l, 1, 0));
  263 + M.push_back(-s[1]);
  264 + M_colInd.push_back((int)ii(l, 2, 0));
  265 + M.push_back(-B * sz1);
  266 + M_colInd.push_back((int)ii(l, 1, 1));
  267 + M.push_back(-B * s[1]);
  268 + M_colInd.push_back((int)ii(l, 2, 1));
  269 + M_rowInd.push_back((int)M.size());
  270 + b.push_back(0);
  271 +
  272 + M.push_back(A * sz0);
  273 + M_colInd.push_back((int)ii(l - 1, 0, 0));
  274 + M.push_back(-A * s[0]);
  275 + M_colInd.push_back((int)ii(l - 1, 2, 0));
  276 + M.push_back(-sz0);
  277 + M_colInd.push_back((int)ii(l - 1, 0, 1));
  278 + M.push_back(-s[0]);
  279 + M_colInd.push_back((int)ii(l - 1, 2, 1));
  280 + M.push_back(-sz1);
  281 + M_colInd.push_back((int)ii(l, 0, 0));
  282 + M.push_back(s[0]);
  283 + M_colInd.push_back((int)ii(l, 2, 0));
  284 + M.push_back(B * sz1);
  285 + M_colInd.push_back((int)ii(l, 0, 1));
  286 + M.push_back(B * s[0]);
  287 + M_colInd.push_back((int)ii(l, 2, 1));
  288 + M_rowInd.push_back((int)M.size());
  289 + b.push_back(0);
  290 + }
  291 + }
  292 + cudaError_t cudaStatus;
  293 + cusolverStatus_t cusolverStatus;
  294 + cusparseStatus_t cusparseStatus;
  295 + cusolverSpHandle_t handle = NULL;
  296 + cusparseHandle_t cusparseHandle = NULL;
  297 + cudaStream_t stream = NULL;
  298 + cusparseMatDescr_t descrM = NULL;
  299 + cuDoubleComplex * csrValM_, * b_, *P_;
  300 + size_t rowsA = b.size(), colsA = b.size(), nnA = M.size(), baseM_ = 0; //nnA is the number of non-zero elements.
  301 + int* csrRowPtrM = NULL; //row index M_rowInd projected to GPU.
  302 + int* csrColIndM = NULL; //CSR(A) from I/O. // M_colInd projected to GPU.
  303 + double tol = 1.e-12; int reorder = 0;
  304 + int singularity = 0;
  305 +
  306 + //Initialize.
  307 + cusolverStatus = cusolverSpCreate(&handle);
  308 + int num = 1;
  309 + cudaStatus = cudaGetDevice(&num);
  310 + cusparseStatus = cusparseCreate(&cusparseHandle);
  311 + cudaStatus = cudaStreamCreate(&stream);
  312 + cusolverStatus = cusolverSpSetStream(handle, stream);
  313 + cusparseStatus = cusparseSetStream(cusparseHandle, stream);
  314 + cusparseStatus = cusparseCreateMatDescr(&descrM);
  315 + cusparseStatus = cusparseSetMatType(descrM, CUSPARSE_MATRIX_TYPE_GENERAL);
  316 + if (baseM_) {
  317 + cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ONE);
  318 + }
  319 + else {
  320 + cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ZERO);
  321 + }
  322 +
  323 + cudaStatus = cudaMalloc((void**)&csrRowPtrM, sizeof(int) * (rowsA + 1)); //Projection of M_rowInd.
  324 + cudaStatus = cudaMalloc((void**)&csrColIndM, sizeof(int) * M_colInd.size()); //Projection of M_colInd.
  325 + cudaStatus = cudaMalloc((void**)&csrValM_, sizeof(cuDoubleComplex) * M.size()); //Projection of M.
  326 + cudaStatus = cudaMalloc((void**)&b_, sizeof(cuDoubleComplex) * b.size()); //Projection of b.
  327 + cudaStatus = cudaMalloc((void**)&P_, sizeof(cuDoubleComplex) * b.size()); //Projection of P.
  328 +
  329 + cudaStatus = cudaMemcpy(csrValM_, M.data(), M.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
  330 + cudaStatus = cudaMemcpy(csrRowPtrM, M_rowInd.data(), M_rowInd.size() * sizeof(int), cudaMemcpyHostToDevice);
  331 + cudaStatus = cudaMemcpy(csrColIndM, M_colInd.data(), M_colInd.size() * sizeof(int), cudaMemcpyHostToDevice);
  332 + cudaStatus = cudaMemcpy(b_, b.data(), b.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
  333 + // Output the current CUDA error.
  334 + //if (cudaStatus != cudaSuccess) {
  335 + // cout<<"%s " << cudaGetErrorString(cudaStatus) << endl;
  336 + //}
  337 + P->resize(rowsA); //P is the to-be-solved matrix in CPU.
  338 + //QR method.
  339 + cusolverStatus = cusolverSpZcsrlsvqr(handle, (int)rowsA, (int)nnA, descrM, csrValM_, csrRowPtrM, csrColIndM, b_, tol, reorder, P_, (int*)&singularity);
  340 + /*cusparseStatus = cusparseZsctr(*cusparseHandle, rowsA, g_z, g_Q, g_x, CUSPARSE_INDEX_BASE_ZERO);*/
  341 + cudaStatus = cudaMemcpyAsync(P->data(), P_, sizeof(cuDoubleComplex) * rowsA, cudaMemcpyDeviceToHost, stream);
  342 +
  343 + cudaStatus = cudaFree(csrRowPtrM);
  344 + cudaStatus = cudaFree(csrColIndM);
  345 + cudaStatus = cudaFree(csrValM_);
  346 + cudaStatus = cudaFree(b_);
  347 + cudaStatus = cudaFree(P_);
  348 + vector<int>().swap(M_rowInd);
  349 + vector<int>().swap(M_colInd);
  350 + }
  351 + else {
  352 + //Work on CPU.
  353 + M.resize(6 * (LAYERS - 1) * 6 * (LAYERS - 1));
  354 + b.resize(6 * (LAYERS - 1));
  355 +
  356 + size_t ei = 0;
  357 + //Set constraints based on Gauss's Law.
  358 + for (size_t l = 0; l < LAYERS; l++) {
  359 + //Set the upward components for each layer.
  360 + //Layer "LAYERS-1" doesn't have a upward component.
  361 + if (l != LAYERS - 1) {
  362 + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = s[0];
  363 + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = s[1];
  364 + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -sz[l];
  365 + ei += 1;
  366 + }
  367 + //Set the downward components for each layer.
  368 + if (l != 0) {
  369 + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = s[0];
  370 + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = s[1];
  371 + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = sz[l];
  372 + ei += 1;
  373 + }
  374 + }
  375 + //Enforce a continuous field across boundaries.
  376 + complex<double> arg, arg_in, B;
  377 + for (size_t l = 1; l < LAYERS; l++) {
  378 + complex<double> sz0 = sz[l - 1];
  379 + complex<double> sz1 = sz[l];
  380 +
  381 + //Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1]))
  382 + complex<double> A_in = k * sz0 * (z[l] - z[l - 1]);
  383 + complex<double> A_in2 = { -A_in.imag(), A_in.real() };
  384 + complex<double> A = exp(A_in2);
  385 +
  386 + if (l < LAYERS - 1) {
  387 + double dl = z[l] - z[l + 1];
  388 + arg_in = -k * sz1 * (complex<double>)dl;
  389 + arg = { -arg_in.imag(), arg_in.real() };
  390 + B = exp(arg);
  391 + }
  392 + //if this is the second layer, use the simplified equations that account for the incident field
  393 + if (l == 1) {
  394 + M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = 1;
  395 + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -1;
  396 + if (LAYERS > 2) {
  397 + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = -B;
  398 + }
  399 + b[ei] = -A * E[0];
  400 + ei += 1;
  401 +
  402 + M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = 1;
  403 + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = -1;
  404 + if (LAYERS > 2) {
  405 + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B;
  406 + }
  407 + b[ei] = -A * E[l];
  408 + ei += 1;
  409 +
  410 + M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = s[1];
  411 + M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = sz0;
  412 + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = -s[1];
  413 + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = sz1;
  414 + if (LAYERS > 2) {
  415 + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = -B * s[1];
  416 + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B * sz1;
  417 + }
  418 + b[ei] = A * sz0 * E[1] - A * s[1] * E[2];
  419 + ei += 1;
  420 +
  421 + M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = -sz0;
  422 + M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = -s[0];
  423 + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -sz1;
  424 + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = s[0];
  425 + if (LAYERS > 2) {
  426 + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = B * sz1;
  427 + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = B * s[0];
  428 + }
  429 + b[ei] = A * s[0] * E[2] - A * sz0 * E[0];
  430 + ei += 1;
  431 + }
  432 + else if (l == LAYERS - 1) {
  433 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A;
  434 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1;
  435 + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1;
  436 + ei += 1;
  437 +
  438 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A;
  439 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1;
  440 + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1;
  441 + ei += 1;
  442 +
  443 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1];
  444 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0;
  445 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1];
  446 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0;
  447 + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1];
  448 + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1;
  449 + ei += 1;
  450 +
  451 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0;
  452 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0];
  453 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0;
  454 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0];
  455 + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1;
  456 + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0];
  457 + ei += 1;
  458 + }
  459 + else {
  460 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A;
  461 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1;
  462 + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1;
  463 + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = -B;
  464 + ei += 1;
  465 +
  466 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A;
  467 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1;
  468 + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1;
  469 + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B;
  470 + ei += 1;
  471 +
  472 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1];
  473 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0;
  474 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1];
  475 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0;
  476 + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1];
  477 + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1;
  478 + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -B * s[1];
  479 + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B * sz1;
  480 + ei += 1;
  481 +
  482 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0;
  483 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0];
  484 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0;
  485 + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0];
  486 + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1;
  487 + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0];
  488 + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = B * sz1;
  489 + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = B * s[0];
  490 + ei += 1;
  491 + }
  492 + }
  493 +
  494 + complex<double>* M_ = new complex<double>[M.size()];
  495 + complex<double>* b_ = new complex<double>[b.size()];
  496 + complex<double>* P_ = new complex<double>[b.size()];
  497 + for (size_t i = 0; i < M.size(); i++) {
  498 + M_[i] = M[i];
  499 + if (i < b.size()) b_[i] = b[i];
  500 + }
  501 + LINALG_inverse(M_, (int)(6 * (LAYERS - 1)));
  502 + LINALG_zgemm((int)(6 * (LAYERS - 1)), (int)1, (int)(6 * (LAYERS - 1)), M_, (int)(6 * (LAYERS - 1)), b_, (int)1, P_, (int)1);
  503 + for (int i = 0; i < b.size(); i++) {
  504 + P->push_back(P_[i]);
  505 + }
  506 +
  507 + delete[] M_;
  508 + delete[] b_;
  509 + delete[] P_;
  510 + }
  511 +}
  512 +
  513 +
  514 +//Build matrix and get E.
  515 +void layersample::solve(vector<complex<double>>* E, bool CPU_op) { //orthogonalized E0 vectors.
  516 + size_t LAYERS = n.size();
  517 + //Store the matrix and RHS vector.
  518 + vector<complex<double>> M; //All non-zero values in the sparse matrix.
  519 + vector<complex<double>> b; //The right hand side column vector.
  520 +
  521 + //Evaluate the linear system.
  522 + vector<complex<double>> P; //Solution of matrix.
  523 + layersample::generate_linsys(LAYERS, M, b, *E, &P, CPU_op);
  524 +
  525 + //Store the coefficients for each layer.
  526 + //Pt[3, L] transmission. Pr[3, L] reflection.
  527 + Pt.resize(3 * LAYERS);
  528 + Pr.resize(3 * LAYERS);
  529 +
  530 + for (size_t l = 0; l < LAYERS; l++) {
  531 + if (l == 0) {
  532 + Pt[0] = (complex<double>)(*E)[0];
  533 + Pt[LAYERS] = (complex<double>)(*E)[1];
  534 + Pt[2 * LAYERS] = (complex<double>)(*E)[2];
  535 + }
  536 + else {
  537 + Pt[l] = P[ii(l, 0, 0)];
  538 + Pt[l + LAYERS] = P[ii(l, 1, 0)];
  539 + Pt[l + 2 * LAYERS] = P[ii(l, 2, 0)];
  540 + }
  541 +
  542 + if (l == LAYERS - 1) {
  543 + Pr[LAYERS - 1] = 0;
  544 + Pr[2 * LAYERS - 1] = 0;
  545 + Pr[3 * LAYERS - 1] = 0;
  546 + }
  547 + else {
  548 + Pr[l] = P[ii(l, 0, 1)];
  549 + Pr[l + LAYERS] = P[ii(l, 1, 1)];
  550 + Pr[l + 2 * LAYERS] = P[ii(l, 2, 1)];
  551 + }
  552 + }
  553 + vector<complex<double>>().swap(M);
  554 + vector<complex<double>>().swap(b);
  555 + vector<complex<double>>().swap(P);
  556 +}
0 557 \ No newline at end of file
... ...
src/layer.h 0 → 100644
  1 +++ a/src/layer.h
  1 +#ifndef LAYER_H
  2 +#define LAYER_H
  3 +#include <vector>
  4 +#include <complex>
  5 +using namespace std;
  6 +
  7 +void crossProduct(vector<complex<double>>* a, vector<complex<double>>* b, vector<complex<double>>* c);
  8 +complex<double> Norm(vector<complex<double>>* E);
  9 +void Normalize(vector<complex<double>>* E);
  10 +vector<vector<double> > transpose(vector<vector<double> >* matrix);
  11 +void orthogonalize(vector<complex<double>>* E_0, vector<complex<double>>* E0, vector<complex<double>>* d);
  12 +
  13 +class layersample {
  14 +
  15 +public:
  16 + double k; //wavenumber.
  17 + vector<complex<double>> n; //refractive index.
  18 + vector<double> z; //z postions.
  19 + vector<complex<double>> s; //propagation direction. Keep it for output.
  20 + vector<complex<double>> sz; //propagation direction. Keep it for output.
  21 + vector<complex<double>> d; //direction of propagation of the plane wave.
  22 + vector<complex<double>> Pt; //transimission.
  23 + vector<complex<double>> Pr; //reflection.
  24 + //Calulate the index of the field component associated with a layer.
  25 + // l is the layer index. c is the component(x, y, z). d is the direction(0for transmission, 1 for reflection).
  26 + size_t ii(size_t l, size_t c, size_t d);
  27 +
  28 + //Generate the linear system corresponding to this layered sample and plane wave.
  29 + void generate_linsys(size_t LAYERS, vector<complex<double>>& M, vector<complex<double>>& b, vector<complex<double>>& E, vector<complex<double>>* P, bool CPU_op);
  30 +
  31 + //Build matrix and get E.
  32 + void solve(vector<complex<double>>* E, bool CPU_op);
  33 +};
  34 +
  35 +#endif
0 36 \ No newline at end of file
... ...
src/linalg.cpp 0 → 100644
  1 +++ a/src/linalg.cpp
  1 +#include "linalg.h"
  2 +
  3 +// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK.
  4 +extern "C" {
  5 +#include "f2c.h"
  6 +#include "clapack.h"
  7 +#include "cblas.h"
  8 +}
  9 +
  10 +
  11 +void LINALG_zgetrf(
  12 + int M,
  13 + int N,
  14 + std::complex<double>* A,
  15 + int LDA,
  16 + int* IPIV)
  17 +{
  18 + integer INFO;
  19 + zgetrf_((integer*)&M, (integer*)&N, (doublecomplex*)A, (integer*)&LDA, (integer*)IPIV, &INFO);
  20 +}
  21 +
  22 +void LINALG_zgetri(
  23 + size_t N,
  24 + std::complex<double>* A,
  25 + int LDA,
  26 + int* IPIV)
  27 +{
  28 + integer LWORK = -1;
  29 + std::complex<double> WORK[1];
  30 + integer INFO;
  31 + zgetri_((integer*)&N, (doublecomplex*)A, (integer*)&LDA, (integer*)IPIV, (doublecomplex*)WORK, &LWORK, &INFO);
  32 +}
  33 +void LINALG_inverse(std::complex<double>* A, int N)
  34 +{
  35 + int* IPIV = new int[N + (size_t)1];
  36 + integer LWORK = N * N;
  37 + std::complex<double>* WORK = new std::complex<double>[LWORK];
  38 + integer INFO;
  39 +
  40 + zgetrf_((integer*)&N, (integer*)&N, (doublecomplex*)A, (integer*)&N, (integer*)IPIV, &INFO);
  41 + zgetri_((integer*)&N, (doublecomplex*)A, (integer*)&N, (integer*)IPIV, (doublecomplex*)WORK, &LWORK, &INFO);
  42 +
  43 + delete[] IPIV;
  44 + delete[] WORK;
  45 +}
  46 +
  47 +void LINALG_zgemm(
  48 + const int M, //A(M*K) B(K*N)
  49 + const int N,
  50 + const int K,
  51 + std::complex<double>* A,
  52 + const int LDA, //=K
  53 + std::complex<double>* B,
  54 + const int LDB, //=N
  55 + std::complex<double>* C,
  56 + const int LDC) //=columns of C.
  57 +{
  58 + std::complex<double> alpha = 1;
  59 + std::complex<double> beta = 0;
  60 + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, (OPENBLAS_CONST blasint)M, (OPENBLAS_CONST blasint)N, (OPENBLAS_CONST blasint)K,
  61 + &alpha, A, (OPENBLAS_CONST blasint)LDA, B, (OPENBLAS_CONST blasint)LDB, &beta, C, (OPENBLAS_CONST blasint)LDC);
  62 +
  63 +}
0 64 \ No newline at end of file
... ...
src/linalg.h 0 → 100644
  1 +++ a/src/linalg.h
  1 +// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK
  2 +#include <complex>
  3 +
  4 +//Solve matrix inverse.
  5 +void LINALG_inverse(std::complex<double>* A, int N);
  6 +
  7 +//Solve matrix multiplication. C = A * B.
  8 +void LINALG_zgemm(
  9 + const int M, //A(M*K) B(K*N)
  10 + const int N,
  11 + const int K,
  12 + std::complex<double>* A,
  13 + const int LDA, //=K
  14 + std::complex<double>* B,
  15 + const int LDB, //=N
  16 + std::complex<double>* C,
  17 + const int LDC); //=columns of C.
0 18 \ No newline at end of file
... ...
src/main.cpp 0 → 100644
  1 +++ a/src/main.cpp
  1 +//Update the data output format in example_layer().
  2 +#include "layer.h"
  3 +#include <windows.h>
  4 +
  5 +#include <fstream>
  6 +#include <iostream>
  7 +#include "stim/parser/arguments.h"
  8 +
  9 +using namespace std;
  10 +#define PI 3.14159265358979323846264338328
  11 +
  12 +bool ASCII_output = false;
  13 +bool CPU_op = false;
  14 +
  15 +void advertise() {
  16 + std::cout << std::endl << std::endl;
  17 + std::cout << "=========================================================================" << std::endl;
  18 + //std::cout << "Thank you for using the NetMets network comparison tool!" << std::endl;
  19 + std::cout << "Scalable Tissue Imaging and Modeling (STIM) Lab, University of Houston" << std::endl;
  20 + std::cout << "=========================================================================" << std::endl << std::endl;
  21 +}
  22 +
  23 +//Calculate Ez from vector d and Ex Ey.
  24 +void E_Cal(vector<complex<double>>* E, vector<complex<double>>* d) {
  25 + if (d->size() == 2) {
  26 + complex<double> dz = sqrt(1.0 + 0i - pow((*d)[0], 2) - pow((*d)[1], 2));
  27 + d->push_back(dz);
  28 + }
  29 + E->push_back(-((*E)[0] * (*d)[0] + (*E)[1] * (*d)[1]) / (*d)[2]);
  30 +}
  31 +void output_binary(std::string filename, layersample& layers) {
  32 + size_t L = layers.n.size(); // get the number of layers
  33 + std::ofstream outFile;
  34 + outFile.open(filename, std::ofstream::binary); // open the output file for binary writing
  35 + if (outFile) {
  36 + outFile.write((char*)&layers.k, sizeof(double));
  37 + outFile.write((char*)&layers.d[0], sizeof(double));
  38 + outFile.write((char*)&layers.d[1], sizeof(double));
  39 + outFile.write((char*)&layers.n[0], sizeof(double) * 2);
  40 +
  41 + for (size_t i = 0; i < L; i++) {
  42 + outFile.write((char*)&layers.z[i], sizeof(double));
  43 + outFile.write((char*)&layers.sz[i], 2 * sizeof(double));
  44 + outFile.write((char*)&layers.Pt[i], 2 * sizeof(double));
  45 + outFile.write((char*)&layers.Pt[i + L], 2 * sizeof(double));
  46 + outFile.write((char*)&layers.Pt[i + 2 * L], 2 * sizeof(double));
  47 + outFile.write((char*)&layers.Pr[i], 2 * sizeof(double));
  48 + outFile.write((char*)&layers.Pr[i + L], 2 * sizeof(double));
  49 + outFile.write((char*)&layers.Pr[i + 2 * L], 2 * sizeof(double));
  50 + }
  51 + outFile.close();
  52 + }
  53 + else {
  54 + std::cout << "ERROR opening output file for binary writing: " << filename << std::endl;
  55 + }
  56 +}
  57 +
  58 +void output_txt(std::string filename, layersample& layers) {
  59 + size_t L = layers.n.size(); // get the number of layers
  60 + std::ofstream outFile;
  61 + outFile.open(filename); // open the output file for text writing
  62 + if (!outFile) {
  63 + std::cout << "ERROR: Could not open file " << filename << std::endl;
  64 + exit(1);
  65 + }
  66 + int width = 15;
  67 +
  68 + outFile << "--------------------------------" << endl;
  69 + outFile << "The wavenumber at free space is : " << layers.k << endl;
  70 + for (size_t i = 0; i < L; i++) {
  71 + if (i == 0) {
  72 + outFile << "--------------------------------" << endl;
  73 + outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl;
  74 + outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl;
  75 + outFile << "----------------------" << endl;
  76 + outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl;
  77 + outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl;
  78 + outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl;
  79 + outFile << "----->>>>>" << endl;
  80 + outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl;
  81 + outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl;
  82 + outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl;
  83 + outFile << "<<<<<-----" << endl;
  84 + outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl;
  85 + outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl;
  86 + outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl;
  87 + }
  88 + else {
  89 + outFile << "----------------------" << endl;
  90 + outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl;
  91 + outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl;
  92 + outFile << "----------------------" << endl;
  93 + outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl;
  94 + outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl;
  95 + outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl;
  96 + outFile << "----->>>>>" << endl;
  97 + outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl;
  98 + outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl;
  99 + outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl;
  100 +
  101 + outFile << "<<<<<-----" << endl;
  102 + outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl;
  103 + outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl;
  104 + outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl;
  105 + }
  106 +
  107 + }
  108 + outFile.close();
  109 +}
  110 +
  111 +void calculate_layer(std::string outName,
  112 + vector<complex<double>>* ns, // the refractive index.
  113 + vector<double>* depths, // z-direction position.
  114 + vector<complex<double>>* E0, // the initialized E0.
  115 + vector<complex<double>>* d0, // direction of propagation of the plane wave.
  116 + double k0) { // the wavenumber at free space.
  117 + std::string outName_ext(outName, size(outName)-4, size(outName)-1); // extract the extension of the output file
  118 + E_Cal(E0, d0); // make sure that both vectors are orthogonal.
  119 +
  120 + //Creat a new layersample and initialize.
  121 + layersample Layer1; // create a layered sample
  122 + Layer1.n = *ns; // set a pointer to the refractive indices
  123 + Layer1.z = *depths; // set a pointer to the layer depths
  124 + Layer1.d = *d0;
  125 + Layer1.k = k0;
  126 +
  127 + LARGE_INTEGER t1, t2, tc; // Timing.
  128 + QueryPerformanceFrequency(&tc);
  129 + QueryPerformanceCounter(&t1);
  130 + Layer1.solve(E0, CPU_op); // Solve for the substrate field in GPU.
  131 + QueryPerformanceCounter(&t2);
  132 + std::cout << "time for 'solving linear functions':" << (t2.QuadPart - t1.QuadPart) / (double)tc.QuadPart << "ms."<< std::endl;
  133 + //output(outName, Layer1);
  134 + if (ASCII_output)
  135 + output_txt(outName, Layer1);
  136 + else
  137 + output_binary(outName, Layer1);
  138 +}
  139 +
  140 +//Main function for example_layer.
  141 +int main(int argc, char* argv[]) {
  142 + stim::arglist args;
  143 +
  144 + //Basic argument lists.
  145 + args.add("help", "prints this help");
  146 + args.add("s", "propagation direction vector (x, y)", "0.5 0.0", "[-1.0, 1.0]");
  147 + args.add("l", "wavelength", "5.0", "in arbitrary units (ex. um)");
  148 + args.add("Ex", "complex amplitude (x direction)", "0.5 0.0");
  149 + args.add("Ey", "complex amplitude (y direction)", "0.5 0.0");
  150 + args.add("z", "layer positions");
  151 + args.add("n", "layer optical path length (real refractive index)", "1.0 1.4 1.4 1.0");
  152 + args.add("kappa", "layer absorbance (imaginary refractive index)");
  153 + args.add("ascii", "output as an ASCII file");
  154 + args.add("CPU", "execute the program in CPU");
  155 + args.parse(argc, argv);
  156 +
  157 + if (args["help"].is_set()) { // test for help
  158 + advertise(); // output the advertisement
  159 + std::cout << args.str(); // output arguments
  160 + exit(1); // exit
  161 + }
  162 +
  163 + ASCII_output = args["ascii"]; // if the ascii flag is set, output an ascii file
  164 + CPU_op = args["CPU"]; // if the CPU_op is set, run the program in CPU.
  165 + std::string outName;
  166 + if (args.nargs() == 1) {
  167 + outName = args.arg(0);
  168 + }
  169 + else if (args.nargs() == 0) {
  170 + if (ASCII_output)
  171 + outName = "output.txt";
  172 + else
  173 + outName = "output.lyr";
  174 + }
  175 + else {
  176 + std::cout << "ERROR: Too many arguments." << std::endl;
  177 + exit(1);
  178 + }
  179 +
  180 +
  181 + vector<complex<double>> d; //direction of propagation of the plane wave Init: {0.5, 0}
  182 + if (args["s"].nargs() == 2) {
  183 + d.push_back({ (double)args["s"].as_float(0), 0 });
  184 + d.push_back({ (double)args["s"].as_float(1), 0 });
  185 + }
  186 +
  187 + double l0 = (double)args["l"].as_float(0); //wavelength.Init: l0 = 5;
  188 + double k0 = 2 * PI / l0; //Calculate the free-space wavenumber.
  189 +
  190 + complex<double> Ex = { (double)args["Ex"].as_float(0), (double)args["Ex"].as_float(1) }; //Input E. Init: {1, 1, 0}
  191 + complex<double> Ey = { (double)args["Ey"].as_float(0), (double)args["Ey"].as_float(1) };
  192 + vector<complex<double>> E0;
  193 + E0.push_back(Ex);
  194 + E0.push_back(Ey);
  195 +
  196 + //const int LAYERS = args["L"].as_int(); //LAYERS Init: 4
  197 +
  198 + vector<double> depths;
  199 + vector<complex<double>> ns;
  200 + size_t i = 0;
  201 + while(args["n"].is_set() && args["n"].as_float(i)!=0) { //n is the real part.
  202 + if (args["kappa"].is_set()) //kappa is the imaginary part.
  203 + ns.push_back({ (double)args["n"].as_float(i), (double)args["kappa"].as_float(i) });
  204 + else
  205 + ns.push_back({ (double)args["n"].as_float(i), 0 }); //ns <- n + i * kappa
  206 + i++;
  207 + }
  208 + for (size_t j = 0; j < i; j++)
  209 + if (args["z"].is_set())
  210 + depths.push_back((double)args["z"].as_float(i));
  211 + else {
  212 + depths.push_back((double)-100 + j * 200 / i);
  213 + }
  214 + /*---------------------------------------example_layer--------------------------------------------*/
  215 +
  216 + calculate_layer(outName, &ns, &depths, &E0, &d, k0);
  217 +
  218 + return 0;
  219 + std::cin.get();
  220 +}
0 221 \ No newline at end of file
... ...