Commit 71d5696d385e7d2e662bbd1e98264293479ed780
0 parents
First commit after development and testing
Showing
12 changed files
with
1182 additions
and
0 deletions
Show diff stats
1 | +++ a/CMakeLists.txt | |
1 | +#Specify the version being used aswell as the language | |
2 | +cmake_minimum_required(VERSION 3.12) | |
3 | + | |
4 | +#Name your project here | |
5 | +project(multilayer) | |
6 | + | |
7 | +#set the module directory | |
8 | +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}") | |
9 | + | |
10 | +#default to release mode | |
11 | +if(NOT CMAKE_BUILD_TYPE) | |
12 | + set(CMAKE_BUILD_TYPE Release) | |
13 | +endif(NOT CMAKE_BUILD_TYPE) | |
14 | + | |
15 | +#build the executable in the binary directory on MS Visual Studio | |
16 | +if ( MSVC ) | |
17 | + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}") | |
18 | + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}") | |
19 | + SET( LIBRARY_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}") | |
20 | + SET( LIBRARY_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}") | |
21 | + add_definitions(-D_CRT_SECURE_NO_WARNINGS) | |
22 | + add_definitions(-D_SCL_SECURE_NO_WARNINGS) | |
23 | +endif ( MSVC ) | |
24 | + | |
25 | + | |
26 | +#find packages----------------------------------- | |
27 | +#find the pthreads package | |
28 | +find_package(Threads) | |
29 | + | |
30 | +#find the X11 package | |
31 | +find_package(X11) | |
32 | + | |
33 | +#find CUDA, mostly for LA stuff using cuBLAS | |
34 | +find_package(CUDA REQUIRED) | |
35 | + | |
36 | +#find Boost | |
37 | +#find_package(Boost) | |
38 | + | |
39 | +#find the STIM library | |
40 | +find_package(STIM REQUIRED) | |
41 | + | |
42 | +#find LAPACK and supporting link_libraries | |
43 | +find_package(clapack CONFIG REQUIRED) | |
44 | +find_package(OpenBLAS CONFIG REQUIRED) | |
45 | + | |
46 | +#include include directories | |
47 | +include_directories(${CUDA_INCLUDE_DIRS} | |
48 | + ${STIM_INCLUDE_DIRS} | |
49 | +) | |
50 | + | |
51 | +#Assign source files to the appropriate variables to easily associate them with executables | |
52 | +file(GLOB SRC "src/*.cpp") | |
53 | + | |
54 | +#-----------------------------Create the executable-------------------------- | |
55 | +#-----------------------------Show all four examples------------------------- | |
56 | +add_executable(multilayer | |
57 | + ${SRC} | |
58 | +) | |
59 | +link_directories(${CUDA_BIN_DIRS}) | |
60 | +target_link_libraries(multilayer ${CUDA_LIBRARIES} | |
61 | + ${CUDA_CUBLAS_LIBRARIES} | |
62 | + ${CUDA_cusparse_LIBRARY} | |
63 | + ${CUDA_cusolver_LIBRARY} | |
64 | + ${CUDA_CUFFT_LIBRARIES} | |
65 | + OpenBLAS::OpenBLAS | |
66 | + f2c lapack | |
67 | +) | |
68 | + | |
69 | + | |
0 | 70 | \ No newline at end of file | ... | ... |
1 | +++ a/FindSTIM.cmake | |
1 | +# finds the STIM library (downloads it if it isn't present) | |
2 | +# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository) | |
3 | + | |
4 | +include(FindPackageHandleStandardArgs) | |
5 | + | |
6 | +set(STIM_ROOT $ENV{STIM_ROOT}) | |
7 | + | |
8 | +IF(NOT STIM_ROOT) | |
9 | + MESSAGE("ERROR: STIM_ROOT environment variable must be set!") | |
10 | +ENDIF(NOT STIM_ROOT) | |
11 | + | |
12 | + FIND_PATH(STIM_INCLUDE_DIRS DOC "Path to STIM include directory." | |
13 | + NAMES stim/image/image.h | |
14 | + PATHS ${STIM_ROOT}) | |
15 | + | |
16 | +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIRS) | ... | ... |
1 | +++ a/docs/Readme_BytesOrder.txt | |
1 | +Order of "output.lyr" parameters. | |
2 | + | |
3 | +The wavenumber in free space: k0 double 8B | |
4 | +The direction of propogation: d double*2 16B | |
5 | +The refractive index in the first layer: n[0] complex<double> 16B | |
6 | + | |
7 | +for i in LAYERS: | |
8 | + z positions[i]: z[i] double 8B * LAYERS | |
9 | + z-component of propogation directions: sz[i] complex<double> 16B * LAYERS | |
10 | + Transmission: Ptx[i] complex<double> 16B * LAYERS | |
11 | + Transmission: Pty[i] complex<double> 16B * LAYERS | |
12 | + Transmission: Ptz[i] complex<double> 16B * LAYERS | |
13 | + Reflection: Prx[i] complex<double> 16B * LAYERS | |
14 | + Transmission: Pry[i] complex<double> 16B * LAYERS | |
15 | + Transmission: Prz[i] complex<double> 16B * LAYERS | |
16 | + | |
17 | + | |
18 | +All parameters we need will be: | |
19 | + 15 * LAYERS + 5 | ... | ... |
No preview for this file type
1 | +++ a/docs/testcases.txt | |
1 | +output.lyr --n 1.0 1.2 1.3 1.4 1.5 1.4 1.3 1.2 1.1 --kappa 0.0 0.01 0.02 0.03 0.04 0.05 0.6 0.07 0.08 --zPos -50 -40 -30 -20 -10 0 10 20 30 | |
2 | + | |
3 | +output.lyr --n 1.0 2.0 1.0 --kappa 0.0 0.0 0.0 --z -50 0 50 | |
4 | + | |
5 | +defaults: | |
6 | + | |
7 | +*) If kappa is not specified, set them all to zero | |
8 | + | |
9 | +*) If z is not specified, create equally spaced layers between -100 and 100 | |
10 | + | |
11 | +*) Output: Place a color bar next to each image (colorbar()) | |
0 | 12 | \ No newline at end of file | ... | ... |
1 | +++ a/layerview.py | |
1 | +# create a function that displays the output when run this way: | |
2 | +# python layerview.py ouput.dat | |
3 | + | |
4 | +import sys | |
5 | +import os | |
6 | +from time import time | |
7 | +import subprocess | |
8 | +import struct | |
9 | +import numpy as np | |
10 | +import matplotlib | |
11 | +import math | |
12 | +import matplotlib.pyplot as plt | |
13 | + | |
14 | +from mpl_toolkits.axes_grid1 import ImageGrid | |
15 | + | |
16 | +def intensity(E): | |
17 | + Econj = np.conj(E) | |
18 | + I = np.sum(E*Econj, axis=-1) | |
19 | + return np.real(I) | |
20 | + | |
21 | +#evaluate a solved homogeneous substrate | |
22 | +# Returns a complex NxMx3 array representing the cross section of the field at Y=0 | |
23 | +def evaluate(Depths, k, d, n0, sz, Pt, Pr, X, Y, Z): | |
24 | + Depths = np.array(Depths) | |
25 | + sz = np.array(sz) | |
26 | + Pt = np.array(Pt) | |
27 | + Pr = np.array(Pr) | |
28 | + s = np.array(d) * n0 | |
29 | + #allocate space for layer indices | |
30 | + LI = np.zeros(Z.shape, dtype=np.int) | |
31 | + | |
32 | + #find the layer index for each sample point | |
33 | + L = len(Depths) | |
34 | + LI[Z < Depths[0]] = 0 | |
35 | + for l in range(L-1): | |
36 | + idx = np.logical_and(Z > Depths[l], Z <= Depths[l+1]) | |
37 | + LI[idx] = l | |
38 | + LI[Z > Depths[-1]] = L - 1 | |
39 | + | |
40 | + #calculate the appropriate phase shift for the wave transmitted through the layer | |
41 | + Ph_t = np.exp(1j * k * sz[LI] * (Z - Depths[LI])) | |
42 | + | |
43 | + #calculate the appropriate phase shift for the wave reflected off of the layer boundary | |
44 | + LIp = LI + 1 | |
45 | + LIp[LIp >= L] = 0 | |
46 | + Ph_r = np.exp(-1j * k * sz[LI] * (Z - Depths[LIp])) | |
47 | + Ph_r[LI >= L-1] = 0 | |
48 | + | |
49 | + #calculate the phase shift based on the X and Y positions | |
50 | + Ph_xy = np.exp(1j * k * (s[0] * X + s[1] * Y)) | |
51 | + | |
52 | + #apply the phase shifts | |
53 | + Et = Pt[:, LI] * Ph_t[:, :] | |
54 | + Er = Pr[:, LI] * Ph_r[:, :] | |
55 | + | |
56 | + #add everything together coherently | |
57 | + E = (Et + Er) * Ph_xy[:, :] | |
58 | + | |
59 | + #return the electric field | |
60 | + return np.moveaxis(E, 0, -1) | |
61 | + | |
62 | +class planewave: | |
63 | + def __int__(self): | |
64 | + self.LAYERS = 0 #Number of layers. int | |
65 | + self.depths = [] #z positions of layers. [1, 5, ..., 10] double | |
66 | + self.k0 = 0.0 #wavenumber at free space. double | |
67 | + self.d = [] #direction of propogation. [0.5, 0] double | |
68 | + self.n0 = 0.0+0.0j #the refractive index of the first layer. complex<double> | |
69 | + self.sz = [] #z-component of propagation for each layer. complex<double> | |
70 | + self.Pt = [[] for i in range(3)] #transmission complex<double> | |
71 | + self.Pr = [[],[],[]] #refraction complex<double> | |
72 | + | |
73 | +# display a binary file produced using the coupled wave C code | |
74 | +def layer(strc): | |
75 | + f = open(strc, "rb") | |
76 | + | |
77 | + # create an empty plane wave structure | |
78 | + L = planewave() | |
79 | + L.depths = [] | |
80 | + L.d = [] | |
81 | + L.sz = [] | |
82 | + L.Pt = [[],[],[]] | |
83 | + L.Pr = [[],[],[]] | |
84 | + | |
85 | + # open the input file for reading | |
86 | + file_bytes = os.path.getsize(strc) | |
87 | + | |
88 | + # calculate the number of layers in the sample | |
89 | + L.LAYERS = int((file_bytes/8-5)/15) | |
90 | + | |
91 | + # load the raw layer data into the plane wave structure | |
92 | + data_raw = struct.unpack('d' * (15*L.LAYERS+5), f.read((15*L.LAYERS+5)* 8)) | |
93 | + data = np.asarray(data_raw) | |
94 | + L.k0 = data[0] | |
95 | + L.d.append(data[1]) | |
96 | + L.d.append(data[2]) | |
97 | + L.n0 = complex(data[3], data[4]) | |
98 | + | |
99 | + # load each layer's plane waves from the binary file | |
100 | + for i in range(L.LAYERS): | |
101 | + L.depths.append(data[5+15*i]) | |
102 | + L.sz.append(complex(data[6+15*i], data[7+15*i])) | |
103 | + L.Pt[0].append(complex(data[8+15*i], data[9+15*i])) | |
104 | + L.Pt[1].append(complex(data[15*i+10], data[15*i+11])) | |
105 | + L.Pt[2].append(complex(data[15*i+12], data[15*i+13])) | |
106 | + L.Pr[0].append(complex(data[15*i+14], data[15*i+15])) | |
107 | + L.Pr[1].append(complex(data[15*i+16], data[15*i+17])) | |
108 | + L.Pr[2].append(complex(data[15*i+18], data[15*i+19])) | |
109 | + | |
110 | + N = 512 # simulation resolution NxM | |
111 | + M = 1024 | |
112 | + #DAVID: Don't hard-code the dimensions - you'll have to calculate them based on the sample information in the file | |
113 | + D = [-110, 110, 0, 60] # dimensions of the simulation | |
114 | + x = np.linspace(D[2], D[3], N) # set the sample points for the simulation | |
115 | + z = np.linspace(D[0], D[1], M) | |
116 | + [X, Z] = np.meshgrid(x, z) # create a mesh grid to evaluate layers | |
117 | + Y = np.zeros(X.shape) | |
118 | + | |
119 | + # evaluate the field across all layers | |
120 | + E = evaluate(L.depths, L.k0, L.d, L.n0, L.sz, L.Pt, L.Pr, X, Y, Z) | |
121 | + Er = np.real(E) | |
122 | + I = intensity(E) | |
123 | + | |
124 | + plt.set_cmap("afmhot") # set the color map | |
125 | + plt.subplot(1, 4, 1) | |
126 | + plt.imshow(Er[:, :, 0], extent=(D[3], D[2], D[1], D[0])) | |
127 | + #plt.colorbar() | |
128 | + plt.title("Ex") | |
129 | + | |
130 | + plt.subplot(1, 4, 2) | |
131 | + plt.imshow(Er[:, :, 1], extent=(D[3], D[2], D[1], D[0])) | |
132 | + #plt.colorbar() | |
133 | + plt.title("Ey") | |
134 | + | |
135 | + plt.subplot(1, 4, 3) | |
136 | + plt.imshow(Er[:, :, 2], extent=(D[3], D[2], D[1], D[0])) | |
137 | + #plt.colorbar() | |
138 | + plt.title("Ez") | |
139 | + | |
140 | + plt.subplot(1, 4, 4) | |
141 | + plt.imshow(I, extent=(D[3], D[2], D[1], D[0])) | |
142 | + plt.colorbar() | |
143 | + plt.title("I") | |
144 | + | |
145 | + #fig = plt.figure(1, (5, 10)) | |
146 | + #plt.set_cmap("afmhot") | |
147 | + #matplotlib.rcParams.update({'font.size': 10}) | |
148 | + #grid = ImageGrid(fig, rect = 211, nrows_ncols = (1, 3), axes_pad = 0.2, label_mode = "1", cbar_mode = "single", cbar_size = "18%") | |
149 | + #Title = ["Ex", "Ey", "Ez"] | |
150 | + #for i in range(3): | |
151 | + # grid[i].axis('off') | |
152 | + # im = grid[i].imshow(Er[..., i], extent=(D[3], D[2], D[1], D[0]), interpolation="nearest") | |
153 | + # grid[i].set_title(Title[i]) | |
154 | + #grid.cbar_axes[0].colorbar(im) | |
155 | + #plt.title("E") | |
156 | + #plt.subplot(212) | |
157 | + #plt.imshow(I, extent=(D[3], D[2], D[1], D[0])) | |
158 | + #plt.title("I") | |
159 | + #plt.colorbar() | |
160 | + plt.show() | |
161 | + | |
162 | +# function displays usage text to the console | |
163 | +def usage(): | |
164 | + print("Usage:") | |
165 | + print(" layerview input.dat") | |
166 | + | |
167 | +if __name__ == '__main__': | |
168 | + start = time() | |
169 | + if len(sys.argv) < 2: # if there are no command line arguments | |
170 | + usage() # display the usage text | |
171 | + exit() # exit | |
172 | + else: | |
173 | + layer(sys.argv[1]) # otherwise display the given data file | |
174 | + | |
175 | + end = time() | |
176 | + print("The elapsed time is " + str(end - start) + " s. ") | |
0 | 177 | \ No newline at end of file | ... | ... |
1 | +++ a/src/layer.cpp | |
1 | +#include "layer.h" | |
2 | +#include "linalg.h" //LAPACKE support for Visual Studio | |
3 | + | |
4 | +#include <cusparse.h> | |
5 | +#include <cuda_runtime.h> | |
6 | +//#include "cublas_v2.h" | |
7 | +#include "cusolverSp.h" | |
8 | +/*----------------------------GPU-----------------------------*/ | |
9 | + | |
10 | + | |
11 | +//Cross product.c is result. | |
12 | +void crossProduct(vector<complex<double>>* a, vector<complex<double>>* b, //The given matrices. | |
13 | + vector<complex<double>>* c) { //Matrix to be gotten. | |
14 | + c->push_back((*a)[1] * (*b)[2] - (*a)[2] * (*b)[1]); | |
15 | + c->push_back((*a)[2] * (*b)[0] - (*a)[0] * (*b)[2]); | |
16 | + c->push_back((*a)[0] * (*b)[1] - (*a)[1] * (*b)[0]); | |
17 | +} | |
18 | + | |
19 | +//Calculate the norm for a matrix. | |
20 | +complex<double> Norm(vector<complex<double>>* E) { | |
21 | + complex<double> sum = 0; | |
22 | + for (unsigned int i = 0; i < E->size(); i++) { | |
23 | + sum += (*E)[i] * (*E)[i]; | |
24 | + } | |
25 | + return sqrt(sum); | |
26 | +} | |
27 | + | |
28 | +//Normalize matrix. | |
29 | +void Normalize(vector<complex<double>>* E) { | |
30 | + complex<double> sum = 0; | |
31 | + for (unsigned int i = 0; i < E->size(); i++) { | |
32 | + sum += (*E)[i] * (*E)[i]; | |
33 | + } | |
34 | + for (unsigned int i = 0; i < E->size(); i++) { | |
35 | + (*E)[i] = (*E)[i] / sqrt(sum); | |
36 | + | |
37 | + } | |
38 | +} | |
39 | + | |
40 | +//Orthogonalization. | |
41 | +void orthogonalize(vector<complex<double>>* E_0rtho, vector<complex<double>>* E0, vector<complex<double>>* d) { | |
42 | + vector<complex<double>> s; | |
43 | + if (d->size() == 2) { | |
44 | + complex<double> dz = sqrt(1.0+0i - pow((*d)[0], 2) - pow((*d)[1], 2)); | |
45 | + d->push_back(dz); | |
46 | + } | |
47 | + crossProduct(E0, d, &s); | |
48 | + crossProduct(d, &s, E_0rtho); | |
49 | + vector<complex<double>>().swap(s); | |
50 | +} | |
51 | + | |
52 | +/*--------------------------------------------------Define Class layersample.--------------------------------------------------------*/ | |
53 | + | |
54 | +/*Do not try to replace "int" as "size_t". | |
55 | +This will result in a bunch of warnings and if we continuously change the type of M_rowInd and M_colInd, the EXCEPTION will occur again.*/ | |
56 | +size_t layersample::ii(size_t l, size_t c, size_t d) { //ii(l, c, d) means the column indexes for every element. | |
57 | + return l * 6 + d * 3 + c - 3; | |
58 | +} | |
59 | + | |
60 | +void layersample::generate_linsys(size_t LAYERS, | |
61 | + vector<complex<double>>& M, //All non-zero values in "A" matirx.(A * X = b) | |
62 | + vector<complex<double>>& b, //The right hand side column vector. | |
63 | + vector<complex<double>>& E, //orthogonalized E0 vectors | |
64 | + vector<complex<double>>* P, | |
65 | + bool CPU_op) { //Solution of the matrices multiplication. | |
66 | + //Calculate the sz component for each layer. | |
67 | + s.clear(); //s is the plane wave direction scaled by the refractive index. | |
68 | + for (size_t i = 0; i < 2; i++) | |
69 | + s.push_back(d[i] * n[0]); | |
70 | + sz.clear(); | |
71 | + for (int l = 0; l < LAYERS; l++) { | |
72 | + sz.push_back(sqrt(pow(n[l], 2) - pow(s[0], 2) - pow(s[1], 2))); | |
73 | + } | |
74 | + | |
75 | + if (!CPU_op){ | |
76 | + //Computer in GPU. | |
77 | + vector<int> M_rowInd; //Sparse matrix M CSR ->row index | |
78 | + vector<int> M_colInd; //Sparse matrix M CSR ->number of elements | |
79 | + M_rowInd.push_back(0); | |
80 | + ////Build M by setting constraints based on Gauss's Law. | |
81 | + for (size_t l = 0; l < LAYERS; l++) { | |
82 | + //Set the upward components for each layer. | |
83 | + //Layer "LAYERS-1" doesn't have a upward component. | |
84 | + if (l != LAYERS - 1) { | |
85 | + M.push_back(s[0]); | |
86 | + M_colInd.push_back((int)ii(l, 0, 1)); | |
87 | + M.push_back(s[1]); | |
88 | + M_colInd.push_back((int)ii(l, 1, 1)); | |
89 | + M.push_back(-sz[l]); | |
90 | + M_colInd.push_back((int)ii(l, 2, 1)); | |
91 | + M_rowInd.push_back((int)M.size()); | |
92 | + b.push_back(0); | |
93 | + } | |
94 | + //Set the downward components for each layer. | |
95 | + if (l != 0) { | |
96 | + M.push_back(s[0]); | |
97 | + M_colInd.push_back((int)ii(l, 0, 0)); | |
98 | + M.push_back(s[1]); | |
99 | + M_colInd.push_back((int)ii(l, 1, 0)); | |
100 | + M.push_back(sz[l]); | |
101 | + M_colInd.push_back((int)ii(l, 2, 0)); | |
102 | + M_rowInd.push_back((int)M.size()); | |
103 | + b.push_back(0); | |
104 | + } | |
105 | + } | |
106 | + //Continue to build M by enforcing a continuous field across boundaries. | |
107 | + complex<double> arg, arg_in, B; | |
108 | + for (size_t l = 1; l < LAYERS; l++) { | |
109 | + complex<double> sz0 = sz[l - 1]; | |
110 | + complex<double> sz1 = sz[l]; | |
111 | + | |
112 | + //Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1])) | |
113 | + complex<double> A_in = k * sz0 * (z[l] - z[l - 1]); | |
114 | + complex<double> A_in2 = { -A_in.imag(), A_in.real() }; | |
115 | + complex<double> A = exp(A_in2); | |
116 | + | |
117 | + if (l < LAYERS - 1) { | |
118 | + double dl = z[l] - z[l + 1]; | |
119 | + arg_in = -k * sz1 * (complex<double>)dl; | |
120 | + arg = { -arg_in.imag(), arg_in.real() }; | |
121 | + B = exp(arg); | |
122 | + } | |
123 | + //if this is the second layer, use the simplified equations that account for the incident field | |
124 | + if (l == 1) { | |
125 | + M.push_back(1); | |
126 | + M_colInd.push_back((int)ii(0, 0, 1)); | |
127 | + M.push_back(-1); | |
128 | + M_colInd.push_back((int)ii(1, 0, 0)); | |
129 | + if (LAYERS > 2) { | |
130 | + M.push_back(-B); | |
131 | + M_colInd.push_back((int)ii(1, 0, 1)); | |
132 | + } | |
133 | + M_rowInd.push_back((int)M.size()); | |
134 | + b.push_back(-A * E[0]); | |
135 | + | |
136 | + M.push_back(1); | |
137 | + M_colInd.push_back((int)ii(0, 1, 1)); | |
138 | + M.push_back(-1); | |
139 | + M_colInd.push_back((int)ii(1, 1, 0)); | |
140 | + if (LAYERS > 2) { | |
141 | + M.push_back(-B); | |
142 | + M_colInd.push_back((int)ii(1, 1, 1)); | |
143 | + } | |
144 | + M_rowInd.push_back((int)M.size()); | |
145 | + b.push_back(-A * E[l]); | |
146 | + | |
147 | + M.push_back(sz0); | |
148 | + M_colInd.push_back((int)ii(0, 1, 1)); | |
149 | + M.push_back(s[1]); | |
150 | + M_colInd.push_back((int)ii(0, 2, 1)); | |
151 | + M.push_back(sz1); | |
152 | + M_colInd.push_back((int)ii(1, 1, 0)); | |
153 | + M.push_back(-s[1]); | |
154 | + M_colInd.push_back((int)ii(1, 2, 0)); | |
155 | + if (LAYERS > 2) { | |
156 | + M.push_back(-B * sz1); | |
157 | + M_colInd.push_back((int)ii(1, 1, 1)); | |
158 | + M.push_back(-B * s[1]); | |
159 | + M_colInd.push_back((int)ii(1, 2, 1)); | |
160 | + } | |
161 | + M_rowInd.push_back((int)M.size()); | |
162 | + b.push_back(A * sz0 * E[1] - A * s[1] * E[2]); | |
163 | + | |
164 | + M.push_back(-sz0); | |
165 | + M_colInd.push_back((int)ii(0, 0, 1)); | |
166 | + M.push_back(-s[0]); | |
167 | + M_colInd.push_back((int)ii(0, 2, 1)); | |
168 | + M.push_back(-sz1); | |
169 | + M_colInd.push_back((int)ii(1, 0, 0)); | |
170 | + M.push_back(s[0]); | |
171 | + M_colInd.push_back((int)ii(1, 2, 0)); | |
172 | + if (LAYERS > 2) { | |
173 | + M.push_back(B * sz1); | |
174 | + M_colInd.push_back((int)ii(1, 0, 1)); | |
175 | + M.push_back(B* s[0]); | |
176 | + M_colInd.push_back((int)ii(1, 2, 1)); | |
177 | + } | |
178 | + M_rowInd.push_back((int)M.size()); | |
179 | + b.push_back(A * s[0] * E[2] - A * sz0 * E[0]); | |
180 | + } | |
181 | + else if (l == LAYERS - 1) { | |
182 | + M.push_back(A); | |
183 | + M_colInd.push_back((int)ii(l - 1, 0, 0)); | |
184 | + M.push_back(1); | |
185 | + M_colInd.push_back((int)ii(l - 1, 0, 1)); | |
186 | + M.push_back(-1); | |
187 | + M_colInd.push_back((int)ii(l, 0, 0)); | |
188 | + M_rowInd.push_back((int)M.size()); | |
189 | + b.push_back(0); | |
190 | + | |
191 | + M.push_back(A); | |
192 | + M_colInd.push_back((int)ii(l - 1, 1, 0)); | |
193 | + M.push_back(1); | |
194 | + M_colInd.push_back((int)ii(l - 1, 1, 1)); | |
195 | + M.push_back(-1); | |
196 | + M_colInd.push_back((int)ii(l, 1, 0)); | |
197 | + M_rowInd.push_back((int)M.size()); | |
198 | + b.push_back(0); | |
199 | + | |
200 | + M.push_back(-A * sz0); | |
201 | + M_colInd.push_back((int)ii(l - 1, 1, 0)); | |
202 | + M.push_back(A * s[1]); | |
203 | + M_colInd.push_back((int)ii(l - 1, 2, 0)); | |
204 | + M.push_back(sz0); | |
205 | + M_colInd.push_back((int)ii(l - 1, 1, 1)); | |
206 | + M.push_back(s[1]); | |
207 | + M_colInd.push_back((int)ii(l - 1, 2, 1)); | |
208 | + M.push_back(sz1); | |
209 | + M_colInd.push_back((int)ii(l, 1, 0)); | |
210 | + M.push_back(-s[1]); | |
211 | + M_colInd.push_back((int)ii(l, 2, 0)); | |
212 | + M_rowInd.push_back((int)M.size()); | |
213 | + b.push_back(0); | |
214 | + | |
215 | + M.push_back(A * sz0); | |
216 | + M_colInd.push_back((int)ii(l - 1, 0, 0)); | |
217 | + M.push_back(-A * s[0]); | |
218 | + M_colInd.push_back((int)ii(l - 1, 2, 0)); | |
219 | + M.push_back(-sz0); | |
220 | + M_colInd.push_back((int)ii(l - 1, 0, 1)); | |
221 | + M.push_back(-s[0]); | |
222 | + M_colInd.push_back((int)ii(l - 1, 2, 1)); | |
223 | + M.push_back(-sz1); | |
224 | + M_colInd.push_back((int)ii(l, 0, 0)); | |
225 | + M.push_back(s[0]); | |
226 | + M_colInd.push_back((int)ii(l, 2, 0)); | |
227 | + M_rowInd.push_back((int)M.size()); | |
228 | + b.push_back(0); | |
229 | + } | |
230 | + else { | |
231 | + M.push_back(A); | |
232 | + M_colInd.push_back((int)ii(l - 1, 0, 0)); | |
233 | + M.push_back(1); | |
234 | + M_colInd.push_back((int)ii(l - 1, 0, 1)); | |
235 | + M.push_back(-1); | |
236 | + M_colInd.push_back((int)ii(l, 0, 0)); | |
237 | + M.push_back(-B); | |
238 | + M_colInd.push_back((int)ii(l, 0, 1)); | |
239 | + M_rowInd.push_back((int)M.size()); | |
240 | + b.push_back(0); | |
241 | + | |
242 | + M.push_back(A); | |
243 | + M_colInd.push_back((int)ii(l - 1, 1, 0)); | |
244 | + M.push_back(1); | |
245 | + M_colInd.push_back((int)ii(l - 1, 1, 1)); | |
246 | + M.push_back(-1); | |
247 | + M_colInd.push_back((int)ii(l, 1, 0)); | |
248 | + M.push_back(-B); | |
249 | + M_colInd.push_back((int)ii(l, 1, 1)); | |
250 | + M_rowInd.push_back((int)M.size()); | |
251 | + b.push_back(0); | |
252 | + | |
253 | + M.push_back(-A * sz0); | |
254 | + M_colInd.push_back((int)ii(l - 1, 1, 0)); | |
255 | + M.push_back(A * s[1]); | |
256 | + M_colInd.push_back((int)ii(l - 1, 2, 0)); | |
257 | + M.push_back(sz0); | |
258 | + M_colInd.push_back((int)ii(l - 1, 1, 1)); | |
259 | + M.push_back(s[1]); | |
260 | + M_colInd.push_back((int)ii(l - 1, 2, 1)); | |
261 | + M.push_back(sz1); | |
262 | + M_colInd.push_back((int)ii(l, 1, 0)); | |
263 | + M.push_back(-s[1]); | |
264 | + M_colInd.push_back((int)ii(l, 2, 0)); | |
265 | + M.push_back(-B * sz1); | |
266 | + M_colInd.push_back((int)ii(l, 1, 1)); | |
267 | + M.push_back(-B * s[1]); | |
268 | + M_colInd.push_back((int)ii(l, 2, 1)); | |
269 | + M_rowInd.push_back((int)M.size()); | |
270 | + b.push_back(0); | |
271 | + | |
272 | + M.push_back(A * sz0); | |
273 | + M_colInd.push_back((int)ii(l - 1, 0, 0)); | |
274 | + M.push_back(-A * s[0]); | |
275 | + M_colInd.push_back((int)ii(l - 1, 2, 0)); | |
276 | + M.push_back(-sz0); | |
277 | + M_colInd.push_back((int)ii(l - 1, 0, 1)); | |
278 | + M.push_back(-s[0]); | |
279 | + M_colInd.push_back((int)ii(l - 1, 2, 1)); | |
280 | + M.push_back(-sz1); | |
281 | + M_colInd.push_back((int)ii(l, 0, 0)); | |
282 | + M.push_back(s[0]); | |
283 | + M_colInd.push_back((int)ii(l, 2, 0)); | |
284 | + M.push_back(B * sz1); | |
285 | + M_colInd.push_back((int)ii(l, 0, 1)); | |
286 | + M.push_back(B * s[0]); | |
287 | + M_colInd.push_back((int)ii(l, 2, 1)); | |
288 | + M_rowInd.push_back((int)M.size()); | |
289 | + b.push_back(0); | |
290 | + } | |
291 | + } | |
292 | + cudaError_t cudaStatus; | |
293 | + cusolverStatus_t cusolverStatus; | |
294 | + cusparseStatus_t cusparseStatus; | |
295 | + cusolverSpHandle_t handle = NULL; | |
296 | + cusparseHandle_t cusparseHandle = NULL; | |
297 | + cudaStream_t stream = NULL; | |
298 | + cusparseMatDescr_t descrM = NULL; | |
299 | + cuDoubleComplex * csrValM_, * b_, *P_; | |
300 | + size_t rowsA = b.size(), colsA = b.size(), nnA = M.size(), baseM_ = 0; //nnA is the number of non-zero elements. | |
301 | + int* csrRowPtrM = NULL; //row index M_rowInd projected to GPU. | |
302 | + int* csrColIndM = NULL; //CSR(A) from I/O. // M_colInd projected to GPU. | |
303 | + double tol = 1.e-12; int reorder = 0; | |
304 | + int singularity = 0; | |
305 | + | |
306 | + //Initialize. | |
307 | + cusolverStatus = cusolverSpCreate(&handle); | |
308 | + int num = 1; | |
309 | + cudaStatus = cudaGetDevice(&num); | |
310 | + cusparseStatus = cusparseCreate(&cusparseHandle); | |
311 | + cudaStatus = cudaStreamCreate(&stream); | |
312 | + cusolverStatus = cusolverSpSetStream(handle, stream); | |
313 | + cusparseStatus = cusparseSetStream(cusparseHandle, stream); | |
314 | + cusparseStatus = cusparseCreateMatDescr(&descrM); | |
315 | + cusparseStatus = cusparseSetMatType(descrM, CUSPARSE_MATRIX_TYPE_GENERAL); | |
316 | + if (baseM_) { | |
317 | + cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ONE); | |
318 | + } | |
319 | + else { | |
320 | + cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ZERO); | |
321 | + } | |
322 | + | |
323 | + cudaStatus = cudaMalloc((void**)&csrRowPtrM, sizeof(int) * (rowsA + 1)); //Projection of M_rowInd. | |
324 | + cudaStatus = cudaMalloc((void**)&csrColIndM, sizeof(int) * M_colInd.size()); //Projection of M_colInd. | |
325 | + cudaStatus = cudaMalloc((void**)&csrValM_, sizeof(cuDoubleComplex) * M.size()); //Projection of M. | |
326 | + cudaStatus = cudaMalloc((void**)&b_, sizeof(cuDoubleComplex) * b.size()); //Projection of b. | |
327 | + cudaStatus = cudaMalloc((void**)&P_, sizeof(cuDoubleComplex) * b.size()); //Projection of P. | |
328 | + | |
329 | + cudaStatus = cudaMemcpy(csrValM_, M.data(), M.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice); | |
330 | + cudaStatus = cudaMemcpy(csrRowPtrM, M_rowInd.data(), M_rowInd.size() * sizeof(int), cudaMemcpyHostToDevice); | |
331 | + cudaStatus = cudaMemcpy(csrColIndM, M_colInd.data(), M_colInd.size() * sizeof(int), cudaMemcpyHostToDevice); | |
332 | + cudaStatus = cudaMemcpy(b_, b.data(), b.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice); | |
333 | + // Output the current CUDA error. | |
334 | + //if (cudaStatus != cudaSuccess) { | |
335 | + // cout<<"%s " << cudaGetErrorString(cudaStatus) << endl; | |
336 | + //} | |
337 | + P->resize(rowsA); //P is the to-be-solved matrix in CPU. | |
338 | + //QR method. | |
339 | + cusolverStatus = cusolverSpZcsrlsvqr(handle, (int)rowsA, (int)nnA, descrM, csrValM_, csrRowPtrM, csrColIndM, b_, tol, reorder, P_, (int*)&singularity); | |
340 | + /*cusparseStatus = cusparseZsctr(*cusparseHandle, rowsA, g_z, g_Q, g_x, CUSPARSE_INDEX_BASE_ZERO);*/ | |
341 | + cudaStatus = cudaMemcpyAsync(P->data(), P_, sizeof(cuDoubleComplex) * rowsA, cudaMemcpyDeviceToHost, stream); | |
342 | + | |
343 | + cudaStatus = cudaFree(csrRowPtrM); | |
344 | + cudaStatus = cudaFree(csrColIndM); | |
345 | + cudaStatus = cudaFree(csrValM_); | |
346 | + cudaStatus = cudaFree(b_); | |
347 | + cudaStatus = cudaFree(P_); | |
348 | + vector<int>().swap(M_rowInd); | |
349 | + vector<int>().swap(M_colInd); | |
350 | + } | |
351 | + else { | |
352 | + //Work on CPU. | |
353 | + M.resize(6 * (LAYERS - 1) * 6 * (LAYERS - 1)); | |
354 | + b.resize(6 * (LAYERS - 1)); | |
355 | + | |
356 | + size_t ei = 0; | |
357 | + //Set constraints based on Gauss's Law. | |
358 | + for (size_t l = 0; l < LAYERS; l++) { | |
359 | + //Set the upward components for each layer. | |
360 | + //Layer "LAYERS-1" doesn't have a upward component. | |
361 | + if (l != LAYERS - 1) { | |
362 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = s[0]; | |
363 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = s[1]; | |
364 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -sz[l]; | |
365 | + ei += 1; | |
366 | + } | |
367 | + //Set the downward components for each layer. | |
368 | + if (l != 0) { | |
369 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = s[0]; | |
370 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = s[1]; | |
371 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = sz[l]; | |
372 | + ei += 1; | |
373 | + } | |
374 | + } | |
375 | + //Enforce a continuous field across boundaries. | |
376 | + complex<double> arg, arg_in, B; | |
377 | + for (size_t l = 1; l < LAYERS; l++) { | |
378 | + complex<double> sz0 = sz[l - 1]; | |
379 | + complex<double> sz1 = sz[l]; | |
380 | + | |
381 | + //Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1])) | |
382 | + complex<double> A_in = k * sz0 * (z[l] - z[l - 1]); | |
383 | + complex<double> A_in2 = { -A_in.imag(), A_in.real() }; | |
384 | + complex<double> A = exp(A_in2); | |
385 | + | |
386 | + if (l < LAYERS - 1) { | |
387 | + double dl = z[l] - z[l + 1]; | |
388 | + arg_in = -k * sz1 * (complex<double>)dl; | |
389 | + arg = { -arg_in.imag(), arg_in.real() }; | |
390 | + B = exp(arg); | |
391 | + } | |
392 | + //if this is the second layer, use the simplified equations that account for the incident field | |
393 | + if (l == 1) { | |
394 | + M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = 1; | |
395 | + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -1; | |
396 | + if (LAYERS > 2) { | |
397 | + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = -B; | |
398 | + } | |
399 | + b[ei] = -A * E[0]; | |
400 | + ei += 1; | |
401 | + | |
402 | + M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = 1; | |
403 | + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = -1; | |
404 | + if (LAYERS > 2) { | |
405 | + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B; | |
406 | + } | |
407 | + b[ei] = -A * E[l]; | |
408 | + ei += 1; | |
409 | + | |
410 | + M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = s[1]; | |
411 | + M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = sz0; | |
412 | + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = -s[1]; | |
413 | + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = sz1; | |
414 | + if (LAYERS > 2) { | |
415 | + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = -B * s[1]; | |
416 | + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B * sz1; | |
417 | + } | |
418 | + b[ei] = A * sz0 * E[1] - A * s[1] * E[2]; | |
419 | + ei += 1; | |
420 | + | |
421 | + M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = -sz0; | |
422 | + M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = -s[0]; | |
423 | + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -sz1; | |
424 | + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = s[0]; | |
425 | + if (LAYERS > 2) { | |
426 | + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = B * sz1; | |
427 | + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = B * s[0]; | |
428 | + } | |
429 | + b[ei] = A * s[0] * E[2] - A * sz0 * E[0]; | |
430 | + ei += 1; | |
431 | + } | |
432 | + else if (l == LAYERS - 1) { | |
433 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A; | |
434 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1; | |
435 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1; | |
436 | + ei += 1; | |
437 | + | |
438 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A; | |
439 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1; | |
440 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1; | |
441 | + ei += 1; | |
442 | + | |
443 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1]; | |
444 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0; | |
445 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1]; | |
446 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0; | |
447 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1]; | |
448 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1; | |
449 | + ei += 1; | |
450 | + | |
451 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0; | |
452 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0]; | |
453 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0; | |
454 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0]; | |
455 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1; | |
456 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0]; | |
457 | + ei += 1; | |
458 | + } | |
459 | + else { | |
460 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A; | |
461 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1; | |
462 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1; | |
463 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = -B; | |
464 | + ei += 1; | |
465 | + | |
466 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A; | |
467 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1; | |
468 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1; | |
469 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B; | |
470 | + ei += 1; | |
471 | + | |
472 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1]; | |
473 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0; | |
474 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1]; | |
475 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0; | |
476 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1]; | |
477 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1; | |
478 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -B * s[1]; | |
479 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B * sz1; | |
480 | + ei += 1; | |
481 | + | |
482 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0; | |
483 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0]; | |
484 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0; | |
485 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0]; | |
486 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1; | |
487 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0]; | |
488 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = B * sz1; | |
489 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = B * s[0]; | |
490 | + ei += 1; | |
491 | + } | |
492 | + } | |
493 | + | |
494 | + complex<double>* M_ = new complex<double>[M.size()]; | |
495 | + complex<double>* b_ = new complex<double>[b.size()]; | |
496 | + complex<double>* P_ = new complex<double>[b.size()]; | |
497 | + for (size_t i = 0; i < M.size(); i++) { | |
498 | + M_[i] = M[i]; | |
499 | + if (i < b.size()) b_[i] = b[i]; | |
500 | + } | |
501 | + LINALG_inverse(M_, (int)(6 * (LAYERS - 1))); | |
502 | + LINALG_zgemm((int)(6 * (LAYERS - 1)), (int)1, (int)(6 * (LAYERS - 1)), M_, (int)(6 * (LAYERS - 1)), b_, (int)1, P_, (int)1); | |
503 | + for (int i = 0; i < b.size(); i++) { | |
504 | + P->push_back(P_[i]); | |
505 | + } | |
506 | + | |
507 | + delete[] M_; | |
508 | + delete[] b_; | |
509 | + delete[] P_; | |
510 | + } | |
511 | +} | |
512 | + | |
513 | + | |
514 | +//Build matrix and get E. | |
515 | +void layersample::solve(vector<complex<double>>* E, bool CPU_op) { //orthogonalized E0 vectors. | |
516 | + size_t LAYERS = n.size(); | |
517 | + //Store the matrix and RHS vector. | |
518 | + vector<complex<double>> M; //All non-zero values in the sparse matrix. | |
519 | + vector<complex<double>> b; //The right hand side column vector. | |
520 | + | |
521 | + //Evaluate the linear system. | |
522 | + vector<complex<double>> P; //Solution of matrix. | |
523 | + layersample::generate_linsys(LAYERS, M, b, *E, &P, CPU_op); | |
524 | + | |
525 | + //Store the coefficients for each layer. | |
526 | + //Pt[3, L] transmission. Pr[3, L] reflection. | |
527 | + Pt.resize(3 * LAYERS); | |
528 | + Pr.resize(3 * LAYERS); | |
529 | + | |
530 | + for (size_t l = 0; l < LAYERS; l++) { | |
531 | + if (l == 0) { | |
532 | + Pt[0] = (complex<double>)(*E)[0]; | |
533 | + Pt[LAYERS] = (complex<double>)(*E)[1]; | |
534 | + Pt[2 * LAYERS] = (complex<double>)(*E)[2]; | |
535 | + } | |
536 | + else { | |
537 | + Pt[l] = P[ii(l, 0, 0)]; | |
538 | + Pt[l + LAYERS] = P[ii(l, 1, 0)]; | |
539 | + Pt[l + 2 * LAYERS] = P[ii(l, 2, 0)]; | |
540 | + } | |
541 | + | |
542 | + if (l == LAYERS - 1) { | |
543 | + Pr[LAYERS - 1] = 0; | |
544 | + Pr[2 * LAYERS - 1] = 0; | |
545 | + Pr[3 * LAYERS - 1] = 0; | |
546 | + } | |
547 | + else { | |
548 | + Pr[l] = P[ii(l, 0, 1)]; | |
549 | + Pr[l + LAYERS] = P[ii(l, 1, 1)]; | |
550 | + Pr[l + 2 * LAYERS] = P[ii(l, 2, 1)]; | |
551 | + } | |
552 | + } | |
553 | + vector<complex<double>>().swap(M); | |
554 | + vector<complex<double>>().swap(b); | |
555 | + vector<complex<double>>().swap(P); | |
556 | +} | |
0 | 557 | \ No newline at end of file | ... | ... |
1 | +++ a/src/layer.h | |
1 | +#ifndef LAYER_H | |
2 | +#define LAYER_H | |
3 | +#include <vector> | |
4 | +#include <complex> | |
5 | +using namespace std; | |
6 | + | |
7 | +void crossProduct(vector<complex<double>>* a, vector<complex<double>>* b, vector<complex<double>>* c); | |
8 | +complex<double> Norm(vector<complex<double>>* E); | |
9 | +void Normalize(vector<complex<double>>* E); | |
10 | +vector<vector<double> > transpose(vector<vector<double> >* matrix); | |
11 | +void orthogonalize(vector<complex<double>>* E_0, vector<complex<double>>* E0, vector<complex<double>>* d); | |
12 | + | |
13 | +class layersample { | |
14 | + | |
15 | +public: | |
16 | + double k; //wavenumber. | |
17 | + vector<complex<double>> n; //refractive index. | |
18 | + vector<double> z; //z postions. | |
19 | + vector<complex<double>> s; //propagation direction. Keep it for output. | |
20 | + vector<complex<double>> sz; //propagation direction. Keep it for output. | |
21 | + vector<complex<double>> d; //direction of propagation of the plane wave. | |
22 | + vector<complex<double>> Pt; //transimission. | |
23 | + vector<complex<double>> Pr; //reflection. | |
24 | + //Calulate the index of the field component associated with a layer. | |
25 | + // l is the layer index. c is the component(x, y, z). d is the direction(0for transmission, 1 for reflection). | |
26 | + size_t ii(size_t l, size_t c, size_t d); | |
27 | + | |
28 | + //Generate the linear system corresponding to this layered sample and plane wave. | |
29 | + void generate_linsys(size_t LAYERS, vector<complex<double>>& M, vector<complex<double>>& b, vector<complex<double>>& E, vector<complex<double>>* P, bool CPU_op); | |
30 | + | |
31 | + //Build matrix and get E. | |
32 | + void solve(vector<complex<double>>* E, bool CPU_op); | |
33 | +}; | |
34 | + | |
35 | +#endif | |
0 | 36 | \ No newline at end of file | ... | ... |
1 | +++ a/src/linalg.cpp | |
1 | +#include "linalg.h" | |
2 | + | |
3 | +// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK. | |
4 | +extern "C" { | |
5 | +#include "f2c.h" | |
6 | +#include "clapack.h" | |
7 | +#include "cblas.h" | |
8 | +} | |
9 | + | |
10 | + | |
11 | +void LINALG_zgetrf( | |
12 | + int M, | |
13 | + int N, | |
14 | + std::complex<double>* A, | |
15 | + int LDA, | |
16 | + int* IPIV) | |
17 | +{ | |
18 | + integer INFO; | |
19 | + zgetrf_((integer*)&M, (integer*)&N, (doublecomplex*)A, (integer*)&LDA, (integer*)IPIV, &INFO); | |
20 | +} | |
21 | + | |
22 | +void LINALG_zgetri( | |
23 | + size_t N, | |
24 | + std::complex<double>* A, | |
25 | + int LDA, | |
26 | + int* IPIV) | |
27 | +{ | |
28 | + integer LWORK = -1; | |
29 | + std::complex<double> WORK[1]; | |
30 | + integer INFO; | |
31 | + zgetri_((integer*)&N, (doublecomplex*)A, (integer*)&LDA, (integer*)IPIV, (doublecomplex*)WORK, &LWORK, &INFO); | |
32 | +} | |
33 | +void LINALG_inverse(std::complex<double>* A, int N) | |
34 | +{ | |
35 | + int* IPIV = new int[N + (size_t)1]; | |
36 | + integer LWORK = N * N; | |
37 | + std::complex<double>* WORK = new std::complex<double>[LWORK]; | |
38 | + integer INFO; | |
39 | + | |
40 | + zgetrf_((integer*)&N, (integer*)&N, (doublecomplex*)A, (integer*)&N, (integer*)IPIV, &INFO); | |
41 | + zgetri_((integer*)&N, (doublecomplex*)A, (integer*)&N, (integer*)IPIV, (doublecomplex*)WORK, &LWORK, &INFO); | |
42 | + | |
43 | + delete[] IPIV; | |
44 | + delete[] WORK; | |
45 | +} | |
46 | + | |
47 | +void LINALG_zgemm( | |
48 | + const int M, //A(M*K) B(K*N) | |
49 | + const int N, | |
50 | + const int K, | |
51 | + std::complex<double>* A, | |
52 | + const int LDA, //=K | |
53 | + std::complex<double>* B, | |
54 | + const int LDB, //=N | |
55 | + std::complex<double>* C, | |
56 | + const int LDC) //=columns of C. | |
57 | +{ | |
58 | + std::complex<double> alpha = 1; | |
59 | + std::complex<double> beta = 0; | |
60 | + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, (OPENBLAS_CONST blasint)M, (OPENBLAS_CONST blasint)N, (OPENBLAS_CONST blasint)K, | |
61 | + &alpha, A, (OPENBLAS_CONST blasint)LDA, B, (OPENBLAS_CONST blasint)LDB, &beta, C, (OPENBLAS_CONST blasint)LDC); | |
62 | + | |
63 | +} | |
0 | 64 | \ No newline at end of file | ... | ... |
1 | +++ a/src/linalg.h | |
1 | +// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK | |
2 | +#include <complex> | |
3 | + | |
4 | +//Solve matrix inverse. | |
5 | +void LINALG_inverse(std::complex<double>* A, int N); | |
6 | + | |
7 | +//Solve matrix multiplication. C = A * B. | |
8 | +void LINALG_zgemm( | |
9 | + const int M, //A(M*K) B(K*N) | |
10 | + const int N, | |
11 | + const int K, | |
12 | + std::complex<double>* A, | |
13 | + const int LDA, //=K | |
14 | + std::complex<double>* B, | |
15 | + const int LDB, //=N | |
16 | + std::complex<double>* C, | |
17 | + const int LDC); //=columns of C. | |
0 | 18 | \ No newline at end of file | ... | ... |
1 | +++ a/src/main.cpp | |
1 | +//Update the data output format in example_layer(). | |
2 | +#include "layer.h" | |
3 | +#include <windows.h> | |
4 | + | |
5 | +#include <fstream> | |
6 | +#include <iostream> | |
7 | +#include "stim/parser/arguments.h" | |
8 | + | |
9 | +using namespace std; | |
10 | +#define PI 3.14159265358979323846264338328 | |
11 | + | |
12 | +bool ASCII_output = false; | |
13 | +bool CPU_op = false; | |
14 | + | |
15 | +void advertise() { | |
16 | + std::cout << std::endl << std::endl; | |
17 | + std::cout << "=========================================================================" << std::endl; | |
18 | + //std::cout << "Thank you for using the NetMets network comparison tool!" << std::endl; | |
19 | + std::cout << "Scalable Tissue Imaging and Modeling (STIM) Lab, University of Houston" << std::endl; | |
20 | + std::cout << "=========================================================================" << std::endl << std::endl; | |
21 | +} | |
22 | + | |
23 | +//Calculate Ez from vector d and Ex Ey. | |
24 | +void E_Cal(vector<complex<double>>* E, vector<complex<double>>* d) { | |
25 | + if (d->size() == 2) { | |
26 | + complex<double> dz = sqrt(1.0 + 0i - pow((*d)[0], 2) - pow((*d)[1], 2)); | |
27 | + d->push_back(dz); | |
28 | + } | |
29 | + E->push_back(-((*E)[0] * (*d)[0] + (*E)[1] * (*d)[1]) / (*d)[2]); | |
30 | +} | |
31 | +void output_binary(std::string filename, layersample& layers) { | |
32 | + size_t L = layers.n.size(); // get the number of layers | |
33 | + std::ofstream outFile; | |
34 | + outFile.open(filename, std::ofstream::binary); // open the output file for binary writing | |
35 | + if (outFile) { | |
36 | + outFile.write((char*)&layers.k, sizeof(double)); | |
37 | + outFile.write((char*)&layers.d[0], sizeof(double)); | |
38 | + outFile.write((char*)&layers.d[1], sizeof(double)); | |
39 | + outFile.write((char*)&layers.n[0], sizeof(double) * 2); | |
40 | + | |
41 | + for (size_t i = 0; i < L; i++) { | |
42 | + outFile.write((char*)&layers.z[i], sizeof(double)); | |
43 | + outFile.write((char*)&layers.sz[i], 2 * sizeof(double)); | |
44 | + outFile.write((char*)&layers.Pt[i], 2 * sizeof(double)); | |
45 | + outFile.write((char*)&layers.Pt[i + L], 2 * sizeof(double)); | |
46 | + outFile.write((char*)&layers.Pt[i + 2 * L], 2 * sizeof(double)); | |
47 | + outFile.write((char*)&layers.Pr[i], 2 * sizeof(double)); | |
48 | + outFile.write((char*)&layers.Pr[i + L], 2 * sizeof(double)); | |
49 | + outFile.write((char*)&layers.Pr[i + 2 * L], 2 * sizeof(double)); | |
50 | + } | |
51 | + outFile.close(); | |
52 | + } | |
53 | + else { | |
54 | + std::cout << "ERROR opening output file for binary writing: " << filename << std::endl; | |
55 | + } | |
56 | +} | |
57 | + | |
58 | +void output_txt(std::string filename, layersample& layers) { | |
59 | + size_t L = layers.n.size(); // get the number of layers | |
60 | + std::ofstream outFile; | |
61 | + outFile.open(filename); // open the output file for text writing | |
62 | + if (!outFile) { | |
63 | + std::cout << "ERROR: Could not open file " << filename << std::endl; | |
64 | + exit(1); | |
65 | + } | |
66 | + int width = 15; | |
67 | + | |
68 | + outFile << "--------------------------------" << endl; | |
69 | + outFile << "The wavenumber at free space is : " << layers.k << endl; | |
70 | + for (size_t i = 0; i < L; i++) { | |
71 | + if (i == 0) { | |
72 | + outFile << "--------------------------------" << endl; | |
73 | + outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl; | |
74 | + outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl; | |
75 | + outFile << "----------------------" << endl; | |
76 | + outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl; | |
77 | + outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl; | |
78 | + outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl; | |
79 | + outFile << "----->>>>>" << endl; | |
80 | + outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl; | |
81 | + outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl; | |
82 | + outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl; | |
83 | + outFile << "<<<<<-----" << endl; | |
84 | + outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl; | |
85 | + outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl; | |
86 | + outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl; | |
87 | + } | |
88 | + else { | |
89 | + outFile << "----------------------" << endl; | |
90 | + outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl; | |
91 | + outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl; | |
92 | + outFile << "----------------------" << endl; | |
93 | + outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl; | |
94 | + outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl; | |
95 | + outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl; | |
96 | + outFile << "----->>>>>" << endl; | |
97 | + outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl; | |
98 | + outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl; | |
99 | + outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl; | |
100 | + | |
101 | + outFile << "<<<<<-----" << endl; | |
102 | + outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl; | |
103 | + outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl; | |
104 | + outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl; | |
105 | + } | |
106 | + | |
107 | + } | |
108 | + outFile.close(); | |
109 | +} | |
110 | + | |
111 | +void calculate_layer(std::string outName, | |
112 | + vector<complex<double>>* ns, // the refractive index. | |
113 | + vector<double>* depths, // z-direction position. | |
114 | + vector<complex<double>>* E0, // the initialized E0. | |
115 | + vector<complex<double>>* d0, // direction of propagation of the plane wave. | |
116 | + double k0) { // the wavenumber at free space. | |
117 | + std::string outName_ext(outName, size(outName)-4, size(outName)-1); // extract the extension of the output file | |
118 | + E_Cal(E0, d0); // make sure that both vectors are orthogonal. | |
119 | + | |
120 | + //Creat a new layersample and initialize. | |
121 | + layersample Layer1; // create a layered sample | |
122 | + Layer1.n = *ns; // set a pointer to the refractive indices | |
123 | + Layer1.z = *depths; // set a pointer to the layer depths | |
124 | + Layer1.d = *d0; | |
125 | + Layer1.k = k0; | |
126 | + | |
127 | + LARGE_INTEGER t1, t2, tc; // Timing. | |
128 | + QueryPerformanceFrequency(&tc); | |
129 | + QueryPerformanceCounter(&t1); | |
130 | + Layer1.solve(E0, CPU_op); // Solve for the substrate field in GPU. | |
131 | + QueryPerformanceCounter(&t2); | |
132 | + std::cout << "time for 'solving linear functions':" << (t2.QuadPart - t1.QuadPart) / (double)tc.QuadPart << "ms."<< std::endl; | |
133 | + //output(outName, Layer1); | |
134 | + if (ASCII_output) | |
135 | + output_txt(outName, Layer1); | |
136 | + else | |
137 | + output_binary(outName, Layer1); | |
138 | +} | |
139 | + | |
140 | +//Main function for example_layer. | |
141 | +int main(int argc, char* argv[]) { | |
142 | + stim::arglist args; | |
143 | + | |
144 | + //Basic argument lists. | |
145 | + args.add("help", "prints this help"); | |
146 | + args.add("s", "propagation direction vector (x, y)", "0.5 0.0", "[-1.0, 1.0]"); | |
147 | + args.add("l", "wavelength", "5.0", "in arbitrary units (ex. um)"); | |
148 | + args.add("Ex", "complex amplitude (x direction)", "0.5 0.0"); | |
149 | + args.add("Ey", "complex amplitude (y direction)", "0.5 0.0"); | |
150 | + args.add("z", "layer positions"); | |
151 | + args.add("n", "layer optical path length (real refractive index)", "1.0 1.4 1.4 1.0"); | |
152 | + args.add("kappa", "layer absorbance (imaginary refractive index)"); | |
153 | + args.add("ascii", "output as an ASCII file"); | |
154 | + args.add("CPU", "execute the program in CPU"); | |
155 | + args.parse(argc, argv); | |
156 | + | |
157 | + if (args["help"].is_set()) { // test for help | |
158 | + advertise(); // output the advertisement | |
159 | + std::cout << args.str(); // output arguments | |
160 | + exit(1); // exit | |
161 | + } | |
162 | + | |
163 | + ASCII_output = args["ascii"]; // if the ascii flag is set, output an ascii file | |
164 | + CPU_op = args["CPU"]; // if the CPU_op is set, run the program in CPU. | |
165 | + std::string outName; | |
166 | + if (args.nargs() == 1) { | |
167 | + outName = args.arg(0); | |
168 | + } | |
169 | + else if (args.nargs() == 0) { | |
170 | + if (ASCII_output) | |
171 | + outName = "output.txt"; | |
172 | + else | |
173 | + outName = "output.lyr"; | |
174 | + } | |
175 | + else { | |
176 | + std::cout << "ERROR: Too many arguments." << std::endl; | |
177 | + exit(1); | |
178 | + } | |
179 | + | |
180 | + | |
181 | + vector<complex<double>> d; //direction of propagation of the plane wave Init: {0.5, 0} | |
182 | + if (args["s"].nargs() == 2) { | |
183 | + d.push_back({ (double)args["s"].as_float(0), 0 }); | |
184 | + d.push_back({ (double)args["s"].as_float(1), 0 }); | |
185 | + } | |
186 | + | |
187 | + double l0 = (double)args["l"].as_float(0); //wavelength.Init: l0 = 5; | |
188 | + double k0 = 2 * PI / l0; //Calculate the free-space wavenumber. | |
189 | + | |
190 | + complex<double> Ex = { (double)args["Ex"].as_float(0), (double)args["Ex"].as_float(1) }; //Input E. Init: {1, 1, 0} | |
191 | + complex<double> Ey = { (double)args["Ey"].as_float(0), (double)args["Ey"].as_float(1) }; | |
192 | + vector<complex<double>> E0; | |
193 | + E0.push_back(Ex); | |
194 | + E0.push_back(Ey); | |
195 | + | |
196 | + //const int LAYERS = args["L"].as_int(); //LAYERS Init: 4 | |
197 | + | |
198 | + vector<double> depths; | |
199 | + vector<complex<double>> ns; | |
200 | + size_t i = 0; | |
201 | + while(args["n"].is_set() && args["n"].as_float(i)!=0) { //n is the real part. | |
202 | + if (args["kappa"].is_set()) //kappa is the imaginary part. | |
203 | + ns.push_back({ (double)args["n"].as_float(i), (double)args["kappa"].as_float(i) }); | |
204 | + else | |
205 | + ns.push_back({ (double)args["n"].as_float(i), 0 }); //ns <- n + i * kappa | |
206 | + i++; | |
207 | + } | |
208 | + for (size_t j = 0; j < i; j++) | |
209 | + if (args["z"].is_set()) | |
210 | + depths.push_back((double)args["z"].as_float(i)); | |
211 | + else { | |
212 | + depths.push_back((double)-100 + j * 200 / i); | |
213 | + } | |
214 | + /*---------------------------------------example_layer--------------------------------------------*/ | |
215 | + | |
216 | + calculate_layer(outName, &ns, &depths, &E0, &d, k0); | |
217 | + | |
218 | + return 0; | |
219 | + std::cin.get(); | |
220 | +} | |
0 | 221 | \ No newline at end of file | ... | ... |