Commit 71d5696d385e7d2e662bbd1e98264293479ed780
0 parents
First commit after development and testing
Showing
12 changed files
with
1182 additions
and
0 deletions
Show diff stats
1 | +++ a/CMakeLists.txt | ||
1 | +#Specify the version being used aswell as the language | ||
2 | +cmake_minimum_required(VERSION 3.12) | ||
3 | + | ||
4 | +#Name your project here | ||
5 | +project(multilayer) | ||
6 | + | ||
7 | +#set the module directory | ||
8 | +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}") | ||
9 | + | ||
10 | +#default to release mode | ||
11 | +if(NOT CMAKE_BUILD_TYPE) | ||
12 | + set(CMAKE_BUILD_TYPE Release) | ||
13 | +endif(NOT CMAKE_BUILD_TYPE) | ||
14 | + | ||
15 | +#build the executable in the binary directory on MS Visual Studio | ||
16 | +if ( MSVC ) | ||
17 | + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}") | ||
18 | + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}") | ||
19 | + SET( LIBRARY_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}") | ||
20 | + SET( LIBRARY_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}") | ||
21 | + add_definitions(-D_CRT_SECURE_NO_WARNINGS) | ||
22 | + add_definitions(-D_SCL_SECURE_NO_WARNINGS) | ||
23 | +endif ( MSVC ) | ||
24 | + | ||
25 | + | ||
26 | +#find packages----------------------------------- | ||
27 | +#find the pthreads package | ||
28 | +find_package(Threads) | ||
29 | + | ||
30 | +#find the X11 package | ||
31 | +find_package(X11) | ||
32 | + | ||
33 | +#find CUDA, mostly for LA stuff using cuBLAS | ||
34 | +find_package(CUDA REQUIRED) | ||
35 | + | ||
36 | +#find Boost | ||
37 | +#find_package(Boost) | ||
38 | + | ||
39 | +#find the STIM library | ||
40 | +find_package(STIM REQUIRED) | ||
41 | + | ||
42 | +#find LAPACK and supporting link_libraries | ||
43 | +find_package(clapack CONFIG REQUIRED) | ||
44 | +find_package(OpenBLAS CONFIG REQUIRED) | ||
45 | + | ||
46 | +#include include directories | ||
47 | +include_directories(${CUDA_INCLUDE_DIRS} | ||
48 | + ${STIM_INCLUDE_DIRS} | ||
49 | +) | ||
50 | + | ||
51 | +#Assign source files to the appropriate variables to easily associate them with executables | ||
52 | +file(GLOB SRC "src/*.cpp") | ||
53 | + | ||
54 | +#-----------------------------Create the executable-------------------------- | ||
55 | +#-----------------------------Show all four examples------------------------- | ||
56 | +add_executable(multilayer | ||
57 | + ${SRC} | ||
58 | +) | ||
59 | +link_directories(${CUDA_BIN_DIRS}) | ||
60 | +target_link_libraries(multilayer ${CUDA_LIBRARIES} | ||
61 | + ${CUDA_CUBLAS_LIBRARIES} | ||
62 | + ${CUDA_cusparse_LIBRARY} | ||
63 | + ${CUDA_cusolver_LIBRARY} | ||
64 | + ${CUDA_CUFFT_LIBRARIES} | ||
65 | + OpenBLAS::OpenBLAS | ||
66 | + f2c lapack | ||
67 | +) | ||
68 | + | ||
69 | + | ||
0 | \ No newline at end of file | 70 | \ No newline at end of file |
1 | +++ a/FindSTIM.cmake | ||
1 | +# finds the STIM library (downloads it if it isn't present) | ||
2 | +# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository) | ||
3 | + | ||
4 | +include(FindPackageHandleStandardArgs) | ||
5 | + | ||
6 | +set(STIM_ROOT $ENV{STIM_ROOT}) | ||
7 | + | ||
8 | +IF(NOT STIM_ROOT) | ||
9 | + MESSAGE("ERROR: STIM_ROOT environment variable must be set!") | ||
10 | +ENDIF(NOT STIM_ROOT) | ||
11 | + | ||
12 | + FIND_PATH(STIM_INCLUDE_DIRS DOC "Path to STIM include directory." | ||
13 | + NAMES stim/image/image.h | ||
14 | + PATHS ${STIM_ROOT}) | ||
15 | + | ||
16 | +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIRS) |
1 | +++ a/docs/Readme_BytesOrder.txt | ||
1 | +Order of "output.lyr" parameters. | ||
2 | + | ||
3 | +The wavenumber in free space: k0 double 8B | ||
4 | +The direction of propogation: d double*2 16B | ||
5 | +The refractive index in the first layer: n[0] complex<double> 16B | ||
6 | + | ||
7 | +for i in LAYERS: | ||
8 | + z positions[i]: z[i] double 8B * LAYERS | ||
9 | + z-component of propogation directions: sz[i] complex<double> 16B * LAYERS | ||
10 | + Transmission: Ptx[i] complex<double> 16B * LAYERS | ||
11 | + Transmission: Pty[i] complex<double> 16B * LAYERS | ||
12 | + Transmission: Ptz[i] complex<double> 16B * LAYERS | ||
13 | + Reflection: Prx[i] complex<double> 16B * LAYERS | ||
14 | + Transmission: Pry[i] complex<double> 16B * LAYERS | ||
15 | + Transmission: Prz[i] complex<double> 16B * LAYERS | ||
16 | + | ||
17 | + | ||
18 | +All parameters we need will be: | ||
19 | + 15 * LAYERS + 5 |
No preview for this file type
1 | +++ a/docs/testcases.txt | ||
1 | +output.lyr --n 1.0 1.2 1.3 1.4 1.5 1.4 1.3 1.2 1.1 --kappa 0.0 0.01 0.02 0.03 0.04 0.05 0.6 0.07 0.08 --zPos -50 -40 -30 -20 -10 0 10 20 30 | ||
2 | + | ||
3 | +output.lyr --n 1.0 2.0 1.0 --kappa 0.0 0.0 0.0 --z -50 0 50 | ||
4 | + | ||
5 | +defaults: | ||
6 | + | ||
7 | +*) If kappa is not specified, set them all to zero | ||
8 | + | ||
9 | +*) If z is not specified, create equally spaced layers between -100 and 100 | ||
10 | + | ||
11 | +*) Output: Place a color bar next to each image (colorbar()) | ||
0 | \ No newline at end of file | 12 | \ No newline at end of file |
1 | +++ a/layerview.py | ||
1 | +# create a function that displays the output when run this way: | ||
2 | +# python layerview.py ouput.dat | ||
3 | + | ||
4 | +import sys | ||
5 | +import os | ||
6 | +from time import time | ||
7 | +import subprocess | ||
8 | +import struct | ||
9 | +import numpy as np | ||
10 | +import matplotlib | ||
11 | +import math | ||
12 | +import matplotlib.pyplot as plt | ||
13 | + | ||
14 | +from mpl_toolkits.axes_grid1 import ImageGrid | ||
15 | + | ||
16 | +def intensity(E): | ||
17 | + Econj = np.conj(E) | ||
18 | + I = np.sum(E*Econj, axis=-1) | ||
19 | + return np.real(I) | ||
20 | + | ||
21 | +#evaluate a solved homogeneous substrate | ||
22 | +# Returns a complex NxMx3 array representing the cross section of the field at Y=0 | ||
23 | +def evaluate(Depths, k, d, n0, sz, Pt, Pr, X, Y, Z): | ||
24 | + Depths = np.array(Depths) | ||
25 | + sz = np.array(sz) | ||
26 | + Pt = np.array(Pt) | ||
27 | + Pr = np.array(Pr) | ||
28 | + s = np.array(d) * n0 | ||
29 | + #allocate space for layer indices | ||
30 | + LI = np.zeros(Z.shape, dtype=np.int) | ||
31 | + | ||
32 | + #find the layer index for each sample point | ||
33 | + L = len(Depths) | ||
34 | + LI[Z < Depths[0]] = 0 | ||
35 | + for l in range(L-1): | ||
36 | + idx = np.logical_and(Z > Depths[l], Z <= Depths[l+1]) | ||
37 | + LI[idx] = l | ||
38 | + LI[Z > Depths[-1]] = L - 1 | ||
39 | + | ||
40 | + #calculate the appropriate phase shift for the wave transmitted through the layer | ||
41 | + Ph_t = np.exp(1j * k * sz[LI] * (Z - Depths[LI])) | ||
42 | + | ||
43 | + #calculate the appropriate phase shift for the wave reflected off of the layer boundary | ||
44 | + LIp = LI + 1 | ||
45 | + LIp[LIp >= L] = 0 | ||
46 | + Ph_r = np.exp(-1j * k * sz[LI] * (Z - Depths[LIp])) | ||
47 | + Ph_r[LI >= L-1] = 0 | ||
48 | + | ||
49 | + #calculate the phase shift based on the X and Y positions | ||
50 | + Ph_xy = np.exp(1j * k * (s[0] * X + s[1] * Y)) | ||
51 | + | ||
52 | + #apply the phase shifts | ||
53 | + Et = Pt[:, LI] * Ph_t[:, :] | ||
54 | + Er = Pr[:, LI] * Ph_r[:, :] | ||
55 | + | ||
56 | + #add everything together coherently | ||
57 | + E = (Et + Er) * Ph_xy[:, :] | ||
58 | + | ||
59 | + #return the electric field | ||
60 | + return np.moveaxis(E, 0, -1) | ||
61 | + | ||
62 | +class planewave: | ||
63 | + def __int__(self): | ||
64 | + self.LAYERS = 0 #Number of layers. int | ||
65 | + self.depths = [] #z positions of layers. [1, 5, ..., 10] double | ||
66 | + self.k0 = 0.0 #wavenumber at free space. double | ||
67 | + self.d = [] #direction of propogation. [0.5, 0] double | ||
68 | + self.n0 = 0.0+0.0j #the refractive index of the first layer. complex<double> | ||
69 | + self.sz = [] #z-component of propagation for each layer. complex<double> | ||
70 | + self.Pt = [[] for i in range(3)] #transmission complex<double> | ||
71 | + self.Pr = [[],[],[]] #refraction complex<double> | ||
72 | + | ||
73 | +# display a binary file produced using the coupled wave C code | ||
74 | +def layer(strc): | ||
75 | + f = open(strc, "rb") | ||
76 | + | ||
77 | + # create an empty plane wave structure | ||
78 | + L = planewave() | ||
79 | + L.depths = [] | ||
80 | + L.d = [] | ||
81 | + L.sz = [] | ||
82 | + L.Pt = [[],[],[]] | ||
83 | + L.Pr = [[],[],[]] | ||
84 | + | ||
85 | + # open the input file for reading | ||
86 | + file_bytes = os.path.getsize(strc) | ||
87 | + | ||
88 | + # calculate the number of layers in the sample | ||
89 | + L.LAYERS = int((file_bytes/8-5)/15) | ||
90 | + | ||
91 | + # load the raw layer data into the plane wave structure | ||
92 | + data_raw = struct.unpack('d' * (15*L.LAYERS+5), f.read((15*L.LAYERS+5)* 8)) | ||
93 | + data = np.asarray(data_raw) | ||
94 | + L.k0 = data[0] | ||
95 | + L.d.append(data[1]) | ||
96 | + L.d.append(data[2]) | ||
97 | + L.n0 = complex(data[3], data[4]) | ||
98 | + | ||
99 | + # load each layer's plane waves from the binary file | ||
100 | + for i in range(L.LAYERS): | ||
101 | + L.depths.append(data[5+15*i]) | ||
102 | + L.sz.append(complex(data[6+15*i], data[7+15*i])) | ||
103 | + L.Pt[0].append(complex(data[8+15*i], data[9+15*i])) | ||
104 | + L.Pt[1].append(complex(data[15*i+10], data[15*i+11])) | ||
105 | + L.Pt[2].append(complex(data[15*i+12], data[15*i+13])) | ||
106 | + L.Pr[0].append(complex(data[15*i+14], data[15*i+15])) | ||
107 | + L.Pr[1].append(complex(data[15*i+16], data[15*i+17])) | ||
108 | + L.Pr[2].append(complex(data[15*i+18], data[15*i+19])) | ||
109 | + | ||
110 | + N = 512 # simulation resolution NxM | ||
111 | + M = 1024 | ||
112 | + #DAVID: Don't hard-code the dimensions - you'll have to calculate them based on the sample information in the file | ||
113 | + D = [-110, 110, 0, 60] # dimensions of the simulation | ||
114 | + x = np.linspace(D[2], D[3], N) # set the sample points for the simulation | ||
115 | + z = np.linspace(D[0], D[1], M) | ||
116 | + [X, Z] = np.meshgrid(x, z) # create a mesh grid to evaluate layers | ||
117 | + Y = np.zeros(X.shape) | ||
118 | + | ||
119 | + # evaluate the field across all layers | ||
120 | + E = evaluate(L.depths, L.k0, L.d, L.n0, L.sz, L.Pt, L.Pr, X, Y, Z) | ||
121 | + Er = np.real(E) | ||
122 | + I = intensity(E) | ||
123 | + | ||
124 | + plt.set_cmap("afmhot") # set the color map | ||
125 | + plt.subplot(1, 4, 1) | ||
126 | + plt.imshow(Er[:, :, 0], extent=(D[3], D[2], D[1], D[0])) | ||
127 | + #plt.colorbar() | ||
128 | + plt.title("Ex") | ||
129 | + | ||
130 | + plt.subplot(1, 4, 2) | ||
131 | + plt.imshow(Er[:, :, 1], extent=(D[3], D[2], D[1], D[0])) | ||
132 | + #plt.colorbar() | ||
133 | + plt.title("Ey") | ||
134 | + | ||
135 | + plt.subplot(1, 4, 3) | ||
136 | + plt.imshow(Er[:, :, 2], extent=(D[3], D[2], D[1], D[0])) | ||
137 | + #plt.colorbar() | ||
138 | + plt.title("Ez") | ||
139 | + | ||
140 | + plt.subplot(1, 4, 4) | ||
141 | + plt.imshow(I, extent=(D[3], D[2], D[1], D[0])) | ||
142 | + plt.colorbar() | ||
143 | + plt.title("I") | ||
144 | + | ||
145 | + #fig = plt.figure(1, (5, 10)) | ||
146 | + #plt.set_cmap("afmhot") | ||
147 | + #matplotlib.rcParams.update({'font.size': 10}) | ||
148 | + #grid = ImageGrid(fig, rect = 211, nrows_ncols = (1, 3), axes_pad = 0.2, label_mode = "1", cbar_mode = "single", cbar_size = "18%") | ||
149 | + #Title = ["Ex", "Ey", "Ez"] | ||
150 | + #for i in range(3): | ||
151 | + # grid[i].axis('off') | ||
152 | + # im = grid[i].imshow(Er[..., i], extent=(D[3], D[2], D[1], D[0]), interpolation="nearest") | ||
153 | + # grid[i].set_title(Title[i]) | ||
154 | + #grid.cbar_axes[0].colorbar(im) | ||
155 | + #plt.title("E") | ||
156 | + #plt.subplot(212) | ||
157 | + #plt.imshow(I, extent=(D[3], D[2], D[1], D[0])) | ||
158 | + #plt.title("I") | ||
159 | + #plt.colorbar() | ||
160 | + plt.show() | ||
161 | + | ||
162 | +# function displays usage text to the console | ||
163 | +def usage(): | ||
164 | + print("Usage:") | ||
165 | + print(" layerview input.dat") | ||
166 | + | ||
167 | +if __name__ == '__main__': | ||
168 | + start = time() | ||
169 | + if len(sys.argv) < 2: # if there are no command line arguments | ||
170 | + usage() # display the usage text | ||
171 | + exit() # exit | ||
172 | + else: | ||
173 | + layer(sys.argv[1]) # otherwise display the given data file | ||
174 | + | ||
175 | + end = time() | ||
176 | + print("The elapsed time is " + str(end - start) + " s. ") | ||
0 | \ No newline at end of file | 177 | \ No newline at end of file |
1 | +++ a/src/layer.cpp | ||
1 | +#include "layer.h" | ||
2 | +#include "linalg.h" //LAPACKE support for Visual Studio | ||
3 | + | ||
4 | +#include <cusparse.h> | ||
5 | +#include <cuda_runtime.h> | ||
6 | +//#include "cublas_v2.h" | ||
7 | +#include "cusolverSp.h" | ||
8 | +/*----------------------------GPU-----------------------------*/ | ||
9 | + | ||
10 | + | ||
11 | +//Cross product.c is result. | ||
12 | +void crossProduct(vector<complex<double>>* a, vector<complex<double>>* b, //The given matrices. | ||
13 | + vector<complex<double>>* c) { //Matrix to be gotten. | ||
14 | + c->push_back((*a)[1] * (*b)[2] - (*a)[2] * (*b)[1]); | ||
15 | + c->push_back((*a)[2] * (*b)[0] - (*a)[0] * (*b)[2]); | ||
16 | + c->push_back((*a)[0] * (*b)[1] - (*a)[1] * (*b)[0]); | ||
17 | +} | ||
18 | + | ||
19 | +//Calculate the norm for a matrix. | ||
20 | +complex<double> Norm(vector<complex<double>>* E) { | ||
21 | + complex<double> sum = 0; | ||
22 | + for (unsigned int i = 0; i < E->size(); i++) { | ||
23 | + sum += (*E)[i] * (*E)[i]; | ||
24 | + } | ||
25 | + return sqrt(sum); | ||
26 | +} | ||
27 | + | ||
28 | +//Normalize matrix. | ||
29 | +void Normalize(vector<complex<double>>* E) { | ||
30 | + complex<double> sum = 0; | ||
31 | + for (unsigned int i = 0; i < E->size(); i++) { | ||
32 | + sum += (*E)[i] * (*E)[i]; | ||
33 | + } | ||
34 | + for (unsigned int i = 0; i < E->size(); i++) { | ||
35 | + (*E)[i] = (*E)[i] / sqrt(sum); | ||
36 | + | ||
37 | + } | ||
38 | +} | ||
39 | + | ||
40 | +//Orthogonalization. | ||
41 | +void orthogonalize(vector<complex<double>>* E_0rtho, vector<complex<double>>* E0, vector<complex<double>>* d) { | ||
42 | + vector<complex<double>> s; | ||
43 | + if (d->size() == 2) { | ||
44 | + complex<double> dz = sqrt(1.0+0i - pow((*d)[0], 2) - pow((*d)[1], 2)); | ||
45 | + d->push_back(dz); | ||
46 | + } | ||
47 | + crossProduct(E0, d, &s); | ||
48 | + crossProduct(d, &s, E_0rtho); | ||
49 | + vector<complex<double>>().swap(s); | ||
50 | +} | ||
51 | + | ||
52 | +/*--------------------------------------------------Define Class layersample.--------------------------------------------------------*/ | ||
53 | + | ||
54 | +/*Do not try to replace "int" as "size_t". | ||
55 | +This will result in a bunch of warnings and if we continuously change the type of M_rowInd and M_colInd, the EXCEPTION will occur again.*/ | ||
56 | +size_t layersample::ii(size_t l, size_t c, size_t d) { //ii(l, c, d) means the column indexes for every element. | ||
57 | + return l * 6 + d * 3 + c - 3; | ||
58 | +} | ||
59 | + | ||
60 | +void layersample::generate_linsys(size_t LAYERS, | ||
61 | + vector<complex<double>>& M, //All non-zero values in "A" matirx.(A * X = b) | ||
62 | + vector<complex<double>>& b, //The right hand side column vector. | ||
63 | + vector<complex<double>>& E, //orthogonalized E0 vectors | ||
64 | + vector<complex<double>>* P, | ||
65 | + bool CPU_op) { //Solution of the matrices multiplication. | ||
66 | + //Calculate the sz component for each layer. | ||
67 | + s.clear(); //s is the plane wave direction scaled by the refractive index. | ||
68 | + for (size_t i = 0; i < 2; i++) | ||
69 | + s.push_back(d[i] * n[0]); | ||
70 | + sz.clear(); | ||
71 | + for (int l = 0; l < LAYERS; l++) { | ||
72 | + sz.push_back(sqrt(pow(n[l], 2) - pow(s[0], 2) - pow(s[1], 2))); | ||
73 | + } | ||
74 | + | ||
75 | + if (!CPU_op){ | ||
76 | + //Computer in GPU. | ||
77 | + vector<int> M_rowInd; //Sparse matrix M CSR ->row index | ||
78 | + vector<int> M_colInd; //Sparse matrix M CSR ->number of elements | ||
79 | + M_rowInd.push_back(0); | ||
80 | + ////Build M by setting constraints based on Gauss's Law. | ||
81 | + for (size_t l = 0; l < LAYERS; l++) { | ||
82 | + //Set the upward components for each layer. | ||
83 | + //Layer "LAYERS-1" doesn't have a upward component. | ||
84 | + if (l != LAYERS - 1) { | ||
85 | + M.push_back(s[0]); | ||
86 | + M_colInd.push_back((int)ii(l, 0, 1)); | ||
87 | + M.push_back(s[1]); | ||
88 | + M_colInd.push_back((int)ii(l, 1, 1)); | ||
89 | + M.push_back(-sz[l]); | ||
90 | + M_colInd.push_back((int)ii(l, 2, 1)); | ||
91 | + M_rowInd.push_back((int)M.size()); | ||
92 | + b.push_back(0); | ||
93 | + } | ||
94 | + //Set the downward components for each layer. | ||
95 | + if (l != 0) { | ||
96 | + M.push_back(s[0]); | ||
97 | + M_colInd.push_back((int)ii(l, 0, 0)); | ||
98 | + M.push_back(s[1]); | ||
99 | + M_colInd.push_back((int)ii(l, 1, 0)); | ||
100 | + M.push_back(sz[l]); | ||
101 | + M_colInd.push_back((int)ii(l, 2, 0)); | ||
102 | + M_rowInd.push_back((int)M.size()); | ||
103 | + b.push_back(0); | ||
104 | + } | ||
105 | + } | ||
106 | + //Continue to build M by enforcing a continuous field across boundaries. | ||
107 | + complex<double> arg, arg_in, B; | ||
108 | + for (size_t l = 1; l < LAYERS; l++) { | ||
109 | + complex<double> sz0 = sz[l - 1]; | ||
110 | + complex<double> sz1 = sz[l]; | ||
111 | + | ||
112 | + //Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1])) | ||
113 | + complex<double> A_in = k * sz0 * (z[l] - z[l - 1]); | ||
114 | + complex<double> A_in2 = { -A_in.imag(), A_in.real() }; | ||
115 | + complex<double> A = exp(A_in2); | ||
116 | + | ||
117 | + if (l < LAYERS - 1) { | ||
118 | + double dl = z[l] - z[l + 1]; | ||
119 | + arg_in = -k * sz1 * (complex<double>)dl; | ||
120 | + arg = { -arg_in.imag(), arg_in.real() }; | ||
121 | + B = exp(arg); | ||
122 | + } | ||
123 | + //if this is the second layer, use the simplified equations that account for the incident field | ||
124 | + if (l == 1) { | ||
125 | + M.push_back(1); | ||
126 | + M_colInd.push_back((int)ii(0, 0, 1)); | ||
127 | + M.push_back(-1); | ||
128 | + M_colInd.push_back((int)ii(1, 0, 0)); | ||
129 | + if (LAYERS > 2) { | ||
130 | + M.push_back(-B); | ||
131 | + M_colInd.push_back((int)ii(1, 0, 1)); | ||
132 | + } | ||
133 | + M_rowInd.push_back((int)M.size()); | ||
134 | + b.push_back(-A * E[0]); | ||
135 | + | ||
136 | + M.push_back(1); | ||
137 | + M_colInd.push_back((int)ii(0, 1, 1)); | ||
138 | + M.push_back(-1); | ||
139 | + M_colInd.push_back((int)ii(1, 1, 0)); | ||
140 | + if (LAYERS > 2) { | ||
141 | + M.push_back(-B); | ||
142 | + M_colInd.push_back((int)ii(1, 1, 1)); | ||
143 | + } | ||
144 | + M_rowInd.push_back((int)M.size()); | ||
145 | + b.push_back(-A * E[l]); | ||
146 | + | ||
147 | + M.push_back(sz0); | ||
148 | + M_colInd.push_back((int)ii(0, 1, 1)); | ||
149 | + M.push_back(s[1]); | ||
150 | + M_colInd.push_back((int)ii(0, 2, 1)); | ||
151 | + M.push_back(sz1); | ||
152 | + M_colInd.push_back((int)ii(1, 1, 0)); | ||
153 | + M.push_back(-s[1]); | ||
154 | + M_colInd.push_back((int)ii(1, 2, 0)); | ||
155 | + if (LAYERS > 2) { | ||
156 | + M.push_back(-B * sz1); | ||
157 | + M_colInd.push_back((int)ii(1, 1, 1)); | ||
158 | + M.push_back(-B * s[1]); | ||
159 | + M_colInd.push_back((int)ii(1, 2, 1)); | ||
160 | + } | ||
161 | + M_rowInd.push_back((int)M.size()); | ||
162 | + b.push_back(A * sz0 * E[1] - A * s[1] * E[2]); | ||
163 | + | ||
164 | + M.push_back(-sz0); | ||
165 | + M_colInd.push_back((int)ii(0, 0, 1)); | ||
166 | + M.push_back(-s[0]); | ||
167 | + M_colInd.push_back((int)ii(0, 2, 1)); | ||
168 | + M.push_back(-sz1); | ||
169 | + M_colInd.push_back((int)ii(1, 0, 0)); | ||
170 | + M.push_back(s[0]); | ||
171 | + M_colInd.push_back((int)ii(1, 2, 0)); | ||
172 | + if (LAYERS > 2) { | ||
173 | + M.push_back(B * sz1); | ||
174 | + M_colInd.push_back((int)ii(1, 0, 1)); | ||
175 | + M.push_back(B* s[0]); | ||
176 | + M_colInd.push_back((int)ii(1, 2, 1)); | ||
177 | + } | ||
178 | + M_rowInd.push_back((int)M.size()); | ||
179 | + b.push_back(A * s[0] * E[2] - A * sz0 * E[0]); | ||
180 | + } | ||
181 | + else if (l == LAYERS - 1) { | ||
182 | + M.push_back(A); | ||
183 | + M_colInd.push_back((int)ii(l - 1, 0, 0)); | ||
184 | + M.push_back(1); | ||
185 | + M_colInd.push_back((int)ii(l - 1, 0, 1)); | ||
186 | + M.push_back(-1); | ||
187 | + M_colInd.push_back((int)ii(l, 0, 0)); | ||
188 | + M_rowInd.push_back((int)M.size()); | ||
189 | + b.push_back(0); | ||
190 | + | ||
191 | + M.push_back(A); | ||
192 | + M_colInd.push_back((int)ii(l - 1, 1, 0)); | ||
193 | + M.push_back(1); | ||
194 | + M_colInd.push_back((int)ii(l - 1, 1, 1)); | ||
195 | + M.push_back(-1); | ||
196 | + M_colInd.push_back((int)ii(l, 1, 0)); | ||
197 | + M_rowInd.push_back((int)M.size()); | ||
198 | + b.push_back(0); | ||
199 | + | ||
200 | + M.push_back(-A * sz0); | ||
201 | + M_colInd.push_back((int)ii(l - 1, 1, 0)); | ||
202 | + M.push_back(A * s[1]); | ||
203 | + M_colInd.push_back((int)ii(l - 1, 2, 0)); | ||
204 | + M.push_back(sz0); | ||
205 | + M_colInd.push_back((int)ii(l - 1, 1, 1)); | ||
206 | + M.push_back(s[1]); | ||
207 | + M_colInd.push_back((int)ii(l - 1, 2, 1)); | ||
208 | + M.push_back(sz1); | ||
209 | + M_colInd.push_back((int)ii(l, 1, 0)); | ||
210 | + M.push_back(-s[1]); | ||
211 | + M_colInd.push_back((int)ii(l, 2, 0)); | ||
212 | + M_rowInd.push_back((int)M.size()); | ||
213 | + b.push_back(0); | ||
214 | + | ||
215 | + M.push_back(A * sz0); | ||
216 | + M_colInd.push_back((int)ii(l - 1, 0, 0)); | ||
217 | + M.push_back(-A * s[0]); | ||
218 | + M_colInd.push_back((int)ii(l - 1, 2, 0)); | ||
219 | + M.push_back(-sz0); | ||
220 | + M_colInd.push_back((int)ii(l - 1, 0, 1)); | ||
221 | + M.push_back(-s[0]); | ||
222 | + M_colInd.push_back((int)ii(l - 1, 2, 1)); | ||
223 | + M.push_back(-sz1); | ||
224 | + M_colInd.push_back((int)ii(l, 0, 0)); | ||
225 | + M.push_back(s[0]); | ||
226 | + M_colInd.push_back((int)ii(l, 2, 0)); | ||
227 | + M_rowInd.push_back((int)M.size()); | ||
228 | + b.push_back(0); | ||
229 | + } | ||
230 | + else { | ||
231 | + M.push_back(A); | ||
232 | + M_colInd.push_back((int)ii(l - 1, 0, 0)); | ||
233 | + M.push_back(1); | ||
234 | + M_colInd.push_back((int)ii(l - 1, 0, 1)); | ||
235 | + M.push_back(-1); | ||
236 | + M_colInd.push_back((int)ii(l, 0, 0)); | ||
237 | + M.push_back(-B); | ||
238 | + M_colInd.push_back((int)ii(l, 0, 1)); | ||
239 | + M_rowInd.push_back((int)M.size()); | ||
240 | + b.push_back(0); | ||
241 | + | ||
242 | + M.push_back(A); | ||
243 | + M_colInd.push_back((int)ii(l - 1, 1, 0)); | ||
244 | + M.push_back(1); | ||
245 | + M_colInd.push_back((int)ii(l - 1, 1, 1)); | ||
246 | + M.push_back(-1); | ||
247 | + M_colInd.push_back((int)ii(l, 1, 0)); | ||
248 | + M.push_back(-B); | ||
249 | + M_colInd.push_back((int)ii(l, 1, 1)); | ||
250 | + M_rowInd.push_back((int)M.size()); | ||
251 | + b.push_back(0); | ||
252 | + | ||
253 | + M.push_back(-A * sz0); | ||
254 | + M_colInd.push_back((int)ii(l - 1, 1, 0)); | ||
255 | + M.push_back(A * s[1]); | ||
256 | + M_colInd.push_back((int)ii(l - 1, 2, 0)); | ||
257 | + M.push_back(sz0); | ||
258 | + M_colInd.push_back((int)ii(l - 1, 1, 1)); | ||
259 | + M.push_back(s[1]); | ||
260 | + M_colInd.push_back((int)ii(l - 1, 2, 1)); | ||
261 | + M.push_back(sz1); | ||
262 | + M_colInd.push_back((int)ii(l, 1, 0)); | ||
263 | + M.push_back(-s[1]); | ||
264 | + M_colInd.push_back((int)ii(l, 2, 0)); | ||
265 | + M.push_back(-B * sz1); | ||
266 | + M_colInd.push_back((int)ii(l, 1, 1)); | ||
267 | + M.push_back(-B * s[1]); | ||
268 | + M_colInd.push_back((int)ii(l, 2, 1)); | ||
269 | + M_rowInd.push_back((int)M.size()); | ||
270 | + b.push_back(0); | ||
271 | + | ||
272 | + M.push_back(A * sz0); | ||
273 | + M_colInd.push_back((int)ii(l - 1, 0, 0)); | ||
274 | + M.push_back(-A * s[0]); | ||
275 | + M_colInd.push_back((int)ii(l - 1, 2, 0)); | ||
276 | + M.push_back(-sz0); | ||
277 | + M_colInd.push_back((int)ii(l - 1, 0, 1)); | ||
278 | + M.push_back(-s[0]); | ||
279 | + M_colInd.push_back((int)ii(l - 1, 2, 1)); | ||
280 | + M.push_back(-sz1); | ||
281 | + M_colInd.push_back((int)ii(l, 0, 0)); | ||
282 | + M.push_back(s[0]); | ||
283 | + M_colInd.push_back((int)ii(l, 2, 0)); | ||
284 | + M.push_back(B * sz1); | ||
285 | + M_colInd.push_back((int)ii(l, 0, 1)); | ||
286 | + M.push_back(B * s[0]); | ||
287 | + M_colInd.push_back((int)ii(l, 2, 1)); | ||
288 | + M_rowInd.push_back((int)M.size()); | ||
289 | + b.push_back(0); | ||
290 | + } | ||
291 | + } | ||
292 | + cudaError_t cudaStatus; | ||
293 | + cusolverStatus_t cusolverStatus; | ||
294 | + cusparseStatus_t cusparseStatus; | ||
295 | + cusolverSpHandle_t handle = NULL; | ||
296 | + cusparseHandle_t cusparseHandle = NULL; | ||
297 | + cudaStream_t stream = NULL; | ||
298 | + cusparseMatDescr_t descrM = NULL; | ||
299 | + cuDoubleComplex * csrValM_, * b_, *P_; | ||
300 | + size_t rowsA = b.size(), colsA = b.size(), nnA = M.size(), baseM_ = 0; //nnA is the number of non-zero elements. | ||
301 | + int* csrRowPtrM = NULL; //row index M_rowInd projected to GPU. | ||
302 | + int* csrColIndM = NULL; //CSR(A) from I/O. // M_colInd projected to GPU. | ||
303 | + double tol = 1.e-12; int reorder = 0; | ||
304 | + int singularity = 0; | ||
305 | + | ||
306 | + //Initialize. | ||
307 | + cusolverStatus = cusolverSpCreate(&handle); | ||
308 | + int num = 1; | ||
309 | + cudaStatus = cudaGetDevice(&num); | ||
310 | + cusparseStatus = cusparseCreate(&cusparseHandle); | ||
311 | + cudaStatus = cudaStreamCreate(&stream); | ||
312 | + cusolverStatus = cusolverSpSetStream(handle, stream); | ||
313 | + cusparseStatus = cusparseSetStream(cusparseHandle, stream); | ||
314 | + cusparseStatus = cusparseCreateMatDescr(&descrM); | ||
315 | + cusparseStatus = cusparseSetMatType(descrM, CUSPARSE_MATRIX_TYPE_GENERAL); | ||
316 | + if (baseM_) { | ||
317 | + cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ONE); | ||
318 | + } | ||
319 | + else { | ||
320 | + cusparseStatus = cusparseSetMatIndexBase(descrM, CUSPARSE_INDEX_BASE_ZERO); | ||
321 | + } | ||
322 | + | ||
323 | + cudaStatus = cudaMalloc((void**)&csrRowPtrM, sizeof(int) * (rowsA + 1)); //Projection of M_rowInd. | ||
324 | + cudaStatus = cudaMalloc((void**)&csrColIndM, sizeof(int) * M_colInd.size()); //Projection of M_colInd. | ||
325 | + cudaStatus = cudaMalloc((void**)&csrValM_, sizeof(cuDoubleComplex) * M.size()); //Projection of M. | ||
326 | + cudaStatus = cudaMalloc((void**)&b_, sizeof(cuDoubleComplex) * b.size()); //Projection of b. | ||
327 | + cudaStatus = cudaMalloc((void**)&P_, sizeof(cuDoubleComplex) * b.size()); //Projection of P. | ||
328 | + | ||
329 | + cudaStatus = cudaMemcpy(csrValM_, M.data(), M.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice); | ||
330 | + cudaStatus = cudaMemcpy(csrRowPtrM, M_rowInd.data(), M_rowInd.size() * sizeof(int), cudaMemcpyHostToDevice); | ||
331 | + cudaStatus = cudaMemcpy(csrColIndM, M_colInd.data(), M_colInd.size() * sizeof(int), cudaMemcpyHostToDevice); | ||
332 | + cudaStatus = cudaMemcpy(b_, b.data(), b.size() * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice); | ||
333 | + // Output the current CUDA error. | ||
334 | + //if (cudaStatus != cudaSuccess) { | ||
335 | + // cout<<"%s " << cudaGetErrorString(cudaStatus) << endl; | ||
336 | + //} | ||
337 | + P->resize(rowsA); //P is the to-be-solved matrix in CPU. | ||
338 | + //QR method. | ||
339 | + cusolverStatus = cusolverSpZcsrlsvqr(handle, (int)rowsA, (int)nnA, descrM, csrValM_, csrRowPtrM, csrColIndM, b_, tol, reorder, P_, (int*)&singularity); | ||
340 | + /*cusparseStatus = cusparseZsctr(*cusparseHandle, rowsA, g_z, g_Q, g_x, CUSPARSE_INDEX_BASE_ZERO);*/ | ||
341 | + cudaStatus = cudaMemcpyAsync(P->data(), P_, sizeof(cuDoubleComplex) * rowsA, cudaMemcpyDeviceToHost, stream); | ||
342 | + | ||
343 | + cudaStatus = cudaFree(csrRowPtrM); | ||
344 | + cudaStatus = cudaFree(csrColIndM); | ||
345 | + cudaStatus = cudaFree(csrValM_); | ||
346 | + cudaStatus = cudaFree(b_); | ||
347 | + cudaStatus = cudaFree(P_); | ||
348 | + vector<int>().swap(M_rowInd); | ||
349 | + vector<int>().swap(M_colInd); | ||
350 | + } | ||
351 | + else { | ||
352 | + //Work on CPU. | ||
353 | + M.resize(6 * (LAYERS - 1) * 6 * (LAYERS - 1)); | ||
354 | + b.resize(6 * (LAYERS - 1)); | ||
355 | + | ||
356 | + size_t ei = 0; | ||
357 | + //Set constraints based on Gauss's Law. | ||
358 | + for (size_t l = 0; l < LAYERS; l++) { | ||
359 | + //Set the upward components for each layer. | ||
360 | + //Layer "LAYERS-1" doesn't have a upward component. | ||
361 | + if (l != LAYERS - 1) { | ||
362 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = s[0]; | ||
363 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = s[1]; | ||
364 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -sz[l]; | ||
365 | + ei += 1; | ||
366 | + } | ||
367 | + //Set the downward components for each layer. | ||
368 | + if (l != 0) { | ||
369 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = s[0]; | ||
370 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = s[1]; | ||
371 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = sz[l]; | ||
372 | + ei += 1; | ||
373 | + } | ||
374 | + } | ||
375 | + //Enforce a continuous field across boundaries. | ||
376 | + complex<double> arg, arg_in, B; | ||
377 | + for (size_t l = 1; l < LAYERS; l++) { | ||
378 | + complex<double> sz0 = sz[l - 1]; | ||
379 | + complex<double> sz1 = sz[l]; | ||
380 | + | ||
381 | + //Representation of A = np.exp(1j * k0 * sz0 * (self.z[l] - self.z[l - 1])) | ||
382 | + complex<double> A_in = k * sz0 * (z[l] - z[l - 1]); | ||
383 | + complex<double> A_in2 = { -A_in.imag(), A_in.real() }; | ||
384 | + complex<double> A = exp(A_in2); | ||
385 | + | ||
386 | + if (l < LAYERS - 1) { | ||
387 | + double dl = z[l] - z[l + 1]; | ||
388 | + arg_in = -k * sz1 * (complex<double>)dl; | ||
389 | + arg = { -arg_in.imag(), arg_in.real() }; | ||
390 | + B = exp(arg); | ||
391 | + } | ||
392 | + //if this is the second layer, use the simplified equations that account for the incident field | ||
393 | + if (l == 1) { | ||
394 | + M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = 1; | ||
395 | + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -1; | ||
396 | + if (LAYERS > 2) { | ||
397 | + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = -B; | ||
398 | + } | ||
399 | + b[ei] = -A * E[0]; | ||
400 | + ei += 1; | ||
401 | + | ||
402 | + M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = 1; | ||
403 | + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = -1; | ||
404 | + if (LAYERS > 2) { | ||
405 | + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B; | ||
406 | + } | ||
407 | + b[ei] = -A * E[l]; | ||
408 | + ei += 1; | ||
409 | + | ||
410 | + M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = s[1]; | ||
411 | + M[ei * 6 * (LAYERS - 1) + ii(0, 1, 1)] = sz0; | ||
412 | + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = -s[1]; | ||
413 | + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 0)] = sz1; | ||
414 | + if (LAYERS > 2) { | ||
415 | + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = -B * s[1]; | ||
416 | + M[ei * 6 * (LAYERS - 1) + ii(1, 1, 1)] = -B * sz1; | ||
417 | + } | ||
418 | + b[ei] = A * sz0 * E[1] - A * s[1] * E[2]; | ||
419 | + ei += 1; | ||
420 | + | ||
421 | + M[ei * 6 * (LAYERS - 1) + ii(0, 0, 1)] = -sz0; | ||
422 | + M[ei * 6 * (LAYERS - 1) + ii(0, 2, 1)] = -s[0]; | ||
423 | + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 0)] = -sz1; | ||
424 | + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 0)] = s[0]; | ||
425 | + if (LAYERS > 2) { | ||
426 | + M[ei * 6 * (LAYERS - 1) + ii(1, 0, 1)] = B * sz1; | ||
427 | + M[ei * 6 * (LAYERS - 1) + ii(1, 2, 1)] = B * s[0]; | ||
428 | + } | ||
429 | + b[ei] = A * s[0] * E[2] - A * sz0 * E[0]; | ||
430 | + ei += 1; | ||
431 | + } | ||
432 | + else if (l == LAYERS - 1) { | ||
433 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A; | ||
434 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1; | ||
435 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1; | ||
436 | + ei += 1; | ||
437 | + | ||
438 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A; | ||
439 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1; | ||
440 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1; | ||
441 | + ei += 1; | ||
442 | + | ||
443 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1]; | ||
444 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0; | ||
445 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1]; | ||
446 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0; | ||
447 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1]; | ||
448 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1; | ||
449 | + ei += 1; | ||
450 | + | ||
451 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0; | ||
452 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0]; | ||
453 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0; | ||
454 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0]; | ||
455 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1; | ||
456 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0]; | ||
457 | + ei += 1; | ||
458 | + } | ||
459 | + else { | ||
460 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A; | ||
461 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = 1; | ||
462 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -1; | ||
463 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = -B; | ||
464 | + ei += 1; | ||
465 | + | ||
466 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = A; | ||
467 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = 1; | ||
468 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = -1; | ||
469 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B; | ||
470 | + ei += 1; | ||
471 | + | ||
472 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = A * s[1]; | ||
473 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 0)] = -A * sz0; | ||
474 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = s[1]; | ||
475 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 1, 1)] = sz0; | ||
476 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = -s[1]; | ||
477 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 0)] = sz1; | ||
478 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = -B * s[1]; | ||
479 | + M[ei * 6 * (LAYERS - 1) + ii(l, 1, 1)] = -B * sz1; | ||
480 | + ei += 1; | ||
481 | + | ||
482 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 0)] = A * sz0; | ||
483 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 0)] = -A * s[0]; | ||
484 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 0, 1)] = -sz0; | ||
485 | + M[ei * 6 * (LAYERS - 1) + ii(l - 1, 2, 1)] = -s[0]; | ||
486 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 0)] = -sz1; | ||
487 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 0)] = s[0]; | ||
488 | + M[ei * 6 * (LAYERS - 1) + ii(l, 0, 1)] = B * sz1; | ||
489 | + M[ei * 6 * (LAYERS - 1) + ii(l, 2, 1)] = B * s[0]; | ||
490 | + ei += 1; | ||
491 | + } | ||
492 | + } | ||
493 | + | ||
494 | + complex<double>* M_ = new complex<double>[M.size()]; | ||
495 | + complex<double>* b_ = new complex<double>[b.size()]; | ||
496 | + complex<double>* P_ = new complex<double>[b.size()]; | ||
497 | + for (size_t i = 0; i < M.size(); i++) { | ||
498 | + M_[i] = M[i]; | ||
499 | + if (i < b.size()) b_[i] = b[i]; | ||
500 | + } | ||
501 | + LINALG_inverse(M_, (int)(6 * (LAYERS - 1))); | ||
502 | + LINALG_zgemm((int)(6 * (LAYERS - 1)), (int)1, (int)(6 * (LAYERS - 1)), M_, (int)(6 * (LAYERS - 1)), b_, (int)1, P_, (int)1); | ||
503 | + for (int i = 0; i < b.size(); i++) { | ||
504 | + P->push_back(P_[i]); | ||
505 | + } | ||
506 | + | ||
507 | + delete[] M_; | ||
508 | + delete[] b_; | ||
509 | + delete[] P_; | ||
510 | + } | ||
511 | +} | ||
512 | + | ||
513 | + | ||
514 | +//Build matrix and get E. | ||
515 | +void layersample::solve(vector<complex<double>>* E, bool CPU_op) { //orthogonalized E0 vectors. | ||
516 | + size_t LAYERS = n.size(); | ||
517 | + //Store the matrix and RHS vector. | ||
518 | + vector<complex<double>> M; //All non-zero values in the sparse matrix. | ||
519 | + vector<complex<double>> b; //The right hand side column vector. | ||
520 | + | ||
521 | + //Evaluate the linear system. | ||
522 | + vector<complex<double>> P; //Solution of matrix. | ||
523 | + layersample::generate_linsys(LAYERS, M, b, *E, &P, CPU_op); | ||
524 | + | ||
525 | + //Store the coefficients for each layer. | ||
526 | + //Pt[3, L] transmission. Pr[3, L] reflection. | ||
527 | + Pt.resize(3 * LAYERS); | ||
528 | + Pr.resize(3 * LAYERS); | ||
529 | + | ||
530 | + for (size_t l = 0; l < LAYERS; l++) { | ||
531 | + if (l == 0) { | ||
532 | + Pt[0] = (complex<double>)(*E)[0]; | ||
533 | + Pt[LAYERS] = (complex<double>)(*E)[1]; | ||
534 | + Pt[2 * LAYERS] = (complex<double>)(*E)[2]; | ||
535 | + } | ||
536 | + else { | ||
537 | + Pt[l] = P[ii(l, 0, 0)]; | ||
538 | + Pt[l + LAYERS] = P[ii(l, 1, 0)]; | ||
539 | + Pt[l + 2 * LAYERS] = P[ii(l, 2, 0)]; | ||
540 | + } | ||
541 | + | ||
542 | + if (l == LAYERS - 1) { | ||
543 | + Pr[LAYERS - 1] = 0; | ||
544 | + Pr[2 * LAYERS - 1] = 0; | ||
545 | + Pr[3 * LAYERS - 1] = 0; | ||
546 | + } | ||
547 | + else { | ||
548 | + Pr[l] = P[ii(l, 0, 1)]; | ||
549 | + Pr[l + LAYERS] = P[ii(l, 1, 1)]; | ||
550 | + Pr[l + 2 * LAYERS] = P[ii(l, 2, 1)]; | ||
551 | + } | ||
552 | + } | ||
553 | + vector<complex<double>>().swap(M); | ||
554 | + vector<complex<double>>().swap(b); | ||
555 | + vector<complex<double>>().swap(P); | ||
556 | +} | ||
0 | \ No newline at end of file | 557 | \ No newline at end of file |
1 | +++ a/src/layer.h | ||
1 | +#ifndef LAYER_H | ||
2 | +#define LAYER_H | ||
3 | +#include <vector> | ||
4 | +#include <complex> | ||
5 | +using namespace std; | ||
6 | + | ||
7 | +void crossProduct(vector<complex<double>>* a, vector<complex<double>>* b, vector<complex<double>>* c); | ||
8 | +complex<double> Norm(vector<complex<double>>* E); | ||
9 | +void Normalize(vector<complex<double>>* E); | ||
10 | +vector<vector<double> > transpose(vector<vector<double> >* matrix); | ||
11 | +void orthogonalize(vector<complex<double>>* E_0, vector<complex<double>>* E0, vector<complex<double>>* d); | ||
12 | + | ||
13 | +class layersample { | ||
14 | + | ||
15 | +public: | ||
16 | + double k; //wavenumber. | ||
17 | + vector<complex<double>> n; //refractive index. | ||
18 | + vector<double> z; //z postions. | ||
19 | + vector<complex<double>> s; //propagation direction. Keep it for output. | ||
20 | + vector<complex<double>> sz; //propagation direction. Keep it for output. | ||
21 | + vector<complex<double>> d; //direction of propagation of the plane wave. | ||
22 | + vector<complex<double>> Pt; //transimission. | ||
23 | + vector<complex<double>> Pr; //reflection. | ||
24 | + //Calulate the index of the field component associated with a layer. | ||
25 | + // l is the layer index. c is the component(x, y, z). d is the direction(0for transmission, 1 for reflection). | ||
26 | + size_t ii(size_t l, size_t c, size_t d); | ||
27 | + | ||
28 | + //Generate the linear system corresponding to this layered sample and plane wave. | ||
29 | + void generate_linsys(size_t LAYERS, vector<complex<double>>& M, vector<complex<double>>& b, vector<complex<double>>& E, vector<complex<double>>* P, bool CPU_op); | ||
30 | + | ||
31 | + //Build matrix and get E. | ||
32 | + void solve(vector<complex<double>>* E, bool CPU_op); | ||
33 | +}; | ||
34 | + | ||
35 | +#endif | ||
0 | \ No newline at end of file | 36 | \ No newline at end of file |
1 | +++ a/src/linalg.cpp | ||
1 | +#include "linalg.h" | ||
2 | + | ||
3 | +// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK. | ||
4 | +extern "C" { | ||
5 | +#include "f2c.h" | ||
6 | +#include "clapack.h" | ||
7 | +#include "cblas.h" | ||
8 | +} | ||
9 | + | ||
10 | + | ||
11 | +void LINALG_zgetrf( | ||
12 | + int M, | ||
13 | + int N, | ||
14 | + std::complex<double>* A, | ||
15 | + int LDA, | ||
16 | + int* IPIV) | ||
17 | +{ | ||
18 | + integer INFO; | ||
19 | + zgetrf_((integer*)&M, (integer*)&N, (doublecomplex*)A, (integer*)&LDA, (integer*)IPIV, &INFO); | ||
20 | +} | ||
21 | + | ||
22 | +void LINALG_zgetri( | ||
23 | + size_t N, | ||
24 | + std::complex<double>* A, | ||
25 | + int LDA, | ||
26 | + int* IPIV) | ||
27 | +{ | ||
28 | + integer LWORK = -1; | ||
29 | + std::complex<double> WORK[1]; | ||
30 | + integer INFO; | ||
31 | + zgetri_((integer*)&N, (doublecomplex*)A, (integer*)&LDA, (integer*)IPIV, (doublecomplex*)WORK, &LWORK, &INFO); | ||
32 | +} | ||
33 | +void LINALG_inverse(std::complex<double>* A, int N) | ||
34 | +{ | ||
35 | + int* IPIV = new int[N + (size_t)1]; | ||
36 | + integer LWORK = N * N; | ||
37 | + std::complex<double>* WORK = new std::complex<double>[LWORK]; | ||
38 | + integer INFO; | ||
39 | + | ||
40 | + zgetrf_((integer*)&N, (integer*)&N, (doublecomplex*)A, (integer*)&N, (integer*)IPIV, &INFO); | ||
41 | + zgetri_((integer*)&N, (doublecomplex*)A, (integer*)&N, (integer*)IPIV, (doublecomplex*)WORK, &LWORK, &INFO); | ||
42 | + | ||
43 | + delete[] IPIV; | ||
44 | + delete[] WORK; | ||
45 | +} | ||
46 | + | ||
47 | +void LINALG_zgemm( | ||
48 | + const int M, //A(M*K) B(K*N) | ||
49 | + const int N, | ||
50 | + const int K, | ||
51 | + std::complex<double>* A, | ||
52 | + const int LDA, //=K | ||
53 | + std::complex<double>* B, | ||
54 | + const int LDB, //=N | ||
55 | + std::complex<double>* C, | ||
56 | + const int LDC) //=columns of C. | ||
57 | +{ | ||
58 | + std::complex<double> alpha = 1; | ||
59 | + std::complex<double> beta = 0; | ||
60 | + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, (OPENBLAS_CONST blasint)M, (OPENBLAS_CONST blasint)N, (OPENBLAS_CONST blasint)K, | ||
61 | + &alpha, A, (OPENBLAS_CONST blasint)LDA, B, (OPENBLAS_CONST blasint)LDB, &beta, C, (OPENBLAS_CONST blasint)LDC); | ||
62 | + | ||
63 | +} | ||
0 | \ No newline at end of file | 64 | \ No newline at end of file |
1 | +++ a/src/linalg.h | ||
1 | +// This file contains a set of wrapper functions that are linked to the corresponding functions in CLAPACK | ||
2 | +#include <complex> | ||
3 | + | ||
4 | +//Solve matrix inverse. | ||
5 | +void LINALG_inverse(std::complex<double>* A, int N); | ||
6 | + | ||
7 | +//Solve matrix multiplication. C = A * B. | ||
8 | +void LINALG_zgemm( | ||
9 | + const int M, //A(M*K) B(K*N) | ||
10 | + const int N, | ||
11 | + const int K, | ||
12 | + std::complex<double>* A, | ||
13 | + const int LDA, //=K | ||
14 | + std::complex<double>* B, | ||
15 | + const int LDB, //=N | ||
16 | + std::complex<double>* C, | ||
17 | + const int LDC); //=columns of C. | ||
0 | \ No newline at end of file | 18 | \ No newline at end of file |
1 | +++ a/src/main.cpp | ||
1 | +//Update the data output format in example_layer(). | ||
2 | +#include "layer.h" | ||
3 | +#include <windows.h> | ||
4 | + | ||
5 | +#include <fstream> | ||
6 | +#include <iostream> | ||
7 | +#include "stim/parser/arguments.h" | ||
8 | + | ||
9 | +using namespace std; | ||
10 | +#define PI 3.14159265358979323846264338328 | ||
11 | + | ||
12 | +bool ASCII_output = false; | ||
13 | +bool CPU_op = false; | ||
14 | + | ||
15 | +void advertise() { | ||
16 | + std::cout << std::endl << std::endl; | ||
17 | + std::cout << "=========================================================================" << std::endl; | ||
18 | + //std::cout << "Thank you for using the NetMets network comparison tool!" << std::endl; | ||
19 | + std::cout << "Scalable Tissue Imaging and Modeling (STIM) Lab, University of Houston" << std::endl; | ||
20 | + std::cout << "=========================================================================" << std::endl << std::endl; | ||
21 | +} | ||
22 | + | ||
23 | +//Calculate Ez from vector d and Ex Ey. | ||
24 | +void E_Cal(vector<complex<double>>* E, vector<complex<double>>* d) { | ||
25 | + if (d->size() == 2) { | ||
26 | + complex<double> dz = sqrt(1.0 + 0i - pow((*d)[0], 2) - pow((*d)[1], 2)); | ||
27 | + d->push_back(dz); | ||
28 | + } | ||
29 | + E->push_back(-((*E)[0] * (*d)[0] + (*E)[1] * (*d)[1]) / (*d)[2]); | ||
30 | +} | ||
31 | +void output_binary(std::string filename, layersample& layers) { | ||
32 | + size_t L = layers.n.size(); // get the number of layers | ||
33 | + std::ofstream outFile; | ||
34 | + outFile.open(filename, std::ofstream::binary); // open the output file for binary writing | ||
35 | + if (outFile) { | ||
36 | + outFile.write((char*)&layers.k, sizeof(double)); | ||
37 | + outFile.write((char*)&layers.d[0], sizeof(double)); | ||
38 | + outFile.write((char*)&layers.d[1], sizeof(double)); | ||
39 | + outFile.write((char*)&layers.n[0], sizeof(double) * 2); | ||
40 | + | ||
41 | + for (size_t i = 0; i < L; i++) { | ||
42 | + outFile.write((char*)&layers.z[i], sizeof(double)); | ||
43 | + outFile.write((char*)&layers.sz[i], 2 * sizeof(double)); | ||
44 | + outFile.write((char*)&layers.Pt[i], 2 * sizeof(double)); | ||
45 | + outFile.write((char*)&layers.Pt[i + L], 2 * sizeof(double)); | ||
46 | + outFile.write((char*)&layers.Pt[i + 2 * L], 2 * sizeof(double)); | ||
47 | + outFile.write((char*)&layers.Pr[i], 2 * sizeof(double)); | ||
48 | + outFile.write((char*)&layers.Pr[i + L], 2 * sizeof(double)); | ||
49 | + outFile.write((char*)&layers.Pr[i + 2 * L], 2 * sizeof(double)); | ||
50 | + } | ||
51 | + outFile.close(); | ||
52 | + } | ||
53 | + else { | ||
54 | + std::cout << "ERROR opening output file for binary writing: " << filename << std::endl; | ||
55 | + } | ||
56 | +} | ||
57 | + | ||
58 | +void output_txt(std::string filename, layersample& layers) { | ||
59 | + size_t L = layers.n.size(); // get the number of layers | ||
60 | + std::ofstream outFile; | ||
61 | + outFile.open(filename); // open the output file for text writing | ||
62 | + if (!outFile) { | ||
63 | + std::cout << "ERROR: Could not open file " << filename << std::endl; | ||
64 | + exit(1); | ||
65 | + } | ||
66 | + int width = 15; | ||
67 | + | ||
68 | + outFile << "--------------------------------" << endl; | ||
69 | + outFile << "The wavenumber at free space is : " << layers.k << endl; | ||
70 | + for (size_t i = 0; i < L; i++) { | ||
71 | + if (i == 0) { | ||
72 | + outFile << "--------------------------------" << endl; | ||
73 | + outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl; | ||
74 | + outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl; | ||
75 | + outFile << "----------------------" << endl; | ||
76 | + outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl; | ||
77 | + outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl; | ||
78 | + outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl; | ||
79 | + outFile << "----->>>>>" << endl; | ||
80 | + outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl; | ||
81 | + outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl; | ||
82 | + outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl; | ||
83 | + outFile << "<<<<<-----" << endl; | ||
84 | + outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl; | ||
85 | + outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl; | ||
86 | + outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl; | ||
87 | + } | ||
88 | + else { | ||
89 | + outFile << "----------------------" << endl; | ||
90 | + outFile << "LAYER " << i << " (z = " << layers.z[i] << ")" << endl; | ||
91 | + outFile << "refractive index: " << layers.n[i].real() << " + i " << layers.n[i].imag() << endl; | ||
92 | + outFile << "----------------------" << endl; | ||
93 | + outFile << "sx = " << setw(width) << layers.s[0].real() << " + i " << layers.s[0].imag() << endl; | ||
94 | + outFile << "sy = " << setw(width) << layers.s[1].real() << " + i " << layers.s[1].imag() << endl; | ||
95 | + outFile << "sz = " << setw(width) << layers.sz[i].real() << " + i " << layers.sz[i].imag() << endl; | ||
96 | + outFile << "----->>>>>" << endl; | ||
97 | + outFile << " X = " << setw(width) << layers.Pt[i].real() << " + i " << layers.Pt[i].imag() << endl; | ||
98 | + outFile << " Y = " << setw(width) << layers.Pt[i + L].real() << " + i " << layers.Pt[i + L].imag() << endl; | ||
99 | + outFile << " Z = " << setw(width) << layers.Pt[i + 2 * L].real() << " + i " << layers.Pt[i + 2 * L].imag() << endl; | ||
100 | + | ||
101 | + outFile << "<<<<<-----" << endl; | ||
102 | + outFile << " X = " << setw(width) << layers.Pr[i].real() << " + i " << layers.Pr[i].imag() << endl; | ||
103 | + outFile << " Y = " << setw(width) << layers.Pr[i + L].real() << " + i " << layers.Pr[i + L].imag() << endl; | ||
104 | + outFile << " Z = " << setw(width) << layers.Pr[i + 2 * L].real() << " + i " << layers.Pr[i + 2 * L].imag() << endl; | ||
105 | + } | ||
106 | + | ||
107 | + } | ||
108 | + outFile.close(); | ||
109 | +} | ||
110 | + | ||
111 | +void calculate_layer(std::string outName, | ||
112 | + vector<complex<double>>* ns, // the refractive index. | ||
113 | + vector<double>* depths, // z-direction position. | ||
114 | + vector<complex<double>>* E0, // the initialized E0. | ||
115 | + vector<complex<double>>* d0, // direction of propagation of the plane wave. | ||
116 | + double k0) { // the wavenumber at free space. | ||
117 | + std::string outName_ext(outName, size(outName)-4, size(outName)-1); // extract the extension of the output file | ||
118 | + E_Cal(E0, d0); // make sure that both vectors are orthogonal. | ||
119 | + | ||
120 | + //Creat a new layersample and initialize. | ||
121 | + layersample Layer1; // create a layered sample | ||
122 | + Layer1.n = *ns; // set a pointer to the refractive indices | ||
123 | + Layer1.z = *depths; // set a pointer to the layer depths | ||
124 | + Layer1.d = *d0; | ||
125 | + Layer1.k = k0; | ||
126 | + | ||
127 | + LARGE_INTEGER t1, t2, tc; // Timing. | ||
128 | + QueryPerformanceFrequency(&tc); | ||
129 | + QueryPerformanceCounter(&t1); | ||
130 | + Layer1.solve(E0, CPU_op); // Solve for the substrate field in GPU. | ||
131 | + QueryPerformanceCounter(&t2); | ||
132 | + std::cout << "time for 'solving linear functions':" << (t2.QuadPart - t1.QuadPart) / (double)tc.QuadPart << "ms."<< std::endl; | ||
133 | + //output(outName, Layer1); | ||
134 | + if (ASCII_output) | ||
135 | + output_txt(outName, Layer1); | ||
136 | + else | ||
137 | + output_binary(outName, Layer1); | ||
138 | +} | ||
139 | + | ||
140 | +//Main function for example_layer. | ||
141 | +int main(int argc, char* argv[]) { | ||
142 | + stim::arglist args; | ||
143 | + | ||
144 | + //Basic argument lists. | ||
145 | + args.add("help", "prints this help"); | ||
146 | + args.add("s", "propagation direction vector (x, y)", "0.5 0.0", "[-1.0, 1.0]"); | ||
147 | + args.add("l", "wavelength", "5.0", "in arbitrary units (ex. um)"); | ||
148 | + args.add("Ex", "complex amplitude (x direction)", "0.5 0.0"); | ||
149 | + args.add("Ey", "complex amplitude (y direction)", "0.5 0.0"); | ||
150 | + args.add("z", "layer positions"); | ||
151 | + args.add("n", "layer optical path length (real refractive index)", "1.0 1.4 1.4 1.0"); | ||
152 | + args.add("kappa", "layer absorbance (imaginary refractive index)"); | ||
153 | + args.add("ascii", "output as an ASCII file"); | ||
154 | + args.add("CPU", "execute the program in CPU"); | ||
155 | + args.parse(argc, argv); | ||
156 | + | ||
157 | + if (args["help"].is_set()) { // test for help | ||
158 | + advertise(); // output the advertisement | ||
159 | + std::cout << args.str(); // output arguments | ||
160 | + exit(1); // exit | ||
161 | + } | ||
162 | + | ||
163 | + ASCII_output = args["ascii"]; // if the ascii flag is set, output an ascii file | ||
164 | + CPU_op = args["CPU"]; // if the CPU_op is set, run the program in CPU. | ||
165 | + std::string outName; | ||
166 | + if (args.nargs() == 1) { | ||
167 | + outName = args.arg(0); | ||
168 | + } | ||
169 | + else if (args.nargs() == 0) { | ||
170 | + if (ASCII_output) | ||
171 | + outName = "output.txt"; | ||
172 | + else | ||
173 | + outName = "output.lyr"; | ||
174 | + } | ||
175 | + else { | ||
176 | + std::cout << "ERROR: Too many arguments." << std::endl; | ||
177 | + exit(1); | ||
178 | + } | ||
179 | + | ||
180 | + | ||
181 | + vector<complex<double>> d; //direction of propagation of the plane wave Init: {0.5, 0} | ||
182 | + if (args["s"].nargs() == 2) { | ||
183 | + d.push_back({ (double)args["s"].as_float(0), 0 }); | ||
184 | + d.push_back({ (double)args["s"].as_float(1), 0 }); | ||
185 | + } | ||
186 | + | ||
187 | + double l0 = (double)args["l"].as_float(0); //wavelength.Init: l0 = 5; | ||
188 | + double k0 = 2 * PI / l0; //Calculate the free-space wavenumber. | ||
189 | + | ||
190 | + complex<double> Ex = { (double)args["Ex"].as_float(0), (double)args["Ex"].as_float(1) }; //Input E. Init: {1, 1, 0} | ||
191 | + complex<double> Ey = { (double)args["Ey"].as_float(0), (double)args["Ey"].as_float(1) }; | ||
192 | + vector<complex<double>> E0; | ||
193 | + E0.push_back(Ex); | ||
194 | + E0.push_back(Ey); | ||
195 | + | ||
196 | + //const int LAYERS = args["L"].as_int(); //LAYERS Init: 4 | ||
197 | + | ||
198 | + vector<double> depths; | ||
199 | + vector<complex<double>> ns; | ||
200 | + size_t i = 0; | ||
201 | + while(args["n"].is_set() && args["n"].as_float(i)!=0) { //n is the real part. | ||
202 | + if (args["kappa"].is_set()) //kappa is the imaginary part. | ||
203 | + ns.push_back({ (double)args["n"].as_float(i), (double)args["kappa"].as_float(i) }); | ||
204 | + else | ||
205 | + ns.push_back({ (double)args["n"].as_float(i), 0 }); //ns <- n + i * kappa | ||
206 | + i++; | ||
207 | + } | ||
208 | + for (size_t j = 0; j < i; j++) | ||
209 | + if (args["z"].is_set()) | ||
210 | + depths.push_back((double)args["z"].as_float(i)); | ||
211 | + else { | ||
212 | + depths.push_back((double)-100 + j * 200 / i); | ||
213 | + } | ||
214 | + /*---------------------------------------example_layer--------------------------------------------*/ | ||
215 | + | ||
216 | + calculate_layer(outName, &ns, &depths, &E0, &d, k0); | ||
217 | + | ||
218 | + return 0; | ||
219 | + std::cin.get(); | ||
220 | +} | ||
0 | \ No newline at end of file | 221 | \ No newline at end of file |