Commit 396a5f1225210b5c52e5b6df9c9d96e7f63176e5
1 parent
4a9d9281
added custom code for dealing with command-line arguments
Showing
13 changed files
with
478 additions
and
1059 deletions
Show diff stats
CMakeLists.txt
@@ -12,10 +12,10 @@ set(CMAKE_AUTOMOC ON) | @@ -12,10 +12,10 @@ set(CMAKE_AUTOMOC ON) | ||
12 | set(CMAKE_INCLUDE_CURRENT_DIR ON) | 12 | set(CMAKE_INCLUDE_CURRENT_DIR ON) |
13 | 13 | ||
14 | #find BOOST | 14 | #find BOOST |
15 | -set(Boost_USE_STATIC_LIBS ON) | ||
16 | -set(Boost_USE_MULTITHREADED ON) | ||
17 | -set(Boost_USE_STATIC_RUNTIME OFF) | ||
18 | -find_package( Boost 1.46.0 COMPONENTS program_options ) | 15 | +#set(Boost_USE_STATIC_LIBS ON) |
16 | +#set(Boost_USE_MULTITHREADED ON) | ||
17 | +#set(Boost_USE_STATIC_RUNTIME OFF) | ||
18 | +#find_package( Boost 1.46.0 COMPONENTS program_options ) | ||
19 | 19 | ||
20 | #find the Qt5 | 20 | #find the Qt5 |
21 | find_package(Qt5Widgets REQUIRED) | 21 | find_package(Qt5Widgets REQUIRED) |
@@ -28,7 +28,7 @@ include_directories(${QT_INCLUDE_DIRECTORY}) | @@ -28,7 +28,7 @@ include_directories(${QT_INCLUDE_DIRECTORY}) | ||
28 | find_package(CUDA) | 28 | find_package(CUDA) |
29 | 29 | ||
30 | #ask the user for the RTS location | 30 | #ask the user for the RTS location |
31 | -find_package(RTS REQUIRED) | 31 | +#find_package(RTS REQUIRED) |
32 | 32 | ||
33 | #set the include directories | 33 | #set the include directories |
34 | include_directories( | 34 | include_directories( |
@@ -37,8 +37,9 @@ include_directories( | @@ -37,8 +37,9 @@ include_directories( | ||
37 | ${Qt5Core_INCLUDE_DIRS} | 37 | ${Qt5Core_INCLUDE_DIRS} |
38 | ${Qt5Gui_INCLUDE_DIRS} | 38 | ${Qt5Gui_INCLUDE_DIRS} |
39 | # ${Qt5OpenGL_INCLUDE_DIRS} | 39 | # ${Qt5OpenGL_INCLUDE_DIRS} |
40 | - ${RTS_INCLUDE_DIR} | ||
41 | - ${Boost_INCLUDE_DIR} | 40 | +# ${RTS_INCLUDE_DIR} |
41 | +# ${Boost_INCLUDE_DIR} | ||
42 | + ${CMAKE_CURRENT_SOURCE_DIR} | ||
42 | ) | 43 | ) |
43 | 44 | ||
44 | #build position independent code for Qt (-fPIC) | 45 | #build position independent code for Qt (-fPIC) |
@@ -55,6 +56,9 @@ file(GLOB SRC_H "*.h") | @@ -55,6 +56,9 @@ file(GLOB SRC_H "*.h") | ||
55 | file(GLOB SRC_UI "*.ui") | 56 | file(GLOB SRC_UI "*.ui") |
56 | file(GLOB SRC_CU "*.cu") | 57 | file(GLOB SRC_CU "*.cu") |
57 | 58 | ||
59 | +#assign RTS source files | ||
60 | +file(GLOB SRC_RTS "rts/source/*.cpp") | ||
61 | + | ||
58 | #determine which source files have to be moc'd | 62 | #determine which source files have to be moc'd |
59 | Qt5_wrap_cpp(UI_MOC ${SRC_H}) | 63 | Qt5_wrap_cpp(UI_MOC ${SRC_H}) |
60 | Qt5_wrap_ui(UI_H ${SRC_UI}) | 64 | Qt5_wrap_ui(UI_H ${SRC_UI}) |
@@ -70,6 +74,7 @@ cuda_add_executable(bimsim | @@ -70,6 +74,7 @@ cuda_add_executable(bimsim | ||
70 | ${UI_H} | 74 | ${UI_H} |
71 | ${SRC_UI} | 75 | ${SRC_UI} |
72 | ${SRC_CU} | 76 | ${SRC_CU} |
77 | + ${SRC_RTS} | ||
73 | ) | 78 | ) |
74 | 79 | ||
75 | #specify which qt5 modules to use | 80 | #specify which qt5 modules to use |
@@ -83,7 +88,7 @@ target_link_libraries(bimsim | @@ -83,7 +88,7 @@ target_link_libraries(bimsim | ||
83 | # ${Qt5OpenGL_LIBRARIES} | 88 | # ${Qt5OpenGL_LIBRARIES} |
84 | ${CUDA_cufft_LIBRARY} | 89 | ${CUDA_cufft_LIBRARY} |
85 | ${CUDA_cublas_LIBRARY} | 90 | ${CUDA_cublas_LIBRARY} |
86 | - ${Boost_LIBRARIES} | 91 | +# ${Boost_LIBRARIES} |
87 | ) | 92 | ) |
88 | 93 | ||
89 | 94 |
dataTypes.h
@@ -14,10 +14,10 @@ typedef double ptype; | @@ -14,10 +14,10 @@ typedef double ptype; | ||
14 | #define BLOCK 256 | 14 | #define BLOCK 256 |
15 | #define SQRT_BLOCK 16 | 15 | #define SQRT_BLOCK 16 |
16 | 16 | ||
17 | -#define PI 3.14159 | 17 | +#define PI 3.14159f |
18 | 18 | ||
19 | //a very small number | 19 | //a very small number |
20 | -#define EPSILON 0.00001 | 20 | +#define EPSILON 0.00001f |
21 | 21 | ||
22 | //CUDA hybrid code - complex class should run on both the CPU and GPU | 22 | //CUDA hybrid code - complex class should run on both the CPU and GPU |
23 | 23 |
defaults.h
@@ -8,22 +8,26 @@ | @@ -8,22 +8,26 @@ | ||
8 | #define DEFAULT_SPHERE_A 1 | 8 | #define DEFAULT_SPHERE_A 1 |
9 | 9 | ||
10 | //default near field parameters | 10 | //default near field parameters |
11 | -#define DEFAULT_LAMBDA 1 | ||
12 | -#define DEFAULT_AMPLITUDE 1 | 11 | +#define DEFAULT_LAMBDA "1" |
12 | +#define DEFAULT_AMPLITUDE "1" | ||
13 | +#define DEFAULT_MATERIAL "1.4 0.05" | ||
13 | #define DEFAULT_N 1.4 | 14 | #define DEFAULT_N 1.4 |
14 | #define DEFAULT_K 0.5 | 15 | #define DEFAULT_K 0.5 |
15 | -#define DEFAULT_FOCUS_X 0 | ||
16 | -#define DEFAULT_FOCUS_Y 0 | ||
17 | -#define DEFAULT_FOCUS_Z 0 | 16 | +#define DEFAULT_FOCUS "0 0 0" |
17 | +//#define DEFAULT_FOCUS_X "0" | ||
18 | +//#define DEFAULT_FOCUS_Y "0" | ||
19 | +//#define DEFAULT_FOCUS_Z "0" | ||
18 | //#define DEFAULT_INCIDENT_ORDER 20 | 20 | //#define DEFAULT_INCIDENT_ORDER 20 |
19 | #define DEFAULT_STABILITY_PARM 1.4 | 21 | #define DEFAULT_STABILITY_PARM 1.4 |
20 | 22 | ||
21 | //optics | 23 | //optics |
22 | -#define DEFAULT_CONDENSER_MIN 0 | ||
23 | -#define DEFAULT_CONDENSER_MAX 1 | 24 | +//#define DEFAULT_CONDENSER_MIN "0.0" |
25 | +//#define DEFAULT_CONDENSER_MAX "1.0" | ||
26 | +#define DEFAULT_CONDENSER "0 1" | ||
24 | 27 | ||
25 | -#define DEFAULT_OBJECTIVE_MIN 0 | ||
26 | -#define DEFAULT_OBJECTIVE_MAX 1 | 28 | +//#define DEFAULT_OBJECTIVE_MIN "0" |
29 | +//#define DEFAULT_OBJECTIVE_MAX "1" | ||
30 | +#define DEFAULT_OBJECTIVE "0 1" | ||
27 | 31 | ||
28 | //incident light direction | 32 | //incident light direction |
29 | #define DEFAULT_K_THETA 0 | 33 | #define DEFAULT_K_THETA 0 |
@@ -35,15 +39,17 @@ | @@ -35,15 +39,17 @@ | ||
35 | #define DEFAULT_APPEND false | 39 | #define DEFAULT_APPEND false |
36 | //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective | 40 | //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective |
37 | 41 | ||
38 | - | 42 | +#define DEFAULT_PLANE_MIN "-5 0 -5" |
39 | #define DEFAULT_PLANE_MIN_X -5 | 43 | #define DEFAULT_PLANE_MIN_X -5 |
40 | #define DEFAULT_PLANE_MIN_Y 0 | 44 | #define DEFAULT_PLANE_MIN_Y 0 |
41 | #define DEFAULT_PLANE_MIN_Z -5 | 45 | #define DEFAULT_PLANE_MIN_Z -5 |
42 | 46 | ||
47 | +#define DEFAULT_PLANE_MAX "5 0 5" | ||
43 | #define DEFAULT_PLANE_MAX_X 5 | 48 | #define DEFAULT_PLANE_MAX_X 5 |
44 | #define DEFAULT_PLANE_MAX_Y 0 | 49 | #define DEFAULT_PLANE_MAX_Y 0 |
45 | #define DEFAULT_PLANE_MAX_Z 5 | 50 | #define DEFAULT_PLANE_MAX_Z 5 |
46 | 51 | ||
52 | +#define DEFAULT_PLANE_NORM "0 1 0" | ||
47 | #define DEFAULT_PLANE_NORM_X 0 | 53 | #define DEFAULT_PLANE_NORM_X 0 |
48 | #define DEFAULT_PLANE_NORM_Y 1 | 54 | #define DEFAULT_PLANE_NORM_Y 1 |
49 | #define DEFAULT_PLANE_NORM_Z 0 | 55 | #define DEFAULT_PLANE_NORM_Z 0 |
@@ -67,16 +73,16 @@ | @@ -67,16 +73,16 @@ | ||
67 | */ | 73 | */ |
68 | 74 | ||
69 | 75 | ||
70 | -#define DEFAULT_FIELD_ORDER 10 | 76 | +#define DEFAULT_FIELD_ORDER "10" |
71 | 77 | ||
72 | -#define DEFAULT_SAMPLES 400 | 78 | +#define DEFAULT_SAMPLES "400" |
73 | 79 | ||
74 | -#define DEFAULT_SLICE_RES 256 | 80 | +#define DEFAULT_SLICE_RES "256" |
75 | 81 | ||
76 | #define DEFAULT_SPHERE_THETA_R 1000 | 82 | #define DEFAULT_SPHERE_THETA_R 1000 |
77 | 83 | ||
78 | -#define DEFAULT_PADDING 1 | ||
79 | -#define DEFAULT_SUPERSAMPLE 1 | 84 | +#define DEFAULT_PADDING "1" |
85 | +#define DEFAULT_SUPERSAMPLE "1" | ||
80 | 86 | ||
81 | #define DEFAULT_INTENSITY_FILE "out_i.bmp" | 87 | #define DEFAULT_INTENSITY_FILE "out_i.bmp" |
82 | #define DEFAULT_TRANSMITTANCE_FILE "" | 88 | #define DEFAULT_TRANSMITTANCE_FILE "" |
main.cpp
@@ -12,7 +12,9 @@ microscopeStruct* SCOPE; | @@ -12,7 +12,9 @@ microscopeStruct* SCOPE; | ||
12 | #include "fieldslice.h" | 12 | #include "fieldslice.h" |
13 | 13 | ||
14 | #include "fileout.h" | 14 | #include "fileout.h" |
15 | -#include "options.h" | 15 | +//#include "options.h" |
16 | +#include "arguments.h" | ||
17 | +#include "rts/tools/arguments.h" | ||
16 | #include "montecarlo.h" | 18 | #include "montecarlo.h" |
17 | #include "rts/math/point.h" | 19 | #include "rts/math/point.h" |
18 | #include "rts/math/spherical_bessel.h" | 20 | #include "rts/math/spherical_bessel.h" |
@@ -29,28 +31,42 @@ microscopeStruct* SCOPE; | @@ -29,28 +31,42 @@ microscopeStruct* SCOPE; | ||
29 | #include "qtMainDialog.h" | 31 | #include "qtMainDialog.h" |
30 | bool gui = false; | 32 | bool gui = false; |
31 | 33 | ||
34 | +#ifdef _WIN32 | ||
35 | +bool ansi = false; | ||
36 | +#else | ||
37 | +bool ansi = true; | ||
38 | +#endif | ||
39 | + | ||
32 | fileoutStruct gFileOut; | 40 | fileoutStruct gFileOut; |
33 | bool verbose = false; | 41 | bool verbose = false; |
34 | using namespace std; | 42 | using namespace std; |
35 | 43 | ||
36 | -int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, | 44 | +int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, |
37 | complex<double>*cyv,complex<double>*cjvp,complex<double>*cyvp); | 45 | complex<double>*cyv,complex<double>*cjvp,complex<double>*cyvp); |
38 | 46 | ||
39 | int main(int argc, char *argv[]) | 47 | int main(int argc, char *argv[]) |
40 | { | 48 | { |
49 | + //arguments test | ||
50 | + rts::arglist args; | ||
51 | + SetArguments(args); | ||
41 | 52 | ||
42 | - //benchtest planewave class | ||
43 | - rts::vector<ptype, 3> k(1, 0, 0); | ||
44 | - rts::vector<ptype, 3> E(2, 2, 0); | ||
45 | - planewave<ptype> P(k, E); | ||
46 | - | ||
47 | - std::cout<<P<<std::endl; | ||
48 | - | ||
49 | - exit(1); | 53 | + //parse the input arguments |
54 | + args.parse(argc, argv); | ||
50 | 55 | ||
51 | SCOPE = new microscopeStruct(); | 56 | SCOPE = new microscopeStruct(); |
52 | - | ||
53 | - LoadParameters(argc, argv); | 57 | + |
58 | + //load the user specified parameters into the simulation | ||
59 | + LoadParameters(args); | ||
60 | + | ||
61 | + //activate ansi output if specified | ||
62 | + args.set_ansi(ansi); | ||
63 | + | ||
64 | + //display help and exit | ||
65 | + if(args("help")) | ||
66 | + { | ||
67 | + cout<<args.toStr()<<endl; | ||
68 | + exit(1); | ||
69 | + } | ||
54 | 70 | ||
55 | //initialize GPU memory for fields | 71 | //initialize GPU memory for fields |
56 | SCOPE->init(); | 72 | SCOPE->init(); |
nearfield.h
@@ -8,7 +8,7 @@ | @@ -8,7 +8,7 @@ | ||
8 | #include "sphere.h" | 8 | #include "sphere.h" |
9 | #include <vector> | 9 | #include <vector> |
10 | 10 | ||
11 | -#define EPSILON_FLOAT 0.000001 | 11 | +#define EPSILON_FLOAT 0.000001f |
12 | 12 | ||
13 | //This structure stores values relevant to creating the near field | 13 | //This structure stores values relevant to creating the near field |
14 | struct nearfieldStruct | 14 | struct nearfieldStruct |
@@ -29,7 +29,7 @@ struct nearfieldStruct | @@ -29,7 +29,7 @@ struct nearfieldStruct | ||
29 | bsVector k; //cartesian coordinates, normalized | 29 | bsVector k; //cartesian coordinates, normalized |
30 | bsPoint focus; | 30 | bsPoint focus; |
31 | 31 | ||
32 | - //slice position and orientation in world space | 32 | + //slice position and orientation in world space |
33 | rts::quad<ptype, 3> pos; | 33 | rts::quad<ptype, 3> pos; |
34 | 34 | ||
35 | //slices for the focused field | 35 | //slices for the focused field |
nfScalarUf.cu
@@ -3,8 +3,8 @@ | @@ -3,8 +3,8 @@ | ||
3 | #include "rts/math/legendre.h" | 3 | #include "rts/math/legendre.h" |
4 | #include <stdlib.h> | 4 | #include <stdlib.h> |
5 | #include "rts/cuda/error.h" | 5 | #include "rts/cuda/error.h" |
6 | -#include "rts/cuda/timer.h" | ||
7 | - | 6 | +#include "rts/cuda/timer.h" |
7 | + | ||
8 | //Incident field for a single plane wave | 8 | //Incident field for a single plane wave |
9 | __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) | 9 | __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) |
10 | { | 10 | { |
@@ -33,15 +33,15 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p | @@ -33,15 +33,15 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p | ||
33 | //get the rtsPoint in world space and then the r vector | 33 | //get the rtsPoint in world space and then the r vector |
34 | bsPoint p = ABCD(u, v); | 34 | bsPoint p = ABCD(u, v); |
35 | bsVector r = p - f; | 35 | bsVector r = p - f; |
36 | - //ptype d = r.len(); | ||
37 | - | ||
38 | - ptype k_dot_r = kmag * k.dot(r); | ||
39 | - bsComplex d(0, k_dot_r); | ||
40 | - | 36 | + //ptype d = r.len(); |
37 | + | ||
38 | + ptype k_dot_r = kmag * k.dot(r); | ||
39 | + bsComplex d(0, k_dot_r); | ||
40 | + | ||
41 | Uf[i] = exp(d) * A; | 41 | Uf[i] = exp(d) * A; |
42 | 42 | ||
43 | } | 43 | } |
44 | - | 44 | + |
45 | //Incident field for a focused point source | 45 | //Incident field for a focused point source |
46 | __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) | 46 | __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) |
47 | { | 47 | { |
@@ -70,11 +70,11 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | @@ -70,11 +70,11 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | ||
70 | //get the rtsPoint in world space and then the r vector | 70 | //get the rtsPoint in world space and then the r vector |
71 | bsPoint p = ABCD(u, v); | 71 | bsPoint p = ABCD(u, v); |
72 | bsVector r = p - f; | 72 | bsVector r = p - f; |
73 | - ptype d = r.len(); | ||
74 | - if(d < EPSILON_FLOAT) | ||
75 | - { | ||
76 | - Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); | ||
77 | - return; | 73 | + ptype d = r.len(); |
74 | + if(d < EPSILON_FLOAT) | ||
75 | + { | ||
76 | + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); | ||
77 | + return; | ||
78 | } | 78 | } |
79 | 79 | ||
80 | //get info for the light direction and frequency | 80 | //get info for the light direction and frequency |
@@ -94,10 +94,10 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | @@ -94,10 +94,10 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | ||
94 | ptype P[2]; | 94 | ptype P[2]; |
95 | //get the angle between k and r (light direction and position vector) | 95 | //get the angle between k and r (light direction and position vector) |
96 | ptype cosTheta; | 96 | ptype cosTheta; |
97 | - cosTheta = k.dot(r); | ||
98 | - | ||
99 | - //deal with the degenerate case where r == 0 | ||
100 | - //if(isnan(cosTheta)) | 97 | + cosTheta = k.dot(r); |
98 | + | ||
99 | + //deal with the degenerate case where r == 0 | ||
100 | + //if(isnan(cosTheta)) | ||
101 | // cosTheta = 0; | 101 | // cosTheta = 0; |
102 | rts::init_legendre<ptype>(cosTheta, P[0], P[1]); | 102 | rts::init_legendre<ptype>(cosTheta, P[0], P[1]); |
103 | 103 | ||
@@ -162,12 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | @@ -162,12 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt | ||
162 | 162 | ||
163 | void nearfieldStruct::scalarUf() | 163 | void nearfieldStruct::scalarUf() |
164 | { | 164 | { |
165 | - | ||
166 | - gpuStartTimer(); | 165 | + |
166 | + gpuStartTimer(); | ||
167 | 167 | ||
168 | //create one thread for each pixel of the field slice | 168 | //create one thread for each pixel of the field slice |
169 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | 169 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); |
170 | - dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | 170 | + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); |
171 | 171 | ||
172 | //if we are computing a plane wave, call the gpuScalarUfp function | 172 | //if we are computing a plane wave, call the gpuScalarUfp function |
173 | if(planeWave) | 173 | if(planeWave) |
@@ -176,15 +176,15 @@ void nearfieldStruct::scalarUf() | @@ -176,15 +176,15 @@ void nearfieldStruct::scalarUf() | ||
176 | } | 176 | } |
177 | //otherwise compute the condenser info and create a focused field | 177 | //otherwise compute the condenser info and create a focused field |
178 | else | 178 | else |
179 | - { | ||
180 | - //pre-compute the cosine of the obscuration and objective angles | ||
181 | - //cout<<"Condenser angle in: "<<asin(condenser[0])<<std::endl; | ||
182 | - //cout<<"Condenser angle out: "<<asin(condenser[1])<<std::endl; | ||
183 | - ptype cosAlpha = cos(asin(condenser[0])); | 179 | + { |
180 | + //pre-compute the cosine of the obscuration and objective angles | ||
181 | + //cout<<"Condenser angle in: "<<asin(condenser[0])<<std::endl; | ||
182 | + //cout<<"Condenser angle out: "<<asin(condenser[1])<<std::endl; | ||
183 | + ptype cosAlpha = cos(asin(condenser[0])); | ||
184 | ptype cosBeta = cos(asin(condenser[1])); | 184 | ptype cosBeta = cos(asin(condenser[1])); |
185 | //compute the scalar Uf field (this will be in the x_hat channel of Uf) | 185 | //compute the scalar Uf field (this will be in the x_hat channel of Uf) |
186 | gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); | 186 | gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); |
187 | - } | ||
188 | - | ||
189 | - t_Uf = gpuStopTimer(); | ||
190 | -} | 187 | + } |
188 | + | ||
189 | + t_Uf = gpuStopTimer(); | ||
190 | +} |
nfScalarUfLut.cu
1 | #include "nearfield.h" | 1 | #include "nearfield.h" |
2 | 2 | ||
3 | #include "rts/math/legendre.h" | 3 | #include "rts/math/legendre.h" |
4 | -#include "rts/cuda/error.h" | 4 | +#include "rts/cuda/error.h" |
5 | #include "rts/cuda/timer.h" | 5 | #include "rts/cuda/timer.h" |
6 | 6 | ||
7 | texture<float, cudaTextureType2D> texJ; | 7 | texture<float, cudaTextureType2D> texJ; |
@@ -25,100 +25,100 @@ __global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoi | @@ -25,100 +25,100 @@ __global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoi | ||
25 | 25 | ||
26 | */ | 26 | */ |
27 | 27 | ||
28 | - //get the current coordinate in the plane slice | ||
29 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
30 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
31 | - | ||
32 | - //make sure that the thread indices are in-bounds | ||
33 | - if(iu >= uR || iv >= vR) return; | ||
34 | - | ||
35 | - //compute the index (easier access to the scalar field array) | ||
36 | - int i = iv*uR + iu; | ||
37 | - | ||
38 | - //compute the parameters for u and v | ||
39 | - ptype u = (ptype)iu / (uR); | ||
40 | - ptype v = (ptype)iv / (vR); | ||
41 | - | ||
42 | - | ||
43 | - | ||
44 | - //get the rtsPoint in world space and then the r vector | ||
45 | - bsPoint p = ABCD(u, v); | ||
46 | - bsVector r = p - f; | 28 | + //get the current coordinate in the plane slice |
29 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
30 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
31 | + | ||
32 | + //make sure that the thread indices are in-bounds | ||
33 | + if(iu >= uR || iv >= vR) return; | ||
34 | + | ||
35 | + //compute the index (easier access to the scalar field array) | ||
36 | + int i = iv*uR + iu; | ||
37 | + | ||
38 | + //compute the parameters for u and v | ||
39 | + ptype u = (ptype)iu / (uR); | ||
40 | + ptype v = (ptype)iv / (vR); | ||
41 | + | ||
42 | + | ||
43 | + | ||
44 | + //get the rtsPoint in world space and then the r vector | ||
45 | + bsPoint p = ABCD(u, v); | ||
46 | + bsVector r = p - f; | ||
47 | ptype d = r.len(); | 47 | ptype d = r.len(); |
48 | 48 | ||
49 | if(d == 0) | 49 | if(d == 0) |
50 | { | 50 | { |
51 | Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); | 51 | Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); |
52 | return; | 52 | return; |
53 | - } | ||
54 | - | ||
55 | - //get info for the light direction and frequency | ||
56 | - r = r.norm(); | ||
57 | - | ||
58 | - //compute the imaginary factor i^l | ||
59 | - bsComplex im = bsComplex(0, 1); | ||
60 | - bsComplex il = bsComplex(1, 0); | ||
61 | - | ||
62 | - //Legendre functions are computed dynamically to save memory | ||
63 | - //initialize the Legendre functions | ||
64 | - | ||
65 | - ptype P[2]; | ||
66 | - //get the angle between k and r (light direction and position vector) | ||
67 | - ptype cosTheta; | 53 | + } |
54 | + | ||
55 | + //get info for the light direction and frequency | ||
56 | + r = r.norm(); | ||
57 | + | ||
58 | + //compute the imaginary factor i^l | ||
59 | + bsComplex im = bsComplex(0, 1); | ||
60 | + bsComplex il = bsComplex(1, 0); | ||
61 | + | ||
62 | + //Legendre functions are computed dynamically to save memory | ||
63 | + //initialize the Legendre functions | ||
64 | + | ||
65 | + ptype P[2]; | ||
66 | + //get the angle between k and r (light direction and position vector) | ||
67 | + ptype cosTheta; | ||
68 | cosTheta = k.dot(r); | 68 | cosTheta = k.dot(r); |
69 | 69 | ||
70 | - rts::init_legendre<ptype>(cosTheta, P[0], P[1]); | ||
71 | - | ||
72 | - //initialize legendre functions for the cassegrain angles | ||
73 | - ptype Palpha[3]; | ||
74 | - rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]); | ||
75 | - Palpha[2] = 1; | ||
76 | - | ||
77 | - ptype Pbeta[3]; | ||
78 | - rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]); | ||
79 | - Pbeta[2] = 1; | ||
80 | - | ||
81 | - //for each order l | ||
82 | - bsComplex sumUf(0.0, 0.0); | ||
83 | - ptype jl = 0.0; | 70 | + rts::init_legendre<ptype>(cosTheta, P[0], P[1]); |
71 | + | ||
72 | + //initialize legendre functions for the cassegrain angles | ||
73 | + ptype Palpha[3]; | ||
74 | + rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]); | ||
75 | + Palpha[2] = 1; | ||
76 | + | ||
77 | + ptype Pbeta[3]; | ||
78 | + rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]); | ||
79 | + Pbeta[2] = 1; | ||
80 | + | ||
81 | + //for each order l | ||
82 | + bsComplex sumUf(0, 0); | ||
83 | + ptype jl = 0; | ||
84 | ptype Pl; | 84 | ptype Pl; |
85 | - ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1); | ||
86 | - for(int l = 0; l<=nl; l++) | ||
87 | - { | ||
88 | - jl = tex2D(texJ, l + 0.5, di + 0.5); | ||
89 | - if(l==0) | ||
90 | - Pl = P[0]; | ||
91 | - else if(l==1) | ||
92 | - { | ||
93 | - Pl = P[1]; | ||
94 | - | ||
95 | - //adjust the cassegrain Legendre function | ||
96 | - Palpha[2] = Palpha[0]; | ||
97 | - rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]); | ||
98 | - Pbeta[2] = Pbeta[0]; | ||
99 | - rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]); | ||
100 | - } | ||
101 | - else | ||
102 | - { | ||
103 | - rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]); | ||
104 | - | ||
105 | - Pl = P[1]; | ||
106 | - | ||
107 | - //adjust the cassegrain outer Legendre function | ||
108 | - Palpha[2] = Palpha[0]; | ||
109 | - rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]); | ||
110 | - Pbeta[2] = Pbeta[0]; | ||
111 | - rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]); | ||
112 | - } | ||
113 | - | 85 | + ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1); |
86 | + for(int l = 0; l<=nl; l++) | ||
87 | + { | ||
88 | + jl = tex2D(texJ, l + 0.5f, di + 0.5f); | ||
89 | + if(l==0) | ||
90 | + Pl = P[0]; | ||
91 | + else if(l==1) | ||
92 | + { | ||
93 | + Pl = P[1]; | ||
94 | + | ||
95 | + //adjust the cassegrain Legendre function | ||
96 | + Palpha[2] = Palpha[0]; | ||
97 | + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]); | ||
98 | + Pbeta[2] = Pbeta[0]; | ||
99 | + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]); | ||
100 | + } | ||
101 | + else | ||
102 | + { | ||
103 | + rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]); | ||
104 | + | ||
105 | + Pl = P[1]; | ||
106 | + | ||
107 | + //adjust the cassegrain outer Legendre function | ||
108 | + Palpha[2] = Palpha[0]; | ||
109 | + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]); | ||
110 | + Pbeta[2] = Pbeta[0]; | ||
111 | + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]); | ||
112 | + } | ||
113 | + | ||
114 | sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); | 114 | sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); |
115 | //sumUf += jl; | 115 | //sumUf += jl; |
116 | - | ||
117 | - il *= im; | ||
118 | - } | ||
119 | - | 116 | + |
117 | + il *= im; | ||
118 | + } | ||
119 | + | ||
120 | Uf[i] = sumUf * 2 * PI * A; | 120 | Uf[i] = sumUf * 2 * PI * A; |
121 | - //Uf[i] = u; | 121 | + //Uf[i] = u; |
122 | //return; | 122 | //return; |
123 | } | 123 | } |
124 | 124 | ||
@@ -159,23 +159,23 @@ void nearfieldStruct::scalarUfLut() | @@ -159,23 +159,23 @@ void nearfieldStruct::scalarUfLut() | ||
159 | HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice)); | 159 | HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice)); |
160 | 160 | ||
161 | //----------------Compute the focused field | 161 | //----------------Compute the focused field |
162 | - //create one thread for each pixel of the field slice | ||
163 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | 162 | + //create one thread for each pixel of the field slice |
163 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
164 | dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | 164 | dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); |
165 | - | ||
166 | - //if we are computing a plane wave, call the gpuScalarUfp function | ||
167 | - if(planeWave) | ||
168 | - { | ||
169 | - gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]); | ||
170 | - } | ||
171 | - //otherwise compute the condenser info and create a focused field | ||
172 | - else | 165 | + |
166 | + //if we are computing a plane wave, call the gpuScalarUfp function | ||
167 | + if(planeWave) | ||
168 | + { | ||
169 | + gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]); | ||
170 | + } | ||
171 | + //otherwise compute the condenser info and create a focused field | ||
172 | + else | ||
173 | { | 173 | { |
174 | //pre-compute the cosine of the obscuration and objective angles | 174 | //pre-compute the cosine of the obscuration and objective angles |
175 | ptype cosAlpha = cos(asin(condenser[0])); | 175 | ptype cosAlpha = cos(asin(condenser[0])); |
176 | - ptype cosBeta = cos(asin(condenser[1])); | ||
177 | - //compute the scalar Uf field (this will be in the x_hat channel of Uf) | ||
178 | - gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR); | 176 | + ptype cosBeta = cos(asin(condenser[1])); |
177 | + //compute the scalar Uf field (this will be in the x_hat channel of Uf) | ||
178 | + gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR); | ||
179 | } | 179 | } |
180 | 180 | ||
181 | 181 |
nfScalarUpLut.cu
@@ -3,30 +3,30 @@ | @@ -3,30 +3,30 @@ | ||
3 | #include "rts/math/legendre.h" | 3 | #include "rts/math/legendre.h" |
4 | #include <stdlib.h> | 4 | #include <stdlib.h> |
5 | #include "rts/cuda/error.h" | 5 | #include "rts/cuda/error.h" |
6 | -#include "rts/cuda/timer.h" | ||
7 | - | ||
8 | -texture<float2, cudaTextureType2D> texUsp; | ||
9 | -texture<float2, cudaTextureType2D> texUip; | ||
10 | - | 6 | +#include "rts/cuda/timer.h" |
7 | + | ||
8 | +texture<float2, cudaTextureType2D> texUsp; | ||
9 | +texture<float2, cudaTextureType2D> texUip; | ||
10 | + | ||
11 | __global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR) | 11 | __global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR) |
12 | -{ | ||
13 | - /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field | ||
14 | - produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field. | ||
15 | - | ||
16 | - Us = final scattered field | ||
17 | - k = list of incoming plane waves (Monte-Carlo samples) | ||
18 | - nk = number of incoming MC samples | ||
19 | - kmag= magnitude of the incoming field 2pi/lambda | ||
20 | - dmin= minimum distance of the Usp texture | ||
21 | - dmax= maximum distance of the Usp texture | ||
22 | - f = position of the focus | ||
23 | - ps = position of the sphere | ||
24 | - A = total amplitude of the incident field arriving at the focal spot | ||
25 | - ABCD= rectangle representing the field slice | ||
26 | - uR = resolution of the field slice in the u direction | ||
27 | - vR = resolution of the field slice in the v direction | ||
28 | - dR = resolution of the Usp texture in the d direction | ||
29 | - thetaR= resolution of the Usp texture in the theta direction | 12 | +{ |
13 | + /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field | ||
14 | + produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field. | ||
15 | + | ||
16 | + Us = final scattered field | ||
17 | + k = list of incoming plane waves (Monte-Carlo samples) | ||
18 | + nk = number of incoming MC samples | ||
19 | + kmag= magnitude of the incoming field 2pi/lambda | ||
20 | + dmin= minimum distance of the Usp texture | ||
21 | + dmax= maximum distance of the Usp texture | ||
22 | + f = position of the focus | ||
23 | + ps = position of the sphere | ||
24 | + A = total amplitude of the incident field arriving at the focal spot | ||
25 | + ABCD= rectangle representing the field slice | ||
26 | + uR = resolution of the field slice in the u direction | ||
27 | + vR = resolution of the field slice in the v direction | ||
28 | + dR = resolution of the Usp texture in the d direction | ||
29 | + thetaR= resolution of the Usp texture in the theta direction | ||
30 | */ | 30 | */ |
31 | 31 | ||
32 | //get the current coordinate in the plane slice | 32 | //get the current coordinate in the plane slice |
@@ -46,47 +46,47 @@ __global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, p | @@ -46,47 +46,47 @@ __global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, p | ||
46 | //get the rtsPoint in world space and then the r vector | 46 | //get the rtsPoint in world space and then the r vector |
47 | bsPoint p = ABCD(u, v); | 47 | bsPoint p = ABCD(u, v); |
48 | bsVector r = p - ps; | 48 | bsVector r = p - ps; |
49 | - ptype d = r.len(); | ||
50 | - float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1); | ||
51 | - float ai = ( (d - dmin)/(a - dmin)) * (aR - 1); | ||
52 | - | ||
53 | - bsComplex sumUs(0, 0); | ||
54 | - //for each plane wave in the wave list | ||
55 | - for(int iw = 0; iw < nk; iw++) | ||
56 | - { | ||
57 | - //normalize the direction vectors and find their inner product | ||
58 | - r = r.norm(); | ||
59 | - ptype cos_theta = k[iw].dot(r); | ||
60 | - if(cos_theta < -1) | ||
61 | - cos_theta = -1; | ||
62 | - if(cos_theta > 1) | ||
63 | - cos_theta = 1; | ||
64 | - float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1); | ||
65 | - | ||
66 | - //compute the phase factor for spheres that are not at the origin | ||
67 | - bsVector c = ps - f; | ||
68 | - bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); | ||
69 | - | ||
70 | - //compute the internal field if we are inside a sphere | 49 | + ptype d = r.len(); |
50 | + float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1); | ||
51 | + float ai = ( (d - dmin)/(a - dmin)) * (aR - 1); | ||
52 | + | ||
53 | + bsComplex sumUs(0, 0); | ||
54 | + //for each plane wave in the wave list | ||
55 | + for(int iw = 0; iw < nk; iw++) | ||
56 | + { | ||
57 | + //normalize the direction vectors and find their inner product | ||
58 | + r = r.norm(); | ||
59 | + ptype cos_theta = k[iw].dot(r); | ||
60 | + if(cos_theta < -1) | ||
61 | + cos_theta = -1; | ||
62 | + if(cos_theta > 1) | ||
63 | + cos_theta = 1; | ||
64 | + float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1); | ||
65 | + | ||
66 | + //compute the phase factor for spheres that are not at the origin | ||
67 | + bsVector c = ps - f; | ||
68 | + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); | ||
69 | + | ||
70 | + //compute the internal field if we are inside a sphere | ||
71 | if(d < a) | 71 | if(d < a) |
72 | { | 72 | { |
73 | - float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5); | ||
74 | - sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y); | ||
75 | - } | ||
76 | - //otherwise compute the scattered field | ||
77 | - else | ||
78 | - { | ||
79 | - float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5); | ||
80 | - sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y); | ||
81 | - } | ||
82 | - | ||
83 | - } | ||
84 | - | ||
85 | - Us[i] += sumUs; | ||
86 | -} | ||
87 | - | ||
88 | -void nearfieldStruct::scalarUpLut() | ||
89 | -{ | 73 | + float2 Uip = tex2D(texUip, ai + 0.5f, thetai + 0.5f); |
74 | + sumUs += (1.0f/nk) * A * phase * bsComplex(Uip.x, Uip.y); | ||
75 | + } | ||
76 | + //otherwise compute the scattered field | ||
77 | + else | ||
78 | + { | ||
79 | + float2 Usp = tex2D(texUsp, di + 0.5f, thetai + 0.5f); | ||
80 | + sumUs += (1.0f/nk) * A * phase * bsComplex(Usp.x, Usp.y); | ||
81 | + } | ||
82 | + | ||
83 | + } | ||
84 | + | ||
85 | + Us[i] += sumUs; | ||
86 | +} | ||
87 | + | ||
88 | +void nearfieldStruct::scalarUpLut() | ||
89 | +{ | ||
90 | //get the number of spheres | 90 | //get the number of spheres |
91 | int nSpheres = sVector.size(); | 91 | int nSpheres = sVector.size(); |
92 | 92 | ||
@@ -103,90 +103,90 @@ void nearfieldStruct::scalarUpLut() | @@ -103,90 +103,90 @@ void nearfieldStruct::scalarUpLut() | ||
103 | //create one thread for each pixel of the field slice | 103 | //create one thread for each pixel of the field slice |
104 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | 104 | dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); |
105 | dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | 105 | dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); |
106 | - | ||
107 | - //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff) | ||
108 | - bsVector* gpuk; | ||
109 | - int nWaves; | ||
110 | - ptype subA; | ||
111 | - if(planeWave) | ||
112 | - { | ||
113 | - nWaves = 1; | ||
114 | - HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) ); | ||
115 | - HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice)); | ||
116 | - subA = A; | ||
117 | - } | ||
118 | - else | ||
119 | - { | ||
120 | - nWaves = inWaves.size(); | ||
121 | - HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) ); | ||
122 | - HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice)); | ||
123 | - //compute the amplitude that makes it through the condenser | ||
124 | - subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); | ||
125 | - } | 106 | + |
107 | + //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff) | ||
108 | + bsVector* gpuk; | ||
109 | + int nWaves; | ||
110 | + ptype subA; | ||
111 | + if(planeWave) | ||
112 | + { | ||
113 | + nWaves = 1; | ||
114 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) ); | ||
115 | + HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice)); | ||
116 | + subA = A; | ||
117 | + } | ||
118 | + else | ||
119 | + { | ||
120 | + nWaves = inWaves.size(); | ||
121 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) ); | ||
122 | + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice)); | ||
123 | + //compute the amplitude that makes it through the condenser | ||
124 | + subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); | ||
125 | + } | ||
126 | 126 | ||
127 | //for each sphere | 127 | //for each sphere |
128 | for(int s = 0; s<nSpheres; s++) | 128 | for(int s = 0; s<nSpheres; s++) |
129 | - { | ||
130 | - //get the current sphere | ||
131 | - //sphere S = sVector[s]; | ||
132 | - | ||
133 | - //allocate space for the Usp and Uip textures | ||
134 | - //allocate the cuda array | ||
135 | - cudaArray* arrayUsp; | ||
136 | - cudaArray* arrayUip; | ||
137 | - cudaChannelFormatDesc channelDescUsp = | ||
138 | - cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); | ||
139 | - cudaChannelFormatDesc channelDescUip = | ||
140 | - cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); | ||
141 | - int dR = sVector[s].Usp.R[0]; | ||
142 | - int thetaR = sVector[s].Usp.R[1]; | ||
143 | - int aR = sVector[s].Uip.R[0]; | ||
144 | - HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR)); | ||
145 | - HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR)); | ||
146 | - | ||
147 | - texUsp.addressMode[0] = cudaAddressModeMirror; | ||
148 | - texUsp.addressMode[1] = cudaAddressModeMirror; | ||
149 | - texUsp.filterMode = cudaFilterModeLinear; | ||
150 | - texUsp.normalized = false; | ||
151 | - | ||
152 | - texUip.addressMode[0] = cudaAddressModeMirror; | ||
153 | - texUip.addressMode[1] = cudaAddressModeMirror; | ||
154 | - texUip.filterMode = cudaFilterModeLinear; | ||
155 | - texUip.normalized = false; | ||
156 | - HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp)); | ||
157 | - HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip)); | ||
158 | - | ||
159 | - //copy the LUT to the Usp texture | ||
160 | - HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice)); | ||
161 | - HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice)); | ||
162 | - | 129 | + { |
130 | + //get the current sphere | ||
131 | + //sphere S = sVector[s]; | ||
132 | + | ||
133 | + //allocate space for the Usp and Uip textures | ||
134 | + //allocate the cuda array | ||
135 | + cudaArray* arrayUsp; | ||
136 | + cudaArray* arrayUip; | ||
137 | + cudaChannelFormatDesc channelDescUsp = | ||
138 | + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); | ||
139 | + cudaChannelFormatDesc channelDescUip = | ||
140 | + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); | ||
141 | + int dR = sVector[s].Usp.R[0]; | ||
142 | + int thetaR = sVector[s].Usp.R[1]; | ||
143 | + int aR = sVector[s].Uip.R[0]; | ||
144 | + HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR)); | ||
145 | + HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR)); | ||
146 | + | ||
147 | + texUsp.addressMode[0] = cudaAddressModeMirror; | ||
148 | + texUsp.addressMode[1] = cudaAddressModeMirror; | ||
149 | + texUsp.filterMode = cudaFilterModeLinear; | ||
150 | + texUsp.normalized = false; | ||
151 | + | ||
152 | + texUip.addressMode[0] = cudaAddressModeMirror; | ||
153 | + texUip.addressMode[1] = cudaAddressModeMirror; | ||
154 | + texUip.filterMode = cudaFilterModeLinear; | ||
155 | + texUip.normalized = false; | ||
156 | + HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp)); | ||
157 | + HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip)); | ||
158 | + | ||
159 | + //copy the LUT to the Usp texture | ||
160 | + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice)); | ||
161 | + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice)); | ||
162 | + | ||
163 | gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat, | 163 | gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat, |
164 | - gpuk, | 164 | + gpuk, |
165 | nWaves, | 165 | nWaves, |
166 | - 2 * PI / lambda, | ||
167 | - sVector[s].a, | ||
168 | - sVector[s].d_min, | 166 | + 2 * PI / lambda, |
167 | + sVector[s].a, | ||
168 | + sVector[s].d_min, | ||
169 | sVector[s].d_max, | 169 | sVector[s].d_max, |
170 | focus, | 170 | focus, |
171 | - sVector[s].p, | ||
172 | - subA, | 171 | + sVector[s].p, |
172 | + subA, | ||
173 | pos, | 173 | pos, |
174 | U.R[0], | 174 | U.R[0], |
175 | - U.R[1], | ||
176 | - dR, | ||
177 | - aR, | ||
178 | - thetaR); | ||
179 | - | ||
180 | - cudaFreeArray(arrayUsp); | ||
181 | - cudaFreeArray(arrayUip); | ||
182 | - | ||
183 | - } | ||
184 | - | 175 | + U.R[1], |
176 | + dR, | ||
177 | + aR, | ||
178 | + thetaR); | ||
179 | + | ||
180 | + cudaFreeArray(arrayUsp); | ||
181 | + cudaFreeArray(arrayUip); | ||
182 | + | ||
183 | + } | ||
184 | + | ||
185 | 185 | ||
186 | //store the time to compute the scattered field | 186 | //store the time to compute the scattered field |
187 | - t_Us = gpuStopTimer(); | ||
188 | - | ||
189 | - //free monte-carlo samples | ||
190 | - cudaFree(gpuk); | ||
191 | - | ||
192 | -} | 187 | + t_Us = gpuStopTimer(); |
188 | + | ||
189 | + //free monte-carlo samples | ||
190 | + cudaFree(gpuk); | ||
191 | + | ||
192 | +} |
nfScalarUs.cu
@@ -7,47 +7,47 @@ | @@ -7,47 +7,47 @@ | ||
7 | 7 | ||
8 | __device__ bsComplex calc_Us(ptype kd, ptype cos_theta, int Nl, bsComplex* B) | 8 | __device__ bsComplex calc_Us(ptype kd, ptype cos_theta, int Nl, bsComplex* B) |
9 | { | 9 | { |
10 | - //initialize the spherical Bessel functions | ||
11 | - ptype j[2]; | ||
12 | - rts::init_sbesselj<ptype>(kd, j); | ||
13 | - ptype y[2]; | ||
14 | - rts::init_sbessely<ptype>(kd, y); | ||
15 | - | ||
16 | - //initialize the Legendre polynomial | ||
17 | - ptype P[2]; | ||
18 | - rts::init_legendre<ptype>(cos_theta, P[0], P[1]); | ||
19 | - | ||
20 | - //initialize the spherical Hankel function | ||
21 | - bsComplex h((ptype)0, (ptype)0); | ||
22 | - | ||
23 | - //initialize the result | ||
24 | - bsComplex Us((ptype)0, (ptype)0); | ||
25 | - | ||
26 | - //for each order up to Nl | ||
27 | - for(int l=0; l<=Nl; l++) | ||
28 | - { | ||
29 | - if(l == 0) | ||
30 | - { | ||
31 | - h.r = j[0]; | ||
32 | - h.i = y[0]; | ||
33 | - Us += B[0] * h * P[0]; | ||
34 | - } | ||
35 | - else | ||
36 | - { | ||
37 | - //shift the bessel functions and legendre polynomials | ||
38 | - if(l > 1) | ||
39 | - { | ||
40 | - rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]); | ||
41 | - rts::shift_sbesselj<ptype>(l, kd, j); | ||
42 | - rts::shift_sbessely<ptype>(l, kd, y); | ||
43 | - } | ||
44 | - | ||
45 | - h.r = j[1]; | ||
46 | - h.i = y[1]; | ||
47 | - Us += B[l] * h * P[1]; | ||
48 | - | ||
49 | - | ||
50 | - } | 10 | + //initialize the spherical Bessel functions |
11 | + ptype j[2]; | ||
12 | + rts::init_sbesselj<ptype>(kd, j); | ||
13 | + ptype y[2]; | ||
14 | + rts::init_sbessely<ptype>(kd, y); | ||
15 | + | ||
16 | + //initialize the Legendre polynomial | ||
17 | + ptype P[2]; | ||
18 | + rts::init_legendre<ptype>(cos_theta, P[0], P[1]); | ||
19 | + | ||
20 | + //initialize the spherical Hankel function | ||
21 | + bsComplex h((ptype)0, (ptype)0); | ||
22 | + | ||
23 | + //initialize the result | ||
24 | + bsComplex Us((ptype)0, (ptype)0); | ||
25 | + | ||
26 | + //for each order up to Nl | ||
27 | + for(int l=0; l<=Nl; l++) | ||
28 | + { | ||
29 | + if(l == 0) | ||
30 | + { | ||
31 | + h.r = j[0]; | ||
32 | + h.i = y[0]; | ||
33 | + Us += B[0] * h * P[0]; | ||
34 | + } | ||
35 | + else | ||
36 | + { | ||
37 | + //shift the bessel functions and legendre polynomials | ||
38 | + if(l > 1) | ||
39 | + { | ||
40 | + rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]); | ||
41 | + rts::shift_sbesselj<ptype>(l, kd, j); | ||
42 | + rts::shift_sbessely<ptype>(l, kd, y); | ||
43 | + } | ||
44 | + | ||
45 | + h.r = j[1]; | ||
46 | + h.i = y[1]; | ||
47 | + Us += B[l] * h * P[1]; | ||
48 | + | ||
49 | + | ||
50 | + } | ||
51 | } | 51 | } |
52 | return Us; | 52 | return Us; |
53 | } | 53 | } |
@@ -59,41 +59,41 @@ __device__ bsComplex calc_Ui(bsComplex knd, ptype cos_theta, int Nl, bsComplex* | @@ -59,41 +59,41 @@ __device__ bsComplex calc_Ui(bsComplex knd, ptype cos_theta, int Nl, bsComplex* | ||
59 | bsComplex Ui((ptype)0, (ptype)0); | 59 | bsComplex Ui((ptype)0, (ptype)0); |
60 | 60 | ||
61 | //deal with rtsPoints near zero | 61 | //deal with rtsPoints near zero |
62 | - if(real(knd) < EPSILON_FLOAT) | ||
63 | - { | ||
64 | - //for(int l=0; l<Nl; l++) | ||
65 | - Ui = A[0]; | ||
66 | - return Ui; | 62 | + if(real(knd) < EPSILON_FLOAT) |
63 | + { | ||
64 | + //for(int l=0; l<Nl; l++) | ||
65 | + Ui = A[0]; | ||
66 | + return Ui; | ||
67 | } | 67 | } |
68 | 68 | ||
69 | - //initialize the spherical Bessel functions | ||
70 | - bsComplex j[2]; | ||
71 | - rts::init_sbesselj<bsComplex>(knd, j); | ||
72 | - | ||
73 | - //initialize the Legendre polynomial | ||
74 | - ptype P[2]; | ||
75 | - rts::init_legendre<ptype>(cos_theta, P[0], P[1]); | ||
76 | - | ||
77 | - //for each order up to Nl | ||
78 | - for(int l=0; l<=Nl; l++) | ||
79 | - { | ||
80 | - if(l == 0) | ||
81 | - { | ||
82 | - Ui += A[0] * j[0] * P[0]; | ||
83 | - } | ||
84 | - else | ||
85 | - { | ||
86 | - //shift the bessel functions and legendre polynomials | ||
87 | - if(l > 1) | ||
88 | - { | ||
89 | - rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]); | ||
90 | - rts::shift_sbesselj<bsComplex>(l, knd, j); | ||
91 | - } | ||
92 | - | ||
93 | - Ui += A[l] * j[1] * P[1]; | ||
94 | - | ||
95 | - | ||
96 | - } | 69 | + //initialize the spherical Bessel functions |
70 | + bsComplex j[2]; | ||
71 | + rts::init_sbesselj<bsComplex>(knd, j); | ||
72 | + | ||
73 | + //initialize the Legendre polynomial | ||
74 | + ptype P[2]; | ||
75 | + rts::init_legendre<ptype>(cos_theta, P[0], P[1]); | ||
76 | + | ||
77 | + //for each order up to Nl | ||
78 | + for(int l=0; l<=Nl; l++) | ||
79 | + { | ||
80 | + if(l == 0) | ||
81 | + { | ||
82 | + Ui += A[0] * j[0] * P[0]; | ||
83 | + } | ||
84 | + else | ||
85 | + { | ||
86 | + //shift the bessel functions and legendre polynomials | ||
87 | + if(l > 1) | ||
88 | + { | ||
89 | + rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]); | ||
90 | + rts::shift_sbesselj<bsComplex>(l, knd, j); | ||
91 | + } | ||
92 | + | ||
93 | + Ui += A[l] * j[1] * P[1]; | ||
94 | + | ||
95 | + | ||
96 | + } | ||
97 | } | 97 | } |
98 | return Ui; | 98 | return Ui; |
99 | } | 99 | } |
@@ -118,39 +118,39 @@ __global__ void gpuScalarUsp(bsComplex* Us, bsVector* k, int nk, ptype kmag, bsP | @@ -118,39 +118,39 @@ __global__ void gpuScalarUsp(bsComplex* Us, bsVector* k, int nk, ptype kmag, bsP | ||
118 | //get the rtsPoint in world space and then the r vector | 118 | //get the rtsPoint in world space and then the r vector |
119 | bsPoint p = ABCD(u, v); | 119 | bsPoint p = ABCD(u, v); |
120 | bsVector r = p - ps; | 120 | bsVector r = p - ps; |
121 | - ptype d = r.len(); | ||
122 | - | ||
123 | - bsComplex sumUs(0, 0); | ||
124 | - //for each plane wave in the wave list | ||
125 | - for(int iw = 0; iw < nk; iw++) | ||
126 | - { | ||
127 | - //normalize the direction vectors and find their inner product | ||
128 | - r = r.norm(); | ||
129 | - ptype cos_theta = k[iw].dot(r); | ||
130 | - | ||
131 | - //compute the phase factor for spheres that are not at the origin | ||
132 | - bsVector c = ps - f; | ||
133 | - bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); | ||
134 | - | ||
135 | - //compute the internal field if we are inside a sphere | 121 | + ptype d = r.len(); |
122 | + | ||
123 | + bsComplex sumUs(0, 0); | ||
124 | + //for each plane wave in the wave list | ||
125 | + for(int iw = 0; iw < nk; iw++) | ||
126 | + { | ||
127 | + //normalize the direction vectors and find their inner product | ||
128 | + r = r.norm(); | ||
129 | + ptype cos_theta = k[iw].dot(r); | ||
130 | + | ||
131 | + //compute the phase factor for spheres that are not at the origin | ||
132 | + bsVector c = ps - f; | ||
133 | + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); | ||
134 | + | ||
135 | + //compute the internal field if we are inside a sphere | ||
136 | if(d <= a) | 136 | if(d <= a) |
137 | { | 137 | { |
138 | bsComplex knd = kmag * d * n; | 138 | bsComplex knd = kmag * d * n; |
139 | - sumUs += (1.0/nk) * A * phase * calc_Ui(knd, cos_theta, Nl, Alpha); | ||
140 | - } | ||
141 | - //otherwise compute the scattered field | ||
142 | - else | ||
143 | - { | ||
144 | - //compute the argument for the spherical Hankel function | ||
145 | - ptype kd = kmag * d; | ||
146 | - sumUs += (1.0/nk) * A * phase * calc_Us(kd, cos_theta, Nl, Beta); | ||
147 | - } | ||
148 | - | ||
149 | - } | ||
150 | - | ||
151 | - Us[i] += sumUs; | ||
152 | - | ||
153 | - | 139 | + sumUs += (1.0f/nk) * A * phase * calc_Ui(knd, cos_theta, Nl, Alpha); |
140 | + } | ||
141 | + //otherwise compute the scattered field | ||
142 | + else | ||
143 | + { | ||
144 | + //compute the argument for the spherical Hankel function | ||
145 | + ptype kd = kmag * d; | ||
146 | + sumUs += (1.0f/nk) * A * phase * calc_Us(kd, cos_theta, Nl, Beta); | ||
147 | + } | ||
148 | + | ||
149 | + } | ||
150 | + | ||
151 | + Us[i] += sumUs; | ||
152 | + | ||
153 | + | ||
154 | } | 154 | } |
155 | 155 | ||
156 | void nearfieldStruct::scalarUs() | 156 | void nearfieldStruct::scalarUs() |
@@ -190,17 +190,17 @@ void nearfieldStruct::scalarUs() | @@ -190,17 +190,17 @@ void nearfieldStruct::scalarUs() | ||
190 | HANDLE_ERROR(cudaMemcpy(gpuB, &sVector[s].B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice)); | 190 | HANDLE_ERROR(cudaMemcpy(gpuB, &sVector[s].B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice)); |
191 | HANDLE_ERROR(cudaMemcpy(gpuA, &sVector[s].A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice)); | 191 | HANDLE_ERROR(cudaMemcpy(gpuA, &sVector[s].A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice)); |
192 | 192 | ||
193 | - //if we are computing a plane wave, call the gpuScalarUfp function | ||
194 | - sphere S = sVector[s]; | ||
195 | - bsVector* gpuk; | 193 | + //if we are computing a plane wave, call the gpuScalarUfp function |
194 | + sphere S = sVector[s]; | ||
195 | + bsVector* gpuk; | ||
196 | 196 | ||
197 | if(planeWave) | 197 | if(planeWave) |
198 | - { | ||
199 | - //if this is a single plane wave, assume it goes along direction k (copy the k vector to the GPU) | ||
200 | - HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) )); | 198 | + { |
199 | + //if this is a single plane wave, assume it goes along direction k (copy the k vector to the GPU) | ||
200 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) )); | ||
201 | HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice)); | 201 | HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice)); |
202 | gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat, | 202 | gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat, |
203 | - gpuk, | 203 | + gpuk, |
204 | 1, | 204 | 1, |
205 | 2 * PI / lambda, | 205 | 2 * PI / lambda, |
206 | focus, | 206 | focus, |
@@ -213,20 +213,20 @@ void nearfieldStruct::scalarUs() | @@ -213,20 +213,20 @@ void nearfieldStruct::scalarUs() | ||
213 | A, | 213 | A, |
214 | pos, | 214 | pos, |
215 | U.R[0], | 215 | U.R[0], |
216 | - U.R[1]); | 216 | + U.R[1]); |
217 | HANDLE_ERROR(cudaFree(gpuk)); | 217 | HANDLE_ERROR(cudaFree(gpuk)); |
218 | - } | ||
219 | - //otherwise copy all of the monte-carlo samples to the GPU and compute | ||
220 | - else | ||
221 | - { | ||
222 | - HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * inWaves.size() )); | ||
223 | - HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * inWaves.size(), cudaMemcpyHostToDevice)); | ||
224 | - | ||
225 | - //compute the amplitude that makes it through the condenser | ||
226 | - ptype subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); | ||
227 | - | 218 | + } |
219 | + //otherwise copy all of the monte-carlo samples to the GPU and compute | ||
220 | + else | ||
221 | + { | ||
222 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * inWaves.size() )); | ||
223 | + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * inWaves.size(), cudaMemcpyHostToDevice)); | ||
224 | + | ||
225 | + //compute the amplitude that makes it through the condenser | ||
226 | + ptype subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); | ||
227 | + | ||
228 | gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat, | 228 | gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat, |
229 | - gpuk, | 229 | + gpuk, |
230 | inWaves.size(), | 230 | inWaves.size(), |
231 | 2 * PI / lambda, | 231 | 2 * PI / lambda, |
232 | focus, | 232 | focus, |
@@ -239,17 +239,17 @@ void nearfieldStruct::scalarUs() | @@ -239,17 +239,17 @@ void nearfieldStruct::scalarUs() | ||
239 | subA, | 239 | subA, |
240 | pos, | 240 | pos, |
241 | U.R[0], | 241 | U.R[0], |
242 | - U.R[1]); | ||
243 | - HANDLE_ERROR(cudaFree(gpuk)); | ||
244 | - | ||
245 | - | ||
246 | - } | ||
247 | - | ||
248 | - //free memory for scattering coefficients | ||
249 | - HANDLE_ERROR(cudaFree(gpuA)); | 242 | + U.R[1]); |
243 | + HANDLE_ERROR(cudaFree(gpuk)); | ||
244 | + | ||
245 | + | ||
246 | + } | ||
247 | + | ||
248 | + //free memory for scattering coefficients | ||
249 | + HANDLE_ERROR(cudaFree(gpuA)); | ||
250 | HANDLE_ERROR(cudaFree(gpuB)); | 250 | HANDLE_ERROR(cudaFree(gpuB)); |
251 | - } | ||
252 | - | 251 | + } |
252 | + | ||
253 | 253 | ||
254 | //store the time to compute the scattered field | 254 | //store the time to compute the scattered field |
255 | t_Us = gpuStopTimer(); | 255 | t_Us = gpuStopTimer(); |
options.h deleted
1 | -//AnyOption for command-line processing | ||
2 | -//#include "anyoption.h" | ||
3 | - | ||
4 | -#include "rts/optics/material.h" | ||
5 | - | ||
6 | -#include "nearfield.h" | ||
7 | -#include "microscope.h" | ||
8 | -#include "rts/visualization/colormap.h" | ||
9 | -#include "fileout.h" | ||
10 | -//extern nearfieldStruct* NF; | ||
11 | -extern microscopeStruct* SCOPE; | ||
12 | -extern fileoutStruct gFileOut; | ||
13 | - | ||
14 | -//default values | ||
15 | -#include "defaults.h" | ||
16 | - | ||
17 | -#include <string> | ||
18 | -#include <sstream> | ||
19 | -#include <fstream> | ||
20 | -#include <limits> | ||
21 | -using namespace std; | ||
22 | - | ||
23 | -#include <boost/program_options.hpp> | ||
24 | -namespace po = boost::program_options; | ||
25 | - | ||
26 | -extern bool verbose; | ||
27 | -extern bool gui; | ||
28 | - | ||
29 | - | ||
30 | - | ||
31 | -static void lNearfield(po::variables_map vm) | ||
32 | -{ | ||
33 | - //test to see if we are running a vector field simulation | ||
34 | - bool vectorField = false; | ||
35 | - if(vm.count("vector")) | ||
36 | - vectorField = true; | ||
37 | - SCOPE->scalarSim = !vectorField; | ||
38 | - | ||
39 | - //test to see if we are simulating a plane wave | ||
40 | - bool planeWave = DEFAULT_PLANEWAVE; | ||
41 | - if(vm.count("plane-wave")) | ||
42 | - planeWave = !planeWave; | ||
43 | - SCOPE->nf.planeWave = planeWave; | ||
44 | - | ||
45 | - //get the incident field amplitude | ||
46 | - SCOPE->nf.A = vm["amplitude"].as<ptype>(); | ||
47 | - | ||
48 | - //get the condenser parameters | ||
49 | - SCOPE->nf.condenser[0] = DEFAULT_CONDENSER_MIN; | ||
50 | - SCOPE->nf.condenser[1] = DEFAULT_CONDENSER_MAX; | ||
51 | - | ||
52 | - if(vm.count("condenser")) | ||
53 | - { | ||
54 | - vector<ptype> cparams = vm["condenser"].as< vector<ptype> >(); | ||
55 | - | ||
56 | - if(cparams.size() == 1) | ||
57 | - SCOPE->nf.condenser[1] = cparams[0]; | ||
58 | - else | ||
59 | - { | ||
60 | - SCOPE->nf.condenser[0] = cparams[0]; | ||
61 | - SCOPE->nf.condenser[1] = cparams[1]; | ||
62 | - } | ||
63 | - } | ||
64 | - | ||
65 | - | ||
66 | - //get the focal rtsPoint position | ||
67 | - SCOPE->nf.focus[0] = DEFAULT_FOCUS_X; | ||
68 | - SCOPE->nf.focus[1] = DEFAULT_FOCUS_Y; | ||
69 | - SCOPE->nf.focus[2] = DEFAULT_FOCUS_Z; | ||
70 | - if(vm.count("focus")) | ||
71 | - { | ||
72 | - vector<ptype> fpos = vm["focus"].as< vector<ptype> >(); | ||
73 | - if(fpos.size() != 3) | ||
74 | - { | ||
75 | - cout<<"BIMSIM Error - the incident focal point is incorrectly specified; it must have three components."<<endl; | ||
76 | - exit(1); | ||
77 | - } | ||
78 | - SCOPE->nf.focus[0] = fpos[0]; | ||
79 | - SCOPE->nf.focus[1] = fpos[1]; | ||
80 | - SCOPE->nf.focus[2] = fpos[2]; | ||
81 | - } | ||
82 | - | ||
83 | - //get the incident light direction (k-vector) | ||
84 | - bsVector spherical(1, 0, 0); | ||
85 | - | ||
86 | - //if a k-vector is specified | ||
87 | - if(vm.count("k")) | ||
88 | - { | ||
89 | - vector<ptype> kvec = vm["k"].as< vector<ptype> >(); | ||
90 | - if(kvec.size() != 2) | ||
91 | - { | ||
92 | - cout<<"BIMSIM Error - k-vector is not specified correctly: it must contain two elements"<<endl; | ||
93 | - exit(1); | ||
94 | - } | ||
95 | - spherical[1] = kvec[0]; | ||
96 | - spherical[2] = kvec[1]; | ||
97 | - } | ||
98 | - SCOPE->nf.k = spherical.sph2cart(); | ||
99 | - | ||
100 | - | ||
101 | - //incident field order | ||
102 | - SCOPE->nf.m = vm["field-order"].as<int>(); | ||
103 | - | ||
104 | - //number of Monte-Carlo samples | ||
105 | - SCOPE->nf.nWaves = vm["samples"].as<int>(); | ||
106 | - | ||
107 | - //random number seed for Monte-Carlo samples | ||
108 | - if(vm.count("seed")) | ||
109 | - srand(vm["seed"].as<unsigned int>()); | ||
110 | - | ||
111 | - | ||
112 | - | ||
113 | -} | ||
114 | - | ||
115 | - | ||
116 | -static void loadOutputParams(po::variables_map vm) | ||
117 | -{ | ||
118 | - //append simulation results to previous binary files | ||
119 | - gFileOut.append = DEFAULT_APPEND; | ||
120 | - if(vm.count("append")) | ||
121 | - gFileOut.append = true; | ||
122 | - | ||
123 | - //image parameters | ||
124 | - //component of the field to be saved | ||
125 | - std::string fieldStr; | ||
126 | - fieldStr = vm["output-type"].as<string>(); | ||
127 | - | ||
128 | - if(fieldStr == "magnitude") | ||
129 | - gFileOut.field = fileoutStruct::fieldMag; | ||
130 | - else if(fieldStr == "intensity") | ||
131 | - gFileOut.field = fileoutStruct::fieldIntensity; | ||
132 | - else if(fieldStr == "polarization") | ||
133 | - gFileOut.field = fileoutStruct::fieldPolar; | ||
134 | - else if(fieldStr == "imaginary") | ||
135 | - gFileOut.field = fileoutStruct::fieldImag; | ||
136 | - else if(fieldStr == "real") | ||
137 | - gFileOut.field = fileoutStruct::fieldReal; | ||
138 | - else if(fieldStr == "angular-spectrum") | ||
139 | - gFileOut.field = fileoutStruct::fieldAngularSpectrum; | ||
140 | - | ||
141 | - | ||
142 | - //image file names | ||
143 | - gFileOut.intFile = vm["intensity"].as<string>(); | ||
144 | - gFileOut.absFile = vm["absorbance"].as<string>(); |