Commit 396a5f1225210b5c52e5b6df9c9d96e7f63176e5

Authored by David Mayerich
1 parent 4a9d9281

added custom code for dealing with command-line arguments

@@ -12,10 +12,10 @@ set(CMAKE_AUTOMOC ON) @@ -12,10 +12,10 @@ set(CMAKE_AUTOMOC ON)
12 set(CMAKE_INCLUDE_CURRENT_DIR ON) 12 set(CMAKE_INCLUDE_CURRENT_DIR ON)
13 13
14 #find BOOST 14 #find BOOST
15 -set(Boost_USE_STATIC_LIBS ON)  
16 -set(Boost_USE_MULTITHREADED ON)  
17 -set(Boost_USE_STATIC_RUNTIME OFF)  
18 -find_package( Boost 1.46.0 COMPONENTS program_options ) 15 +#set(Boost_USE_STATIC_LIBS ON)
  16 +#set(Boost_USE_MULTITHREADED ON)
  17 +#set(Boost_USE_STATIC_RUNTIME OFF)
  18 +#find_package( Boost 1.46.0 COMPONENTS program_options )
19 19
20 #find the Qt5 20 #find the Qt5
21 find_package(Qt5Widgets REQUIRED) 21 find_package(Qt5Widgets REQUIRED)
@@ -28,7 +28,7 @@ include_directories(${QT_INCLUDE_DIRECTORY}) @@ -28,7 +28,7 @@ include_directories(${QT_INCLUDE_DIRECTORY})
28 find_package(CUDA) 28 find_package(CUDA)
29 29
30 #ask the user for the RTS location 30 #ask the user for the RTS location
31 -find_package(RTS REQUIRED) 31 +#find_package(RTS REQUIRED)
32 32
33 #set the include directories 33 #set the include directories
34 include_directories( 34 include_directories(
@@ -37,8 +37,9 @@ include_directories( @@ -37,8 +37,9 @@ include_directories(
37 ${Qt5Core_INCLUDE_DIRS} 37 ${Qt5Core_INCLUDE_DIRS}
38 ${Qt5Gui_INCLUDE_DIRS} 38 ${Qt5Gui_INCLUDE_DIRS}
39 # ${Qt5OpenGL_INCLUDE_DIRS} 39 # ${Qt5OpenGL_INCLUDE_DIRS}
40 - ${RTS_INCLUDE_DIR}  
41 - ${Boost_INCLUDE_DIR} 40 +# ${RTS_INCLUDE_DIR}
  41 +# ${Boost_INCLUDE_DIR}
  42 + ${CMAKE_CURRENT_SOURCE_DIR}
42 ) 43 )
43 44
44 #build position independent code for Qt (-fPIC) 45 #build position independent code for Qt (-fPIC)
@@ -55,6 +56,9 @@ file(GLOB SRC_H "*.h") @@ -55,6 +56,9 @@ file(GLOB SRC_H "*.h")
55 file(GLOB SRC_UI "*.ui") 56 file(GLOB SRC_UI "*.ui")
56 file(GLOB SRC_CU "*.cu") 57 file(GLOB SRC_CU "*.cu")
57 58
  59 +#assign RTS source files
  60 +file(GLOB SRC_RTS "rts/source/*.cpp")
  61 +
58 #determine which source files have to be moc'd 62 #determine which source files have to be moc'd
59 Qt5_wrap_cpp(UI_MOC ${SRC_H}) 63 Qt5_wrap_cpp(UI_MOC ${SRC_H})
60 Qt5_wrap_ui(UI_H ${SRC_UI}) 64 Qt5_wrap_ui(UI_H ${SRC_UI})
@@ -70,6 +74,7 @@ cuda_add_executable(bimsim @@ -70,6 +74,7 @@ cuda_add_executable(bimsim
70 ${UI_H} 74 ${UI_H}
71 ${SRC_UI} 75 ${SRC_UI}
72 ${SRC_CU} 76 ${SRC_CU}
  77 + ${SRC_RTS}
73 ) 78 )
74 79
75 #specify which qt5 modules to use 80 #specify which qt5 modules to use
@@ -83,7 +88,7 @@ target_link_libraries(bimsim @@ -83,7 +88,7 @@ target_link_libraries(bimsim
83 # ${Qt5OpenGL_LIBRARIES} 88 # ${Qt5OpenGL_LIBRARIES}
84 ${CUDA_cufft_LIBRARY} 89 ${CUDA_cufft_LIBRARY}
85 ${CUDA_cublas_LIBRARY} 90 ${CUDA_cublas_LIBRARY}
86 - ${Boost_LIBRARIES} 91 +# ${Boost_LIBRARIES}
87 ) 92 )
88 93
89 94
@@ -14,10 +14,10 @@ typedef double ptype; @@ -14,10 +14,10 @@ typedef double ptype;
14 #define BLOCK 256 14 #define BLOCK 256
15 #define SQRT_BLOCK 16 15 #define SQRT_BLOCK 16
16 16
17 -#define PI 3.14159 17 +#define PI 3.14159f
18 18
19 //a very small number 19 //a very small number
20 -#define EPSILON 0.00001 20 +#define EPSILON 0.00001f
21 21
22 //CUDA hybrid code - complex class should run on both the CPU and GPU 22 //CUDA hybrid code - complex class should run on both the CPU and GPU
23 23
@@ -8,22 +8,26 @@ @@ -8,22 +8,26 @@
8 #define DEFAULT_SPHERE_A 1 8 #define DEFAULT_SPHERE_A 1
9 9
10 //default near field parameters 10 //default near field parameters
11 -#define DEFAULT_LAMBDA 1  
12 -#define DEFAULT_AMPLITUDE 1 11 +#define DEFAULT_LAMBDA "1"
  12 +#define DEFAULT_AMPLITUDE "1"
  13 +#define DEFAULT_MATERIAL "1.4 0.05"
13 #define DEFAULT_N 1.4 14 #define DEFAULT_N 1.4
14 #define DEFAULT_K 0.5 15 #define DEFAULT_K 0.5
15 -#define DEFAULT_FOCUS_X 0  
16 -#define DEFAULT_FOCUS_Y 0  
17 -#define DEFAULT_FOCUS_Z 0 16 +#define DEFAULT_FOCUS "0 0 0"
  17 +//#define DEFAULT_FOCUS_X "0"
  18 +//#define DEFAULT_FOCUS_Y "0"
  19 +//#define DEFAULT_FOCUS_Z "0"
18 //#define DEFAULT_INCIDENT_ORDER 20 20 //#define DEFAULT_INCIDENT_ORDER 20
19 #define DEFAULT_STABILITY_PARM 1.4 21 #define DEFAULT_STABILITY_PARM 1.4
20 22
21 //optics 23 //optics
22 -#define DEFAULT_CONDENSER_MIN 0  
23 -#define DEFAULT_CONDENSER_MAX 1 24 +//#define DEFAULT_CONDENSER_MIN "0.0"
  25 +//#define DEFAULT_CONDENSER_MAX "1.0"
  26 +#define DEFAULT_CONDENSER "0 1"
24 27
25 -#define DEFAULT_OBJECTIVE_MIN 0  
26 -#define DEFAULT_OBJECTIVE_MAX 1 28 +//#define DEFAULT_OBJECTIVE_MIN "0"
  29 +//#define DEFAULT_OBJECTIVE_MAX "1"
  30 +#define DEFAULT_OBJECTIVE "0 1"
27 31
28 //incident light direction 32 //incident light direction
29 #define DEFAULT_K_THETA 0 33 #define DEFAULT_K_THETA 0
@@ -35,15 +39,17 @@ @@ -35,15 +39,17 @@
35 #define DEFAULT_APPEND false 39 #define DEFAULT_APPEND false
36 //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective 40 //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective
37 41
38 - 42 +#define DEFAULT_PLANE_MIN "-5 0 -5"
39 #define DEFAULT_PLANE_MIN_X -5 43 #define DEFAULT_PLANE_MIN_X -5
40 #define DEFAULT_PLANE_MIN_Y 0 44 #define DEFAULT_PLANE_MIN_Y 0
41 #define DEFAULT_PLANE_MIN_Z -5 45 #define DEFAULT_PLANE_MIN_Z -5
42 46
  47 +#define DEFAULT_PLANE_MAX "5 0 5"
43 #define DEFAULT_PLANE_MAX_X 5 48 #define DEFAULT_PLANE_MAX_X 5
44 #define DEFAULT_PLANE_MAX_Y 0 49 #define DEFAULT_PLANE_MAX_Y 0
45 #define DEFAULT_PLANE_MAX_Z 5 50 #define DEFAULT_PLANE_MAX_Z 5
46 51
  52 +#define DEFAULT_PLANE_NORM "0 1 0"
47 #define DEFAULT_PLANE_NORM_X 0 53 #define DEFAULT_PLANE_NORM_X 0
48 #define DEFAULT_PLANE_NORM_Y 1 54 #define DEFAULT_PLANE_NORM_Y 1
49 #define DEFAULT_PLANE_NORM_Z 0 55 #define DEFAULT_PLANE_NORM_Z 0
@@ -67,16 +73,16 @@ @@ -67,16 +73,16 @@
67 */ 73 */
68 74
69 75
70 -#define DEFAULT_FIELD_ORDER 10 76 +#define DEFAULT_FIELD_ORDER "10"
71 77
72 -#define DEFAULT_SAMPLES 400 78 +#define DEFAULT_SAMPLES "400"
73 79
74 -#define DEFAULT_SLICE_RES 256 80 +#define DEFAULT_SLICE_RES "256"
75 81
76 #define DEFAULT_SPHERE_THETA_R 1000 82 #define DEFAULT_SPHERE_THETA_R 1000
77 83
78 -#define DEFAULT_PADDING 1  
79 -#define DEFAULT_SUPERSAMPLE 1 84 +#define DEFAULT_PADDING "1"
  85 +#define DEFAULT_SUPERSAMPLE "1"
80 86
81 #define DEFAULT_INTENSITY_FILE "out_i.bmp" 87 #define DEFAULT_INTENSITY_FILE "out_i.bmp"
82 #define DEFAULT_TRANSMITTANCE_FILE "" 88 #define DEFAULT_TRANSMITTANCE_FILE ""
@@ -12,7 +12,9 @@ microscopeStruct* SCOPE; @@ -12,7 +12,9 @@ microscopeStruct* SCOPE;
12 #include "fieldslice.h" 12 #include "fieldslice.h"
13 13
14 #include "fileout.h" 14 #include "fileout.h"
15 -#include "options.h" 15 +//#include "options.h"
  16 +#include "arguments.h"
  17 +#include "rts/tools/arguments.h"
16 #include "montecarlo.h" 18 #include "montecarlo.h"
17 #include "rts/math/point.h" 19 #include "rts/math/point.h"
18 #include "rts/math/spherical_bessel.h" 20 #include "rts/math/spherical_bessel.h"
@@ -29,28 +31,42 @@ microscopeStruct* SCOPE; @@ -29,28 +31,42 @@ microscopeStruct* SCOPE;
29 #include "qtMainDialog.h" 31 #include "qtMainDialog.h"
30 bool gui = false; 32 bool gui = false;
31 33
  34 +#ifdef _WIN32
  35 +bool ansi = false;
  36 +#else
  37 +bool ansi = true;
  38 +#endif
  39 +
32 fileoutStruct gFileOut; 40 fileoutStruct gFileOut;
33 bool verbose = false; 41 bool verbose = false;
34 using namespace std; 42 using namespace std;
35 43
36 -int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, 44 +int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv,
37 complex<double>*cyv,complex<double>*cjvp,complex<double>*cyvp); 45 complex<double>*cyv,complex<double>*cjvp,complex<double>*cyvp);
38 46
39 int main(int argc, char *argv[]) 47 int main(int argc, char *argv[])
40 { 48 {
  49 + //arguments test
  50 + rts::arglist args;
  51 + SetArguments(args);
41 52
42 - //benchtest planewave class  
43 - rts::vector<ptype, 3> k(1, 0, 0);  
44 - rts::vector<ptype, 3> E(2, 2, 0);  
45 - planewave<ptype> P(k, E);  
46 -  
47 - std::cout<<P<<std::endl;  
48 -  
49 - exit(1); 53 + //parse the input arguments
  54 + args.parse(argc, argv);
50 55
51 SCOPE = new microscopeStruct(); 56 SCOPE = new microscopeStruct();
52 -  
53 - LoadParameters(argc, argv); 57 +
  58 + //load the user specified parameters into the simulation
  59 + LoadParameters(args);
  60 +
  61 + //activate ansi output if specified
  62 + args.set_ansi(ansi);
  63 +
  64 + //display help and exit
  65 + if(args("help"))
  66 + {
  67 + cout<<args.toStr()<<endl;
  68 + exit(1);
  69 + }
54 70
55 //initialize GPU memory for fields 71 //initialize GPU memory for fields
56 SCOPE->init(); 72 SCOPE->init();
@@ -8,7 +8,7 @@ @@ -8,7 +8,7 @@
8 #include "sphere.h" 8 #include "sphere.h"
9 #include <vector> 9 #include <vector>
10 10
11 -#define EPSILON_FLOAT 0.000001 11 +#define EPSILON_FLOAT 0.000001f
12 12
13 //This structure stores values relevant to creating the near field 13 //This structure stores values relevant to creating the near field
14 struct nearfieldStruct 14 struct nearfieldStruct
@@ -29,7 +29,7 @@ struct nearfieldStruct @@ -29,7 +29,7 @@ struct nearfieldStruct
29 bsVector k; //cartesian coordinates, normalized 29 bsVector k; //cartesian coordinates, normalized
30 bsPoint focus; 30 bsPoint focus;
31 31
32 - //slice position and orientation in world space 32 + //slice position and orientation in world space
33 rts::quad<ptype, 3> pos; 33 rts::quad<ptype, 3> pos;
34 34
35 //slices for the focused field 35 //slices for the focused field
@@ -3,8 +3,8 @@ @@ -3,8 +3,8 @@
3 #include "rts/math/legendre.h" 3 #include "rts/math/legendre.h"
4 #include <stdlib.h> 4 #include <stdlib.h>
5 #include "rts/cuda/error.h" 5 #include "rts/cuda/error.h"
6 -#include "rts/cuda/timer.h"  
7 - 6 +#include "rts/cuda/timer.h"
  7 +
8 //Incident field for a single plane wave 8 //Incident field for a single plane wave
9 __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) 9 __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR)
10 { 10 {
@@ -33,15 +33,15 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p @@ -33,15 +33,15 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p
33 //get the rtsPoint in world space and then the r vector 33 //get the rtsPoint in world space and then the r vector
34 bsPoint p = ABCD(u, v); 34 bsPoint p = ABCD(u, v);
35 bsVector r = p - f; 35 bsVector r = p - f;
36 - //ptype d = r.len();  
37 -  
38 - ptype k_dot_r = kmag * k.dot(r);  
39 - bsComplex d(0, k_dot_r);  
40 - 36 + //ptype d = r.len();
  37 +
  38 + ptype k_dot_r = kmag * k.dot(r);
  39 + bsComplex d(0, k_dot_r);
  40 +
41 Uf[i] = exp(d) * A; 41 Uf[i] = exp(d) * A;
42 42
43 } 43 }
44 - 44 +
45 //Incident field for a focused point source 45 //Incident field for a focused point source
46 __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) 46 __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4)
47 { 47 {
@@ -70,11 +70,11 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt @@ -70,11 +70,11 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt
70 //get the rtsPoint in world space and then the r vector 70 //get the rtsPoint in world space and then the r vector
71 bsPoint p = ABCD(u, v); 71 bsPoint p = ABCD(u, v);
72 bsVector r = p - f; 72 bsVector r = p - f;
73 - ptype d = r.len();  
74 - if(d < EPSILON_FLOAT)  
75 - {  
76 - Uf[i] = A * 2 * PI * (cosAlpha - cosBeta);  
77 - return; 73 + ptype d = r.len();
  74 + if(d < EPSILON_FLOAT)
  75 + {
  76 + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta);
  77 + return;
78 } 78 }
79 79
80 //get info for the light direction and frequency 80 //get info for the light direction and frequency
@@ -94,10 +94,10 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt @@ -94,10 +94,10 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt
94 ptype P[2]; 94 ptype P[2];
95 //get the angle between k and r (light direction and position vector) 95 //get the angle between k and r (light direction and position vector)
96 ptype cosTheta; 96 ptype cosTheta;
97 - cosTheta = k.dot(r);  
98 -  
99 - //deal with the degenerate case where r == 0  
100 - //if(isnan(cosTheta)) 97 + cosTheta = k.dot(r);
  98 +
  99 + //deal with the degenerate case where r == 0
  100 + //if(isnan(cosTheta))
101 // cosTheta = 0; 101 // cosTheta = 0;
102 rts::init_legendre<ptype>(cosTheta, P[0], P[1]); 102 rts::init_legendre<ptype>(cosTheta, P[0], P[1]);
103 103
@@ -162,12 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt @@ -162,12 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt
162 162
163 void nearfieldStruct::scalarUf() 163 void nearfieldStruct::scalarUf()
164 { 164 {
165 -  
166 - gpuStartTimer(); 165 +
  166 + gpuStartTimer();
167 167
168 //create one thread for each pixel of the field slice 168 //create one thread for each pixel of the field slice
169 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); 169 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
170 - dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); 170 + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
171 171
172 //if we are computing a plane wave, call the gpuScalarUfp function 172 //if we are computing a plane wave, call the gpuScalarUfp function
173 if(planeWave) 173 if(planeWave)
@@ -176,15 +176,15 @@ void nearfieldStruct::scalarUf() @@ -176,15 +176,15 @@ void nearfieldStruct::scalarUf()
176 } 176 }
177 //otherwise compute the condenser info and create a focused field 177 //otherwise compute the condenser info and create a focused field
178 else 178 else
179 - {  
180 - //pre-compute the cosine of the obscuration and objective angles  
181 - //cout<<"Condenser angle in: "<<asin(condenser[0])<<std::endl;  
182 - //cout<<"Condenser angle out: "<<asin(condenser[1])<<std::endl;  
183 - ptype cosAlpha = cos(asin(condenser[0])); 179 + {
  180 + //pre-compute the cosine of the obscuration and objective angles
  181 + //cout<<"Condenser angle in: "<<asin(condenser[0])<<std::endl;
  182 + //cout<<"Condenser angle out: "<<asin(condenser[1])<<std::endl;
  183 + ptype cosAlpha = cos(asin(condenser[0]));
184 ptype cosBeta = cos(asin(condenser[1])); 184 ptype cosBeta = cos(asin(condenser[1]));
185 //compute the scalar Uf field (this will be in the x_hat channel of Uf) 185 //compute the scalar Uf field (this will be in the x_hat channel of Uf)
186 gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); 186 gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m);
187 - }  
188 -  
189 - t_Uf = gpuStopTimer();  
190 -} 187 + }
  188 +
  189 + t_Uf = gpuStopTimer();
  190 +}
1 #include "nearfield.h" 1 #include "nearfield.h"
2 2
3 #include "rts/math/legendre.h" 3 #include "rts/math/legendre.h"
4 -#include "rts/cuda/error.h" 4 +#include "rts/cuda/error.h"
5 #include "rts/cuda/timer.h" 5 #include "rts/cuda/timer.h"
6 6
7 texture<float, cudaTextureType2D> texJ; 7 texture<float, cudaTextureType2D> texJ;
@@ -25,100 +25,100 @@ __global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoi @@ -25,100 +25,100 @@ __global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoi
25 25
26 */ 26 */
27 27
28 - //get the current coordinate in the plane slice  
29 - int iu = blockIdx.x * blockDim.x + threadIdx.x;  
30 - int iv = blockIdx.y * blockDim.y + threadIdx.y;  
31 -  
32 - //make sure that the thread indices are in-bounds  
33 - if(iu >= uR || iv >= vR) return;  
34 -  
35 - //compute the index (easier access to the scalar field array)  
36 - int i = iv*uR + iu;  
37 -  
38 - //compute the parameters for u and v  
39 - ptype u = (ptype)iu / (uR);  
40 - ptype v = (ptype)iv / (vR);  
41 -  
42 -  
43 -  
44 - //get the rtsPoint in world space and then the r vector  
45 - bsPoint p = ABCD(u, v);  
46 - bsVector r = p - f; 28 + //get the current coordinate in the plane slice
  29 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  30 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  31 +
  32 + //make sure that the thread indices are in-bounds
  33 + if(iu >= uR || iv >= vR) return;
  34 +
  35 + //compute the index (easier access to the scalar field array)
  36 + int i = iv*uR + iu;
  37 +
  38 + //compute the parameters for u and v
  39 + ptype u = (ptype)iu / (uR);
  40 + ptype v = (ptype)iv / (vR);
  41 +
  42 +
  43 +
  44 + //get the rtsPoint in world space and then the r vector
  45 + bsPoint p = ABCD(u, v);
  46 + bsVector r = p - f;
47 ptype d = r.len(); 47 ptype d = r.len();
48 48
49 if(d == 0) 49 if(d == 0)
50 { 50 {
51 Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); 51 Uf[i] = A * 2 * PI * (cosAlpha - cosBeta);
52 return; 52 return;
53 - }  
54 -  
55 - //get info for the light direction and frequency  
56 - r = r.norm();  
57 -  
58 - //compute the imaginary factor i^l  
59 - bsComplex im = bsComplex(0, 1);  
60 - bsComplex il = bsComplex(1, 0);  
61 -  
62 - //Legendre functions are computed dynamically to save memory  
63 - //initialize the Legendre functions  
64 -  
65 - ptype P[2];  
66 - //get the angle between k and r (light direction and position vector)  
67 - ptype cosTheta; 53 + }
  54 +
  55 + //get info for the light direction and frequency
  56 + r = r.norm();
  57 +
  58 + //compute the imaginary factor i^l
  59 + bsComplex im = bsComplex(0, 1);
  60 + bsComplex il = bsComplex(1, 0);
  61 +
  62 + //Legendre functions are computed dynamically to save memory
  63 + //initialize the Legendre functions
  64 +
  65 + ptype P[2];
  66 + //get the angle between k and r (light direction and position vector)
  67 + ptype cosTheta;
68 cosTheta = k.dot(r); 68 cosTheta = k.dot(r);
69 69
70 - rts::init_legendre<ptype>(cosTheta, P[0], P[1]);  
71 -  
72 - //initialize legendre functions for the cassegrain angles  
73 - ptype Palpha[3];  
74 - rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]);  
75 - Palpha[2] = 1;  
76 -  
77 - ptype Pbeta[3];  
78 - rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]);  
79 - Pbeta[2] = 1;  
80 -  
81 - //for each order l  
82 - bsComplex sumUf(0.0, 0.0);  
83 - ptype jl = 0.0; 70 + rts::init_legendre<ptype>(cosTheta, P[0], P[1]);
  71 +
  72 + //initialize legendre functions for the cassegrain angles
  73 + ptype Palpha[3];
  74 + rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]);
  75 + Palpha[2] = 1;
  76 +
  77 + ptype Pbeta[3];
  78 + rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]);
  79 + Pbeta[2] = 1;
  80 +
  81 + //for each order l
  82 + bsComplex sumUf(0, 0);
  83 + ptype jl = 0;
84 ptype Pl; 84 ptype Pl;
85 - ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1);  
86 - for(int l = 0; l<=nl; l++)  
87 - {  
88 - jl = tex2D(texJ, l + 0.5, di + 0.5);  
89 - if(l==0)  
90 - Pl = P[0];  
91 - else if(l==1)  
92 - {  
93 - Pl = P[1];  
94 -  
95 - //adjust the cassegrain Legendre function  
96 - Palpha[2] = Palpha[0];  
97 - rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);  
98 - Pbeta[2] = Pbeta[0];  
99 - rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);  
100 - }  
101 - else  
102 - {  
103 - rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]);  
104 -  
105 - Pl = P[1];  
106 -  
107 - //adjust the cassegrain outer Legendre function  
108 - Palpha[2] = Palpha[0];  
109 - rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);  
110 - Pbeta[2] = Pbeta[0];  
111 - rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);  
112 - }  
113 - 85 + ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1);
  86 + for(int l = 0; l<=nl; l++)
  87 + {
  88 + jl = tex2D(texJ, l + 0.5f, di + 0.5f);
  89 + if(l==0)
  90 + Pl = P[0];
  91 + else if(l==1)
  92 + {
  93 + Pl = P[1];
  94 +
  95 + //adjust the cassegrain Legendre function
  96 + Palpha[2] = Palpha[0];
  97 + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
  98 + Pbeta[2] = Pbeta[0];
  99 + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
  100 + }
  101 + else
  102 + {
  103 + rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]);
  104 +
  105 + Pl = P[1];
  106 +
  107 + //adjust the cassegrain outer Legendre function
  108 + Palpha[2] = Palpha[0];
  109 + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
  110 + Pbeta[2] = Pbeta[0];
  111 + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
  112 + }
  113 +
114 sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); 114 sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
115 //sumUf += jl; 115 //sumUf += jl;
116 -  
117 - il *= im;  
118 - }  
119 - 116 +
  117 + il *= im;
  118 + }
  119 +
120 Uf[i] = sumUf * 2 * PI * A; 120 Uf[i] = sumUf * 2 * PI * A;
121 - //Uf[i] = u; 121 + //Uf[i] = u;
122 //return; 122 //return;
123 } 123 }
124 124
@@ -159,23 +159,23 @@ void nearfieldStruct::scalarUfLut() @@ -159,23 +159,23 @@ void nearfieldStruct::scalarUfLut()
159 HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice)); 159 HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice));
160 160
161 //----------------Compute the focused field 161 //----------------Compute the focused field
162 - //create one thread for each pixel of the field slice  
163 - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); 162 + //create one thread for each pixel of the field slice
  163 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
164 dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); 164 dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
165 -  
166 - //if we are computing a plane wave, call the gpuScalarUfp function  
167 - if(planeWave)  
168 - {  
169 - gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]);  
170 - }  
171 - //otherwise compute the condenser info and create a focused field  
172 - else 165 +
  166 + //if we are computing a plane wave, call the gpuScalarUfp function
  167 + if(planeWave)
  168 + {
  169 + gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]);
  170 + }
  171 + //otherwise compute the condenser info and create a focused field
  172 + else
173 { 173 {
174 //pre-compute the cosine of the obscuration and objective angles 174 //pre-compute the cosine of the obscuration and objective angles
175 ptype cosAlpha = cos(asin(condenser[0])); 175 ptype cosAlpha = cos(asin(condenser[0]));
176 - ptype cosBeta = cos(asin(condenser[1]));  
177 - //compute the scalar Uf field (this will be in the x_hat channel of Uf)  
178 - gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR); 176 + ptype cosBeta = cos(asin(condenser[1]));
  177 + //compute the scalar Uf field (this will be in the x_hat channel of Uf)
  178 + gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR);
179 } 179 }
180 180
181 181
@@ -3,30 +3,30 @@ @@ -3,30 +3,30 @@
3 #include "rts/math/legendre.h" 3 #include "rts/math/legendre.h"
4 #include <stdlib.h> 4 #include <stdlib.h>
5 #include "rts/cuda/error.h" 5 #include "rts/cuda/error.h"
6 -#include "rts/cuda/timer.h"  
7 -  
8 -texture<float2, cudaTextureType2D> texUsp;  
9 -texture<float2, cudaTextureType2D> texUip;  
10 - 6 +#include "rts/cuda/timer.h"
  7 +
  8 +texture<float2, cudaTextureType2D> texUsp;
  9 +texture<float2, cudaTextureType2D> texUip;
  10 +
11 __global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR) 11 __global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR)
12 -{  
13 - /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field  
14 - produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field.  
15 -  
16 - Us = final scattered field  
17 - k = list of incoming plane waves (Monte-Carlo samples)  
18 - nk = number of incoming MC samples  
19 - kmag= magnitude of the incoming field 2pi/lambda  
20 - dmin= minimum distance of the Usp texture  
21 - dmax= maximum distance of the Usp texture  
22 - f = position of the focus  
23 - ps = position of the sphere  
24 - A = total amplitude of the incident field arriving at the focal spot  
25 - ABCD= rectangle representing the field slice  
26 - uR = resolution of the field slice in the u direction  
27 - vR = resolution of the field slice in the v direction  
28 - dR = resolution of the Usp texture in the d direction  
29 - thetaR= resolution of the Usp texture in the theta direction 12 +{
  13 + /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field
  14 + produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field.
  15 +
  16 + Us = final scattered field
  17 + k = list of incoming plane waves (Monte-Carlo samples)
  18 + nk = number of incoming MC samples
  19 + kmag= magnitude of the incoming field 2pi/lambda
  20 + dmin= minimum distance of the Usp texture
  21 + dmax= maximum distance of the Usp texture
  22 + f = position of the focus
  23 + ps = position of the sphere
  24 + A = total amplitude of the incident field arriving at the focal spot
  25 + ABCD= rectangle representing the field slice
  26 + uR = resolution of the field slice in the u direction
  27 + vR = resolution of the field slice in the v direction
  28 + dR = resolution of the Usp texture in the d direction
  29 + thetaR= resolution of the Usp texture in the theta direction
30 */ 30 */
31 31
32 //get the current coordinate in the plane slice 32 //get the current coordinate in the plane slice
@@ -46,47 +46,47 @@ __global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, p @@ -46,47 +46,47 @@ __global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, p
46 //get the rtsPoint in world space and then the r vector 46 //get the rtsPoint in world space and then the r vector
47 bsPoint p = ABCD(u, v); 47 bsPoint p = ABCD(u, v);
48 bsVector r = p - ps; 48 bsVector r = p - ps;
49 - ptype d = r.len();  
50 - float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1);  
51 - float ai = ( (d - dmin)/(a - dmin)) * (aR - 1);  
52 -  
53 - bsComplex sumUs(0, 0);  
54 - //for each plane wave in the wave list  
55 - for(int iw = 0; iw < nk; iw++)  
56 - {  
57 - //normalize the direction vectors and find their inner product  
58 - r = r.norm();  
59 - ptype cos_theta = k[iw].dot(r);  
60 - if(cos_theta < -1)  
61 - cos_theta = -1;  
62 - if(cos_theta > 1)  
63 - cos_theta = 1;  
64 - float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1);  
65 -  
66 - //compute the phase factor for spheres that are not at the origin  
67 - bsVector c = ps - f;  
68 - bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));  
69 -  
70 - //compute the internal field if we are inside a sphere 49 + ptype d = r.len();
  50 + float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1);
  51 + float ai = ( (d - dmin)/(a - dmin)) * (aR - 1);
  52 +
  53 + bsComplex sumUs(0, 0);
  54 + //for each plane wave in the wave list
  55 + for(int iw = 0; iw < nk; iw++)
  56 + {
  57 + //normalize the direction vectors and find their inner product
  58 + r = r.norm();
  59 + ptype cos_theta = k[iw].dot(r);
  60 + if(cos_theta < -1)
  61 + cos_theta = -1;
  62 + if(cos_theta > 1)
  63 + cos_theta = 1;
  64 + float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1);
  65 +
  66 + //compute the phase factor for spheres that are not at the origin
  67 + bsVector c = ps - f;
  68 + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));
  69 +
  70 + //compute the internal field if we are inside a sphere
71 if(d < a) 71 if(d < a)
72 { 72 {
73 - float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5);  
74 - sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y);  
75 - }  
76 - //otherwise compute the scattered field  
77 - else  
78 - {  
79 - float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5);  
80 - sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y);  
81 - }  
82 -  
83 - }  
84 -  
85 - Us[i] += sumUs;  
86 -}  
87 -  
88 -void nearfieldStruct::scalarUpLut()  
89 -{ 73 + float2 Uip = tex2D(texUip, ai + 0.5f, thetai + 0.5f);
  74 + sumUs += (1.0f/nk) * A * phase * bsComplex(Uip.x, Uip.y);
  75 + }
  76 + //otherwise compute the scattered field
  77 + else
  78 + {
  79 + float2 Usp = tex2D(texUsp, di + 0.5f, thetai + 0.5f);
  80 + sumUs += (1.0f/nk) * A * phase * bsComplex(Usp.x, Usp.y);
  81 + }
  82 +
  83 + }
  84 +
  85 + Us[i] += sumUs;
  86 +}
  87 +
  88 +void nearfieldStruct::scalarUpLut()
  89 +{
90 //get the number of spheres 90 //get the number of spheres
91 int nSpheres = sVector.size(); 91 int nSpheres = sVector.size();
92 92
@@ -103,90 +103,90 @@ void nearfieldStruct::scalarUpLut() @@ -103,90 +103,90 @@ void nearfieldStruct::scalarUpLut()
103 //create one thread for each pixel of the field slice 103 //create one thread for each pixel of the field slice
104 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); 104 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
105 dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); 105 dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
106 -  
107 - //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff)  
108 - bsVector* gpuk;  
109 - int nWaves;  
110 - ptype subA;  
111 - if(planeWave)  
112 - {  
113 - nWaves = 1;  
114 - HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) );  
115 - HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));  
116 - subA = A;  
117 - }  
118 - else  
119 - {  
120 - nWaves = inWaves.size();  
121 - HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) );  
122 - HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice));  
123 - //compute the amplitude that makes it through the condenser  
124 - subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );  
125 - } 106 +
  107 + //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff)
  108 + bsVector* gpuk;
  109 + int nWaves;
  110 + ptype subA;
  111 + if(planeWave)
  112 + {
  113 + nWaves = 1;
  114 + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) );
  115 + HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));
  116 + subA = A;
  117 + }
  118 + else
  119 + {
  120 + nWaves = inWaves.size();
  121 + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) );
  122 + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice));
  123 + //compute the amplitude that makes it through the condenser
  124 + subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );
  125 + }
126 126
127 //for each sphere 127 //for each sphere
128 for(int s = 0; s<nSpheres; s++) 128 for(int s = 0; s<nSpheres; s++)
129 - {  
130 - //get the current sphere  
131 - //sphere S = sVector[s];  
132 -  
133 - //allocate space for the Usp and Uip textures  
134 - //allocate the cuda array  
135 - cudaArray* arrayUsp;  
136 - cudaArray* arrayUip;  
137 - cudaChannelFormatDesc channelDescUsp =  
138 - cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);  
139 - cudaChannelFormatDesc channelDescUip =  
140 - cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);  
141 - int dR = sVector[s].Usp.R[0];  
142 - int thetaR = sVector[s].Usp.R[1];  
143 - int aR = sVector[s].Uip.R[0];  
144 - HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR));  
145 - HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR));  
146 -  
147 - texUsp.addressMode[0] = cudaAddressModeMirror;  
148 - texUsp.addressMode[1] = cudaAddressModeMirror;  
149 - texUsp.filterMode = cudaFilterModeLinear;  
150 - texUsp.normalized = false;  
151 -  
152 - texUip.addressMode[0] = cudaAddressModeMirror;  
153 - texUip.addressMode[1] = cudaAddressModeMirror;  
154 - texUip.filterMode = cudaFilterModeLinear;  
155 - texUip.normalized = false;  
156 - HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp));  
157 - HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip));  
158 -  
159 - //copy the LUT to the Usp texture  
160 - HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));  
161 - HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));  
162 - 129 + {
  130 + //get the current sphere
  131 + //sphere S = sVector[s];
  132 +
  133 + //allocate space for the Usp and Uip textures
  134 + //allocate the cuda array
  135 + cudaArray* arrayUsp;
  136 + cudaArray* arrayUip;
  137 + cudaChannelFormatDesc channelDescUsp =
  138 + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
  139 + cudaChannelFormatDesc channelDescUip =
  140 + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
  141 + int dR = sVector[s].Usp.R[0];
  142 + int thetaR = sVector[s].Usp.R[1];
  143 + int aR = sVector[s].Uip.R[0];
  144 + HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR));
  145 + HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR));
  146 +
  147 + texUsp.addressMode[0] = cudaAddressModeMirror;
  148 + texUsp.addressMode[1] = cudaAddressModeMirror;
  149 + texUsp.filterMode = cudaFilterModeLinear;
  150 + texUsp.normalized = false;
  151 +
  152 + texUip.addressMode[0] = cudaAddressModeMirror;
  153 + texUip.addressMode[1] = cudaAddressModeMirror;
  154 + texUip.filterMode = cudaFilterModeLinear;
  155 + texUip.normalized = false;
  156 + HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp));
  157 + HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip));
  158 +
  159 + //copy the LUT to the Usp texture
  160 + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));
  161 + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));
  162 +
163 gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat, 163 gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat,
164 - gpuk, 164 + gpuk,
165 nWaves, 165 nWaves,
166 - 2 * PI / lambda,  
167 - sVector[s].a,  
168 - sVector[s].d_min, 166 + 2 * PI / lambda,
  167 + sVector[s].a,
  168 + sVector[s].d_min,
169 sVector[s].d_max, 169 sVector[s].d_max,
170 focus, 170 focus,
171 - sVector[s].p,  
172 - subA, 171 + sVector[s].p,
  172 + subA,
173 pos, 173 pos,
174 U.R[0], 174 U.R[0],
175 - U.R[1],  
176 - dR,  
177 - aR,  
178 - thetaR);  
179 -  
180 - cudaFreeArray(arrayUsp);  
181 - cudaFreeArray(arrayUip);  
182 -  
183 - }  
184 - 175 + U.R[1],
  176 + dR,
  177 + aR,
  178 + thetaR);
  179 +
  180 + cudaFreeArray(arrayUsp);
  181 + cudaFreeArray(arrayUip);
  182 +
  183 + }
  184 +
185 185
186 //store the time to compute the scattered field 186 //store the time to compute the scattered field
187 - t_Us = gpuStopTimer();  
188 -  
189 - //free monte-carlo samples  
190 - cudaFree(gpuk);  
191 -  
192 -} 187 + t_Us = gpuStopTimer();
  188 +
  189 + //free monte-carlo samples
  190 + cudaFree(gpuk);
  191 +
  192 +}
@@ -7,47 +7,47 @@ @@ -7,47 +7,47 @@
7 7
8 __device__ bsComplex calc_Us(ptype kd, ptype cos_theta, int Nl, bsComplex* B) 8 __device__ bsComplex calc_Us(ptype kd, ptype cos_theta, int Nl, bsComplex* B)
9 { 9 {
10 - //initialize the spherical Bessel functions  
11 - ptype j[2];  
12 - rts::init_sbesselj<ptype>(kd, j);  
13 - ptype y[2];  
14 - rts::init_sbessely<ptype>(kd, y);  
15 -  
16 - //initialize the Legendre polynomial  
17 - ptype P[2];  
18 - rts::init_legendre<ptype>(cos_theta, P[0], P[1]);  
19 -  
20 - //initialize the spherical Hankel function  
21 - bsComplex h((ptype)0, (ptype)0);  
22 -  
23 - //initialize the result  
24 - bsComplex Us((ptype)0, (ptype)0);  
25 -  
26 - //for each order up to Nl  
27 - for(int l=0; l<=Nl; l++)  
28 - {  
29 - if(l == 0)  
30 - {  
31 - h.r = j[0];  
32 - h.i = y[0];  
33 - Us += B[0] * h * P[0];  
34 - }  
35 - else  
36 - {  
37 - //shift the bessel functions and legendre polynomials  
38 - if(l > 1)  
39 - {  
40 - rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);  
41 - rts::shift_sbesselj<ptype>(l, kd, j);  
42 - rts::shift_sbessely<ptype>(l, kd, y);  
43 - }  
44 -  
45 - h.r = j[1];  
46 - h.i = y[1];  
47 - Us += B[l] * h * P[1];  
48 -  
49 -  
50 - } 10 + //initialize the spherical Bessel functions
  11 + ptype j[2];
  12 + rts::init_sbesselj<ptype>(kd, j);
  13 + ptype y[2];
  14 + rts::init_sbessely<ptype>(kd, y);
  15 +
  16 + //initialize the Legendre polynomial
  17 + ptype P[2];
  18 + rts::init_legendre<ptype>(cos_theta, P[0], P[1]);
  19 +
  20 + //initialize the spherical Hankel function
  21 + bsComplex h((ptype)0, (ptype)0);
  22 +
  23 + //initialize the result
  24 + bsComplex Us((ptype)0, (ptype)0);
  25 +
  26 + //for each order up to Nl
  27 + for(int l=0; l<=Nl; l++)
  28 + {
  29 + if(l == 0)
  30 + {
  31 + h.r = j[0];
  32 + h.i = y[0];
  33 + Us += B[0] * h * P[0];
  34 + }
  35 + else
  36 + {
  37 + //shift the bessel functions and legendre polynomials
  38 + if(l > 1)
  39 + {
  40 + rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);
  41 + rts::shift_sbesselj<ptype>(l, kd, j);
  42 + rts::shift_sbessely<ptype>(l, kd, y);
  43 + }
  44 +
  45 + h.r = j[1];
  46 + h.i = y[1];
  47 + Us += B[l] * h * P[1];
  48 +
  49 +
  50 + }
51 } 51 }
52 return Us; 52 return Us;
53 } 53 }
@@ -59,41 +59,41 @@ __device__ bsComplex calc_Ui(bsComplex knd, ptype cos_theta, int Nl, bsComplex* @@ -59,41 +59,41 @@ __device__ bsComplex calc_Ui(bsComplex knd, ptype cos_theta, int Nl, bsComplex*
59 bsComplex Ui((ptype)0, (ptype)0); 59 bsComplex Ui((ptype)0, (ptype)0);
60 60
61 //deal with rtsPoints near zero 61 //deal with rtsPoints near zero
62 - if(real(knd) < EPSILON_FLOAT)  
63 - {  
64 - //for(int l=0; l<Nl; l++)  
65 - Ui = A[0];  
66 - return Ui; 62 + if(real(knd) < EPSILON_FLOAT)
  63 + {
  64 + //for(int l=0; l<Nl; l++)
  65 + Ui = A[0];
  66 + return Ui;
67 } 67 }
68 68
69 - //initialize the spherical Bessel functions  
70 - bsComplex j[2];  
71 - rts::init_sbesselj<bsComplex>(knd, j);  
72 -  
73 - //initialize the Legendre polynomial  
74 - ptype P[2];  
75 - rts::init_legendre<ptype>(cos_theta, P[0], P[1]);  
76 -  
77 - //for each order up to Nl  
78 - for(int l=0; l<=Nl; l++)  
79 - {  
80 - if(l == 0)  
81 - {  
82 - Ui += A[0] * j[0] * P[0];  
83 - }  
84 - else  
85 - {  
86 - //shift the bessel functions and legendre polynomials  
87 - if(l > 1)  
88 - {  
89 - rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);  
90 - rts::shift_sbesselj<bsComplex>(l, knd, j);  
91 - }  
92 -  
93 - Ui += A[l] * j[1] * P[1];  
94 -  
95 -  
96 - } 69 + //initialize the spherical Bessel functions
  70 + bsComplex j[2];
  71 + rts::init_sbesselj<bsComplex>(knd, j);
  72 +
  73 + //initialize the Legendre polynomial
  74 + ptype P[2];
  75 + rts::init_legendre<ptype>(cos_theta, P[0], P[1]);
  76 +
  77 + //for each order up to Nl
  78 + for(int l=0; l<=Nl; l++)
  79 + {
  80 + if(l == 0)
  81 + {
  82 + Ui += A[0] * j[0] * P[0];
  83 + }
  84 + else
  85 + {
  86 + //shift the bessel functions and legendre polynomials
  87 + if(l > 1)
  88 + {
  89 + rts::shift_legendre<ptype>(l, cos_theta, P[0], P[1]);
  90 + rts::shift_sbesselj<bsComplex>(l, knd, j);
  91 + }
  92 +
  93 + Ui += A[l] * j[1] * P[1];
  94 +
  95 +
  96 + }
97 } 97 }
98 return Ui; 98 return Ui;
99 } 99 }
@@ -118,39 +118,39 @@ __global__ void gpuScalarUsp(bsComplex* Us, bsVector* k, int nk, ptype kmag, bsP @@ -118,39 +118,39 @@ __global__ void gpuScalarUsp(bsComplex* Us, bsVector* k, int nk, ptype kmag, bsP
118 //get the rtsPoint in world space and then the r vector 118 //get the rtsPoint in world space and then the r vector
119 bsPoint p = ABCD(u, v); 119 bsPoint p = ABCD(u, v);
120 bsVector r = p - ps; 120 bsVector r = p - ps;
121 - ptype d = r.len();  
122 -  
123 - bsComplex sumUs(0, 0);  
124 - //for each plane wave in the wave list  
125 - for(int iw = 0; iw < nk; iw++)  
126 - {  
127 - //normalize the direction vectors and find their inner product  
128 - r = r.norm();  
129 - ptype cos_theta = k[iw].dot(r);  
130 -  
131 - //compute the phase factor for spheres that are not at the origin  
132 - bsVector c = ps - f;  
133 - bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));  
134 -  
135 - //compute the internal field if we are inside a sphere 121 + ptype d = r.len();
  122 +
  123 + bsComplex sumUs(0, 0);
  124 + //for each plane wave in the wave list
  125 + for(int iw = 0; iw < nk; iw++)
  126 + {
  127 + //normalize the direction vectors and find their inner product
  128 + r = r.norm();
  129 + ptype cos_theta = k[iw].dot(r);
  130 +
  131 + //compute the phase factor for spheres that are not at the origin
  132 + bsVector c = ps - f;
  133 + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));
  134 +
  135 + //compute the internal field if we are inside a sphere
136 if(d <= a) 136 if(d <= a)
137 { 137 {
138 bsComplex knd = kmag * d * n; 138 bsComplex knd = kmag * d * n;
139 - sumUs += (1.0/nk) * A * phase * calc_Ui(knd, cos_theta, Nl, Alpha);  
140 - }  
141 - //otherwise compute the scattered field  
142 - else  
143 - {  
144 - //compute the argument for the spherical Hankel function  
145 - ptype kd = kmag * d;  
146 - sumUs += (1.0/nk) * A * phase * calc_Us(kd, cos_theta, Nl, Beta);  
147 - }  
148 -  
149 - }  
150 -  
151 - Us[i] += sumUs;  
152 -  
153 - 139 + sumUs += (1.0f/nk) * A * phase * calc_Ui(knd, cos_theta, Nl, Alpha);
  140 + }
  141 + //otherwise compute the scattered field
  142 + else
  143 + {
  144 + //compute the argument for the spherical Hankel function
  145 + ptype kd = kmag * d;
  146 + sumUs += (1.0f/nk) * A * phase * calc_Us(kd, cos_theta, Nl, Beta);
  147 + }
  148 +
  149 + }
  150 +
  151 + Us[i] += sumUs;
  152 +
  153 +
154 } 154 }
155 155
156 void nearfieldStruct::scalarUs() 156 void nearfieldStruct::scalarUs()
@@ -190,17 +190,17 @@ void nearfieldStruct::scalarUs() @@ -190,17 +190,17 @@ void nearfieldStruct::scalarUs()
190 HANDLE_ERROR(cudaMemcpy(gpuB, &sVector[s].B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice)); 190 HANDLE_ERROR(cudaMemcpy(gpuB, &sVector[s].B[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));
191 HANDLE_ERROR(cudaMemcpy(gpuA, &sVector[s].A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice)); 191 HANDLE_ERROR(cudaMemcpy(gpuA, &sVector[s].A[0], (Nl+1) * sizeof(bsComplex), cudaMemcpyHostToDevice));
192 192
193 - //if we are computing a plane wave, call the gpuScalarUfp function  
194 - sphere S = sVector[s];  
195 - bsVector* gpuk; 193 + //if we are computing a plane wave, call the gpuScalarUfp function
  194 + sphere S = sVector[s];
  195 + bsVector* gpuk;
196 196
197 if(planeWave) 197 if(planeWave)
198 - {  
199 - //if this is a single plane wave, assume it goes along direction k (copy the k vector to the GPU)  
200 - HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) )); 198 + {
  199 + //if this is a single plane wave, assume it goes along direction k (copy the k vector to the GPU)
  200 + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ));
201 HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice)); 201 HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));
202 gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat, 202 gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat,
203 - gpuk, 203 + gpuk,
204 1, 204 1,
205 2 * PI / lambda, 205 2 * PI / lambda,
206 focus, 206 focus,
@@ -213,20 +213,20 @@ void nearfieldStruct::scalarUs() @@ -213,20 +213,20 @@ void nearfieldStruct::scalarUs()
213 A, 213 A,
214 pos, 214 pos,
215 U.R[0], 215 U.R[0],
216 - U.R[1]); 216 + U.R[1]);
217 HANDLE_ERROR(cudaFree(gpuk)); 217 HANDLE_ERROR(cudaFree(gpuk));
218 - }  
219 - //otherwise copy all of the monte-carlo samples to the GPU and compute  
220 - else  
221 - {  
222 - HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * inWaves.size() ));  
223 - HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * inWaves.size(), cudaMemcpyHostToDevice));  
224 -  
225 - //compute the amplitude that makes it through the condenser  
226 - ptype subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );  
227 - 218 + }
  219 + //otherwise copy all of the monte-carlo samples to the GPU and compute
  220 + else
  221 + {
  222 + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * inWaves.size() ));
  223 + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * inWaves.size(), cudaMemcpyHostToDevice));
  224 +
  225 + //compute the amplitude that makes it through the condenser
  226 + ptype subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );
  227 +
228 gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat, 228 gpuScalarUsp<<<dimGrid, dimBlock>>>(U.x_hat,
229 - gpuk, 229 + gpuk,
230 inWaves.size(), 230 inWaves.size(),
231 2 * PI / lambda, 231 2 * PI / lambda,
232 focus, 232 focus,
@@ -239,17 +239,17 @@ void nearfieldStruct::scalarUs() @@ -239,17 +239,17 @@ void nearfieldStruct::scalarUs()
239 subA, 239 subA,
240 pos, 240 pos,
241 U.R[0], 241 U.R[0],
242 - U.R[1]);  
243 - HANDLE_ERROR(cudaFree(gpuk));  
244 -  
245 -  
246 - }  
247 -  
248 - //free memory for scattering coefficients  
249 - HANDLE_ERROR(cudaFree(gpuA)); 242 + U.R[1]);
  243 + HANDLE_ERROR(cudaFree(gpuk));
  244 +
  245 +
  246 + }
  247 +
  248 + //free memory for scattering coefficients
  249 + HANDLE_ERROR(cudaFree(gpuA));
250 HANDLE_ERROR(cudaFree(gpuB)); 250 HANDLE_ERROR(cudaFree(gpuB));
251 - }  
252 - 251 + }
  252 +
253 253
254 //store the time to compute the scattered field 254 //store the time to compute the scattered field
255 t_Us = gpuStopTimer(); 255 t_Us = gpuStopTimer();
options.h deleted
1 -//AnyOption for command-line processing  
2 -//#include "anyoption.h"  
3 -  
4 -#include "rts/optics/material.h"  
5 -  
6 -#include "nearfield.h"  
7 -#include "microscope.h"  
8 -#include "rts/visualization/colormap.h"  
9 -#include "fileout.h"  
10 -//extern nearfieldStruct* NF;  
11 -extern microscopeStruct* SCOPE;  
12 -extern fileoutStruct gFileOut;  
13 -  
14 -//default values  
15 -#include "defaults.h"  
16 -  
17 -#include <string>  
18 -#include <sstream>  
19 -#include <fstream>  
20 -#include <limits>  
21 -using namespace std;  
22 -  
23 -#include <boost/program_options.hpp>  
24 -namespace po = boost::program_options;  
25 -  
26 -extern bool verbose;  
27 -extern bool gui;  
28 -  
29 -  
30 -  
31 -static void lNearfield(po::variables_map vm)  
32 -{  
33 - //test to see if we are running a vector field simulation  
34 - bool vectorField = false;  
35 - if(vm.count("vector"))  
36 - vectorField = true;  
37 - SCOPE->scalarSim = !vectorField;  
38 -  
39 - //test to see if we are simulating a plane wave  
40 - bool planeWave = DEFAULT_PLANEWAVE;  
41 - if(vm.count("plane-wave"))  
42 - planeWave = !planeWave;  
43 - SCOPE->nf.planeWave = planeWave;  
44 -  
45 - //get the incident field amplitude  
46 - SCOPE->nf.A = vm["amplitude"].as<ptype>();  
47 -  
48 - //get the condenser parameters  
49 - SCOPE->nf.condenser[0] = DEFAULT_CONDENSER_MIN;  
50 - SCOPE->nf.condenser[1] = DEFAULT_CONDENSER_MAX;  
51 -  
52 - if(vm.count("condenser"))  
53 - {  
54 - vector<ptype> cparams = vm["condenser"].as< vector<ptype> >();  
55 -  
56 - if(cparams.size() == 1)  
57 - SCOPE->nf.condenser[1] = cparams[0];  
58 - else  
59 - {  
60 - SCOPE->nf.condenser[0] = cparams[0];  
61 - SCOPE->nf.condenser[1] = cparams[1];  
62 - }  
63 - }  
64 -  
65 -  
66 - //get the focal rtsPoint position  
67 - SCOPE->nf.focus[0] = DEFAULT_FOCUS_X;  
68 - SCOPE->nf.focus[1] = DEFAULT_FOCUS_Y;  
69 - SCOPE->nf.focus[2] = DEFAULT_FOCUS_Z;  
70 - if(vm.count("focus"))  
71 - {  
72 - vector<ptype> fpos = vm["focus"].as< vector<ptype> >();  
73 - if(fpos.size() != 3)  
74 - {  
75 - cout<<"BIMSIM Error - the incident focal point is incorrectly specified; it must have three components."<<endl;  
76 - exit(1);  
77 - }  
78 - SCOPE->nf.focus[0] = fpos[0];  
79 - SCOPE->nf.focus[1] = fpos[1];  
80 - SCOPE->nf.focus[2] = fpos[2];  
81 - }  
82 -  
83 - //get the incident light direction (k-vector)  
84 - bsVector spherical(1, 0, 0);  
85 -  
86 - //if a k-vector is specified  
87 - if(vm.count("k"))  
88 - {  
89 - vector<ptype> kvec = vm["k"].as< vector<ptype> >();  
90 - if(kvec.size() != 2)  
91 - {  
92 - cout<<"BIMSIM Error - k-vector is not specified correctly: it must contain two elements"<<endl;  
93 - exit(1);  
94 - }  
95 - spherical[1] = kvec[0];  
96 - spherical[2] = kvec[1];  
97 - }  
98 - SCOPE->nf.k = spherical.sph2cart();  
99 -  
100 -  
101 - //incident field order  
102 - SCOPE->nf.m = vm["field-order"].as<int>();  
103 -  
104 - //number of Monte-Carlo samples  
105 - SCOPE->nf.nWaves = vm["samples"].as<int>();  
106 -  
107 - //random number seed for Monte-Carlo samples  
108 - if(vm.count("seed"))  
109 - srand(vm["seed"].as<unsigned int>());  
110 -  
111 -  
112 -  
113 -}  
114 -  
115 -  
116 -static void loadOutputParams(po::variables_map vm)  
117 -{  
118 - //append simulation results to previous binary files  
119 - gFileOut.append = DEFAULT_APPEND;  
120 - if(vm.count("append"))  
121 - gFileOut.append = true;  
122 -  
123 - //image parameters  
124 - //component of the field to be saved  
125 - std::string fieldStr;  
126 - fieldStr = vm["output-type"].as<string>();  
127 -  
128 - if(fieldStr == "magnitude")  
129 - gFileOut.field = fileoutStruct::fieldMag;  
130 - else if(fieldStr == "intensity")  
131 - gFileOut.field = fileoutStruct::fieldIntensity;  
132 - else if(fieldStr == "polarization")  
133 - gFileOut.field = fileoutStruct::fieldPolar;  
134 - else if(fieldStr == "imaginary")  
135 - gFileOut.field = fileoutStruct::fieldImag;  
136 - else if(fieldStr == "real")  
137 - gFileOut.field = fileoutStruct::fieldReal;  
138 - else if(fieldStr == "angular-spectrum")  
139 - gFileOut.field = fileoutStruct::fieldAngularSpectrum;  
140 -  
141 -  
142 - //image file names  
143 - gFileOut.intFile = vm["intensity"].as<string>();  
144 - gFileOut.absFile = vm["absorbance"].as<string>();