Commit 03c403facb572c7df53e36dd20ef6b73ea1290e3

Authored by Pavel Govyadinov
2 parents c0e09133 ad2123e6

Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib

stim/biomodels/network.h
@@ -8,7 +8,7 @@ @@ -8,7 +8,7 @@
8 #include <algorithm> 8 #include <algorithm>
9 #include <string.h> 9 #include <string.h>
10 #include <math.h> 10 #include <math.h>
11 -#include <stim/math/vector.h> 11 +#include <stim/math/vec3.h>
12 #include <stim/visualization/obj.h> 12 #include <stim/visualization/obj.h>
13 #include <stim/visualization/cylinder.h> 13 #include <stim/visualization/cylinder.h>
14 #include <ANN/ANN.h> 14 #include <ANN/ANN.h>
@@ -37,7 +37,7 @@ class network{ @@ -37,7 +37,7 @@ class network{
37 /// Constructor - creates an edge from a list of points by calling the stim::fiber constructor 37 /// Constructor - creates an edge from a list of points by calling the stim::fiber constructor
38 38
39 ///@param p is an array of positions in space 39 ///@param p is an array of positions in space
40 - edge(std::vector< stim::vec<T> > p) : cylinder<T>(p){} 40 + edge(std::vector< stim::vec3<T> > p) : cylinder<T>(p){}
41 41
42 /// Copy constructor creates an edge from a fiber 42 /// Copy constructor creates an edge from a fiber
43 edge(stim::cylinder<T> f) : cylinder<T>(f) {} 43 edge(stim::cylinder<T> f) : cylinder<T>(f) {}
@@ -61,20 +61,20 @@ class network{ @@ -61,20 +61,20 @@ class network{
61 }; 61 };
62 62
63 ///Node class that stores the physical position of the node as well as the edges it is connected to (edges that connect to it), As well as any additional data necessary. 63 ///Node class that stores the physical position of the node as well as the edges it is connected to (edges that connect to it), As well as any additional data necessary.
64 - class vertex : public stim::vec<T> 64 + class vertex : public stim::vec3<T>
65 { 65 {
66 public: 66 public:
67 //std::vector<unsigned int> edges; //indices of edges connected to this node. 67 //std::vector<unsigned int> edges; //indices of edges connected to this node.
68 std::vector<unsigned int> e[2]; //indices of edges going out (e[0]) and coming in (e[1]) 68 std::vector<unsigned int> e[2]; //indices of edges going out (e[0]) and coming in (e[1])
69 - //stim::vec<T> p; //position of this node in physical space. 69 + //stim::vec3<T> p; //position of this node in physical space.
70 70
71 //constructor takes a stim::vec 71 //constructor takes a stim::vec
72 - vertex(stim::vec<T> p) : stim::vec<T>(p){} 72 + vertex(stim::vec3<T> p) : stim::vec3<T>(p){}
73 73
74 /// Output the vertex information as a string 74 /// Output the vertex information as a string
75 std::string str(){ 75 std::string str(){
76 std::stringstream ss; 76 std::stringstream ss;
77 - ss<<"\t(x, y, z) = "<<stim::vec<T>::str(); 77 + ss<<"\t(x, y, z) = "<<stim::vec3<T>::str();
78 78
79 if(e[0].size() > 0){ 79 if(e[0].size() > 0){
80 ss<<"\t> "; 80 ss<<"\t> ";
@@ -129,7 +129,11 @@ public: @@ -129,7 +129,11 @@ public:
129 std::vector< stim::vec<T> > c; //allocate an array of points for the vessel centerline 129 std::vector< stim::vec<T> > c; //allocate an array of points for the vessel centerline
130 O.getLine(l, c); //get the fiber centerline 130 O.getLine(l, c); //get the fiber centerline
131 131
132 - edge new_edge = c; //create an edge from the given centerline 132 + std::vector< stim::vec3<T> > c3(c.size());
  133 + for(size_t j = 0; j < c.size(); j++)
  134 + c3[j] = c[j];
  135 +
  136 + edge new_edge = c3; //create an edge from the given centerline
133 unsigned int I = new_edge.size(); //calculate the number of points on the centerline 137 unsigned int I = new_edge.size(); //calculate the number of points on the centerline
134 138
135 //get the first and last vertex IDs for the line 139 //get the first and last vertex IDs for the line
@@ -222,7 +226,7 @@ public: @@ -222,7 +226,7 @@ public:
222 float gaussianFunction(float x, float std=25){ return exp(-x/(2*std*std));} // by default std = 25 226 float gaussianFunction(float x, float std=25){ return exp(-x/(2*std*std));} // by default std = 25
223 227
224 // stim 3d vector to annpoint of 3 dimensions 228 // stim 3d vector to annpoint of 3 dimensions
225 - void stim2ann(ANNpoint &a, stim::vec<T> b){ 229 + void stim2ann(ANNpoint &a, stim::vec3<T> b){
226 a[0] = b[0]; 230 a[0] = b[0];
227 a[1] = b[1]; 231 a[1] = b[1];
228 a[2] = b[2]; 232 a[2] = b[2];
@@ -278,10 +282,9 @@ public: @@ -278,10 +282,9 @@ public:
278 ANNdistArray dists = new ANNdist[1]; // near neighbor distances 282 ANNdistArray dists = new ANNdist[1]; // near neighbor distances
279 ANNidxArray nnIdx = new ANNidx[1]; // near neighbor indices // allocate near neigh indices 283 ANNidxArray nnIdx = new ANNidx[1]; // near neighbor indices // allocate near neigh indices
280 284
281 - stim::vec<T> p0, p1;  
282 - float m0, m1; 285 + stim::vec3<T> p0, p1;
  286 + float m1;
283 float M = 0; //stores the total metric value 287 float M = 0; //stores the total metric value
284 - float l; //stores the segment length  
285 float L = 0; //stores the total network length 288 float L = 0; //stores the total network length
286 ANNpoint queryPt = annAllocPt(3); 289 ANNpoint queryPt = annAllocPt(3);
287 for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A 290 for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A
@@ -292,7 +295,7 @@ public: @@ -292,7 +295,7 @@ public:
292 p1 = R.E[e][p]; //get the next point in the edge 295 p1 = R.E[e][p]; //get the next point in the edge
293 stim2ann(queryPt, p1); 296 stim2ann(queryPt, p1);
294 kdt->annkSearch( queryPt, 1, nnIdx, dists, eps); //find the distance between A and the current network 297 kdt->annkSearch( queryPt, 1, nnIdx, dists, eps); //find the distance between A and the current network
295 - m1 = 1.0f - gaussianFunction(dists[0], sigma); //calculate the metric value based on the distance 298 + m1 = 1.0f - gaussianFunction((float)dists[0], sigma); //calculate the metric value based on the distance
296 R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment 299 R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment
297 300
298 } 301 }
stim/cuda/cudatools/callable.h
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 2
3 //define the CUDA_CALLABLE macro (will prefix all members) 3 //define the CUDA_CALLABLE macro (will prefix all members)
4 #ifdef __CUDACC__ 4 #ifdef __CUDACC__
5 -#define CUDA_CALLABLE __host__ __device__ 5 +#define CUDA_CALLABLE __host__ __device__ inline
6 #else 6 #else
7 #define CUDA_CALLABLE 7 #define CUDA_CALLABLE
8 #endif 8 #endif
stim/cuda/cudatools/devices.h
@@ -15,7 +15,7 @@ int maxThreadsPerBlock() @@ -15,7 +15,7 @@ int maxThreadsPerBlock()
15 } 15 }
16 16
17 extern "C" 17 extern "C"
18 -int sharedMemPerBlock() 18 +size_t sharedMemPerBlock()
19 { 19 {
20 int device; 20 int device;
21 cudaGetDevice(&device); //get the id of the current device 21 cudaGetDevice(&device); //get the id of the current device
@@ -23,6 +23,16 @@ int sharedMemPerBlock() @@ -23,6 +23,16 @@ int sharedMemPerBlock()
23 cudaGetDeviceProperties(&props, device); 23 cudaGetDeviceProperties(&props, device);
24 return props.sharedMemPerBlock; 24 return props.sharedMemPerBlock;
25 } 25 }
  26 +
  27 +extern "C"
  28 +size_t constMem()
  29 +{
  30 + int device;
  31 + cudaGetDevice(&device); //get the id of the current device
  32 + cudaDeviceProp props; //device property structure
  33 + cudaGetDeviceProperties(&props, device);
  34 + return props.totalConstMem;
  35 +}
26 } //end namespace rts 36 } //end namespace rts
27 37
28 #endif 38 #endif
stim/cuda/sharedmem.cuh
@@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
5 namespace stim{ 5 namespace stim{
6 namespace cuda{ 6 namespace cuda{
7 7
8 - // Copies values from global memory to shared memory, optimizing threads 8 + // Copies values from texture memory to shared memory, optimizing threads
9 template<typename T> 9 template<typename T>
10 __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src, 10 __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
11 unsigned int x, unsigned int y, unsigned int X, unsigned int Y, 11 unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
@@ -35,6 +35,19 @@ namespace stim{ @@ -35,6 +35,19 @@ namespace stim{
35 } 35 }
36 } 36 }
37 37
  38 + // Copies values from global memory to shared memory, optimizing threads
  39 + template<typename T>
  40 + __device__ void sharedMemcpy(T* dest, T* src, size_t N, size_t tid, size_t nt){
  41 +
  42 + size_t I = N / nt + 1; //calculate the number of iterations required to make the copy
  43 + size_t xi = tid; //initialize the source and destination index to the thread ID
  44 + for(size_t i = 0; i < I; i++){ //for each iteration
  45 + if(xi < N) //if the index is within the copy region
  46 + dest[xi] = src[xi]; //perform the copy
  47 + xi += nt;
  48 + }
  49 + }
  50 +
38 51
39 } 52 }
40 } 53 }
@@ -884,7 +884,7 @@ public: @@ -884,7 +884,7 @@ public:
884 /// using the following indexing: i = p*B + b 884 /// using the following indexing: i = p*B + b
885 /// @param matrix is the destination for the pixel data 885 /// @param matrix is the destination for the pixel data
886 /// @param mask is the mask 886 /// @param mask is the mask
887 - bool sift(T* matrix, unsigned char* mask = NULL){ 887 + bool sift(T* matrix, unsigned char* mask = NULL, bool PROGRESS = false){
888 size_t Lbytes = sizeof(T) * X(); 888 size_t Lbytes = sizeof(T) * X();
889 T* line = (T*) malloc( Lbytes ); //allocate space for a line 889 T* line = (T*) malloc( Lbytes ); //allocate space for a line
890 890
@@ -903,6 +903,7 @@ public: @@ -903,6 +903,7 @@ public:
903 pl++; //increment the pixel pointer 903 pl++; //increment the pixel pointer
904 } 904 }
905 } 905 }
  906 + if(PROGRESS) progress = (double)( (y+1)*Z() + 1) / (double)(Y() * Z()) * 100;
906 } 907 }
907 p += pl; //add the line increment to the running pixel index 908 p += pl; //add the line increment to the running pixel index
908 } 909 }
@@ -817,7 +817,7 @@ public: @@ -817,7 +817,7 @@ public:
817 /// using the following indexing: i = p*B + b 817 /// using the following indexing: i = p*B + b
818 /// @param matrix is the destination for the pixel data 818 /// @param matrix is the destination for the pixel data
819 /// @param mask is the mask 819 /// @param mask is the mask
820 - bool sift(T* matrix, unsigned char* mask = NULL){ 820 + bool sift(T* matrix, unsigned char* mask = NULL, bool PROGRESS = false){
821 size_t Bbytes = sizeof(T) * Z(); 821 size_t Bbytes = sizeof(T) * Z();
822 size_t XY = X() * Y(); 822 size_t XY = X() * Y();
823 T* band = (T*) malloc( Bbytes ); //allocate space for a line 823 T* band = (T*) malloc( Bbytes ); //allocate space for a line
@@ -836,6 +836,7 @@ public: @@ -836,6 +836,7 @@ public:
836 } 836 }
837 else 837 else
838 file.seekg(Bbytes, std::ios::cur); //otherwise skip this band 838 file.seekg(Bbytes, std::ios::cur); //otherwise skip this band
  839 + if(PROGRESS) progress = (double)(xy+1) / (double)XY * 100;
839 } 840 }
840 return true; 841 return true;
841 } 842 }
@@ -809,7 +809,7 @@ public: @@ -809,7 +809,7 @@ public:
809 /// using the following indexing: i = p*B + b 809 /// using the following indexing: i = p*B + b
810 /// @param matrix is the destination for the pixel data 810 /// @param matrix is the destination for the pixel data
811 /// @param mask is the mask 811 /// @param mask is the mask
812 - bool sift(T* matrix, unsigned char* mask = NULL){ 812 + bool sift(T* matrix, unsigned char* mask = NULL, bool PROGRESS = false){
813 unsigned long long XY = X() * Y(); //Number of XY pixels 813 unsigned long long XY = X() * Y(); //Number of XY pixels
814 unsigned long long L = XY * sizeof(T); //size of XY plane (in bytes) 814 unsigned long long L = XY * sizeof(T); //size of XY plane (in bytes)
815 815
@@ -827,9 +827,8 @@ public: @@ -827,9 +827,8 @@ public:
827 if(mask == NULL || mask[xy] != 0){ //if the pixel is valid 827 if(mask == NULL || mask[xy] != 0){ //if the pixel is valid
828 matrix[i*Z() + b] = band_image[xy]; //copy it to the appropriate point in the values[] array 828 matrix[i*Z() + b] = band_image[xy]; //copy it to the appropriate point in the values[] array
829 i++; 829 i++;
830 - //std::cout<<i<<std::endl;  
831 } 830 }
832 - 831 + if(PROGRESS) progress = (double)(xy+1) / (double)XY * 100;
833 } 832 }
834 } 833 }
835 834
@@ -670,13 +670,13 @@ public: @@ -670,13 +670,13 @@ public:
670 /// using the following indexing: i = b*P + p 670 /// using the following indexing: i = b*P + p
671 /// @param matrix is the destination for the pixel data 671 /// @param matrix is the destination for the pixel data
672 /// @param p is the mask 672 /// @param p is the mask
673 - bool sift(void* matrix, unsigned char* p = NULL){ 673 + bool sift(void* matrix, unsigned char* p = NULL, bool PROGRESS = false){
674 674
675 if (header.interleave == envi_header::BSQ){ //if the infile is bsq file 675 if (header.interleave == envi_header::BSQ){ //if the infile is bsq file
676 if (header.data_type == envi_header::float32) 676 if (header.data_type == envi_header::float32)
677 - return ((bsq<float>*)file)->sift((float*)matrix, p); 677 + return ((bsq<float>*)file)->sift((float*)matrix, p, PROGRESS);
678 else if (header.data_type == envi_header::float64) 678 else if (header.data_type == envi_header::float64)
679 - return ((bsq<double>*)file)->sift((double*)matrix, p); 679 + return ((bsq<double>*)file)->sift((double*)matrix, p, PROGRESS);
680 else{ 680 else{
681 std::cout << "ERROR: unidentified data type" << std::endl; 681 std::cout << "ERROR: unidentified data type" << std::endl;
682 exit(1); 682 exit(1);
@@ -685,9 +685,9 @@ public: @@ -685,9 +685,9 @@ public:
685 685
686 if (header.interleave == envi_header::BIP){ 686 if (header.interleave == envi_header::BIP){
687 if (header.data_type == envi_header::float32) 687 if (header.data_type == envi_header::float32)
688 - return ((bip<float>*)file)->sift((float*)matrix, p); 688 + return ((bip<float>*)file)->sift((float*)matrix, p, PROGRESS);
689 else if (header.data_type == envi_header::float64) 689 else if (header.data_type == envi_header::float64)
690 - return ((bip<double>*)file)->sift((double*)matrix, p); 690 + return ((bip<double>*)file)->sift((double*)matrix, p, PROGRESS);
691 else{ 691 else{
692 std::cout << "ERROR: unidentified data type" << std::endl; 692 std::cout << "ERROR: unidentified data type" << std::endl;
693 exit(1); 693 exit(1);
@@ -695,9 +695,9 @@ public: @@ -695,9 +695,9 @@ public:
695 } 695 }
696 if (header.interleave == envi_header::BIL){ 696 if (header.interleave == envi_header::BIL){
697 if (header.data_type == envi_header::float32) 697 if (header.data_type == envi_header::float32)
698 - return ((bil<float>*)file)->sift((float*)matrix, p); 698 + return ((bil<float>*)file)->sift((float*)matrix, p, PROGRESS);
699 else if (header.data_type == envi_header::float64) 699 else if (header.data_type == envi_header::float64)
700 - return ((bil<double>*)file)->sift((double*)matrix, p); 700 + return ((bil<double>*)file)->sift((double*)matrix, p, PROGRESS);
701 else{ 701 else{
702 std::cout << "ERROR: unidentified data type" << std::endl; 702 std::cout << "ERROR: unidentified data type" << std::endl;
703 exit(1); 703 exit(1);
stim/image/image.h
@@ -6,6 +6,7 @@ @@ -6,6 +6,7 @@
6 #include <vector> 6 #include <vector>
7 #include <iostream> 7 #include <iostream>
8 #include <limits> 8 #include <limits>
  9 +#include <typeinfo>
9 10
10 namespace stim{ 11 namespace stim{
11 /// This static class provides the STIM interface for loading, saving, and storing 2D images. 12 /// This static class provides the STIM interface for loading, saving, and storing 2D images.
@@ -24,8 +25,6 @@ class image{ @@ -24,8 +25,6 @@ class image{
24 size_t Y() const { return R[2]; } 25 size_t Y() const { return R[2]; }
25 size_t C() const { return R[0]; } 26 size_t C() const { return R[0]; }
26 27
27 - size_t bytes(){ return size() * sizeof(T); }  
28 -  
29 void init(){ //initializes all variables, assumes no memory is allocated 28 void init(){ //initializes all variables, assumes no memory is allocated
30 memset(R, 0, sizeof(size_t) * 3); //set the resolution and number of channels to zero 29 memset(R, 0, sizeof(size_t) * 3); //set the resolution and number of channels to zero
31 img = NULL; 30 img = NULL;
@@ -33,7 +32,6 @@ class image{ @@ -33,7 +32,6 @@ class image{
33 32
34 void unalloc(){ //frees any resources associated with the image 33 void unalloc(){ //frees any resources associated with the image
35 if(img) free(img); //if memory has been allocated, free it 34 if(img) free(img); //if memory has been allocated, free it
36 - img=NULL;  
37 } 35 }
38 36
39 37
@@ -44,16 +42,15 @@ class image{ @@ -44,16 +42,15 @@ class image{
44 42
45 void allocate(){ 43 void allocate(){
46 unalloc(); 44 unalloc();
47 - img = (T*) malloc( bytes() ); //allocate memory  
48 - memset(img, 0, bytes()); 45 + img = (T*) malloc( sizeof(T) * R[0] * R[1] * R[2] ); //allocate memory
49 } 46 }
50 47
51 void allocate(size_t x, size_t y, size_t c){ //allocate memory based on the resolution 48 void allocate(size_t x, size_t y, size_t c){ //allocate memory based on the resolution
52 - unalloc();  
53 R[0] = c; R[1] = x; R[2] = y; //set the resolution 49 R[0] = c; R[1] = x; R[2] = y; //set the resolution
54 allocate(); //allocate memory 50 allocate(); //allocate memory
55 } 51 }
56 52
  53 + size_t bytes(){ return size() * sizeof(T); }
57 54
58 size_t idx(size_t x, size_t y, size_t c = 0){ 55 size_t idx(size_t x, size_t y, size_t c = 0){
59 return y * C() * X() + x * C() + c; 56 return y * C() * X() + x * C() + c;
@@ -61,13 +58,23 @@ class image{ @@ -61,13 +58,23 @@ class image{
61 58
62 59
63 int cv_type(){ 60 int cv_type(){
64 - if(std::is_same<T, unsigned char>::value) return CV_MAKETYPE(CV_8U, (int)C());  
65 - if(std::is_same<T, char>::value) return CV_MAKETYPE(CV_8S, (int)C());  
66 - if(std::is_same<T, unsigned short>::value) return CV_MAKETYPE(CV_16U, (int)C());  
67 - if(std::is_same<T, short>::value) return CV_MAKETYPE(CV_16S, (int)C());  
68 - if(std::is_same<T, int>::value) return CV_MAKETYPE(CV_32S, (int)C());  
69 - if(std::is_same<T, float>::value) return CV_MAKETYPE(CV_32F, (int)C());  
70 - if(std::is_same<T, double>::value) return CV_MAKETYPE(CV_64F, (int)C()); 61 + // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution
  62 +
  63 + //if(std::is_same<T, unsigned char>::value) return CV_MAKETYPE(CV_8U, (int)C());
  64 + //if(std::is_same<T, char>::value) return CV_MAKETYPE(CV_8S, (int)C());
  65 + //if(std::is_same<T, unsigned short>::value) return CV_MAKETYPE(CV_16U, (int)C());
  66 + //if(std::is_same<T, short>::value) return CV_MAKETYPE(CV_16S, (int)C());
  67 + //if(std::is_same<T, int>::value) return CV_MAKETYPE(CV_32S, (int)C());
  68 + //if(std::is_same<T, float>::value) return CV_MAKETYPE(CV_32F, (int)C());
  69 + //if(std::is_same<T, double>::value) return CV_MAKETYPE(CV_64F, (int)C());
  70 +
  71 + if(typeid(T) == typeid(unsigned char)) return CV_MAKETYPE(CV_8U, (int)C());
  72 + if(typeid(T) == typeid(char)) return CV_MAKETYPE(CV_8S, (int)C());
  73 + if(typeid(T) == typeid(unsigned short)) return CV_MAKETYPE(CV_16U, (int)C());
  74 + if(typeid(T) == typeid(short)) return CV_MAKETYPE(CV_16S, (int)C());
  75 + if(typeid(T) == typeid(int)) return CV_MAKETYPE(CV_32S, (int)C());
  76 + if(typeid(T) == typeid(float)) return CV_MAKETYPE(CV_32F, (int)C());
  77 + if(typeid(T) == typeid(double)) return CV_MAKETYPE(CV_64F, (int)C());
71 78
72 std::cout<<"ERROR in stim::image::cv_type - no valid data type found"<<std::endl; 79 std::cout<<"ERROR in stim::image::cv_type - no valid data type found"<<std::endl;
73 exit(1); 80 exit(1);
@@ -75,15 +82,26 @@ class image{ @@ -75,15 +82,26 @@ class image{
75 82
76 /// Returns the value for "white" based on the dynamic range (assumes white is 1.0 for floating point images) 83 /// Returns the value for "white" based on the dynamic range (assumes white is 1.0 for floating point images)
77 T white(){ 84 T white(){
78 - if(std::is_same<T, unsigned char>::value) return UCHAR_MAX;  
79 - if(std::is_same<T, unsigned short>::value) return SHRT_MAX;  
80 - if(std::is_same<T, unsigned>::value) return UINT_MAX;  
81 - if(std::is_same<T, unsigned long>::value) return ULONG_MAX;  
82 - if(std::is_same<T, unsigned long long>::value) return ULLONG_MAX;  
83 - if(std::is_same<T, float>::value) return 1.0f;  
84 - if(std::is_same<T, double>::value) return 1.0; 85 + // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution
  86 +
  87 + //if(std::is_same<T, unsigned char>::value) return UCHAR_MAX;
  88 + //if(std::is_same<T, unsigned short>::value) return SHRT_MAX;
  89 + //if(std::is_same<T, unsigned>::value) return UINT_MAX;
  90 + //if(std::is_same<T, unsigned long>::value) return ULONG_MAX;
  91 + //if(std::is_same<T, unsigned long long>::value) return ULLONG_MAX;
  92 + //if(std::is_same<T, float>::value) return 1.0f;
  93 + //if(std::is_same<T, double>::value) return 1.0;
  94 +
  95 + if(typeid(T) == typeid(unsigned char)) return UCHAR_MAX;
  96 + if(typeid(T) == typeid(unsigned short)) return SHRT_MAX;
  97 + if(typeid(T) == typeid(unsigned)) return UINT_MAX;
  98 + if(typeid(T) == typeid(unsigned long)) return ULONG_MAX;
  99 + if(typeid(T) == typeid(unsigned long long)) return ULLONG_MAX;
  100 + if(typeid(T) == typeid(float)) return 1.0f;
  101 + if(typeid(T) == typeid(double)) return 1.0;
85 102
86 std::cout<<"ERROR in stim::image::white - no white value known for this data type"<<std::endl; 103 std::cout<<"ERROR in stim::image::white - no white value known for this data type"<<std::endl;
  104 + exit(1);
87 105
88 } 106 }
89 107
@@ -91,9 +109,7 @@ class image{ @@ -91,9 +109,7 @@ class image{
91 public: 109 public:
92 110
93 /// Default constructor - creates an empty image object 111 /// Default constructor - creates an empty image object
94 - image(){  
95 - init(); //initialize all variables to zero, don't allocate any memory  
96 - } 112 + image(){ init(); } //initialize all variables to zero, don't allocate any memory
97 113
98 /// Constructor with a filename - loads the specified file 114 /// Constructor with a filename - loads the specified file
99 image(std::string filename){ //constructor initialize the image with an image file 115 image(std::string filename){ //constructor initialize the image with an image file
@@ -115,7 +131,7 @@ public: @@ -115,7 +131,7 @@ public:
115 } 131 }
116 132
117 /// Copy constructor - duplicates an image object 133 /// Copy constructor - duplicates an image object
118 - image(const stim::image<T> &I){ 134 + image(const stim::image<T>& I){
119 init(); 135 init();
120 allocate(I.X(), I.Y(), I.C()); 136 allocate(I.X(), I.Y(), I.C());
121 memcpy(img, I.img, bytes()); 137 memcpy(img, I.img, bytes());
@@ -127,6 +143,7 @@ public: @@ -127,6 +143,7 @@ public:
127 } 143 }
128 144
129 stim::image<T>& operator=(const stim::image<T>& I){ 145 stim::image<T>& operator=(const stim::image<T>& I){
  146 + init();
130 if(&I == this) //handle self-assignment 147 if(&I == this) //handle self-assignment
131 return *this; 148 return *this;
132 allocate(I.X(), I.Y(), I.C()); 149 allocate(I.X(), I.Y(), I.C());
@@ -139,22 +156,15 @@ public: @@ -139,22 +156,15 @@ public:
139 156
140 cv::Mat cvImage = cv::imread(filename, CV_LOAD_IMAGE_UNCHANGED); //use OpenCV to open the image file 157 cv::Mat cvImage = cv::imread(filename, CV_LOAD_IMAGE_UNCHANGED); //use OpenCV to open the image file
141 if(!cvImage.data){ 158 if(!cvImage.data){
142 - std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<" ["<<__FILE__<<" (line "<<__LINE__<<")]"<<std::endl; 159 + std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<std::endl;
143 exit(1); 160 exit(1);
144 } 161 }
145 allocate(cvImage.cols, cvImage.rows, cvImage.channels()); //allocate space for the image 162 allocate(cvImage.cols, cvImage.rows, cvImage.channels()); //allocate space for the image
146 - T* cv_ptr = (T*) cvImage.data;  
147 - if(C() == 1)  
148 - {  
149 - //if this is a single-color image, just copy the data  
150 - memcpy(img, cv_ptr, bytes());  
151 - }  
152 - if(C() == 3)  
153 - { //if this is a 3-color image, OpenCV uses BGR interleaving 163 + T* cv_ptr = (T*)cvImage.data;
  164 + if(C() == 1) //if this is a single-color image, just copy the data
  165 + memcpy(img, cv_ptr, bytes());
  166 + if(C() == 3) //if this is a 3-color image, OpenCV uses BGR interleaving
154 set_interleaved_bgr(cv_ptr, X(), Y()); 167 set_interleaved_bgr(cv_ptr, X(), Y());
155 - }  
156 -  
157 - cvImage.release();  
158 } 168 }
159 169
160 //save a file 170 //save a file
@@ -168,18 +178,16 @@ public: @@ -168,18 +178,16 @@ public:
168 get_interleaved_bgr(buffer); 178 get_interleaved_bgr(buffer);
169 cv::Mat cvImage((int)Y(), (int)X(), cv_type(), buffer); 179 cv::Mat cvImage((int)Y(), (int)X(), cv_type(), buffer);
170 cv::imwrite(filename, cvImage); 180 cv::imwrite(filename, cvImage);
171 - cvImage.release();  
172 - free(buffer);  
173 } 181 }
174 182
175 //create an image from an interleaved buffer 183 //create an image from an interleaved buffer
176 - void set_interleaved_rgb(T* buffer, size_t width, size_t height, size_t channels = 3){  
177 - allocate(width, height, channels); 184 + void set_interleaved_rgb(T* buffer, size_t width, size_t height){
  185 + allocate(width, height, 3);
178 memcpy(img, buffer, bytes()); 186 memcpy(img, buffer, bytes());
179 } 187 }
180 188
181 - void set_interleaved_bgr(T* buffer, size_t width, size_t height, size_t channels = 3){  
182 - allocate(width, height, channels); 189 + void set_interleaved_bgr(T* buffer, size_t width, size_t height){
  190 + allocate(width, height, 3);
183 for(size_t c = 0; c < C(); c++){ //copy directly 191 for(size_t c = 0; c < C(); c++){ //copy directly
184 for(size_t y = 0; y < Y(); y++){ 192 for(size_t y = 0; y < Y(); y++){
185 for(size_t x = 0; x < X(); x++){ 193 for(size_t x = 0; x < X(); x++){
@@ -359,34 +367,6 @@ public: @@ -359,34 +367,6 @@ public:
359 367
360 return r; //return the inverted image 368 return r; //return the inverted image
361 } 369 }
362 -  
363 - /// Invert an image by calculating I1 = alpha - I0, where alpha is the maximum image value  
364 - image<T> invert(){  
365 - size_t N = size(); //calculate the total number of values in the image  
366 - image<T> r(X(), Y(), C()); //allocate space for the resulting image  
367 - T white_val = maxv();  
368 - for(size_t n = 0; n < N; n++)  
369 - r.img[n] = white_val - img[n]; //perform the inversion  
370 -  
371 - return r; //return the inverted image  
372 - }  
373 -  
374 - ///crops the image from x1 to x0 and y1 to y0 and returns a new (smaller) image.  
375 - image<T> crop(int x0, int x1, int y0, int y1)  
376 - {  
377 -  
378 - image<T> ret(x1-x0, y1-y0, C());  
379 - int newWidth = x1-x0;  
380 - int destidx, srcidx;  
381 - ///for each row, cut what amount of data from the original and put it into the new copy.  
382 - for(int i = 0; i < (y1-y0); i++)  
383 - {  
384 - destidx = i*newWidth*C(); ///destination index one per each row  
385 - srcidx = ((i+(y0))*X()+x0)*C(); ///source index, one per each row.  
386 - memcpy(&ret.img[destidx], &img[srcidx], sizeof(T)*newWidth*C());  
387 - }  
388 - return ret;  
389 - }  
390 370
391 image<T> srgb2lab(){ 371 image<T> srgb2lab(){
392 std::cout<<"ERROR stim::image::srgb2lab - function has been broken, re-implement."<<std::endl; 372 std::cout<<"ERROR stim::image::srgb2lab - function has been broken, re-implement."<<std::endl;
@@ -405,7 +385,6 @@ public: @@ -405,7 +385,6 @@ public:
405 exit(1); 385 exit(1);
406 } 386 }
407 387
408 -  
409 // leila's code for non_interleaving data in 3D 388 // leila's code for non_interleaving data in 3D
410 //create an data set from an interleaved buffer 389 //create an data set from an interleaved buffer
411 void set_interleaved3(T* buffer, size_t width, size_t height, size_t depth, size_t channels = 3){ 390 void set_interleaved3(T* buffer, size_t width, size_t height, size_t depth, size_t channels = 3){
stim/math/bessel.h
@@ -17,6 +17,11 @@ static complex&lt;double&gt; czero(0.0,0.0); @@ -17,6 +17,11 @@ static complex&lt;double&gt; czero(0.0,0.0);
17 template< typename P > 17 template< typename P >
18 P gamma(P x) 18 P gamma(P x)
19 { 19 {
  20 + const P EPS = numeric_limits<P>::epsilon();
  21 + const P FPMIN_MAG = numeric_limits<P>::min();
  22 + const P FPMIN = numeric_limits<P>::lowest();
  23 + const P FPMAX = numeric_limits<P>::max();
  24 +
20 int i,k,m; 25 int i,k,m;
21 P ga,gr,r,z; 26 P ga,gr,r,z;
22 27
@@ -47,7 +52,7 @@ P gamma(P x) @@ -47,7 +52,7 @@ P gamma(P x)
47 -0.54e-14, 52 -0.54e-14,
48 0.14e-14}; 53 0.14e-14};
49 54
50 - if (x > 171.0) return 1e308; // This value is an overflow flag. 55 + if (x > 171.0) return FPMAX; // This value is an overflow flag.
51 if (x == (int)x) { 56 if (x == (int)x) {
52 if (x > 0.0) { 57 if (x > 0.0) {
53 ga = 1.0; // use factorial 58 ga = 1.0; // use factorial
@@ -56,7 +61,7 @@ P gamma(P x) @@ -56,7 +61,7 @@ P gamma(P x)
56 } 61 }
57 } 62 }
58 else 63 else
59 - ga = 1e308; 64 + ga = FPMAX;
60 } 65 }
61 else { 66 else {
62 if (fabs(x) > 1.0) { 67 if (fabs(x) > 1.0) {
@@ -89,6 +94,11 @@ template&lt;typename P&gt; @@ -89,6 +94,11 @@ template&lt;typename P&gt;
89 int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1, 94 int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1,
90 P &j0p,P &j1p,P &y0p,P &y1p) 95 P &j0p,P &j1p,P &y0p,P &y1p)
91 { 96 {
  97 + const P EPS = numeric_limits<P>::epsilon();
  98 + const P FPMIN_MAG = numeric_limits<P>::min();
  99 + const P FPMIN = numeric_limits<P>::lowest();
  100 + const P FPMAX = numeric_limits<P>::max();
  101 +
92 P x2,r,ec,w0,w1,r0,r1,cs0,cs1; 102 P x2,r,ec,w0,w1,r0,r1,cs0,cs1;
93 P cu,p0,q0,p1,q1,t1,t2; 103 P cu,p0,q0,p1,q1,t1,t2;
94 int k,kz; 104 int k,kz;
@@ -157,12 +167,12 @@ int bessjy01a(P x,P &amp;j0,P &amp;j1,P &amp;y0,P &amp;y1, @@ -157,12 +167,12 @@ int bessjy01a(P x,P &amp;j0,P &amp;j1,P &amp;y0,P &amp;y1,
157 if (x == 0.0) { 167 if (x == 0.0) {
158 j0 = 1.0; 168 j0 = 1.0;
159 j1 = 0.0; 169 j1 = 0.0;
160 - y0 = -1e308;  
161 - y1 = -1e308; 170 + y0 = -FPMIN;
  171 + y1 = -FPMIN;
162 j0p = 0.0; 172 j0p = 0.0;
163 j1p = 0.5; 173 j1p = 0.5;
164 - y0p = 1e308;  
165 - y1p = 1e308; 174 + y0p = FPMAX;
  175 + y1p = FPMAX;
166 return 0; 176 return 0;
167 } 177 }
168 x2 = x*x; 178 x2 = x*x;
@@ -329,7 +339,7 @@ int msta1(P x,int mp) @@ -329,7 +339,7 @@ int msta1(P x,int mp)
329 for (i=0;i<20;i++) { 339 for (i=0;i<20;i++) {
330 nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); 340 nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
331 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp; 341 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp;
332 - if (abs(nn-n1) < 1) break; 342 + if (std::abs(nn-n1) < 1) break;
333 n0 = n1; 343 n0 = n1;
334 f0 = f1; 344 f0 = f1;
335 n1 = nn; 345 n1 = nn;
@@ -361,7 +371,7 @@ int msta2(P x,int n,int mp) @@ -361,7 +371,7 @@ int msta2(P x,int n,int mp)
361 for (i=0;i<20;i++) { 371 for (i=0;i<20;i++) {
362 nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); 372 nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
363 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj; 373 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj;
364 - if (abs(nn-n1) < 1) break; 374 + if (std::abs(nn-n1) < 1) break;
365 n0 = n1; 375 n0 = n1;
366 f0 = f1; 376 f0 = f1;
367 n1 = nn; 377 n1 = nn;
@@ -596,21 +606,26 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv, @@ -596,21 +606,26 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
596 P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk; 606 P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk;
597 int j,k,l,m,n,kz; 607 int j,k,l,m,n,kz;
598 608
  609 + const P EPS = numeric_limits<P>::epsilon();
  610 + const P FPMIN_MAG = numeric_limits<P>::min();
  611 + const P FPMIN = numeric_limits<P>::lowest();
  612 + const P FPMAX = numeric_limits<P>::max();
  613 +
599 x2 = x*x; 614 x2 = x*x;
600 n = (int)v; 615 n = (int)v;
601 v0 = v-n; 616 v0 = v-n;
602 if ((x < 0.0) || (v < 0.0)) return 1; 617 if ((x < 0.0) || (v < 0.0)) return 1;
603 - if (x < 1e-15) { 618 + if (x < EPS) {
604 for (k=0;k<=n;k++) { 619 for (k=0;k<=n;k++) {
605 jv[k] = 0.0; 620 jv[k] = 0.0;
606 - yv[k] = -1e308; 621 + yv[k] = FPMIN;
607 djv[k] = 0.0; 622 djv[k] = 0.0;
608 - dyv[k] = 1e308; 623 + dyv[k] = FPMAX;
609 if (v0 == 0.0) { 624 if (v0 == 0.0) {
610 jv[0] = 1.0; 625 jv[0] = 1.0;
611 djv[1] = 0.5; 626 djv[1] = 0.5;
612 } 627 }
613 - else djv[0] = 1e308; 628 + else djv[0] = FPMAX;
614 } 629 }
615 vm = v; 630 vm = v;
616 return 0; 631 return 0;
@@ -623,7 +638,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv, @@ -623,7 +638,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
623 for (k=1;k<=40;k++) { 638 for (k=1;k<=40;k++) {
624 r *= -0.25*x2/(k*(k+vl)); 639 r *= -0.25*x2/(k*(k+vl));
625 bjvl += r; 640 bjvl += r;
626 - if (fabs(r) < fabs(bjvl)*1e-15) break; 641 + if (fabs(r) < fabs(bjvl)*EPS) break;
627 } 642 }
628 vg = 1.0 + vl; 643 vg = 1.0 + vl;
629 a = pow(0.5*x,vl)/gamma(vg); 644 a = pow(0.5*x,vl)/gamma(vg);
@@ -686,7 +701,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv, @@ -686,7 +701,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
686 if (m < n) n = m; 701 if (m < n) n = m;
687 else m = msta2(x,n,15); 702 else m = msta2(x,n,15);
688 f2 = 0.0; 703 f2 = 0.0;
689 - f1 = 1.0e-100; 704 + f1 = FPMIN_MAG;
690 for (k=m;k>=0;k--) { 705 for (k=m;k>=0;k--) {
691 f = 2.0*(v0+k+1.0)*f1/x-f2; 706 f = 2.0*(v0+k+1.0)*f1/x-f2;
692 if (k <= n) jv[k] = f; 707 if (k <= n) jv[k] = f;
@@ -763,20 +778,26 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv, @@ -763,20 +778,26 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
763 778
764 template<typename P> 779 template<typename P>
765 int bessjyv_sph(int v, P z, P &vm, P* cjv, 780 int bessjyv_sph(int v, P z, P &vm, P* cjv,
766 - P* cyv, P* cjvp, P* cyvp)  
767 -{ 781 + P* cyv, P* cjvp, P* cyvp){
  782 +
768 //first, compute the bessel functions of fractional order 783 //first, compute the bessel functions of fractional order
769 - bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); 784 + bessjyv<P>(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp);
  785 +
  786 + if(z == 0){ //handle degenerate case of z = 0
  787 + memset(cjv, 0, sizeof(P) * (v+1));
  788 + cjv[0] = 1;
  789 + }
770 790
771 //iterate through each and scale 791 //iterate through each and scale
772 - for(int n = 0; n<=v; n++)  
773 - { 792 + for(int n = 0; n<=v; n++){
774 793
775 - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0));  
776 - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0)); 794 + if(z != 0){ //handle degenerate case of z = 0
  795 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  796 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
  797 + }
777 798
778 - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0));  
779 - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0)); 799 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  800 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
780 } 801 }
781 802
782 return 0; 803 return 0;
@@ -1237,7 +1258,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1237,7 +1258,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1237 P a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa; 1258 P a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa;
1238 int j,n,k,kz,l,lb,lb0,m; 1259 int j,n,k,kz,l,lb,lb0,m;
1239 1260
1240 - a0 = abs(z); 1261 + a0 = ::abs(z);
1241 z1 = z; 1262 z1 = z;
1242 z2 = z*z; 1263 z2 = z*z;
1243 n = (int)v; 1264 n = (int)v;
@@ -1265,7 +1286,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1265,7 +1286,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1265 vm = v; 1286 vm = v;
1266 return 0; 1287 return 0;
1267 } 1288 }
1268 - if (real(z1) < 0.0) z1 = -z; 1289 + if (::real(z1) < 0.0) z1 = -z;
1269 if (a0 <= 12.0) { 1290 if (a0 <= 12.0) {
1270 for (l=0;l<2;l++) { 1291 for (l=0;l<2;l++) {
1271 vl = v0+l; 1292 vl = v0+l;
@@ -1274,7 +1295,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1274,7 +1295,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1274 for (k=1;k<=40;k++) { 1295 for (k=1;k<=40;k++) {
1275 cr *= -0.25*z2/(k*(k+vl)); 1296 cr *= -0.25*z2/(k*(k+vl));
1276 cjvl += cr; 1297 cjvl += cr;
1277 - if (abs(cr) < abs(cjvl)*eps) break; 1298 + if (::abs(cr) < ::abs(cjvl)*eps) break;
1278 } 1299 }
1279 vg = 1.0 + vl; 1300 vg = 1.0 + vl;
1280 ga = gamma(vg); 1301 ga = gamma(vg);
@@ -1327,7 +1348,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1327,7 +1348,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1327 for (k=1;k<=40;k++) { 1348 for (k=1;k<=40;k++) {
1328 cr *= -0.25*z2/(k*(k-vl)); 1349 cr *= -0.25*z2/(k*(k-vl));
1329 cjvl += cr; 1350 cjvl += cr;
1330 - if (abs(cr) < abs(cjvl)*eps) break; 1351 + if (::abs(cr) < ::abs(cjvl)*eps) break;
1331 } 1352 }
1332 vg = 1.0-vl; 1353 vg = 1.0-vl;
1333 gb = gamma(vg); 1354 gb = gamma(vg);
@@ -1360,16 +1381,16 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1360,16 +1381,16 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1360 cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1); 1381 cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1);
1361 } 1382 }
1362 } 1383 }
1363 - if (real(z) < 0.0) { 1384 + if (::real(z) < 0.0) {
1364 cfac0 = exp(pv0*cii); 1385 cfac0 = exp(pv0*cii);
1365 cfac1 = exp(pv1*cii); 1386 cfac1 = exp(pv1*cii);
1366 - if (imag(z) < 0.0) { 1387 + if (::imag(z) < 0.0) {
1367 cyv0 = cfac0*cyv0-(P)2.0*(complex<P>)cii*cos(pv0)*cjv0; 1388 cyv0 = cfac0*cyv0-(P)2.0*(complex<P>)cii*cos(pv0)*cjv0;
1368 cyv1 = cfac1*cyv1-(P)2.0*(complex<P>)cii*cos(pv1)*cjv1; 1389 cyv1 = cfac1*cyv1-(P)2.0*(complex<P>)cii*cos(pv1)*cjv1;
1369 cjv0 /= cfac0; 1390 cjv0 /= cfac0;
1370 cjv1 /= cfac1; 1391 cjv1 /= cfac1;
1371 } 1392 }
1372 - else if (imag(z) > 0.0) { 1393 + else if (::imag(z) > 0.0) {
1373 cyv0 = cyv0/cfac0+(P)2.0*(complex<P>)cii*cos(pv0)*cjv0; 1394 cyv0 = cyv0/cfac0+(P)2.0*(complex<P>)cii*cos(pv0)*cjv0;
1374 cyv1 = cyv1/cfac1+(P)2.0*(complex<P>)cii*cos(pv1)*cjv1; 1395 cyv1 = cyv1/cfac1+(P)2.0*(complex<P>)cii*cos(pv1)*cjv1;
1375 cjv0 *= cfac0; 1396 cjv0 *= cfac0;
@@ -1400,7 +1421,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1400,7 +1421,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1400 cf2 = cf1; 1421 cf2 = cf1;
1401 cf1 = cf; 1422 cf1 = cf;
1402 } 1423 }
1403 - if (abs(cjv0) > abs(cjv1)) cs = cjv0/cf; 1424 + if (::abs(cjv0) > ::abs(cjv1)) cs = cjv0/cf;
1404 else cs = cjv1/cf2; 1425 else cs = cjv1/cf2;
1405 for (k=0;k<=n;k++) { 1426 for (k=0;k<=n;k++) {
1406 cjv[k] *= cs; 1427 cjv[k] *= cs;
@@ -1412,21 +1433,21 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1412,21 +1433,21 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1412 } 1433 }
1413 cyv[0] = cyv0; 1434 cyv[0] = cyv0;
1414 cyv[1] = cyv1; 1435 cyv[1] = cyv1;
1415 - ya0 = abs(cyv0); 1436 + ya0 = ::abs(cyv0);
1416 lb = 0; 1437 lb = 0;
1417 cg0 = cyv0; 1438 cg0 = cyv0;
1418 cg1 = cyv1; 1439 cg1 = cyv1;
1419 for (k=2;k<=n;k++) { 1440 for (k=2;k<=n;k++) {
1420 cyk = 2.0*(v0+k-1.0)*cg1/z-cg0; 1441 cyk = 2.0*(v0+k-1.0)*cg1/z-cg0;
1421 - yak = abs(cyk);  
1422 - ya1 = abs(cg0); 1442 + yak = ::abs(cyk);
  1443 + ya1 = ::abs(cg0);
1423 if ((yak < ya0) && (yak< ya1)) lb = k; 1444 if ((yak < ya0) && (yak< ya1)) lb = k;
1424 cyv[k] = cyk; 1445 cyv[k] = cyk;
1425 cg0 = cg1; 1446 cg0 = cg1;
1426 cg1 = cyk; 1447 cg1 = cyk;
1427 } 1448 }
1428 lb0 = 0; 1449 lb0 = 0;
1429 - if ((lb > 4) && (imag(z) != 0.0)) { 1450 + if ((lb > 4) && (::imag(z) != 0.0)) {
1430 while(lb != lb0) { 1451 while(lb != lb0) {
1431 ch2 = cone; 1452 ch2 = cone;
1432 ch1 = czero; 1453 ch1 = czero;
@@ -1449,7 +1470,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1449,7 +1470,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1449 cp21 = ch2; 1470 cp21 = ch2;
1450 if (lb == n) 1471 if (lb == n)
1451 cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1]; 1472 cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1];
1452 - if (abs(cjv[0]) > abs(cjv[1])) { 1473 + if (::abs(cjv[0]) > ::abs(cjv[1])) {
1453 cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0]; 1474 cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0];
1454 cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0]; 1475 cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0];
1455 } 1476 }
@@ -1474,8 +1495,8 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1474,8 +1495,8 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1474 cyl2 = cylk; 1495 cyl2 = cylk;
1475 } 1496 }
1476 for (k=2;k<=n;k++) { 1497 for (k=2;k<=n;k++) {
1477 - wa = abs(cyv[k]);  
1478 - if (wa < abs(cyv[k-1])) lb = k; 1498 + wa = ::abs(cyv[k]);
  1499 + if (wa < ::abs(cyv[k-1])) lb = k;
1479 } 1500 }
1480 } 1501 }
1481 } 1502 }
@@ -1494,15 +1515,21 @@ int cbessjyva_sph(int v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1494,15 +1515,21 @@ int cbessjyva_sph(int v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1494 //first, compute the bessel functions of fractional order 1515 //first, compute the bessel functions of fractional order
1495 cbessjyva<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); 1516 cbessjyva<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
1496 1517
  1518 + if(z == 0){ //handle degenerate case of z = 0
  1519 + memset(cjv, 0, sizeof(P) * (v+1));
  1520 + cjv[0] = 1;
  1521 + }
  1522 +
1497 //iterate through each and scale 1523 //iterate through each and scale
1498 for(int n = 0; n<=v; n++) 1524 for(int n = 0; n<=v; n++)
1499 { 1525 {
  1526 + if(z != 0){ //handle degenerate case of z = 0
  1527 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  1528 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
  1529 + }
1500 1530
1501 - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0));  
1502 - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0));  
1503 -  
1504 - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0));  
1505 - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0)); 1531 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  1532 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
1506 } 1533 }
1507 1534
1508 return 0; 1535 return 0;
stim/math/circle.h
@@ -17,7 +17,7 @@ class circle : plane&lt;T&gt; @@ -17,7 +17,7 @@ class circle : plane&lt;T&gt;
17 17
18 private: 18 private:
19 19
20 - stim::vec<T> Y; 20 + stim::vec3<T> Y;
21 21
22 CUDA_CALLABLE void 22 CUDA_CALLABLE void
23 init() 23 init()
@@ -48,7 +48,7 @@ public: @@ -48,7 +48,7 @@ public:
48 circle(T size, T z_pos = (T)0) : plane<T>() 48 circle(T size, T z_pos = (T)0) : plane<T>()
49 { 49 {
50 init(); 50 init();
51 - center(stim::vec<T>(0,0,z_pos)); 51 + center(stim::vec3<T>(0,0,z_pos));
52 scale(size); 52 scale(size);
53 } 53 }
54 54
@@ -56,7 +56,7 @@ public: @@ -56,7 +56,7 @@ public:
56 ///@param c: x,y,z location of the center. 56 ///@param c: x,y,z location of the center.
57 ///@param n: x,y,z direction of the normal. 57 ///@param n: x,y,z direction of the normal.
58 CUDA_CALLABLE 58 CUDA_CALLABLE
59 - circle(vec<T> c, vec<T> n = vec<T>(0,0,1)) : plane<T>() 59 + circle(vec3<T> c, vec3<T> n = vec3<T>(0,0,1)) : plane<T>()
60 { 60 {
61 center(c); 61 center(c);
62 normal(n); 62 normal(n);
@@ -68,7 +68,7 @@ public: @@ -68,7 +68,7 @@ public:
68 ///@param s: size of the rectangle. 68 ///@param s: size of the rectangle.
69 ///@param n: x,y,z direction of the normal. 69 ///@param n: x,y,z direction of the normal.
70 CUDA_CALLABLE 70 CUDA_CALLABLE
71 - circle(vec<T> c, T s, vec<T> n = vec<T>(0,0,1)) : plane<T>() 71 + circle(vec3<T> c, T s, vec3<T> n = vec3<T>(0,0,1)) : plane<T>()
72 { 72 {
73 init(); 73 init();
74 center(c); 74 center(c);
@@ -82,7 +82,7 @@ public: @@ -82,7 +82,7 @@ public:
82 ///@param n: x,y,z direction of the normal. 82 ///@param n: x,y,z direction of the normal.
83 ///@param u: x,y,z direction for the zero vector (from where the rotation starts) 83 ///@param u: x,y,z direction for the zero vector (from where the rotation starts)
84 CUDA_CALLABLE 84 CUDA_CALLABLE
85 - circle(vec<T> c, T s, vec<T> n = vec<T>(0,0,1), vec<T> u = vec<T>(1, 0, 0)) : plane<T>() 85 + circle(vec3<T> c, T s, vec3<T> n = vec3<T>(0,0,1), vec3<T> u = vec3<T>(1, 0, 0)) : plane<T>()
86 { 86 {
87 init(); 87 init();
88 setU(u); 88 setU(u);
@@ -103,16 +103,15 @@ public: @@ -103,16 +103,15 @@ public:
103 ///sets the normal for the cirlce 103 ///sets the normal for the cirlce
104 ///@param n: x,y,z direction of the normal. 104 ///@param n: x,y,z direction of the normal.
105 CUDA_CALLABLE void 105 CUDA_CALLABLE void
106 - normal(vec<T> n) 106 + normal(vec3<T> n)
107 { 107 {
108 rotate(n, Y); 108 rotate(n, Y);
109 } 109 }
110 110
111 ///sets the center of the circle. 111 ///sets the center of the circle.
112 ///@param n: x,y,z location of the center. 112 ///@param n: x,y,z location of the center.
113 - CUDA_CALLABLE T  
114 - center(vec<T> p)  
115 - { 113 + CUDA_CALLABLE void
  114 + center(vec3<T> p){
116 this->P = p; 115 this->P = p;
117 } 116 }
118 117
@@ -127,17 +126,17 @@ public: @@ -127,17 +126,17 @@ public:
127 } 126 }
128 127
129 ///get the world space value given the planar coordinates a, b in [0, 1] 128 ///get the world space value given the planar coordinates a, b in [0, 1]
130 - CUDA_CALLABLE stim::vec<T> p(T a, T b) 129 + CUDA_CALLABLE stim::vec3<T> p(T a, T b)
131 { 130 {
132 - stim::vec<T> result; 131 + stim::vec3<T> result;
133 132
134 - vec<T> A = this->P - this->U * (T)0.5 - Y * (T)0.5; 133 + vec3<T> A = this->P - this->U * (T)0.5 - Y * (T)0.5;
135 result = A + this->U * a + Y * b; 134 result = A + this->U * a + Y * b;
136 return result; 135 return result;
137 } 136 }
138 137
139 ///parenthesis operator returns the world space given rectangular coordinates a and b in [0 1] 138 ///parenthesis operator returns the world space given rectangular coordinates a and b in [0 1]
140 - CUDA_CALLABLE stim::vec<T> operator()(T a, T b) 139 + CUDA_CALLABLE stim::vec3<T> operator()(T a, T b)
141 { 140 {
142 return p(a,b); 141 return p(a,b);
143 } 142 }
@@ -145,11 +144,11 @@ public: @@ -145,11 +144,11 @@ public:
145 ///returns a vector with the points on the initialized circle. 144 ///returns a vector with the points on the initialized circle.
146 ///connecting the points results in a circle. 145 ///connecting the points results in a circle.
147 ///@param n: integer for the number of points representing the circle. 146 ///@param n: integer for the number of points representing the circle.
148 - std::vector<stim::vec<T> > 147 + std::vector<stim::vec3<T> >
149 getPoints(int n) 148 getPoints(int n)
150 { 149 {
151 - std::vector<stim::vec<T> > result;  
152 - stim::vec<T> point; 150 + std::vector<stim::vec3<T> > result;
  151 + stim::vec3<T> point;
153 T x,y; 152 T x,y;
154 float step = 360.0/(float) n; 153 float step = 360.0/(float) n;
155 for(float j = 0; j <= 360.0; j += step) 154 for(float j = 0; j <= 360.0; j += step)
@@ -164,7 +163,7 @@ public: @@ -164,7 +163,7 @@ public:
164 ///returns a vector with the points on the initialized circle. 163 ///returns a vector with the points on the initialized circle.
165 ///connecting the points results in a circle. 164 ///connecting the points results in a circle.
166 ///@param n: integer for the number of points representing the circle. 165 ///@param n: integer for the number of points representing the circle.
167 - stim::vec<T> 166 + stim::vec3<T>
168 p(T theta) 167 p(T theta)
169 { 168 {
170 T x,y; 169 T x,y;
stim/math/complex.h
1 -/*RTS Complex number class. This class is CUDA compatible,  
2 -and can therefore be used in CUDA code and on CUDA devices.  
3 -*/ 1 +/// CUDA compatible complex number class
4 2
5 -#ifndef RTS_COMPLEX  
6 -#define RTS_COMPLEX 3 +#ifndef STIM_COMPLEX
  4 +#define STIM_COMPLEX
7 5
8 -#include "../cuda/callable.h" 6 +#include "../cuda/cudatools/callable.h"
9 #include <cmath> 7 #include <cmath>
10 #include <string> 8 #include <string>
11 #include <sstream> 9 #include <sstream>
@@ -13,6 +11,7 @@ and can therefore be used in CUDA code and on CUDA devices. @@ -13,6 +11,7 @@ and can therefore be used in CUDA code and on CUDA devices.
13 11
14 namespace stim 12 namespace stim
15 { 13 {
  14 + enum complexComponentType {complexReal, complexImaginary, complexMag};
16 15
17 template <class T> 16 template <class T>
18 struct complex 17 struct complex
@@ -230,12 +229,6 @@ struct complex @@ -230,12 +229,6 @@ struct complex
230 return result; 229 return result;
231 } 230 }
232 231
233 - /*CUDA_CALLABLE complex<T> pow(int y)  
234 - {  
235 -  
236 - return pow((double)y);  
237 - }*/  
238 -  
239 CUDA_CALLABLE complex<T> pow(T y) 232 CUDA_CALLABLE complex<T> pow(T y)
240 { 233 {
241 complex<T> result; 234 complex<T> result;
@@ -328,8 +321,31 @@ struct complex @@ -328,8 +321,31 @@ struct complex
328 return *this; 321 return *this;
329 } 322 }
330 323
  324 +
  325 +
331 }; 326 };
332 327
  328 +/// Cast an array of complex values to an array of real values
  329 +template<typename T>
  330 +static void real(T* r, complex<T>* c, size_t n){
  331 + for(size_t i = 0; i < n; i++)
  332 + r[i] = c[i].real();
  333 +}
  334 +
  335 +/// Cast an array of complex values to an array of real values
  336 +template<typename T>
  337 +static void imag(T* r, complex<T>* c, size_t n){
  338 + for(size_t i = 0; i < n; i++)
  339 + r[i] = c[i].imag();
  340 +}
  341 +
  342 +/// Calculate the magnitude of an array of complex values
  343 +template<typename T>
  344 +static void abs(T* m, complex<T>* c, size_t n){
  345 + for(size_t i = 0; i < n; i++)
  346 + m[i] = c[i].abs();
  347 +}
  348 +
333 } //end RTS namespace 349 } //end RTS namespace
334 350
335 //addition 351 //addition
@@ -432,17 +448,6 @@ CUDA_CALLABLE static T imag(stim::complex&lt;T&gt; a) @@ -432,17 +448,6 @@ CUDA_CALLABLE static T imag(stim::complex&lt;T&gt; a)
432 return a.i; 448 return a.i;
433 } 449 }
434 450
435 -//trigonometric functions  
436 -//template<class A>  
437 -/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)  
438 -{  
439 - stim::complex<float> result;  
440 - result.r = sinf(x.r) * coshf(x.i);  
441 - result.i = cosf(x.r) * sinhf(x.i);  
442 -  
443 - return result;  
444 -}*/  
445 -  
446 template<class A> 451 template<class A>
447 CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x) 452 CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)
448 { 453 {
@@ -453,17 +458,6 @@ CUDA_CALLABLE stim::complex&lt;A&gt; sin(const stim::complex&lt;A&gt; x) @@ -453,17 +458,6 @@ CUDA_CALLABLE stim::complex&lt;A&gt; sin(const stim::complex&lt;A&gt; x)
453 return result; 458 return result;
454 } 459 }
455 460
456 -//floating point template  
457 -//template<class A>  
458 -/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)  
459 -{  
460 - stim::complex<float> result;  
461 - result.r = cosf(x.r) * coshf(x.i);  
462 - result.i = -(sinf(x.r) * sinhf(x.i));  
463 -  
464 - return result;  
465 -}*/  
466 -  
467 template<class A> 461 template<class A>
468 CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x) 462 CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)
469 { 463 {
@@ -496,10 +490,4 @@ std::istream&amp; operator&gt;&gt;(std::istream&amp; is, stim::complex&lt;A&gt;&amp; x) @@ -496,10 +490,4 @@ std::istream&amp; operator&gt;&gt;(std::istream&amp; is, stim::complex&lt;A&gt;&amp; x)
496 return is; //return the stream 490 return is; //return the stream
497 } 491 }
498 492
499 -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7  
500 -//template<class T> using rtsComplex = stim::complex<T>;  
501 -//#endif  
502 -  
503 -  
504 -  
505 #endif 493 #endif
stim/math/constants.h
1 -#ifndef RTS_CONSTANTS_H  
2 -#define RTS_CONSTANTS_H 1 +#ifndef STIM_CONSTANTS_H
  2 +#define STIM_CONSTANTS_H
3 3
4 -#define stimPI 3.14159  
5 -#define stimTAU 2 * rtsPI 4 +#include "stim/cuda/cudatools/callable.h"
  5 +namespace stim{
  6 + const double PI = 3.1415926535897932384626433832795028841971693993751058209749445923078164062862;
  7 + const double TAU = 2 * stim::PI;
  8 +}
6 9
7 #endif 10 #endif
stim/math/fft.h 0 โ†’ 100644
  1 +#ifndef STIM_FFT_H
  2 +#define STIM_FFT_H
  3 +
  4 +namespace stim{
  5 +
  6 + template<class T>
  7 + void circshift(T *out, const T *in, size_t xdim, size_t ydim, size_t xshift, size_t yshift){
  8 + size_t i, j, ii, jj;
  9 + for (i =0; i < xdim; i++) {
  10 + ii = (i + xshift) % xdim;
  11 + for (j = 0; j < ydim; j++) {
  12 + jj = (j + yshift) % ydim;
  13 + out[ii * ydim + jj] = in[i * ydim + j];
  14 + }
  15 + }
  16 + }
  17 +
  18 + template<typename T>
  19 + void cpu_fftshift(T* out, T* in, size_t xdim, size_t ydim){
  20 + circshift(out, in, xdim, ydim, xdim/2, ydim/2);
  21 + }
  22 +
  23 + template<typename T>
  24 + void cpu_ifftshift(T* out, T* in, size_t xdim, size_t ydim){
  25 + circshift(out, in, xdim, ydim, xdim/2, ydim/2);
  26 + }
  27 +
  28 +
  29 +}
  30 +
  31 +#endif
0 \ No newline at end of file 32 \ No newline at end of file
stim/math/legendre.h
1 #ifndef RTS_LEGENDRE_H 1 #ifndef RTS_LEGENDRE_H
2 #define RTS_LEGENDRE_H 2 #define RTS_LEGENDRE_H
3 3
4 -#include "rts/cuda/callable.h" 4 +#include "../cuda/cudatools/callable.h"
5 5
6 namespace stim{ 6 namespace stim{
7 7
@@ -24,9 +24,11 @@ CUDA_CALLABLE void shift_legendre(int n, T x, T&amp; P0, T&amp; P1) @@ -24,9 +24,11 @@ CUDA_CALLABLE void shift_legendre(int n, T x, T&amp; P0, T&amp; P1)
24 P1 = Pnew; 24 P1 = Pnew;
25 } 25 }
26 26
  27 +/// Iteratively evaluates the Legendre polynomials for orders l = [0 n]
27 template <typename T> 28 template <typename T>
28 CUDA_CALLABLE void legendre(int n, T x, T* P) 29 CUDA_CALLABLE void legendre(int n, T x, T* P)
29 { 30 {
  31 + if(n < 0) return;
30 P[0] = 1; 32 P[0] = 1;
31 33
32 if(n >= 1) 34 if(n >= 1)
stim/math/matrix.h
@@ -5,6 +5,7 @@ @@ -5,6 +5,7 @@
5 #include <string.h> 5 #include <string.h>
6 #include <iostream> 6 #include <iostream>
7 #include <stim/math/vector.h> 7 #include <stim/math/vector.h>
  8 +#include <stim/math/vec3.h>
8 #include <stim/cuda/cudatools/callable.h> 9 #include <stim/cuda/cudatools/callable.h>
9 10
10 namespace stim{ 11 namespace stim{
@@ -50,10 +51,8 @@ struct matrix @@ -50,10 +51,8 @@ struct matrix
50 return *this; 51 return *this;
51 } 52 }
52 53
53 -  
54 template<typename Y> 54 template<typename Y>
55 - CUDA_CALLABLE vec<Y> operator*(vec<Y> rhs)  
56 - { 55 + vec<Y> operator*(vec<Y> rhs){
57 unsigned int N = rhs.size(); 56 unsigned int N = rhs.size();
58 57
59 vec<Y> result; 58 vec<Y> result;
@@ -66,6 +65,16 @@ struct matrix @@ -66,6 +65,16 @@ struct matrix
66 return result; 65 return result;
67 } 66 }
68 67
  68 + template<typename Y>
  69 + CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){
  70 + vec3<Y> result = 0;
  71 + for(int r=0; r<3; r++)
  72 + for(int c=0; c<3; c++)
  73 + result[r] += (*this)(r, c) * rhs[c];
  74 +
  75 + return result;
  76 + }
  77 +
69 std::string toStr() 78 std::string toStr()
70 { 79 {
71 std::stringstream ss; 80 std::stringstream ss;
@@ -82,10 +91,6 @@ struct matrix @@ -82,10 +91,6 @@ struct matrix
82 91
83 return ss.str(); 92 return ss.str();
84 } 93 }
85 -  
86 -  
87 -  
88 -  
89 }; 94 };
90 95
91 } //end namespace rts 96 } //end namespace rts
stim/math/meshgrid.h 0 โ†’ 100644
  1 +#ifndef STIM_MESHGRID_H
  2 +#define STIM_MESHGRID_H
  3 +
  4 +namespace stim{
  5 +
  6 + /// Create a 2D grid based on a pair of vectors representing the grid spacing (see Matlab)
  7 + /// @param X is an [nx x ny] array that will store the X coordinates for each 2D point
  8 + /// @param Y is an [nx x ny] array that will store the Y coordinates for each 2D point
  9 + /// @param x is an [nx] array that provides the positions of grid points in the x direction
  10 + /// @param nx is the number of grid points in the x direction
  11 + /// @param y is an [ny] array that provides the positions of grid points in the y direction
  12 + /// @param ny is the number of grid points in the y direction
  13 + template<typename T>
  14 + void meshgrid(T* X, T* Y, T* x, size_t nx, T* y, size_t ny){
  15 + size_t xi, yi; //allocate index variables
  16 + for(yi = 0; yi < ny; yi++){ //iterate through each column
  17 + for(xi = 0; xi < nx; xi++){ //iterate through each row
  18 + X[yi * nx + xi] = x[xi];
  19 + Y[yi * nx + xi] = y[yi];
  20 + }
  21 + }
  22 + }
  23 +
  24 + /// Creates an array of n equally spaced values in the range [xmin xmax]
  25 + /// @param X is an array of length n that stores the values
  26 + /// @param xmin is the start point of the array
  27 + /// @param xmax is the end point of the array
  28 + /// @param n is the number of points in the array
  29 + template<typename T>
  30 + void linspace(T* X, T xmin, T xmax, size_t n){
  31 + T alpha;
  32 + for(size_t i = 0; i < n; i++){
  33 + alpha = (T)i / (T)n;
  34 + X[i] = (1 - alpha) * xmin + alpha * xmax;
  35 + }
  36 + }
  37 +
  38 +
  39 +}
  40 +
  41 +
  42 +#endif
0 \ No newline at end of file 43 \ No newline at end of file
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 #define STIM_PLANE_H 2 #define STIM_PLANE_H
3 3
4 #include <iostream> 4 #include <iostream>
5 -#include <stim/math/vector.h> 5 +#include <stim/math/vec3.h>
6 #include <stim/cuda/cudatools/callable.h> 6 #include <stim/cuda/cudatools/callable.h>
7 #include <stim/math/quaternion.h> 7 #include <stim/math/quaternion.h>
8 8
@@ -22,17 +22,17 @@ template &lt;typename T&gt; @@ -22,17 +22,17 @@ template &lt;typename T&gt;
22 class plane 22 class plane
23 { 23 {
24 protected: 24 protected:
25 - stim::vec<T> P;  
26 - stim::vec<T> N;  
27 - stim::vec<T> U; 25 + stim::vec3<T> P;
  26 + stim::vec3<T> N;
  27 + stim::vec3<T> U;
28 28
29 ///Initializes the plane with standard coordinates. 29 ///Initializes the plane with standard coordinates.
30 /// 30 ///
31 CUDA_CALLABLE void init() 31 CUDA_CALLABLE void init()
32 { 32 {
33 - P = stim::vec<T>(0, 0, 0);  
34 - N = stim::vec<T>(0, 0, 1);  
35 - U = stim::vec<T>(1, 0, 0); 33 + P = stim::vec3<T>(0, 0, 0);
  34 + N = stim::vec3<T>(0, 0, 1);
  35 + U = stim::vec3<T>(1, 0, 0);
36 } 36 }
37 37
38 public: 38 public:
@@ -42,7 +42,7 @@ class plane @@ -42,7 +42,7 @@ class plane
42 init(); 42 init();
43 } 43 }
44 44
45 - CUDA_CALLABLE plane(vec<T> n, vec<T> p = vec<T>(0, 0, 0)) 45 + CUDA_CALLABLE plane(vec3<T> n, vec3<T> p = vec3<T>(0, 0, 0))
46 { 46 {
47 init(); 47 init();
48 P = p; 48 P = p;
@@ -56,11 +56,11 @@ class plane @@ -56,11 +56,11 @@ class plane
56 } 56 }
57 57
58 //create a plane from three points (a triangle) 58 //create a plane from three points (a triangle)
59 - CUDA_CALLABLE plane(vec<T> a, vec<T> b, vec<T> c) 59 + CUDA_CALLABLE plane(vec3<T> a, vec3<T> b, vec3<T> c)
60 { 60 {
61 init(); 61 init();
62 P = c; 62 P = c;
63 - stim::vec<T> n = (c - a).cross(b - a); 63 + stim::vec3<T> n = (c - a).cross(b - a);
64 try 64 try
65 { 65 {
66 if(n.len() != 0) 66 if(n.len() != 0)
@@ -84,17 +84,17 @@ class plane @@ -84,17 +84,17 @@ class plane
84 84
85 } 85 }
86 86
87 - CUDA_CALLABLE vec<T> n() 87 + CUDA_CALLABLE vec3<T> n()
88 { 88 {
89 return N; 89 return N;
90 } 90 }
91 91
92 - CUDA_CALLABLE vec<T> p() 92 + CUDA_CALLABLE vec3<T> p()
93 { 93 {
94 return P; 94 return P;
95 } 95 }
96 96
97 - CUDA_CALLABLE vec<T> u() 97 + CUDA_CALLABLE vec3<T> u()
98 { 98 {
99 return U; 99 return U;
100 } 100 }
@@ -107,7 +107,7 @@ class plane @@ -107,7 +107,7 @@ class plane
107 } 107 }
108 108
109 //determines how a vector v intersects the plane (1 = intersects front, 0 = within plane, -1 = intersects back) 109 //determines how a vector v intersects the plane (1 = intersects front, 0 = within plane, -1 = intersects back)
110 - CUDA_CALLABLE int face(vec<T> v){ 110 + CUDA_CALLABLE int face(vec3<T> v){
111 111
112 T dprod = v.dot(N); //get the dot product between v and N 112 T dprod = v.dot(N); //get the dot product between v and N
113 113
@@ -121,46 +121,46 @@ class plane @@ -121,46 +121,46 @@ class plane
121 } 121 }
122 122
123 //determine on which side of the plane a point lies (1 = front, 0 = on the plane, -1 = bac k) 123 //determine on which side of the plane a point lies (1 = front, 0 = on the plane, -1 = bac k)
124 - CUDA_CALLABLE int side(vec<T> p){ 124 + CUDA_CALLABLE int side(vec3<T> p){
125 125
126 - vec<T> v = p - P; //get the vector from P to the query point p 126 + vec3<T> v = p - P; //get the vector from P to the query point p
127 127
128 return face(v); 128 return face(v);
129 } 129 }
130 130
131 //compute the component of v that is perpendicular to the plane 131 //compute the component of v that is perpendicular to the plane
132 - CUDA_CALLABLE vec<T> perpendicular(vec<T> v){ 132 + CUDA_CALLABLE vec3<T> perpendicular(vec3<T> v){
133 return N * v.dot(N); 133 return N * v.dot(N);
134 } 134 }
135 135
136 //compute the projection of v in the plane 136 //compute the projection of v in the plane
137 - CUDA_CALLABLE vec<T> parallel(vec<T> v){ 137 + CUDA_CALLABLE vec3<T> parallel(vec3<T> v){
138 return v - perpendicular(v); 138 return v - perpendicular(v);
139 } 139 }
140 140
141 - CUDA_CALLABLE void setU(vec<T> v) 141 + CUDA_CALLABLE void setU(vec3<T> v)
142 { 142 {
143 U = (parallel(v.norm())).norm(); 143 U = (parallel(v.norm())).norm();
144 } 144 }
145 145
146 - CUDA_CALLABLE void decompose(vec<T> v, vec<T>& para, vec<T>& perp){ 146 + CUDA_CALLABLE void decompose(vec3<T> v, vec3<T>& para, vec3<T>& perp){
147 perp = N * v.dot(N); 147 perp = N * v.dot(N);
148 para = v - perp; 148 para = v - perp;
149 } 149 }
150 150
151 //get both the parallel and perpendicular components of a vector v w.r.t. the plane 151 //get both the parallel and perpendicular components of a vector v w.r.t. the plane
152 - CUDA_CALLABLE void project(vec<T> v, vec<T> &v_par, vec<T> &v_perp){ 152 + CUDA_CALLABLE void project(vec3<T> v, vec3<T> &v_par, vec3<T> &v_perp){
153 153
154 v_perp = v.dot(N); 154 v_perp = v.dot(N);
155 v_par = v - v_perp; 155 v_par = v - v_perp;
156 } 156 }
157 157
158 //compute the reflection of v off of the plane 158 //compute the reflection of v off of the plane
159 - CUDA_CALLABLE vec<T> reflect(vec<T> v){ 159 + CUDA_CALLABLE vec3<T> reflect(vec3<T> v){
160 160
161 //compute the reflection using N_prime as the plane normal 161 //compute the reflection using N_prime as the plane normal
162 - vec<T> par = parallel(v);  
163 - vec<T> r = (-v) + par * 2; 162 + vec3<T> par = parallel(v);
  163 + vec3<T> r = (-v) + par * 2;
164 return r; 164 return r;
165 165
166 } 166 }
@@ -184,7 +184,7 @@ class plane @@ -184,7 +184,7 @@ class plane
184 } 184 }
185 185
186 186
187 - CUDA_CALLABLE void rotate(vec<T> n) 187 + CUDA_CALLABLE void rotate(vec3<T> n)
188 { 188 {
189 quaternion<T> q; 189 quaternion<T> q;
190 q.CreateRotation(N, n); 190 q.CreateRotation(N, n);
@@ -194,7 +194,7 @@ class plane @@ -194,7 +194,7 @@ class plane
194 194
195 } 195 }
196 196
197 - CUDA_CALLABLE void rotate(vec<T> n, vec<T> &Y) 197 + CUDA_CALLABLE void rotate(vec3<T> n, vec3<T> &Y)
198 { 198 {
199 quaternion<T> q; 199 quaternion<T> q;
200 q.CreateRotation(N, n); 200 q.CreateRotation(N, n);
@@ -205,7 +205,7 @@ class plane @@ -205,7 +205,7 @@ class plane
205 205
206 } 206 }
207 207
208 - CUDA_CALLABLE void rotate(vec<T> n, vec<T> &X, vec<T> &Y) 208 + CUDA_CALLABLE void rotate(vec3<T> n, vec3<T> &X, vec3<T> &Y)
209 { 209 {
210 quaternion<T> q; 210 quaternion<T> q;
211 q.CreateRotation(N, n); 211 q.CreateRotation(N, n);
stim/math/plane_old.h deleted
1 -#ifndef RTS_PLANE_H  
2 -#define RTS_PLANE_H  
3 -  
4 -#include <iostream>  
5 -#include <stim/math/vector.h>  
6 -#include "rts/cuda/callable.h"  
7 -  
8 -  
9 -namespace stim{  
10 -template <typename T, int D> class plane;  
11 -}  
12 -  
13 -template <typename T, int D>  
14 -CUDA_CALLABLE stim::plane<T, D> operator-(stim::plane<T, D> v);  
15 -  
16 -namespace stim{  
17 -  
18 -template <class T, int D = 3>  
19 -class plane{  
20 -  
21 - //a plane is defined by a point and a normal  
22 -  
23 -private:  
24 -  
25 - vec<T, D> P; //point on the plane  
26 - vec<T, D> N; //plane normal  
27 -  
28 - CUDA_CALLABLE void init(){  
29 - P = vec<T, D>(0, 0, 0);  
30 - N = vec<T, D>(0, 0, 1);  
31 - }  
32 -  
33 -  
34 -public:  
35 -  
36 - //default constructor  
37 - CUDA_CALLABLE plane(){  
38 - init();  
39 - }  
40 -  
41 - CUDA_CALLABLE plane(vec<T, D> n, vec<T, D> p = vec<T, D>(0, 0, 0)){  
42 - P = p;  
43 - N = n.norm();  
44 - }  
45 -  
46 - CUDA_CALLABLE plane(T z_pos){  
47 - init();  
48 - P[2] = z_pos;  
49 - }  
50 -  
51 - //create a plane from three points (a triangle)  
52 - CUDA_CALLABLE plane(vec<T, D> a, vec<T, D> b, vec<T, D> c){  
53 - P = c;  
54 - N = (c - a).cross(b - a);  
55 - if(N.len() == 0) //handle the degenerate case when two vectors are the same, N = 0  
56 - N = 0;  
57 - else  
58 - N = N.norm();  
59 - }  
60 -  
61 - template< typename U >  
62 - CUDA_CALLABLE operator plane<U, D>(){  
63 -  
64 - plane<U, D> result(N, P);  
65 - return result;  
66 - }  
67 -  
68 - CUDA_CALLABLE vec<T, D> norm(){  
69 - return N;  
70 - }  
71 -  
72 - CUDA_CALLABLE vec<T, D> p(){  
73 - return P;  
74 - }  
75 -  
76 - //flip the plane front-to-back  
77 - CUDA_CALLABLE plane<T, D> flip(){  
78 - plane<T, D> result = *this;  
79 - result.N = -result.N;  
80 - return result;  
81 - }  
82 -  
83 - //determines how a vector v intersects the plane (1 = intersects front, 0 = within plane, -1 = intersects back)  
84 - CUDA_CALLABLE int face(vec<T, D> v){  
85 -  
86 - T dprod = v.dot(N); //get the dot product between v and N  
87 -  
88 - //conditional returns the appropriate value  
89 - if(dprod < 0)  
90 - return 1;  
91 - else if(dprod > 0)  
92 - return -1;  
93 - else  
94 - return 0;  
95 - }  
96 -  
97 - //determine on which side of the plane a point lies (1 = front, 0 = on the plane, -1 = back)  
98 - CUDA_CALLABLE int side(vec<T, D> p){  
99 -  
100 - vec<T, D> v = p - P; //get the vector from P to the query point p  
101 -  
102 - return face(v);  
103 - }  
104 -  
105 - //compute the component of v that is perpendicular to the plane  
106 - CUDA_CALLABLE vec<T, D> perpendicular(vec<T, D> v){  
107 - return N * v.dot(N);  
108 - }  
109 -  
110 - //compute the projection of v in the plane  
111 - CUDA_CALLABLE vec<T, D> parallel(vec<T, D> v){  
112 - return v - perpendicular(v);  
113 - }  
114 -  
115 - CUDA_CALLABLE void decompose(vec<T, D> v, vec<T, D>& para, vec<T, D>& perp){  
116 - perp = N * v.dot(N);  
117 - para = v - perp;  
118 - }  
119 -  
120 - //get both the parallel and perpendicular components of a vector v w.r.t. the plane  
121 - CUDA_CALLABLE void project(vec<T, D> v, vec<T, D> &v_par, vec<T, D> &v_perp){  
122 -  
123 - v_perp = v.dot(N);  
124 - v_par = v - v_perp;  
125 - }  
126 -  
127 - //compute the reflection of v off of the plane  
128 - CUDA_CALLABLE vec<T, D> reflect(vec<T, D> v){  
129 -  
130 - //compute the reflection using N_prime as the plane normal  
131 - vec<T, D> par = parallel(v);  
132 - vec<T, D> r = (-v) + par * 2;  
133 -  
134 - /*std::cout<<"----------------REFLECT-----------------------------"<<std::endl;  
135 - std::cout<<str()<<std::endl;  
136 - std::cout<<"v: "<<v<<std::endl;  
137 - std::cout<<"r: "<<r<<std::endl;  
138 - std::cout<<"Perpendicular: "<<perpendicular(v)<<std::endl;  
139 - std::cout<<"Parallel: "<<par<<std::endl;*/  
140 - return r;  
141 -  
142 - }  
143 -  
144 - CUDA_CALLABLE rts::plane<T, D> operator-()  
145 - {  
146 - rts::plane<T, D> p = *this;  
147 -  
148 - //negate the normal vector  
149 - p.N = -p.N;  
150 -  
151 - return p;  
152 - }  
153 -  
154 - //output a string  
155 - std::string str(){  
156 - std::stringstream ss;  
157 - ss<<"P: "<<P<<std::endl;  
158 - ss<<"N: "<<N;  
159 - return ss.str();  
160 - }  
161 -  
162 - ///////Friendship  
163 - //friend CUDA_CALLABLE rts::plane<T, D> operator- <> (rts::plane<T, D> v);  
164 -  
165 -  
166 -  
167 -};  
168 -  
169 -}  
170 -  
171 -//arithmetic operators  
172 -  
173 -//negative operator flips the plane (front to back)  
174 -//template <typename T, int D>  
175 -  
176 -  
177 -  
178 -  
179 -#endif  
stim/math/quad.h deleted
1 -#ifndef RTS_QUAD_H  
2 -#define RTS_QUAD_H  
3 -  
4 -//enable CUDA_CALLABLE macro  
5 -#include <stim/cuda/callable.h>  
6 -#include <stim/math/vector.h>  
7 -#include <stim/math/triangle.h>  
8 -#include <stim/math/quaternion.h>  
9 -#include <iostream>  
10 -#include <iomanip>  
11 -#include <algorithm>  
12 -  
13 -namespace stim{  
14 -  
15 -//template for a quadangle class in ND space  
16 -template <class T, int N = 3>  
17 -struct quad  
18 -{  
19 - /*  
20 - B------------------>C  
21 - ^ ^  
22 - | |  
23 - Y |  
24 - | |  
25 - | |  
26 - A---------X-------->O  
27 - */  
28 -  
29 - /*T A[N];  
30 - T B[N];  
31 - T C[N];*/  
32 -  
33 - rts::vec<T, N> A;  
34 - rts::vec<T, N> X;  
35 - rts::vec<T, N> Y;  
36 -  
37 -  
38 - CUDA_CALLABLE quad()  
39 - {  
40 -  
41 - }  
42 -  
43 - CUDA_CALLABLE quad(vec<T, N> a, vec<T, N> b, vec<T, N> c)  
44 - {  
45 -  
46 - A = a;  
47 - Y = b - a;  
48 - X = c - a - Y;  
49 -  
50 - }  
51 -  
52 - /*******************************************************************  
53 - Constructor - create a quad from a position, normal, and rotation  
54 - *******************************************************************/  
55 - CUDA_CALLABLE quad(rts::vec<T, N> c, rts::vec<T, N> normal, T width, T height, T theta)  
56 - {  
57 -  
58 - //compute the X direction - start along world-space X  
59 - Y = rts::vec<T, N>(0, 1, 0);  
60 - if(Y == normal)  
61 - Y = rts::vec<T, N>(0, 0, 1);  
62 -  
63 - X = Y.cross(normal).norm();  
64 -  
65 - std::cout<<X<<std::endl;  
66 -  
67 - //rotate the X axis by theta radians  
68 - rts::quaternion<T> q;  
69 - q.CreateRotation(theta, normal);  
70 - X = q.toMatrix3() * X;  
71 - Y = normal.cross(X);  
72 -  
73 - //normalize everything  
74 - X = X.norm();  
75 - Y = Y.norm();  
76 -  
77 - //scale to match the quad width and height  
78 - X = X * width;  
79 - Y = Y * height;  
80 -  
81 - //set the corner of the plane  
82 - A = c - X * 0.5f - Y * 0.5f;  
83 -  
84 - std::cout<<X<<std::endl;  
85 - }  
86 -  
87 - //boolean comparison  
88 - bool operator==(const quad<T, N> & rhs)  
89 - {  
90 - if(A == rhs.A && X == rhs.X && Y == rhs.Y)  
91 - return true;  
92 - else  
93 - return false;  
94 - }  
95 -  
96 - /*******************************************  
97 - Return the normal for the quad  
98 - *******************************************/  
99 - CUDA_CALLABLE rts::vec<T, N> n()  
100 - {  
101 - return (X.cross(Y)).norm();  
102 - }  
103 -  
104 - CUDA_CALLABLE rts::vec<T, N> p(T a, T b)  
105 - {  
106 - rts::vec<T, N> result;  
107 - //given the two parameters a, b = [0 1], returns the position in world space  
108 - result = A + X * a + Y * b;  
109 -  
110 - return result;  
111 - }  
112 -  
113 - CUDA_CALLABLE rts::vec<T, N> operator()(T a, T b)  
114 - {  
115 - return p(a, b);  
116 - }  
117 -  
118 - std::string str()  
119 - {  
120 - std::stringstream ss;  
121 -  
122 - ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;  
123 - ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl;  
124 - ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X;  
125 -  
126 - return ss.str();  
127 -  
128 - }  
129 -  
130 - CUDA_CALLABLE quad<T, N> operator*(T rhs)  
131 - {  
132 - //scales the plane by a scalar value  
133 -  
134 - //compute the center point  
135 - rts::vec<T, N> c = A + X*0.5f + Y*0.5f;  
136 -  
137 - //create the new quadangle  
138 - quad<T, N> result;  
139 - result.X = X * rhs;  
140 - result.Y = Y * rhs;  
141 - result.A = c - result.X*0.5f - result.Y*0.5f;  
142 -  
143 - return result;  
144 -  
145 - }  
146 -  
147 - CUDA_CALLABLE T dist(vec<T, N> p)  
148 - {  
149 - //compute the distance between a point and this quad  
150 -  
151 - //first break the quad up into two triangles  
152 - triangle<T, N> T0(A, A+X, A+Y);  
153 - triangle<T, N> T1(A+X+Y, A+X, A+Y);  
154 -  
155 -  
156 - T d0 = T0.dist(p);  
157 - T d1 = T1.dist(p);  
158 -  
159 - if(d0 < d1)  
160 - return d0;  
161 - else  
162 - return d1;  
163 - }  
164 -  
165 - CUDA_CALLABLE T dist_max(vec<T, N> p)  
166 - {  
167 - T da = (A - p).len();  
168 - T db = (A+X - p).len();  
169 - T dc = (A+Y - p).len();  
170 - T dd = (A+X+Y - p).len();  
171 -  
172 - return std::max( da, std::max(db, std::max(dc, dd) ) );  
173 - }  
174 -};  
175 -  
176 -} //end namespace rts  
177 -  
178 -template <typename T, int N>  
179 -std::ostream& operator<<(std::ostream& os, rts::quad<T, N> R)  
180 -{  
181 - os<<R.str();  
182 - return os;  
183 -}  
184 -  
185 -  
186 -#endif  
stim/math/quaternion.h
@@ -26,13 +26,13 @@ public: @@ -26,13 +26,13 @@ public:
26 26
27 CUDA_CALLABLE void CreateRotation(T theta, T ux, T uy, T uz){ 27 CUDA_CALLABLE void CreateRotation(T theta, T ux, T uy, T uz){
28 28
29 - vec<T> u(ux, uy, uz); 29 + vec3<T> u(ux, uy, uz);
30 CreateRotation(theta, u); 30 CreateRotation(theta, u);
31 } 31 }
32 32
33 - CUDA_CALLABLE void CreateRotation(T theta, vec<T> u){ 33 + CUDA_CALLABLE void CreateRotation(T theta, vec3<T> u){
34 34
35 - vec<T> u_hat = u.norm(); 35 + vec3<T> u_hat = u.norm();
36 36
37 //assign the given Euler rotation to this quaternion 37 //assign the given Euler rotation to this quaternion
38 w = (T)cos(theta/2); 38 w = (T)cos(theta/2);
@@ -41,9 +41,11 @@ public: @@ -41,9 +41,11 @@ public:
41 z = u_hat[2]*(T)sin(theta/2); 41 z = u_hat[2]*(T)sin(theta/2);
42 } 42 }
43 43
44 - void CreateRotation(vec<T> from, vec<T> to){ 44 + CUDA_CALLABLE void CreateRotation(vec3<T> from, vec3<T> to){
45 45
46 - vec<T> r = from.cross(to); //compute the rotation vector 46 + from = from.norm();
  47 + to = to.norm();
  48 + vec3<T> r = from.cross(to); //compute the rotation vector
47 T theta = asin(r.len()); //compute the angle of the rotation about r 49 T theta = asin(r.len()); //compute the angle of the rotation about r
48 //deal with a zero vector (both k and kn point in the same direction) 50 //deal with a zero vector (both k and kn point in the same direction)
49 if(theta == (T)0){ 51 if(theta == (T)0){
@@ -28,13 +28,10 @@ class rect : plane &lt;T&gt; @@ -28,13 +28,10 @@ class rect : plane &lt;T&gt;
28 O---------X---------> 28 O---------X--------->
29 */ 29 */
30 30
31 -private:  
32 -  
33 - stim::vec<T> X;  
34 - stim::vec<T> Y;  
35 -  
36 - 31 +protected:
37 32
  33 + stim::vec3<T> X;
  34 + stim::vec3<T> Y;
38 35
39 public: 36 public:
40 37
@@ -65,7 +62,7 @@ public: @@ -65,7 +62,7 @@ public:
65 ///create a rectangle from a center point, normal 62 ///create a rectangle from a center point, normal
66 ///@param c: x,y,z location of the center. 63 ///@param c: x,y,z location of the center.
67 ///@param n: x,y,z direction of the normal. 64 ///@param n: x,y,z direction of the normal.
68 - CUDA_CALLABLE rect(vec<T> c, vec<T> n = vec<T>(0, 0, 1)) 65 + CUDA_CALLABLE rect(vec3<T> c, vec3<T> n = vec3<T>(0, 0, 1))
69 : plane<T>() 66 : plane<T>()
70 { 67 {
71 init(); //start with the default setting 68 init(); //start with the default setting
@@ -76,7 +73,7 @@ public: @@ -76,7 +73,7 @@ public:
76 ///@param c: x,y,z location of the center. 73 ///@param c: x,y,z location of the center.
77 ///@param s: size of the rectangle. 74 ///@param s: size of the rectangle.
78 ///@param n: x,y,z direction of the normal. 75 ///@param n: x,y,z direction of the normal.
79 - CUDA_CALLABLE rect(vec<T> c, T s, vec<T> n = vec<T>(0, 0, 1)) 76 + CUDA_CALLABLE rect(vec3<T> c, T s, vec3<T> n = vec3<T>(0, 0, 1))
80 : plane<T>() 77 : plane<T>()
81 { 78 {
82 init(); //start with the default setting 79 init(); //start with the default setting
@@ -89,7 +86,7 @@ public: @@ -89,7 +86,7 @@ public:
89 ///@param center: x,y,z location of the center. 86 ///@param center: x,y,z location of the center.
90 ///@param directionX: u,v,w direction of the X vector. 87 ///@param directionX: u,v,w direction of the X vector.
91 ///@param directionY: u,v,w direction of the Y vector. 88 ///@param directionY: u,v,w direction of the Y vector.
92 - CUDA_CALLABLE rect(vec<T> center, vec<T> directionX, vec<T> directionY ) 89 + CUDA_CALLABLE rect(vec3<T> center, vec3<T> directionX, vec3<T> directionY )
93 : plane<T>((directionX.cross(directionY)).norm(),center) 90 : plane<T>((directionX.cross(directionY)).norm(),center)
94 { 91 {
95 X = directionX; 92 X = directionX;
@@ -101,7 +98,7 @@ public: @@ -101,7 +98,7 @@ public:
101 ///@param center: x,y,z location of the center. 98 ///@param center: x,y,z location of the center.
102 ///@param directionX: u,v,w direction of the X vector. 99 ///@param directionX: u,v,w direction of the X vector.
103 ///@param directionY: u,v,w direction of the Y vector. 100 ///@param directionY: u,v,w direction of the Y vector.
104 - CUDA_CALLABLE rect(T size, vec<T> center, vec<T> directionX, vec<T> directionY ) 101 + CUDA_CALLABLE rect(T size, vec3<T> center, vec3<T> directionX, vec3<T> directionY )
105 : plane<T>((directionX.cross(directionY)).norm(),center) 102 : plane<T>((directionX.cross(directionY)).norm(),center)
106 { 103 {
107 X = directionX; 104 X = directionX;
@@ -114,7 +111,7 @@ public: @@ -114,7 +111,7 @@ public:
114 ///@param center: x,y,z location of the center. 111 ///@param center: x,y,z location of the center.
115 ///@param directionX: u,v,w direction of the X vector. 112 ///@param directionX: u,v,w direction of the X vector.
116 ///@param directionY: u,v,w direction of the Y vector. 113 ///@param directionY: u,v,w direction of the Y vector.
117 - CUDA_CALLABLE rect(vec<T> size, vec<T> center, vec<T> directionX, vec<T> directionY) 114 + CUDA_CALLABLE rect(vec3<T> size, vec3<T> center, vec3<T> directionX, vec3<T> directionY)
118 : plane<T>((directionX.cross(directionY)).norm(), center) 115 : plane<T>((directionX.cross(directionY)).norm(), center)
119 { 116 {
120 X = directionX; 117 X = directionX;
@@ -138,7 +135,7 @@ public: @@ -138,7 +135,7 @@ public:
138 135
139 ///@param n; vector with the normal. 136 ///@param n; vector with the normal.
140 ///Orients the rectangle along the normal n. 137 ///Orients the rectangle along the normal n.
141 - CUDA_CALLABLE void normal(vec<T> n) 138 + CUDA_CALLABLE void normal(vec3<T> n)
142 { 139 {
143 //orient the rectangle along the specified normal 140 //orient the rectangle along the specified normal
144 rotate(n, X, Y); 141 rotate(n, X, Y);
@@ -147,8 +144,8 @@ public: @@ -147,8 +144,8 @@ public:
147 ///general init method that sets a general rectangle. 144 ///general init method that sets a general rectangle.
148 CUDA_CALLABLE void init() 145 CUDA_CALLABLE void init()
149 { 146 {
150 - X = vec<T>(1, 0, 0);  
151 - Y = vec<T>(0, 1, 0); 147 + X = vec3<T>(1, 0, 0);
  148 + Y = vec3<T>(0, 1, 0);
152 } 149 }
153 150
154 //boolean comparison 151 //boolean comparison
@@ -162,18 +159,18 @@ public: @@ -162,18 +159,18 @@ public:
162 159
163 160
164 //get the world space value given the planar coordinates a, b in [0, 1] 161 //get the world space value given the planar coordinates a, b in [0, 1]
165 - CUDA_CALLABLE stim::vec<T> p(T a, T b) 162 + CUDA_CALLABLE stim::vec3<T> p(T a, T b)
166 { 163 {
167 - stim::vec<T> result; 164 + stim::vec3<T> result;
168 //given the two parameters a, b = [0 1], returns the position in world space 165 //given the two parameters a, b = [0 1], returns the position in world space
169 - vec<T> A = this->P - X * (T)0.5 - Y * (T)0.5; 166 + vec3<T> A = this->P - X * (T)0.5 - Y * (T)0.5;
170 result = A + X * a + Y * b; 167 result = A + X * a + Y * b;
171 168
172 return result; 169 return result;
173 } 170 }
174 171
175 //parenthesis operator returns the world space given rectangular coordinates a and b in [0 1] 172 //parenthesis operator returns the world space given rectangular coordinates a and b in [0 1]
176 - CUDA_CALLABLE stim::vec<T> operator()(T a, T b) 173 + CUDA_CALLABLE stim::vec3<T> operator()(T a, T b)
177 { 174 {
178 return p(a, b); 175 return p(a, b);
179 } 176 }
@@ -181,12 +178,12 @@ public: @@ -181,12 +178,12 @@ public:
181 std::string str() 178 std::string str()
182 { 179 {
183 std::stringstream ss; 180 std::stringstream ss;
184 - vec<T> A = P - X * (T)0.5 - Y * (T)0.5; 181 + vec3<T> A = P - X * (T)0.5 - Y * (T)0.5;
185 ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; 182 ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
186 ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl; 183 ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl;
187 ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X; 184 ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X;
188 185
189 - return ss.str(); 186 + return ss.str();
190 187
191 } 188 }
192 189
@@ -205,11 +202,11 @@ public: @@ -205,11 +202,11 @@ public:
205 202
206 ///computes the distance between the specified point and this rectangle. 203 ///computes the distance between the specified point and this rectangle.
207 ///@param p: x, y, z coordinates of the point to calculate distance to. 204 ///@param p: x, y, z coordinates of the point to calculate distance to.
208 - CUDA_CALLABLE T dist(vec<T> p) 205 + CUDA_CALLABLE T dist(vec3<T> p)
209 { 206 {
210 //compute the distance between a point and this rect 207 //compute the distance between a point and this rect
211 208
212 - vec<T> A = P - X * (T)0.5 - Y * (T)0.5; 209 + vec3<T> A = P - X * (T)0.5 - Y * (T)0.5;
213 210
214 //first break the rect up into two triangles 211 //first break the rect up into two triangles
215 triangle<T> T0(A, A+X, A+Y); 212 triangle<T> T0(A, A+X, A+Y);
@@ -225,16 +222,16 @@ public: @@ -225,16 +222,16 @@ public:
225 return d1; 222 return d1;
226 } 223 }
227 224
228 - CUDA_CALLABLE T center(vec<T> p) 225 + CUDA_CALLABLE T center(vec3<T> p)
229 { 226 {
230 this->P = p; 227 this->P = p;
231 } 228 }
232 229
233 ///Returns the maximum distance of the rectangle from a point p to the sides of the rectangle. 230 ///Returns the maximum distance of the rectangle from a point p to the sides of the rectangle.
234 ///@param p: x, y, z point. 231 ///@param p: x, y, z point.
235 - CUDA_CALLABLE T dist_max(vec<T> p) 232 + CUDA_CALLABLE T dist_max(vec3<T> p)
236 { 233 {
237 - vec<T> A = P - X * (T)0.5 - Y * (T)0.5; 234 + vec3<T> A = P - X * (T)0.5 - Y * (T)0.5;
238 T da = (A - p).len(); 235 T da = (A - p).len();
239 T db = (A+X - p).len(); 236 T db = (A+X - p).len();
240 T dc = (A+Y - p).len(); 237 T dc = (A+Y - p).len();
stim/math/vec3.h 0 โ†’ 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +
  7 +
  8 +namespace stim{
  9 +
  10 +
  11 +/// A class designed to act as a 3D vector with CUDA compatibility
  12 +template<typename T>
  13 +class vec3{
  14 +
  15 +protected:
  16 + T ptr[3];
  17 +
  18 +public:
  19 +
  20 + CUDA_CALLABLE vec3(){}
  21 +
  22 + CUDA_CALLABLE vec3(T v){
  23 + ptr[0] = ptr[1] = ptr[2] = v;
  24 + }
  25 +
  26 + CUDA_CALLABLE vec3(T x, T y, T z){
  27 + ptr[0] = x;
  28 + ptr[1] = y;
  29 + ptr[2] = z;
  30 + }
  31 +
  32 + //copy constructor
  33 + CUDA_CALLABLE vec3( const vec3<T>& other){
  34 + ptr[0] = other.ptr[0];
  35 + ptr[1] = other.ptr[1];
  36 + ptr[2] = other.ptr[2];
  37 + }
  38 +
  39 + //access an element using an index
  40 + CUDA_CALLABLE T& operator[](int idx){
  41 + return ptr[idx];
  42 + }
  43 +
  44 +/// Casting operator. Creates a new vector with a new type U.
  45 + template< typename U >
  46 + CUDA_CALLABLE operator vec3<U>(){
  47 + vec3<U> result;
  48 + result.ptr[0] = (U)ptr[0];
  49 + result.ptr[1] = (U)ptr[1];
  50 + result.ptr[2] = (U)ptr[2];
  51 +
  52 + return result;
  53 + }
  54 +
  55 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  56 + CUDA_CALLABLE T len_sq() const{
  57 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  58 + }
  59 +
  60 + /// computes the Euclidean length of the vector
  61 + CUDA_CALLABLE T len() const{
  62 + return sqrt(len_sq());
  63 + }
  64 +
  65 +
  66 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  67 + CUDA_CALLABLE vec3<T> cart2sph() const{
  68 + vec3<T> sph;
  69 + sph.ptr[0] = len();
  70 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  71 + if(sph.ptr[0] == 0)
  72 + sph.ptr[2] = 0;
  73 + else
  74 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  75 + return sph;
  76 + }
  77 +
  78 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  79 + CUDA_CALLABLE vec3<T> sph2cart() const{
  80 + vec3<T> cart;
  81 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  82 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  83 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  84 +
  85 + return cart;
  86 + }
  87 +
  88 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  89 + CUDA_CALLABLE vec3<T> norm() const{
  90 + vec3<T> result;
  91 + T l = len(); //compute the vector length
  92 + return (*this) / l;
  93 + }
  94 +
  95 + /// Computes the cross product of a 3-dimensional vector
  96 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  97 +
  98 + vec3<T> result;
  99 +
  100 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  101 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  102 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  103 +
  104 + return result;
  105 + }
  106 +
  107 + /// Compute the Euclidean inner (dot) product
  108 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  109 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  110 + }
  111 +
  112 + /// Arithmetic addition operator
  113 +
  114 + /// @param rhs is the right-hand-side operator for the addition
  115 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  116 + vec3<T> result;
  117 + result.ptr[0] = ptr[0] + rhs[0];
  118 + result.ptr[1] = ptr[1] + rhs[1];
  119 + result.ptr[2] = ptr[2] + rhs[2];
  120 + return result;
  121 + }
  122 +
  123 + /// Arithmetic addition to a scalar
  124 +
  125 + /// @param rhs is the right-hand-side operator for the addition
  126 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  127 + vec3<T> result;
  128 + result.ptr[0] = ptr[0] + rhs;
  129 + result.ptr[1] = ptr[1] + rhs;
  130 + result.ptr[2] = ptr[2] + rhs;
  131 + return result;
  132 + }
  133 +
  134 + /// Arithmetic subtraction operator
  135 +
  136 + /// @param rhs is the right-hand-side operator for the subtraction
  137 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  138 + vec3<T> result;
  139 + result.ptr[0] = ptr[0] - rhs[0];
  140 + result.ptr[1] = ptr[1] - rhs[1];
  141 + result.ptr[2] = ptr[2] - rhs[2];
  142 + return result;
  143 + }
  144 + /// Arithmetic subtraction to a scalar
  145 +
  146 + /// @param rhs is the right-hand-side operator for the addition
  147 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  148 + vec3<T> result;
  149 + result.ptr[0] = ptr[0] - rhs;
  150 + result.ptr[1] = ptr[1] - rhs;
  151 + result.ptr[2] = ptr[2] - rhs;
  152 + return result;
  153 + }
  154 +
  155 + /// Arithmetic scalar multiplication operator
  156 +
  157 + /// @param rhs is the right-hand-side operator for the subtraction
  158 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  159 + vec3<T> result;
  160 + result.ptr[0] = ptr[0] * rhs;
  161 + result.ptr[1] = ptr[1] * rhs;
  162 + result.ptr[2] = ptr[2] * rhs;
  163 + return result;
  164 + }
  165 +
  166 + /// Arithmetic scalar division operator
  167 +
  168 + /// @param rhs is the right-hand-side operator for the subtraction
  169 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  170 + return (*this) * ((T)1.0/rhs);
  171 + }
  172 +
  173 + /// Multiplication by a scalar, followed by assignment
  174 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  175 + ptr[0] = ptr[0] * rhs;
  176 + ptr[1] = ptr[1] * rhs;
  177 + ptr[2] = ptr[2] * rhs;
  178 + return *this;
  179 + }
  180 +
  181 + /// Addition and assignment
  182 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  183 + ptr[0] = ptr[0] + rhs;
  184 + ptr[1] = ptr[1] + rhs;
  185 + ptr[2] = ptr[2] + rhs;
  186 + return *this;
  187 + }
  188 +
  189 + /// Assign a scalar to all values
  190 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  191 + ptr[0] = ptr[0] = rhs;
  192 + ptr[1] = ptr[1] = rhs;
  193 + ptr[2] = ptr[2] = rhs;
  194 + return *this;
  195 + }
  196 +
  197 + /// Casting and assignment
  198 + template<typename Y>
  199 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  200 + ptr[0] = (T)rhs.ptr[0];
  201 + ptr[1] = (T)rhs.ptr[1];
  202 + ptr[2] = (T)rhs.ptr[2];
  203 + return *this;
  204 + }
  205 +
  206 + /// Unary minus (returns the negative of the vector)
  207 + CUDA_CALLABLE vec3<T> operator-() const{
  208 + vec3<T> result;
  209 + result.ptr[0] = -ptr[0];
  210 + result.ptr[1] = -ptr[1];
  211 + result.ptr[2] = -ptr[2];
  212 + return result;
  213 + }
  214 +
  215 +
  216 + /// Outputs the vector as a string
  217 + std::string str() const{
  218 + std::stringstream ss;
  219 +
  220 + const size_t N = 3;
  221 +
  222 + ss<<"[";
  223 + for(size_t i=0; i<N; i++)
  224 + {
  225 + ss<<ptr[i];
  226 + if(i != N-1)
  227 + ss<<", ";
  228 + }
  229 + ss<<"]";
  230 +
  231 + return ss.str();
  232 + }
  233 +
  234 + size_t size(){ return 3; }
  235 +
  236 + }; //end class vec3
  237 +} //end namespace stim
  238 +
  239 +/// Multiply a vector by a constant when the vector is on the right hand side
  240 +template <typename T>
  241 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  242 + return rhs * lhs;
  243 +}
  244 +
  245 +//stream operator
  246 +template<typename T>
  247 +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){
  248 + os<<rhs.str();
  249 + return os;
  250 +}
  251 +
  252 +#endif
0 \ No newline at end of file 253 \ No newline at end of file
stim/math/vector.h
1 -#ifndef RTS_VECTOR_H  
2 -#define RTS_VECTOR_H 1 +#ifndef STIM_VECTOR_H
  2 +#define STIM_VECTOR_H
3 3
4 #include <iostream> 4 #include <iostream>
5 #include <cmath> 5 #include <cmath>
6 #include <sstream> 6 #include <sstream>
7 #include <vector> 7 #include <vector>
8 - 8 +
9 #include <stim/cuda/cudatools/callable.h> 9 #include <stim/cuda/cudatools/callable.h>
  10 +#include <stim/math/vec3.h>
10 11
11 namespace stim 12 namespace stim
12 { 13 {
13 14
14 -  
15 -  
16 template <class T> 15 template <class T>
17 struct vec : public std::vector<T> 16 struct vec : public std::vector<T>
18 { 17 {
@@ -72,8 +71,8 @@ struct vec : public std::vector&lt;T&gt; @@ -72,8 +71,8 @@ struct vec : public std::vector&lt;T&gt;
72 size_t N = other.size(); 71 size_t N = other.size();
73 resize(N); //resize the current vector to match the copy 72 resize(N); //resize the current vector to match the copy
74 for(size_t i=0; i<N; i++){ //copy each element 73 for(size_t i=0; i<N; i++){ //copy each element
75 - at(i) = other[i];  
76 - } 74 + at(i) = other[i];
  75 + }
77 } 76 }
78 77
79 //I'm not sure what these were doing here. 78 //I'm not sure what these were doing here.
@@ -329,6 +328,15 @@ struct vec : public std::vector&lt;T&gt; @@ -329,6 +328,15 @@ struct vec : public std::vector&lt;T&gt;
329 return *this; 328 return *this;
330 } 329 }
331 330
  331 + /// Cast to a vec3
  332 + operator stim::vec3<T>(){
  333 + stim::vec3<T> r;
  334 + size_t N = std::min<size_t>(size(), 3);
  335 + for(size_t i = 0; i < N; i++)
  336 + r[i] = at(i);
  337 + return r;
  338 + }
  339 +
332 /// Casting and assignment 340 /// Casting and assignment
333 template<typename Y> 341 template<typename Y>
334 vec<T> & operator=(vec<Y> rhs){ 342 vec<T> & operator=(vec<Y> rhs){
stim/optics/lens.h 0 โ†’ 100644
  1 +#ifndef STIM_LENS_H
  2 +#define STIM_LENS_H
  3 +
  4 +#include "scalarwave.h"
  5 +#include "../math/bessel.h"
  6 +#include "../cuda/cudatools/devices.h"
  7 +#include "../visualization/colormap.h"
  8 +#include "../math/fft.h"
  9 +
  10 +#include "cufft.h"
  11 +
  12 +#include <cmath>
  13 +
  14 +namespace stim{
  15 +
  16 + /// Perform a k-space transform of a scalar field (FFT). The given field has a width of x and the calculated momentum space has a
  17 + /// width of kx (in radians).
  18 + /// @param K is a pointer to the output array of all plane waves in the field
  19 + /// @param kx is the width of the frame in momentum space
  20 + /// @param ky is the height of the frame in momentum space
  21 + /// @param E is the field to be transformed
  22 + /// @param x is the width of the field in the spatial domain
  23 + /// @param y is the height of the field in the spatial domain
  24 + /// @param nx is the number of pixels representing the field in the x (and kx) direction
  25 + /// @param ny is the number of pixels representing the field in the y (and ky) direction
  26 + template<typename T>
  27 + void cpu_scalar_to_kspace(stim::complex<T>* K, T& kx, T& ky, stim::complex<T>* E, T x, T y, size_t nx, size_t ny){
  28 +
  29 + kx = stim::TAU * nx / x; //calculate the width of the momentum space
  30 + ky = stim::TAU * ny / y;
  31 +
  32 + stim::complex<T>* dev_FFT;
  33 + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array
  34 +
  35 + stim::complex<T>* dev_E;
  36 + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field
  37 + HANDLE_ERROR( cudaMemcpy(dev_E, E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory
  38 +
  39 + cufftResult result;
  40 + cufftHandle plan;
  41 + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C);
  42 + if(result != CUFFT_SUCCESS){
  43 + std::cout<<"Error creating cuFFT plan."<<std::endl;
  44 + exit(1);
  45 + }
  46 +
  47 + result = cufftExecC2C(plan, (cufftComplex*)dev_E, (cufftComplex*)dev_FFT, CUFFT_FORWARD);
  48 + if(result != CUFFT_SUCCESS){
  49 + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl;
  50 + exit(1);
  51 + }
  52 +
  53 + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny);
  54 + HANDLE_ERROR( cudaMemcpy(fft, dev_FFT, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) );
  55 +
  56 + stim::cpu_fftshift(K, fft, nx, ny);
  57 + }
  58 +
  59 + template<typename T>
  60 + void cpu_scalar_from_kspace(stim::complex<T>* E, T& x, T& y, stim::complex<T>* K, T kx, T ky, size_t nx, size_t ny){
  61 +
  62 + x = stim::TAU * nx / kx; //calculate the width of the momentum space
  63 + y = stim::TAU * ny / ky;
  64 +
  65 + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny);
  66 + stim::cpu_ifftshift(fft, K, nx, ny);
  67 +
  68 + stim::complex<T>* dev_FFT;
  69 + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array
  70 + HANDLE_ERROR( cudaMemcpy(dev_FFT, fft, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory
  71 +
  72 + stim::complex<T>* dev_E;
  73 + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field
  74 +
  75 + cufftResult result;
  76 + cufftHandle plan;
  77 + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C);
  78 + if(result != CUFFT_SUCCESS){
  79 + std::cout<<"Error creating cuFFT plan."<<std::endl;
  80 + exit(1);
  81 + }
  82 +
  83 + result = cufftExecC2C(plan, (cufftComplex*)dev_FFT, (cufftComplex*)dev_E, CUFFT_FORWARD);
  84 + if(result != CUFFT_SUCCESS){
  85 + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl;
  86 + exit(1);
  87 + }
  88 +
  89 + HANDLE_ERROR( cudaMemcpy(E, dev_E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) );
  90 +
  91 +
  92 + }
  93 +
  94 + /// Propagate a field slice along its orthogonal direction by a given distance z
  95 + /// @param Enew is the resulting propogated field
  96 + template<typename T>
  97 + void cpu_scalar_propagate(stim::complex<T>* Enew, stim::complex<T>* E, T sx, T sy, T z, T k, size_t nx, size_t ny){
  98 +
  99 + stim::complex<T>* K = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * nx * ny );
  100 +
  101 + T Kx, Ky; //width and height in k space
  102 + cpu_scalar_to_kspace(K, Kx, Ky, E ,sx, sy, nx, ny);
  103 +
  104 + T* mag = (T*) malloc( sizeof(T) * nx * ny );
  105 + stim::abs(mag, K, nx * ny);
  106 + stim::cpu2image<float>(mag, "kspace_pre_shift.bmp", nx, ny, stim::cmBrewer);
  107 +
  108 + size_t kxi, kyi;
  109 + size_t i;
  110 + T kx, kx_sq, ky, ky_sq, k_sq;
  111 + T kz;
  112 + stim::complex<T> shift;
  113 + T min_kx = -Kx / 2;
  114 + T dkx = Kx / (nx);
  115 + T min_ky = -Ky / 2;
  116 + T dky = Ky / (ny);
  117 + for(kyi = 0; kyi < ny; kyi++){ //for each plane wave in the ky direction
  118 + for(kxi = 0; kxi < nx; kxi++){ //for each plane wave in the ky direction
  119 + i = kyi * nx + kxi;
  120 +
  121 + kx = min_kx + kxi * dkx; //calculate the position of the current plane wave
  122 + ky = min_ky + kyi * dky;
  123 +
  124 + kx_sq = kx * kx;
  125 + ky_sq = ky * ky;
  126 + k_sq = k*k;
  127 +
  128 + if(kx_sq + ky_sq < k_sq){
  129 + kz = sqrt(k*k - kx * kx - ky * ky); //estimate kz using the Fresnel approximation
  130 + shift = -exp(stim::complex<T>(0, kz * z));
  131 + K[i] *= shift;
  132 + }
  133 + else{
  134 + K[i] = 0;
  135 + }
  136 + }
  137 + }
  138 +
  139 + stim::abs(mag, K, nx * ny);
  140 + stim::cpu2image<float>(mag, "kspace_post_shift.bmp", nx, ny, stim::cmBrewer);
  141 +
  142 + cpu_scalar_from_kspace(Enew, sx, sy, K, Kx, Ky, nx, ny);
  143 + }
  144 +
  145 +}
  146 +
  147 +
  148 +#endif
0 \ No newline at end of file 149 \ No newline at end of file
stim/optics/mie.h 0 โ†’ 100644
  1 +#ifndef STIM_MIE_H
  2 +#define STIM_MIE_H
  3 +#include <boost/math/special_functions/bessel.hpp>
  4 +
  5 +#include "scalarwave.h"
  6 +#include "../math/bessel.h"
  7 +#include "../cuda/cudatools/devices.h"
  8 +#include <cmath>
  9 +
  10 +namespace stim{
  11 +
  12 +
  13 +/// Calculate the scattering coefficients for a spherical scatterer
  14 +template<typename T>
  15 +void B_coefficients(stim::complex<T>* B, T a, T k, stim::complex<T> n, int Nl){
  16 +
  17 + //temporary variables
  18 + double vm; //allocate space to store the return values for the bessel function calculation
  19 + double* j_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
  20 + double* y_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
  21 + double* dj_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  22 + double* dy_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  23 +
  24 + stim::complex<double>* j_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  25 + stim::complex<double>* y_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  26 + stim::complex<double>* dj_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  27 + stim::complex<double>* dy_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  28 +
  29 + double ka = k * a; //store k*a (argument for spherical bessel and Hankel functions)
  30 + stim::complex<double> kna = k * n * a; //store k*n*a (argument for spherical bessel functions and derivatives)
  31 +
  32 + stim::bessjyv_sph<double>(Nl, ka, vm, j_ka, y_ka, dj_ka, dy_ka); //calculate bessel functions and derivatives for k*a
  33 + stim::cbessjyva_sph<double>(Nl, kna, vm, j_kna, y_kna, dj_kna, dy_kna); //calculate complex bessel functions for k*n*a
  34 +
  35 + stim::complex<double> h_ka, dh_ka;
  36 + stim::complex<double> numerator, denominator;
  37 + stim::complex<double> i(0, 1);
  38 + for(int l = 0; l <= Nl; l++){
  39 + h_ka.r = j_ka[l];
  40 + h_ka.i = y_ka[l];
  41 + dh_ka.r = dj_ka[l];
  42 + dh_ka.i = dy_ka[l];
  43 +
  44 + numerator = j_ka[l] * dj_kna[l] * (stim::complex<double>)n - j_kna[l] * dj_ka[l];
  45 + denominator = j_kna[l] * dh_ka - h_ka * dj_kna[l] * (stim::complex<double>)n;
  46 + B[l] = (2 * l + 1) * pow(i, l) * numerator / denominator;
  47 + }
  48 +}
  49 +
  50 +template<typename T>
  51 +void A_coefficients(stim::complex<T>* A, T a, T k, stim::complex<T> n, int Nl){
  52 + //temporary variables
  53 + double vm; //allocate space to store the return values for the bessel function calculation
  54 + double* j_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
  55 + double* y_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
  56 + double* dj_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  57 + double* dy_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  58 +
  59 + stim::complex<double>* j_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  60 + stim::complex<double>* y_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  61 + stim::complex<double>* dj_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  62 + stim::complex<double>* dy_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  63 +
  64 + double ka = k * a; //store k*a (argument for spherical bessel and Hankel functions)
  65 + stim::complex<double> kna = k * n * a; //store k*n*a (argument for spherical bessel functions and derivatives)
  66 +
  67 + stim::bessjyv_sph<double>(Nl, ka, vm, j_ka, y_ka, dj_ka, dy_ka); //calculate bessel functions and derivatives for k*a
  68 + stim::cbessjyva_sph<double>(Nl, kna, vm, j_kna, y_kna, dj_kna, dy_kna); //calculate complex bessel functions for k*n*a
  69 +
  70 + stim::complex<double> h_ka, dh_ka;
  71 + stim::complex<double> numerator, denominator;
  72 + stim::complex<double> i(0, 1);
  73 + for(size_t l = 0; l <= Nl; l++){
  74 + h_ka.r = j_ka[l];
  75 + h_ka.i = y_ka[l];
  76 + dh_ka.r = dj_ka[l];
  77 + dh_ka.i = dy_ka[l];
  78 +
  79 + numerator = j_ka[l] * dh_ka - dj_ka[l] * h_ka;
  80 + denominator = j_kna[l] * dh_ka - h_ka * dj_kna[l] * (stim::complex<double>)n;
  81 + A[l] = (2 * l + 1) * pow(i, l) * numerator / denominator;
  82 + }
  83 +}
  84 +
  85 +#define LOCAL_NL 16
  86 +template<typename T>
  87 +__global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* hB, T r_min, T dr, size_t N_hB, int Nl){
  88 + extern __shared__ stim::complex<T> shared_hB[]; //declare the list of waves in shared memory
  89 +
  90 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  91 + if(i >= N) return; //exit if this thread is outside the array
  92 + stim::vec3<T> p;
  93 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  94 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  95 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  96 +
  97 + T r = p.len(); //calculate the distance from the sphere
  98 + if(r < a) return; //exit if the point is inside the sphere (we only calculate the internal field)
  99 + T fij = (r - r_min)/dr; //FP index into the spherical bessel LUT
  100 + size_t ij = (size_t) fij; //convert to an integral index
  101 + T alpha = fij - ij; //calculate the fractional portion of the index
  102 + size_t n0j = ij * (Nl + 1); //start of the first entry in the LUT
  103 + size_t n1j = (ij+1) * (Nl + 1); //start of the second entry in the LUT
  104 +
  105 + T cos_phi;
  106 + T Pl_2, Pl_1, Pl; //declare registers to store the previous two Legendre polynomials
  107 +
  108 + stim::complex<T> hBl;
  109 + stim::complex<T> Ei = 0; //create a register to store the result
  110 + int l;
  111 +
  112 + stim::complex<T> hlBl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed
  113 + int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops
  114 +
  115 + #pragma unroll LOCAL_NL+1 //copy the first LOCAL_NL+1 h_l * B_l components to registers
  116 + for(l = 0; l <= LOCAL_NL; l++)
  117 + hlBl[l] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha );
  118 +
  119 + for(l = LOCAL_NL+1; l <= Nl; l++) //copy any additional h_l * B_l components to shared memory
  120 + shared_hB[shared_start + (l - (LOCAL_NL+1))] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha );
  121 +
  122 + for(size_t w = 0; w < nW; w++){ //for each plane wave
  123 + cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle between the k vector and the direction from the sphere
  124 + Pl_2 = 1; //the Legendre polynomials will be calculated recursively, initialize the first two steps of the recursive relation
  125 + Pl_1 = cos_phi;
  126 + Ei += W[w].E() * hlBl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation
  127 + Ei += W[w].E() * hlBl[1] * Pl_1;
  128 +
  129 + #pragma unroll LOCAL_NL-1 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file)
  130 + for(l = 2; l <= LOCAL_NL; l++){
  131 + Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs)
  132 + Ei += W[w].E() * hlBl[l] * Pl; //calculate and sum the current field order
  133 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  134 + Pl_1 = Pl;
  135 + }
  136 +
  137 + for(l = LOCAL_NL+1; l <= Nl; l++){ //do the same as above, except for any additional orders that are stored in shared memory (not registers)
  138 + Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //again, this is where most computation in the kernel occurs
  139 + Ei += W[w].E() * shared_hB[shared_start + l - LOCAL_NL - 1] * Pl;
  140 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  141 + Pl_1 = Pl;
  142 + }
  143 + }
  144 + E[i] += Ei; //copy the result to device memory
  145 +}
  146 +
  147 +template<typename T>
  148 +void gpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* hB, T kr_min, T dkr, size_t N_hB, size_t Nl){
  149 +
  150 + size_t max_shared_mem = stim::sharedMemPerBlock();
  151 + size_t hBl_array = sizeof(stim::complex<T>) * (Nl + 1);
  152 + std::cout<<"hl*Bl array size: "<<hBl_array<<std::endl;
  153 + std::cout<<"shared memory: "<<max_shared_mem<<std::endl;
  154 + int threads = (int)((max_shared_mem / hBl_array) / 32 * 32);
  155 + std::cout<<"threads per block: "<<threads<<std::endl;
  156 + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  157 +
  158 + size_t shared_mem;
  159 + if(Nl <= LOCAL_NL) shared_mem = 0;
  160 + else shared_mem = threads * sizeof(stim::complex<T>) * (Nl - LOCAL_NL); //amount of shared memory to allocate
  161 + std::cout<<"shared memory allocated: "<<shared_mem<<std::endl;
  162 + cuda_scalar_mie_scatter<T><<< blocks, threads, shared_mem >>>(E, N, x, y, z, W, nW, a, n, hB, kr_min, dkr, N_hB, (int)Nl); //call the kernel
  163 +}
  164 +
  165 +template<typename T>
  166 +__global__ void cuda_dist(T* r, T* x, T* y, T* z, size_t N){
  167 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  168 + if(i >= N) return; //exit if this thread is outside the array
  169 +
  170 + stim::vec3<T> p;
  171 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  172 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  173 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  174 +
  175 + r[i] = p.len();
  176 +}
  177 +/// Calculate the scalar Mie solution for the scattered field produced by a single plane wave
  178 +
  179 +/// @param E is a pointer to the destination field values
  180 +/// @param N is the number of points used to calculate the field
  181 +/// @param x is an array of x coordinates for each point, specified relative to the sphere (x = NULL assumes all zeros)
  182 +/// @param y is an array of y coordinates for each point, specified relative to the sphere (y = NULL assumes all zeros)
  183 +/// @param z is an array of z coordinates for each point, specified relative to the sphere (z = NULL assumes all zeros)
  184 +/// @param W is an array of planewaves that will be scattered
  185 +/// @param a is the radius of the sphere
  186 +/// @param n is the complex refractive index of the sphere
  187 +template<typename T>
  188 +void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std::vector<stim::scalarwave<T>> W, T a, stim::complex<T> n, T r_spacing = 0.1){
  189 + //calculate the necessary number of orders required to represent the scattered field
  190 + T k = W[0].kmag();
  191 +
  192 + int Nl = (int)ceil(k*a + 4 * cbrt( k * a ) + 2);
  193 + if(Nl < LOCAL_NL) Nl = LOCAL_NL; //always do at least the minimum number of local operations (kernel optimization)
  194 + std::cout<<"Nl: "<<Nl<<std::endl;
  195 +
  196 + //calculate the scattering coefficients for the sphere
  197 + stim::complex<T>* B = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * (Nl + 1) ); //allocate space for the scattering coefficients
  198 + B_coefficients(B, a, k, n, Nl);
  199 +
  200 +#ifdef CUDA_FOUND
  201 + stim::complex<T>* dev_E; //allocate space for the field
  202 + cudaMalloc(&dev_E, N * sizeof(stim::complex<T>));
  203 + cudaMemcpy(dev_E, E, N * sizeof(stim::complex<T>), cudaMemcpyHostToDevice);
  204 + //cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used)
  205 +
  206 + // COORDINATES
  207 + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified)
  208 + if(x != NULL){
  209 + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
  210 + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
  211 + }
  212 + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified)
  213 + if(y != NULL){
  214 + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
  215 + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
  216 + }
  217 + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified)
  218 + if(z != NULL){
  219 + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
  220 + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
  221 + }
  222 +
  223 + // PLANE WAVES
  224 + stim::scalarwave<T>* dev_W; //allocate space and copy plane waves
  225 + HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) );
  226 + HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) );
  227 +
  228 + // BESSEL FUNCTION LOOK-UP TABLE
  229 + //calculate the distance from the sphere center
  230 + T* dev_r;
  231 + HANDLE_ERROR( cudaMalloc(&dev_r, sizeof(T) * N) );
  232 +
  233 + int threads = stim::maxThreadsPerBlock();
  234 + dim3 blocks((unsigned)(N / threads + 1));
  235 + cuda_dist<T> <<< blocks, threads >>>(dev_r, dev_x, dev_y, dev_z, N);
  236 +
  237 + //Find the minimum and maximum values of r
  238 + cublasStatus_t stat;
  239 + cublasHandle_t handle;
  240 +
  241 + stat = cublasCreate(&handle); //create a cuBLAS handle
  242 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  243 + printf ("CUBLAS initialization failed\n");
  244 + exit(1);
  245 + }
  246 +
  247 + int i_min, i_max;
  248 + stat = cublasIsamin(handle, (int)N, dev_r, 1, &i_min);
  249 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  250 + printf ("CUBLAS Error: failed to calculate minimum r value.\n");
  251 + exit(1);
  252 + }
  253 + stat = cublasIsamax(handle, (int)N, dev_r, 1, &i_max);
  254 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  255 + printf ("CUBLAS Error: failed to calculate maximum r value.\n");
  256 + exit(1);
  257 + }
  258 +
  259 + i_min--; //cuBLAS uses 1-based indexing for Fortran compatibility
  260 + i_max--;
  261 + T r_min, r_max; //allocate space to store the minimum and maximum values
  262 + HANDLE_ERROR( cudaMemcpy(&r_min, dev_r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU
  263 + HANDLE_ERROR( cudaMemcpy(&r_max, dev_r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
  264 +
  265 + r_min = max(r_min, a); //if the radius of the sphere is larger than r_min, change r_min to a (the scattered field doesn't exist inside the sphere)
  266 +
  267 + //size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r
  268 + size_t N_hB_lut = (size_t)((r_max - r_min) / r_spacing + 1);
  269 +
  270 + //T kr_min = k * r_min;
  271 + //T kr_max = k * r_max;
  272 +
  273 + //temporary variables
  274 + double vm; //allocate space to store the return values for the bessel function calculation
  275 + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
  276 + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
  277 + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
  278 + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
  279 +
  280 + size_t hB_bytes = sizeof(stim::complex<T>) * (Nl+1) * N_hB_lut;
  281 + stim::complex<T>* hB_lut = (stim::complex<T>*) malloc(hB_bytes); //pointer to the look-up table
  282 + T dr = (r_max - r_min) / (N_hB_lut-1); //distance between values in the LUT
  283 + std::cout<<"LUT jl bytes: "<<hB_bytes<<std::endl;
  284 + stim::complex<T> hl;
  285 + for(size_t ri = 0; ri < N_hB_lut; ri++){ //for each value in the LUT
  286 + stim::bessjyv_sph<double>(Nl, k * (r_min + ri * dr), vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]
  287 + for(size_t l = 0; l <= Nl; l++){ //for each order
  288 + hl.r = (T)jv[l];
  289 + hl.i = (T)yv[l];
  290 +
  291 + hB_lut[ri * (Nl + 1) + l] = hl * B[l]; //store the bessel function result
  292 + //std::cout<<hB_lut[ri * (Nl + 1) + l]<<std::endl;
  293 + }
  294 + }
  295 + T* real_lut = (T*) malloc(hB_bytes/2);
  296 + stim::real(real_lut, hB_lut, N_hB_lut);
  297 + stim::cpu2image<T>(real_lut, "hankel_B.bmp", Nl+1, N_hB_lut, stim::cmBrewer);
  298 +
  299 + //Allocate device memory and copy everything to the GPU
  300 + stim::complex<T>* dev_hB_lut;
  301 + HANDLE_ERROR( cudaMalloc(&dev_hB_lut, hB_bytes) );
  302 + HANDLE_ERROR( cudaMemcpy(dev_hB_lut, hB_lut, hB_bytes, cudaMemcpyHostToDevice) );
  303 +
  304 + gpu_scalar_mie_scatter<T>(dev_E, N, dev_x, dev_y, dev_z, dev_W, W.size(), a, n, dev_hB_lut, r_min, dr, N_hB_lut, Nl);
  305 +
  306 + cudaMemcpy(E, dev_E, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory
  307 +
  308 + if(x != NULL) cudaFree(dev_x); //free everything
  309 + if(y != NULL) cudaFree(dev_y);
  310 + if(z != NULL) cudaFree(dev_z);
  311 + cudaFree(dev_E);
  312 +#else
  313 +
  314 +
  315 + //allocate space to store the bessel function call results
  316 + double vm;
  317 + double* j_kr = (double*) malloc( (Nl + 1) * sizeof(double) );
  318 + double* y_kr = (double*) malloc( (Nl + 1) * sizeof(double) );
  319 + double* dj_kr= (double*) malloc( (Nl + 1) * sizeof(double) );
  320 + double* dy_kr= (double*) malloc( (Nl + 1) * sizeof(double) );
  321 +
  322 + T* P = (T*) malloc( (Nl + 1) * sizeof(T) );
  323 +
  324 + T r, kr, cos_phi;
  325 + stim::complex<T> h;
  326 + for(size_t i = 0; i < N; i++){
  327 + stim::vec3<T> p; //declare a 3D point
  328 +
  329 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  330 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  331 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  332 + r = p.len();
  333 + if(r >= a){
  334 + for(size_t w = 0; w < W.size(); w++){
  335 + kr = p.len() * W[w].kmag(); //calculate k*r
  336 + stim::bessjyv_sph<double>(Nl, kr, vm, j_kr, y_kr, dj_kr, dy_kr);
  337 + cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle from the propagating direction
  338 + stim::legendre<T>(Nl, cos_phi, P);
  339 +
  340 + for(size_t l = 0; l <= Nl; l++){
  341 + h.r = j_kr[l];
  342 + h.i = y_kr[l];
  343 + E[i] += W[w].E() * B[l] * h * P[l];
  344 + }
  345 + }
  346 + }
  347 + }
  348 +#endif
  349 +}
  350 +
  351 +template<typename T>
  352 +void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w, T a, stim::complex<T> n, T r_spacing = 0.1){
  353 + std::vector< stim::scalarwave<T> > W(1, w);
  354 + cpu_scalar_mie_scatter(E, N, x, y, z, W, a, n, r_spacing);
  355 +}
  356 +
  357 +template<typename T>
  358 +__global__ void cuda_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* jA, T r_min, T dr, size_t N_jA, int Nl){
  359 + extern __shared__ stim::complex<T> shared_jA[]; //declare the list of waves in shared memory
  360 +
  361 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  362 + if(i >= N) return; //exit if this thread is outside the array
  363 + stim::vec3<T> p;
  364 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  365 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  366 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  367 +
  368 + T r = p.len(); //calculate the distance from the sphere
  369 + if(r > a) return; //exit if the point is inside the sphere (we only calculate the internal field)
  370 + T fij = (r - r_min)/dr; //FP index into the spherical bessel LUT
  371 + size_t ij = (size_t) fij; //convert to an integral index
  372 + T alpha = fij - ij; //calculate the fractional portion of the index
  373 + size_t n0j = ij * (Nl + 1); //start of the first entry in the LUT
  374 + size_t n1j = (ij+1) * (Nl + 1); //start of the second entry in the LUT
  375 +
  376 + T cos_phi;
  377 + T Pl_2, Pl_1, Pl; //declare registers to store the previous two Legendre polynomials
  378 +
  379 + stim::complex<T> jAl;
  380 + stim::complex<T> Ei = 0; //create a register to store the result
  381 + int l;
  382 +
  383 + stim::complex<T> jlAl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed
  384 + int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops
  385 +
  386 + #pragma unroll LOCAL_NL+1 //copy the first LOCAL_NL+1 h_l * B_l components to registers
  387 + for(l = 0; l <= LOCAL_NL; l++)
  388 + jlAl[l] = clerp<T>( jA[n0j + l], jA[n1j + l], alpha );
  389 +
  390 + for(l = LOCAL_NL+1; l <= Nl; l++) //copy any additional h_l * B_l components to shared memory
  391 + shared_jA[shared_start + (l - (LOCAL_NL+1))] = clerp<T>( jA[n0j + l], jA[n1j + l], alpha );
  392 +
  393 + for(size_t w = 0; w < nW; w++){ //for each plane wave
  394 + if(r == 0) cos_phi = 0;
  395 + else
  396 + cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle between the k vector and the direction from the sphere
  397 + Pl_2 = 1; //the Legendre polynomials will be calculated recursively, initialize the first two steps of the recursive relation
  398 + Pl_1 = cos_phi;
  399 + Ei += W[w].E() * jlAl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation
  400 + Ei += W[w].E() * jlAl[1] * Pl_1;
  401 +
  402 + #pragma unroll LOCAL_NL-1 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file)
  403 + for(l = 2; l <= LOCAL_NL; l++){
  404 + Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs)
  405 + Ei += W[w].E() * jlAl[l] * Pl; //calculate and sum the current field order
  406 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  407 + Pl_1 = Pl;
  408 + }
  409 +
  410 + for(l = LOCAL_NL+1; l <= Nl; l++){ //do the same as above, except for any additional orders that are stored in shared memory (not registers)
  411 + Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //again, this is where most computation in the kernel occurs
  412 + Ei += W[w].E() * shared_jA[shared_start + l - LOCAL_NL - 1] * Pl;
  413 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  414 + Pl_1 = Pl;
  415 + }
  416 + }
  417 + E[i] = Ei; //copy the result to device memory
  418 +}
  419 +
  420 +template<typename T>
  421 +void gpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* jA, T r_min, T dr, size_t N_jA, size_t Nl){
  422 +
  423 + size_t max_shared_mem = stim::sharedMemPerBlock();
  424 + size_t hBl_array = sizeof(stim::complex<T>) * (Nl + 1);
  425 + std::cout<<"hl*Bl array size: "<<hBl_array<<std::endl;
  426 + std::cout<<"shared memory: "<<max_shared_mem<<std::endl;
  427 + int threads = (int)((max_shared_mem / hBl_array) / 32 * 32);
  428 + std::cout<<"threads per block: "<<threads<<std::endl;
  429 + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  430 +
  431 + size_t shared_mem;
  432 + if(Nl <= LOCAL_NL) shared_mem = 0;
  433 + else shared_mem = threads * sizeof(stim::complex<T>) * (Nl - LOCAL_NL); //amount of shared memory to allocate
  434 + std::cout<<"shared memory allocated: "<<shared_mem<<std::endl;
  435 + cuda_scalar_mie_internal<T><<< blocks, threads, shared_mem >>>(E, N, x, y, z, W, nW, a, n, jA, r_min, dr, N_jA, (int)Nl); //call the kernel
  436 +}
  437 +
  438 +/// Calculate the scalar Mie solution for the internal field produced by a single plane wave scattered by a sphere
  439 +
  440 +/// @param E is a pointer to the destination field values
  441 +/// @param N is the number of points used to calculate the field
  442 +/// @param x is an array of x coordinates for each point, specified relative to the sphere (x = NULL assumes all zeros)
  443 +/// @param y is an array of y coordinates for each point, specified relative to the sphere (y = NULL assumes all zeros)
  444 +/// @param z is an array of z coordinates for each point, specified relative to the sphere (z = NULL assumes all zeros)
  445 +/// @param w is a planewave that will be scattered
  446 +/// @param a is the radius of the sphere
  447 +/// @param n is the complex refractive index of the sphere
  448 +template<typename T>
  449 +void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > W, T a, stim::complex<T> n, T r_spacing = 0.1){
  450 +//calculate the necessary number of orders required to represent the scattered field
  451 + T k = W[0].kmag();
  452 +
  453 + int Nl = (int)ceil(k*a + 4 * cbrt( k * a ) + 2);
  454 + if(Nl < LOCAL_NL) Nl = LOCAL_NL; //always do at least the minimum number of local operations (kernel optimization)
  455 + std::cout<<"Nl: "<<Nl<<std::endl;
  456 +
  457 + //calculate the scattering coefficients for the sphere
  458 + stim::complex<T>* A = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * (Nl + 1) ); //allocate space for the scattering coefficients
  459 + A_coefficients(A, a, k, n, Nl);
  460 +
  461 +#ifdef CUDA_FOUND
  462 + stim::complex<T>* dev_E; //allocate space for the field
  463 + cudaMalloc(&dev_E, N * sizeof(stim::complex<T>));
  464 + cudaMemcpy(dev_E, E, N * sizeof(stim::complex<T>), cudaMemcpyHostToDevice);
  465 + //cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used)
  466 +
  467 + // COORDINATES
  468 + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified)
  469 + if(x != NULL){
  470 + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
  471 + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
  472 + }
  473 + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified)
  474 + if(y != NULL){
  475 + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
  476 + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
  477 + }
  478 + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified)
  479 + if(z != NULL){
  480 + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
  481 + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
  482 + }
  483 +
  484 + // PLANE WAVES
  485 + stim::scalarwave<T>* dev_W; //allocate space and copy plane waves
  486 + HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) );
  487 + HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) );
  488 +
  489 + // BESSEL FUNCTION LOOK-UP TABLE
  490 + //calculate the distance from the sphere center
  491 + T* dev_r;
  492 + HANDLE_ERROR( cudaMalloc(&dev_r, sizeof(T) * N) );
  493 +
  494 + int threads = stim::maxThreadsPerBlock();
  495 + dim3 blocks((unsigned)(N / threads + 1));
  496 + cuda_dist<T> <<< blocks, threads >>>(dev_r, dev_x, dev_y, dev_z, N);
  497 +
  498 + //Find the minimum and maximum values of r
  499 + cublasStatus_t stat;
  500 + cublasHandle_t handle;
  501 +
  502 + stat = cublasCreate(&handle); //create a cuBLAS handle
  503 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  504 + printf ("CUBLAS initialization failed\n");
  505 + exit(1);
  506 + }
  507 +
  508 + int i_min, i_max;
  509 + stat = cublasIsamin(handle, (int)N, dev_r, 1, &i_min);
  510 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  511 + printf ("CUBLAS Error: failed to calculate minimum r value.\n");
  512 + exit(1);
  513 + }
  514 + stat = cublasIsamax(handle, (int)N, dev_r, 1, &i_max);
  515 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  516 + printf ("CUBLAS Error: failed to calculate maximum r value.\n");
  517 + exit(1);
  518 + }
  519 +
  520 + i_min--; //cuBLAS uses 1-based indexing for Fortran compatibility
  521 + i_max--;
  522 + T r_min, r_max; //allocate space to store the minimum and maximum values
  523 + HANDLE_ERROR( cudaMemcpy(&r_min, dev_r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU
  524 + HANDLE_ERROR( cudaMemcpy(&r_max, dev_r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
  525 +
  526 + r_max = min(r_max, a); //the internal field doesn't exist outside of the sphere
  527 +
  528 + size_t N_jA_lut = (size_t)((r_max - r_min) / r_spacing + 1);
  529 +
  530 + //temporary variables
  531 + double vm; //allocate space to store the return values for the bessel function calculation
  532 + stim::complex<double>* jv = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  533 + stim::complex<double>* yv = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  534 + stim::complex<double>* djv= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  535 + stim::complex<double>* dyv= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  536 +
  537 + size_t jA_bytes = sizeof(stim::complex<T>) * (Nl+1) * N_jA_lut;
  538 + stim::complex<T>* jA_lut = (stim::complex<T>*) malloc(jA_bytes); //pointer to the look-up table
  539 + T dr = (r_max - r_min) / (N_jA_lut-1); //distance between values in the LUT
  540 + std::cout<<"LUT jl bytes: "<<jA_bytes<<std::endl;
  541 + stim::complex<T> hl;
  542 + stim::complex<double> nd = (stim::complex<double>)n;
  543 + for(size_t ri = 0; ri < N_jA_lut; ri++){ //for each value in the LUT
  544 + stim::cbessjyva_sph<double>(Nl, nd * k * (r_min + ri * dr), vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]
  545 + for(size_t l = 0; l <= Nl; l++){ //for each order
  546 + jA_lut[ri * (Nl + 1) + l] = (stim::complex<T>)(jv[l] * (stim::complex<double>)A[l]); //store the bessel function result
  547 + }
  548 + }
  549 +
  550 + //Allocate device memory and copy everything to the GPU
  551 + stim::complex<T>* dev_jA_lut;
  552 + HANDLE_ERROR( cudaMalloc(&dev_jA_lut, jA_bytes) );
  553 + HANDLE_ERROR( cudaMemcpy(dev_jA_lut, jA_lut, jA_bytes, cudaMemcpyHostToDevice) );
  554 +
  555 + gpu_scalar_mie_internal<T>(dev_E, N, dev_x, dev_y, dev_z, dev_W, W.size(), a, n, dev_jA_lut, r_min, dr, N_jA_lut, Nl);
  556 +
  557 + cudaMemcpy(E, dev_E, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory
  558 +
  559 + if(x != NULL) cudaFree(dev_x); //free everything
  560 + if(y != NULL) cudaFree(dev_y);
  561 + if(z != NULL) cudaFree(dev_z);
  562 + cudaFree(dev_E);
  563 +#else
  564 +
  565 + //allocate space to store the bessel function call results
  566 + double vm;
  567 + stim::complex<double>* j_knr = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  568 + stim::complex<double>* y_knr = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  569 + stim::complex<double>* dj_knr= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  570 + stim::complex<double>* dy_knr= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  571 +
  572 + T* P = (T*) malloc( (Nl + 1) * sizeof(T) );
  573 +
  574 + T r, cos_phi;
  575 + stim::complex<double> knr;
  576 + stim::complex<T> h;
  577 + for(size_t i = 0; i < N; i++){
  578 + stim::vec3<T> p; //declare a 3D point
  579 +
  580 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  581 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  582 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  583 + r = p.len();
  584 + if(r < a){
  585 + E[i] = 0;
  586 + for(size_t w = 0; w < W.size(); w++){
  587 + knr = (stim::complex<double>)n * p.len() * W[w].kmag(); //calculate k*n*r
  588 +
  589 + stim::cbessjyva_sph<double>(Nl, knr, vm, j_knr, y_knr, dj_knr, dy_knr);
  590 + if(r == 0)
  591 + cos_phi = 0;
  592 + else
  593 + cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle from the propagating direction
  594 + stim::legendre<T>(Nl, cos_phi, P);
  595 +
  596 + for(size_t l = 0; l <= Nl; l++){
  597 + E[i] += W[w].E() * A[l] * (stim::complex<T>)j_knr[l] * P[l];
  598 + }
  599 + }
  600 + }
  601 + }
  602 +#endif
  603 +}
  604 +
  605 +template<typename T>
  606 +void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w, T a, stim::complex<T> n, T r_spacing = 0.1){
  607 + std::vector< stim::scalarwave<T> > W(1, w);
  608 + cpu_scalar_mie_internal(E, N, x, y, z, W, a, n, r_spacing);
  609 +}
  610 +
  611 +}
  612 +
  613 +#endif
0 \ No newline at end of file 614 \ No newline at end of file
stim/optics/planewave.h
1 -#ifndef RTS_PLANEWAVE  
2 -#define RTS_PLANEWAVE 1 +#ifndef STIM_PLANEWAVE_H
  2 +#define STIM_PLANEWAVE_H
3 3
4 #include <string> 4 #include <string>
5 #include <sstream> 5 #include <sstream>
  6 +#include <cmath>
6 7
7 #include "../math/vector.h" 8 #include "../math/vector.h"
8 #include "../math/quaternion.h" 9 #include "../math/quaternion.h"
9 #include "../math/constants.h" 10 #include "../math/constants.h"
10 #include "../math/plane.h" 11 #include "../math/plane.h"
11 -#include "../cuda/callable.h"  
12 -  
13 -/*Basic conversions used here (assuming a vacuum)  
14 - lambda =  
15 -*/ 12 +#include "../math/complex.h"
16 13
17 namespace stim{ 14 namespace stim{
  15 + namespace optics{
  16 +
  17 + /// evaluate the scalar field produced by a plane wave at a point (x, y, z)
  18 +
  19 + /// @param x is the x-coordinate of the point
  20 + /// @param y is the y-coordinate of the point
  21 + /// @param z is the z-coordinate of the point
  22 + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0)
  23 + /// @param kx is the k-vector component in the x direction
  24 + /// @param ky is the k-vector component in the y direction
  25 + /// @param kz is the k-vector component in the z direction
  26 + template<typename T>
  27 + stim::complex<T> planewave_scalar(T x, T y, T z, stim::complex<T> A, T kx, T ky, T kz){
  28 + T d = x * kx + y * ky + z * kz; //calculate the dot product between k and p = (x, y, z) to find the distance p is along the propagation direction
  29 + stim::complex<T> di = stim::complex<T>(0, d); //calculate the phase shift that will have to be applied to propagate the wave distance d
  30 + return A * exp(di); //multiply the phase term by the amplitude at (0, 0, 0) to propagate the wave to p
  31 + }
  32 +
  33 + /// evaluate the scalar field produced by a plane wave at several positions
  34 +
  35 + /// @param field is a pre-allocated block of memory that will store the complex field at all points
  36 + /// @param N is the number of field values to be evaluated
  37 + /// @param x is a set of x coordinates defining positions within the field (NULL implies that all values are zero)
  38 + /// @param y is a set of y coordinates defining positions within the field (NULL implies that all values are zero)
  39 + /// @param z is a set of z coordinates defining positions within the field (NULL implies that all values are zero)
  40 + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0)
  41 + /// @param kx is the k-vector component in the x direction
  42 + /// @param ky is the k-vector component in the y direction
  43 + /// @param kz is the k-vector component in the z direction
  44 + template<typename T>
  45 + void cpu_planewave_scalar(stim::complex<T>* field, size_t N, T* x, T* y = NULL, T* z = NULL, stim::complex<T> A = 1.0, T kx = 0.0, T ky = 0.0, T kz = 0.0){
  46 + T px, py, pz;
  47 + for(size_t i = 0; i < N; i++){ // for each element in the array
  48 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values
  49 + (y == NULL) ? py = 0 : py = y[i];
  50 + (z == NULL) ? pz = 0 : pz = z[i];
  51 +
  52 + field[i] = planewave_scalar(px, py, pz, A, kx, ky, kz); // call the single-value plane wave function
  53 + }
  54 + }
18 55
19 template<typename T> 56 template<typename T>
20 class planewave{ 57 class planewave{
21 58
22 protected: 59 protected:
23 60
24 - vec<T> k; //k = tau / lambda  
25 - vec< complex<T> > E0; //amplitude  
26 - //T phi;  
27 -  
28 - CUDA_CALLABLE planewave<T> bend(rts::vec<T> kn) const{ 61 + stim::vec<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
  62 + stim::vec< stim::complex<T> > E0; //amplitude (for a scalar plane wave, only E0[0] is used)
29 63
30 - vec<T> kn_hat = kn.norm(); //normalize the new k  
31 - vec<T> k_hat = k.norm(); //normalize the current k 64 + /// Bend a plane wave via refraction, given that the new propagation direction is known
  65 + CUDA_CALLABLE planewave<T> bend(stim::vec<T> kn) const{
32 66
33 - //std::cout<<"PLANE WAVE BENDING------------------"<<std::endl;  
34 - //std::cout<<"kn_hat: "<<kn_hat<<" k_hat: "<<k_hat<<std::endl; 67 + stim::vec<T> kn_hat = kn.norm(); //normalize the new k
  68 + stim::vec<T> k_hat = k.norm(); //normalize the current k
35 69
36 - planewave<T> new_p; //create a new plane wave 70 + planewave<T> new_p; //create a new plane wave
37 71
38 - //if kn is equal to k or -k, handle the degenerate case  
39 - T k_dot_kn = k_hat.dot(kn_hat); 72 + T k_dot_kn = k_hat.dot(kn_hat); //if kn is equal to k or -k, handle the degenerate case
40 73
41 //if k . n < 0, then the bend is a reflection 74 //if k . n < 0, then the bend is a reflection
42 - //flip k_hat  
43 - if(k_dot_kn < 0) k_hat = -k_hat; 75 + if(k_dot_kn < 0) k_hat = -k_hat; //flip k_hat
44 76
45 - //std::cout<<"k dot kn: "<<k_dot_kn<<std::endl;  
46 -  
47 - //std::cout<<"k_dot_kn: "<<k_dot_kn<<std::endl;  
48 if(k_dot_kn == -1){ 77 if(k_dot_kn == -1){
49 new_p.k = -k; 78 new_p.k = -k;
50 new_p.E0 = E0; 79 new_p.E0 = E0;
@@ -56,28 +85,11 @@ protected: @@ -56,28 +85,11 @@ protected:
56 return new_p; 85 return new_p;
57 } 86 }
58 87
59 - vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector  
60 -  
61 - //std::cout<<"r: "<<r<<std::endl;  
62 -  
63 - T theta = asin(r.len()); //compute the angle of the rotation about r  
64 -  
65 -  
66 -  
67 - //deal with a zero vector (both k and kn point in the same direction)  
68 - //if(theta == (T)0)  
69 - //{  
70 - // new_p = *this;  
71 - // return new_p;  
72 - //}  
73 -  
74 - //create a quaternion to capture the rotation  
75 - quaternion<T> q;  
76 - q.CreateRotation(theta, r.norm());  
77 -  
78 - //apply the rotation to E0  
79 - vec< complex<T> > E0n = q.toMatrix3() * E0;  
80 - 88 + vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector
  89 + T theta = asin(r.len()); //compute the angle of the rotation about r
  90 + quaternion<T> q; //create a quaternion to capture the rotation
  91 + q.CreateRotation(theta, r.norm());
  92 + vec< complex<T> > E0n = q.toMatrix3() * E0; //apply the rotation to E0
81 new_p.k = kn_hat * kmag(); 93 new_p.k = kn_hat * kmag();
82 new_p.E0 = E0n; 94 new_p.E0 = E0n;
83 95
@@ -86,16 +98,9 @@ protected: @@ -86,16 +98,9 @@ protected:
86 98
87 public: 99 public:
88 100
89 -  
90 - ///constructor: create a plane wave propagating along z, polarized along x  
91 - /*planewave(T lambda = (T)1)  
92 - {  
93 - k = rts::vec<T>(0, 0, 1) * (TAU/lambda);  
94 - E0 = rts::vec<T>(1, 0, 0);  
95 - }*/  
96 - ///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega  
97 - CUDA_CALLABLE planewave(vec<T> kvec = rts::vec<T>(0, 0, rtsTAU),  
98 - vec< complex<T> > E = rts::vec<T>(1, 0, 0), T phase = 0) 101 + ///constructor: create a plane wave propagating along k
  102 + CUDA_CALLABLE planewave(vec<T> kvec = stim::vec<T>(0, 0, stim::TAU),
  103 + vec< complex<T> > E = stim::vec<T>(1, 0, 0))
99 { 104 {
100 //phi = phase; 105 //phi = phase;
101 106
@@ -107,27 +112,23 @@ public: @@ -107,27 +112,23 @@ public:
107 else{ 112 else{
108 vec< complex<T> > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector 113 vec< complex<T> > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector
109 vec< complex<T> > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector 114 vec< complex<T> > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector
110 - E0 = E_hat * E_hat.dot(E); //compute the projection of _E0 onto E0_hat 115 + E0 = E_hat;// * E_hat.dot(E); //compute the projection of _E0 onto E0_hat
111 } 116 }
112 117
113 E0 = E0 * exp( complex<T>(0, phase) ); 118 E0 = E0 * exp( complex<T>(0, phase) );
114 } 119 }
115 120
116 ///multiplication operator: scale E0 121 ///multiplication operator: scale E0
117 - CUDA_CALLABLE planewave<T> & operator* (const T & rhs)  
118 - {  
119 - 122 + CUDA_CALLABLE planewave<T> & operator* (const T & rhs){
120 E0 = E0 * rhs; 123 E0 = E0 * rhs;
121 return *this; 124 return *this;
122 } 125 }
123 126
124 - CUDA_CALLABLE T lambda() const  
125 - {  
126 - return rtsTAU / k.len(); 127 + CUDA_CALLABLE T lambda() const{
  128 + return stim::TAU / k.len();
127 } 129 }
128 130
129 - CUDA_CALLABLE T kmag() const  
130 - { 131 + CUDA_CALLABLE T kmag() const{
131 return k.len(); 132 return k.len();
132 } 133 }
133 134
@@ -139,14 +140,11 @@ public: @@ -139,14 +140,11 @@ public:
139 return k; 140 return k;
140 } 141 }
141 142
142 - /*CUDA_CALLABLE T phase(){  
143 - return phi; 143 + /// calculate the value of the field produced by the plane wave given a three-dimensional position
  144 + CUDA_CALLABLE vec< complex<T> > pos(T x, T y, T z){
  145 + return pos( stim::vec<T>(x, y, z) );
144 } 146 }
145 147
146 - CUDA_CALLABLE void phase(T p){  
147 - phi = p;  
148 - }*/  
149 -  
150 CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){ 148 CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){
151 vec< complex<T> > result; 149 vec< complex<T> > result;
152 150
@@ -166,18 +164,32 @@ public: @@ -166,18 +164,32 @@ public:
166 return planewave<T>(k * (nt / ni), E0); 164 return planewave<T>(k * (nt / ni), E0);
167 } 165 }
168 166
169 - CUDA_CALLABLE planewave<T> refract(rts::vec<T> kn) const  
170 - { 167 + CUDA_CALLABLE planewave<T> refract(stim::vec<T> kn) const{
171 return bend(kn); 168 return bend(kn);
172 } 169 }
173 170
174 - void scatter(rts::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){ 171 + /// Calculate the result of a plane wave hitting an interface between two refractive indices
  172 +
  173 + /// @param P is a plane representing the position and orientation of the surface
  174 + /// @param n0 is the refractive index outside of the surface (in the direction of the normal)
  175 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  176 + /// @param r is the reflected component of the plane wave
  177 + /// @param t is the transmitted component of the plane wave
  178 + void scatter(stim::plane<T> P, T n0, T n1, planewave<T> &r, planewave<T> &t){
  179 + scatter(P, n1/n0, r, t);
  180 + }
  181 +
  182 + /// Calculate the scattering result when nr = n1/n0
  183 +
  184 + /// @param P is a plane representing the position and orientation of the surface
  185 + /// @param r is the ration n1/n0
  186 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  187 + /// @param r is the reflected component of the plane wave
  188 + /// @param t is the transmitted component of the plane wave
  189 + void scatter(stim::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){
175 190
176 int facing = P.face(k); //determine which direction the plane wave is coming in 191 int facing = P.face(k); //determine which direction the plane wave is coming in
177 192
178 - //if(facing == 0) //if the wave is tangent to the plane, return an identical wave  
179 - // return *this;  
180 - //else  
181 if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr 193 if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
182 P = P.flip(); //flip the plane 194 P = P.flip(); //flip the plane
183 nr = 1/nr; //invert the refractive index (now nr = n0/n1) 195 nr = 1/nr; //invert the refractive index (now nr = n0/n1)
@@ -192,7 +204,7 @@ public: @@ -192,7 +204,7 @@ public:
192 bool tir = false; //flag for total internal reflection 204 bool tir = false; //flag for total internal reflection
193 if(theta_t != theta_t){ 205 if(theta_t != theta_t){
194 tir = true; 206 tir = true;
195 - theta_t = rtsPI / (T)2; 207 + theta_t = stim::PI / (T)2;
196 } 208 }
197 209
198 //handle the degenerate case where theta_i is 0 (the plane wave hits head-on) 210 //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
@@ -205,17 +217,10 @@ public: @@ -205,17 +217,10 @@ public:
205 vec< complex<T> > Et = E0 * tp; 217 vec< complex<T> > Et = E0 * tp;
206 T phase_t = P.p().dot(k - kt); //compute the phase offset 218 T phase_t = P.p().dot(k - kt); //compute the phase offset
207 T phase_r = P.p().dot(k - kr); 219 T phase_r = P.p().dot(k - kr);
208 - //std::cout<<"Degeneracy: Head-On"<<std::endl;  
209 - //std::cout<<"rs: "<<rp<<" rp: "<<rp<<" ts: "<<tp<<" tp: "<<tp<<std::endl;  
210 - //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;  
211 220
212 //create the plane waves 221 //create the plane waves
213 r = planewave<T>(kr, Er, phase_r); 222 r = planewave<T>(kr, Er, phase_r);
214 t = planewave<T>(kt, Et, phase_t); 223 t = planewave<T>(kt, Et, phase_t);
215 -  
216 - //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;  
217 - //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;  
218 - //std::cout<<"--------------------------------"<<std::endl;  
219 return; 224 return;
220 } 225 }
221 226
@@ -245,11 +250,9 @@ public: @@ -245,11 +250,9 @@ public:
245 250
246 //compute the magnitude of the p- and s-polarized components of the incident E vector 251 //compute the magnitude of the p- and s-polarized components of the incident E vector
247 complex<T> Ei_s = E0.dot(x_hat); 252 complex<T> Ei_s = E0.dot(x_hat);
248 - //int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0);  
249 int sgn = E0.dot(y_hat).sgn(); 253 int sgn = E0.dot(y_hat).sgn();
250 vec< complex<T> > cx_hat = x_hat; 254 vec< complex<T> > cx_hat = x_hat;
251 complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn; 255 complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
252 - //T Ei_p = ( E0 - x_hat * Ei_s ).len();  
253 //compute the magnitude of the p- and s-polarized components of the reflected E vector 256 //compute the magnitude of the p- and s-polarized components of the reflected E vector
254 complex<T> Er_s = Ei_s * rs; 257 complex<T> Er_s = Ei_s * rs;
255 complex<T> Er_p = Ei_p * rp; 258 complex<T> Er_p = Ei_p * rp;
@@ -257,14 +260,6 @@ public: @@ -257,14 +260,6 @@ public:
257 complex<T> Et_s = Ei_s * ts; 260 complex<T> Et_s = Ei_s * ts;
258 complex<T> Et_p = Ei_p * tp; 261 complex<T> Et_p = Ei_p * tp;
259 262
260 - //std::cout<<"E0: "<<E0<<std::endl;  
261 - //std::cout<<"E0 dot y_hat: "<<E0.dot(y_hat)<<std::endl;  
262 - //std::cout<<"theta i: "<<theta_i<<" theta t: "<<theta_t<<std::endl;  
263 - //std::cout<<"x_hat: "<<x_hat<<" y_hat: "<<y_hat<<" z_hat: "<<z_hat<<std::endl;  
264 - //std::cout<<"Ei_s: "<<Ei_s<<" Ei_p: "<<Ei_p<<" Er_s: "<<Er_s<<" Er_p: "<<Er_p<<" Et_s: "<<Et_s<<" Et_p: "<<Et_p<<std::endl;  
265 - //std::cout<<"rs: "<<rs<<" rp: "<<rp<<" ts: "<<ts<<" tp: "<<tp<<std::endl;  
266 -  
267 -  
268 //compute the reflected E vector 263 //compute the reflected E vector
269 vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s; 264 vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
270 //compute the transmitted E vector 265 //compute the transmitted E vector
@@ -273,29 +268,12 @@ public: @@ -273,29 +268,12 @@ public:
273 T phase_t = P.p().dot(k - kt); 268 T phase_t = P.p().dot(k - kt);
274 T phase_r = P.p().dot(k - kr); 269 T phase_r = P.p().dot(k - kr);
275 270
276 - //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;  
277 -  
278 - //std::cout<<"phase: "<<phase<<std::endl;  
279 -  
280 //create the plane waves 271 //create the plane waves
281 r.k = kr; 272 r.k = kr;
282 r.E0 = Er * exp( complex<T>(0, phase_r) ); 273 r.E0 = Er * exp( complex<T>(0, phase_r) );
283 - //r.phi = phase_r;  
284 -  
285 - //t = bend(kt);  
286 - //t.k = t.k * nr;  
287 274
288 t.k = kt; 275 t.k = kt;
289 t.E0 = Et * exp( complex<T>(0, phase_t) ); 276 t.E0 = Et * exp( complex<T>(0, phase_t) );
290 - //t.phi = phase_t;  
291 - //std::cout<<"i: "<<str()<<std::endl;  
292 - //std::cout<<"r: "<<r.str()<<std::endl;  
293 - //std::cout<<"t: "<<t.str()<<std::endl;  
294 -  
295 - //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;  
296 - //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;  
297 - //std::cout<<"--------------------------------"<<std::endl;  
298 -  
299 } 277 }
300 278
301 std::string str() 279 std::string str()
@@ -305,14 +283,15 @@ public: @@ -305,14 +283,15 @@ public:
305 ss<<" "<<E0<<" e^i ( "<<k<<" . r )"; 283 ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
306 return ss.str(); 284 return ss.str();
307 } 285 }
308 -};  
309 -} 286 +}; //end planewave class
  287 +} //end namespace optics
  288 +} //end namespace stim
310 289
311 template <typename T> 290 template <typename T>
312 -std::ostream& operator<<(std::ostream& os, rts::planewave<T> p) 291 +std::ostream& operator<<(std::ostream& os, stim::optics::planewave<T> p)
313 { 292 {
314 os<<p.str(); 293 os<<p.str();
315 return os; 294 return os;
316 } 295 }
317 296
318 -#endif 297 -#endif
  298 +#endif
319 \ No newline at end of file 299 \ No newline at end of file
stim/optics/scalarbeam.h 0 โ†’ 100644
  1 +#ifndef RTS_BEAM
  2 +#define RTS_BEAM
  3 +#include <boost/math/special_functions/bessel.hpp>
  4 +
  5 +#include "../math/vec3.h"
  6 +#include "../optics/scalarwave.h"
  7 +#include "../math/bessel.h"
  8 +#include "../math/legendre.h"
  9 +#include "../cuda/cudatools/devices.h"
  10 +#include "../cuda/cudatools/timer.h"
  11 +#include "../optics/scalarfield.h"
  12 +#include <cublas_v2.h>
  13 +#include <math_constants.h>
  14 +#include <vector>
  15 +#include <stdlib.h>
  16 +
  17 +
  18 +
  19 +namespace stim{
  20 +
  21 +/// Function returns the value of the scalar field produced by a beam with the specified parameters
  22 +
  23 +template<typename T>
  24 +std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d, T NA, T NA_in = 0){
  25 +
  26 + std::vector< stim::vec3<T> > dirs(N); //allocate an array to store the focusing vectors
  27 +
  28 + ///compute the rotation operator to transform (0, 0, 1) to k
  29 + T cos_angle = d.dot(vec3<T>(0, 0, 1));
  30 + stim::matrix<T, 3> rotation;
  31 +
  32 + //if the cosine of the angle is -1, the rotation is just a flip across the z axis
  33 + if(cos_angle == -1){
  34 + rotation(2, 2) = -1;
  35 + }
  36 + else if(cos_angle != 1.0)
  37 + {
  38 + vec3<T> r_axis = vec3<T>(0, 0, 1).cross(d).norm(); //compute the axis of rotation
  39 + T angle = acos(cos_angle); //compute the angle of rotation
  40 + quaternion<T> quat; //create a quaternion describing the rotation
  41 + quat.CreateRotation(angle, r_axis);
  42 + rotation = quat.toMatrix3(); //compute the rotation matrix
  43 + }
  44 +
  45 + //find the phi values associated with the cassegrain ring
  46 + T PHI[2];
  47 + PHI[0] = (T)asin(NA);
  48 + PHI[1] = (T)asin(NA_in);
  49 +
  50 + //calculate the z-axis cylinder coordinates associated with these angles
  51 + T Z[2];
  52 + Z[0] = cos(PHI[0]);
  53 + Z[1] = cos(PHI[1]);
  54 + T range = Z[0] - Z[1];
  55 +
  56 + //draw a distribution of random phi, z values
  57 + T z, phi, theta;
  58 + //T kmag = stim::TAU / lambda;
  59 + for(int i=0; i<N; i++){ //for each sample
  60 + z = (T)((double)rand() / (double)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
  61 + theta = (T)(((double)rand() / (double)RAND_MAX) * stim::TAU);
  62 + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
  63 +
  64 + //compute and store cartesian coordinates
  65 + vec3<T> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
  66 + vec3<T> cart = spherical.sph2cart();
  67 + dirs[i] = rotation * cart; //create a sample vector
  68 + }
  69 + return dirs;
  70 +}
  71 +
  72 +
  73 +/// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional)
  74 +/// @param C is a pointer to Nl + 1 values where the terms will be stored
  75 +template<typename T>
  76 +CUDA_CALLABLE void cpu_aperture_integral(T* C, int Nl, T NA, T NA_in = 0){
  77 +
  78 + size_t table_bytes = (Nl + 1) * sizeof(T); //calculate the number of bytes required to store the terms
  79 + T cos_alpha_1 = cos(asin(NA_in)); //calculate the cosine of the angle subtended by the central obscuration
  80 + T cos_alpha_2 = cos(asin(NA)); //calculate the cosine of the angle subtended by the aperture
  81 +
  82 + // the aperture integral is computed using four individual Legendre polynomials, each a function of the angles subtended
  83 + // by the objective and central obscuration
  84 + T* Pln_a1 = (T*) malloc(table_bytes);
  85 + stim::legendre<T>(Nl-1, cos_alpha_1, &Pln_a1[1]);
  86 + Pln_a1[0] = 1;
  87 +
  88 + T* Pln_a2 = (T*) malloc(table_bytes);
  89 + stim::legendre<T>(Nl-1, cos_alpha_2, &Pln_a2[1]);
  90 + Pln_a2[0] = 1;
  91 +
  92 + T* Plp_a1 = (T*) malloc(table_bytes+sizeof(T));
  93 + stim::legendre<T>(Nl+1, cos_alpha_1, Plp_a1);
  94 +
  95 + T* Plp_a2 = (T*) malloc(table_bytes+sizeof(T));
  96 + stim::legendre<T>(Nl+1, cos_alpha_2, Plp_a2);
  97 +
  98 + for(size_t l = 0; l <= Nl; l++){
  99 + C[l] = Plp_a1[l+1] - Plp_a2[l+1] - Pln_a1[l] + Pln_a2[l];
  100 + }
  101 +
  102 + free(Pln_a1);
  103 + free(Pln_a2);
  104 + free(Plp_a1);
  105 + free(Plp_a2);
  106 +}
  107 +
  108 +/// performs linear interpolation into a look-up table
  109 +template<typename T>
  110 +CUDA_CALLABLE void lut_lookup(T* lut_values, T* lut, T val, size_t N, T min_val, T delta, size_t n_vals){
  111 + T idx = ((val - min_val) / delta);
  112 + size_t i = (size_t) idx;
  113 + T a1 = idx - i;
  114 + T a0 = 1 - a1;
  115 + size_t n0 = i * n_vals;
  116 + size_t n1 = (i+1) * n_vals;
  117 + for(size_t n = 0; n < n_vals; n++){
  118 + lut_values[n] = lut[n0 + n] * a0 + lut[n1 + n] * a1;
  119 + }
  120 +}
  121 +
  122 +template <typename T>
  123 +CUDA_CALLABLE stim::complex<T> clerp(stim::complex<T> v0, stim::complex<T> v1, T t) {
  124 + return stim::complex<T>( fma(t, v1.r, fma(-t, v0.r, v0.r)), fma(t, v1.i, fma(-t, v0.i, v0.i)) );
  125 +}
  126 +
  127 +template <typename T>
  128 +CUDA_CALLABLE T lerp(T v0, T v1, T t) {
  129 + return fma(t, v1, fma(-t, v0, v0));
  130 +}
  131 +
  132 +#ifdef CUDA_FOUND
  133 +template<typename T>
  134 +__global__ void cuda_scalar_psf(stim::complex<T>* E, size_t N, T* r, T* phi, T A, size_t Nl,
  135 + T* C,
  136 + T* lut_j, size_t Nj, T min_r, T dr){
  137 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  138 + if(i >= N) return; //exit if this thread is outside the array
  139 +
  140 + T cos_phi = cos(phi[i]); //calculate the thread value for cos(phi)
  141 + stim::complex<T> Ei = 0; //initialize the value of the field to zero
  142 + size_t NC = Nl + 1; //calculate the number of coefficients to be used
  143 +
  144 + T fij = (r[i] - min_r)/dr; //FP index into the spherical bessel LUT
  145 + size_t ij = (size_t) fij; //convert to an integral index
  146 + T a = fij - ij; //calculate the fractional portion of the index
  147 + size_t n0j = ij * (NC); //start of the first entry in the LUT
  148 + size_t n1j = (ij+1) * (NC); //start of the second entry in the LUT
  149 +
  150 + T jl; //declare register to store the spherical bessel function
  151 + T Pl_2, Pl_1; //declare registers to store the previous two Legendre polynomials
  152 + T Pl = 1; //initialize the current value for the Legendre polynomial
  153 + stim::complex<T> im(0, 1); //declare i (imaginary 1)
  154 + stim::complex<T> i_pow(1, 0); //i_pow stores the current value of i^l so it doesn't have to be re-computed every iteration
  155 + for(int l = 0; l <= Nl; l++){ //for each order
  156 + jl = lerp<T>( lut_j[n0j + l], lut_j[n1j + l], a ); //read jl from the LUT and interpolate the result
  157 + Ei += i_pow * jl * Pl * C[l]; //calculate the value for the field and sum
  158 + i_pow *= im; //multiply i^l * i for the next iteration
  159 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  160 + Pl_1 = Pl;
  161 + if(l == 0){ //computing Pl is done recursively, where the recursive relation
  162 + Pl = cos_phi; // requires the first two orders. This defines the second.
  163 + }
  164 + else{ //if this is not the first iteration, use the recursive relation to calculate Pl
  165 + Pl = ( (2 * (l+1) - 1) * cos_phi * Pl_1 - (l) * Pl_2 ) / (l+1);
  166 + }
  167 +
  168 + }
  169 + E[i] = Ei * A * 2 * CUDART_PI_F; //scale the integral by the amplitude
  170 +}
  171 +
  172 +template<typename T>
  173 +void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl, T r_spacing){
  174 +
  175 + //Find the minimum and maximum values of r
  176 + cublasStatus_t stat;
  177 + cublasHandle_t handle;
  178 +
  179 + stat = cublasCreate(&handle); //create a cuBLAS handle
  180 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  181 + printf ("CUBLAS initialization failed\n");
  182 + exit(1);
  183 + }
  184 +
  185 + int i_min, i_max;
  186 + stat = cublasIsamin(handle, (int)N, r, 1, &i_min);
  187 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  188 + printf ("CUBLAS Error: failed to calculate minimum r value.\n");
  189 + exit(1);
  190 + }
  191 + stat = cublasIsamax(handle, (int)N, r, 1, &i_max);
  192 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  193 + printf ("CUBLAS Error: failed to calculate maximum r value.\n");
  194 + exit(1);
  195 + }
  196 +
  197 + i_min--; //cuBLAS uses 1-based indexing for Fortran compatibility
  198 + i_max--;
  199 + T r_min, r_max; //allocate space to store the minimum and maximum values
  200 + HANDLE_ERROR( cudaMemcpy(&r_min, r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU
  201 + HANDLE_ERROR( cudaMemcpy(&r_max, r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
  202 +
  203 + T k = (T)stim::TAU / lambda; //calculate the wavenumber from lambda
  204 + size_t C_bytes = (Nl + 1) * sizeof(T);
  205 + T* C = (T*) malloc( C_bytes ); //allocate space for the aperture integral terms
  206 + cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms
  207 +
  208 + size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r
  209 +
  210 +
  211 + size_t lutj_bytes = sizeof(T) * (Nl+1) * Nlut_j;
  212 + T* j_lut = (T*) malloc(lutj_bytes); //pointer to the look-up table
  213 + T dr = (r_max - r_min) / (Nlut_j-1); //distance between values in the LUT
  214 + T jl;
  215 + for(size_t ri = 0; ri < Nlut_j; ri++){ //for each value in the LUT
  216 + for(size_t l = 0; l <= Nl; l++){ //for each order
  217 + jl = boost::math::sph_bessel<T>(l, k*(r_min + ri * dr)); //use boost to calculate the spherical bessel function
  218 + j_lut[ri * (Nl + 1) + l] = jl; //store the bessel function result
  219 + }
  220 + }
  221 +
  222 + stim::cpu2image<T>(j_lut, "j_lut.bmp", Nl+1, Nlut_j, stim::cmBrewer);
  223 + //Allocate device memory and copy everything to the GPU
  224 +
  225 + T* gpu_C;
  226 + HANDLE_ERROR( cudaMalloc(&gpu_C, C_bytes) );
  227 + HANDLE_ERROR( cudaMemcpy(gpu_C, C, C_bytes, cudaMemcpyHostToDevice) );
  228 + T* gpu_j_lut;
  229 + HANDLE_ERROR( cudaMalloc(&gpu_j_lut, lutj_bytes) );
  230 + HANDLE_ERROR( cudaMemcpy(gpu_j_lut, j_lut, lutj_bytes, cudaMemcpyHostToDevice) );
  231 +
  232 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  233 + dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  234 +
  235 + cuda_scalar_psf<T><<< blocks, threads >>>(E, N, r, phi, A, Nl, gpu_C, gpu_j_lut, Nlut_j, r_min, dr);
  236 +
  237 + //free the LUT and condenser tables
  238 + HANDLE_ERROR( cudaFree(gpu_C) );
  239 + HANDLE_ERROR( cudaFree(gpu_j_lut) );
  240 +}
  241 +#endif
  242 +
  243 +/// Calculate the analytical solution to a scalar point spread function given a set of spherical coordinates about the PSF (beam propagation along phi = theta = 0)
  244 +template<typename T>
  245 +void cpu_scalar_psf_local(stim::complex<T>* F, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl){
  246 + T k = (T)stim::TAU / lambda;
  247 + size_t C_bytes = (Nl + 1) * sizeof(T);
  248 + T* C = (T*) malloc( C_bytes ); //allocate space for the aperture integral terms
  249 + cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms
  250 + memset(F, 0, N * sizeof(stim::complex<T>));
  251 + T jl, Pl, kr, cos_phi;
  252 +
  253 + double vm;
  254 + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
  255 + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
  256 + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
  257 + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
  258 +
  259 + T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T));
  260 +
  261 + for(size_t n = 0; n < N; n++){ //for each point in the field
  262 + kr = k * r[n]; //calculate kr (the optical distance between the focal point and p)
  263 + cos_phi = std::cos(phi[n]); //calculate the cosine of phi
  264 + stim::bessjyv_sph<double>(Nl, kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]
  265 + stim::legendre<T>(Nl, cos_phi, Pl_cos_phi); //calculate the [0 Nl] legendre polynomials for this point
  266 +
  267 + for(int l = 0; l <= Nl; l++){
  268 + jl = (T)jv[l];
  269 + Pl = Pl_cos_phi[l];
  270 + F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l];
  271 + }
  272 + F[n] *= A * stim::TAU;
  273 + }
  274 +
  275 + free(C);
  276 + free(Pl_cos_phi);
  277 +}
  278 +
  279 +/// Converts a set of cartesian points into spherical coordinates surrounding a point spread function (PSF)
  280 +/// @param r is the output distance from the PSF
  281 +/// @param phi is the non-symmetric direction about the PSF
  282 +/// @param x (x, y, z) are the cartesian coordinates in world space
  283 +/// @f is the focal point of the PSF in cartesian coordinates
  284 +/// @d is the propagation direction of the PSF in cartesian coordinates
  285 +template<typename T>
  286 +__global__ void cuda_cart2psf(T* r, T* phi, size_t N, T* x, T* y, T* z, stim::vec3<T> f, stim::quaternion<T> q){
  287 +
  288 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  289 + if(i >= N) return; //exit if this thread is outside the array
  290 +
  291 + stim::vec3<T> p; //declare a 3D point
  292 +
  293 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  294 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  295 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  296 +
  297 + p = p - f; //shift the point to the center of the PSF (focal point)
  298 + p = q.toMatrix3() * p; //rotate the point to align with the propagation direction
  299 +
  300 + stim::vec3<T> ps = p.cart2sph(); //convert from cartesian to spherical coordinates
  301 + r[i] = ps[0]; //store r
  302 + phi[i] = ps[2]; //phi = [0 pi]
  303 +}
  304 +
  305 +#ifdef CUDA_FOUND
  306 +/// Calculate the analytical solution to a point spread function given a set of points in cartesian coordinates
  307 +template<typename T>
  308 +void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
  309 +
  310 + T* gpu_r; //allocate space for the coordinates in r
  311 + HANDLE_ERROR( cudaMalloc(&gpu_r, sizeof(T) * N) );
  312 + T* gpu_phi;
  313 + HANDLE_ERROR( cudaMalloc(&gpu_phi, sizeof(T) * N) );
  314 + //stim::complex<T>* gpu_E;
  315 + //HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
  316 +
  317 + stim::quaternion<T> q; //create a quaternion
  318 + q.CreateRotation(d, stim::vec3<T>(0, 0, 1)); //create a mapping from the propagation direction to the PSF space
  319 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  320 + dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  321 + cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space
  322 +
  323 + gpu_scalar_psf_local(E, N, gpu_r, gpu_phi, lambda, A, NA, NA_in, Nl, r_spacing);
  324 +
  325 +}
  326 +#endif
  327 +
  328 +template<typename T>
  329 +void cpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
  330 +
  331 +// If CUDA is available, copy the cartesian points to the GPU and evaluate them in a kernel
  332 +#ifdef CUDA_FOUND
  333 +
  334 + T* gpu_x = NULL;
  335 + if(x != NULL){
  336 + HANDLE_ERROR( cudaMalloc(&gpu_x, sizeof(T) * N) );
  337 + HANDLE_ERROR( cudaMemcpy(gpu_x, x, sizeof(T) * N, cudaMemcpyHostToDevice) );
  338 + }
  339 + T* gpu_y = NULL;
  340 + if(y != NULL){
  341 + HANDLE_ERROR( cudaMalloc(&gpu_y, sizeof(T) * N) );
  342 + HANDLE_ERROR( cudaMemcpy(gpu_y, y, sizeof(T) * N, cudaMemcpyHostToDevice) );
  343 + }
  344 + T* gpu_z = NULL;
  345 + if(z != NULL){
  346 + HANDLE_ERROR( cudaMalloc(&gpu_z, sizeof(T) * N) );
  347 + HANDLE_ERROR( cudaMemcpy(gpu_z, z, sizeof(T) * N, cudaMemcpyHostToDevice) );
  348 + }
  349 +
  350 + stim::complex<T>* gpu_E;
  351 + HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
  352 + HANDLE_ERROR( cudaMemcpy(gpu_E, E, sizeof(stim::complex<T>) * N, cudaMemcpyHostToDevice) );
  353 + gpu_scalar_psf_cart<T>(gpu_E, N, gpu_x, gpu_y, gpu_z, lambda, A, f, d, NA, NA_in, Nl, r_spacing);
  354 + HANDLE_ERROR( cudaMemcpy(E, gpu_E, sizeof(stim::complex<T>) * N, cudaMemcpyDeviceToHost) );
  355 +
  356 + HANDLE_ERROR( cudaFree(gpu_x) );
  357 + HANDLE_ERROR( cudaFree(gpu_y) );
  358 + HANDLE_ERROR( cudaFree(gpu_z) );
  359 + HANDLE_ERROR( cudaFree(gpu_E) );
  360 +
  361 +#else
  362 + T* r = (T*) malloc(N * sizeof(T)); //allocate space for p in spherical coordinates
  363 + T* phi = (T*) malloc(N * sizeof(T)); // only r and phi are necessary (the scalar PSF is symmetric about theta)
  364 +
  365 + stim::quaternion<T> q;
  366 + q.CreateRotation(d, stim::vec3<T>(0, 0, 1));
  367 + stim::matrix<T, 3> R = q.toMatrix3();
  368 + stim::vec3<T> p, ps, ds;
  369 + for(size_t i = 0; i < N; i++){
  370 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  371 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  372 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  373 +
  374 + p = p - f;
  375 +
  376 + p = R * p; //rotate the cartesian point
  377 +
  378 + ps = p.cart2sph(); //convert from cartesian to spherical coordinates
  379 + r[i] = ps[0]; //store r
  380 + phi[i] = ps[2]; //phi = [0 pi]
  381 + }
  382 +
  383 + cpu_scalar_psf_local(E, N, r, phi, lambda, A, NA, NA_in, Nl); //call the spherical coordinate CPU function
  384 +
  385 + free(r);
  386 + free(phi);
  387 +#endif
  388 +}
  389 +
  390 +/// Class stim::beam represents a beam of light focused at a point and composed of several plane waves
  391 +template<typename T>
  392 +class scalarbeam
  393 +{
  394 +public:
  395 + //enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  396 +
  397 +private:
  398 +
  399 + T NA[2]; //numerical aperature of the focusing optics
  400 + vec3<T> f; //focal point
  401 + vec3<T> d; //propagation direction
  402 + T A; //beam amplitude
  403 + T lambda; //beam wavelength
  404 +public:
  405 +
  406 + ///constructor: build a default beam (NA=1.0)
  407 + scalarbeam(T wavelength = 1, T amplitude = 1, vec3<T> focal_point = vec3<T>(0, 0, 0), vec3<T> direction = vec3<T>(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){
  408 + lambda = wavelength;
  409 + A = amplitude;
  410 + f = focal_point;
  411 + d = direction.norm(); //make sure that the direction vector is normalized (makes calculations more efficient later on)
  412 + NA[0] = numerical_aperture;
  413 + NA[1] = center_obsc;
  414 + }
  415 +
  416 + ///Numerical Aperature functions
  417 + void setNA(T na)
  418 + {
  419 + NA[0] = (T)0;
  420 + NA[1] = na;
  421 + }
  422 + void setNA(T na0, T na1)
  423 + {
  424 + NA[0] = na0;
  425 + NA[1] = na1;
  426 + }
  427 +
  428 + //Monte-Carlo decomposition into plane waves
  429 + std::vector< scalarwave<T> > mc(size_t N = 100000) const{
  430 +
  431 + std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]); //generate a random set of N vectors forming a focus
  432 + std::vector< scalarwave<T> > samples(N); //create a vector of plane waves
  433 + T kmag = (T)stim::TAU / lambda; //calculate the wavenumber
  434 + stim::complex<T> apw; //allocate space for the amplitude at the focal point
  435 + T a = (T)(stim::TAU * ( (1 - cos(asin(NA[0]))) - (1 - cos(asin(NA[1])))) / (double)N); //constant value weights plane waves based on the aperture and number of samples (N)
  436 + stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction
  437 + for(size_t i=0; i<N; i++){ //for each sample
  438 + kpw = dirs[i] * kmag; //calculate the k-vector for the new plane wave
  439 + apw = a * exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave
  440 + samples[i] = scalarwave<T>(kpw, apw); //create a plane wave based on the direction
  441 + }
  442 + return samples;
  443 + }
  444 +
  445 + /// Evaluate the beam to a scalar field using Debye focusing
  446 + void eval(stim::scalarfield<T>& E, size_t order = 500){
  447 + size_t array_size = E.grid_bytes();
  448 + T* X = (T*) malloc( array_size ); //allocate space for the coordinate meshes
  449 + T* Y = (T*) malloc( array_size );
  450 + T* Z = (T*) malloc( array_size );
  451 +
  452 + E.meshgrid(X, Y, Z, stim::CPUmem); //calculate the coordinate meshes
  453 + cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[0], NA[1], order, E.spacing());
  454 +
  455 + free(X); //free the coordinate meshes
  456 + free(Y);
  457 + free(Z);
  458 + }
  459 +
  460 + /// Calculate the field at a given point
  461 + /// @param x is the x-coordinate of the field point
  462 + /// @O is the approximation accuracy
  463 + stim::complex<T> field(T x, T y, T z, size_t O){
  464 + std::vector< scalarwave<T> > W = mc(O);
  465 + T result = 0; //initialize the result to zero (0)
  466 + for(size_t i = 0; i < O; i++){ //for each plane wave
  467 + result += W[i].pos(x, y, z);
  468 + }
  469 + return result;
  470 + }
  471 +
  472 + std::string str()
  473 + {
  474 + std::stringstream ss;
  475 + ss<<"Beam:"<<std::endl;
  476 + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  477 + ss<<" Beam Direction: "<<d<<std::endl;
  478 + if(NA[0] == 0)
  479 + ss<<" NA: "<<NA[1];
  480 + else
  481 + ss<<" NA: "<<NA[0]<<" -- "<<NA[1];
  482 +
  483 + return ss.str();
  484 + }
  485 +
  486 +
  487 +
  488 +}; //end beam
  489 +} //end namespace stim
  490 +
  491 +#endif
stim/optics/scalarfield.h 0 โ†’ 100644
  1 +#ifndef STIM_SCALARFIELD_H
  2 +#define STIM_SCALARFIELD_H
  3 +
  4 +#include "../math/rect.h"
  5 +#include "../math/complex.h"
  6 +
  7 +namespace stim{
  8 +
  9 + enum locationType {CPUmem, GPUmem};
  10 +
  11 + /// Class represents a scalar optical field.
  12 +
  13 + /// In general, this class is designed to operate between the CPU and GPU. So, make sure all functions have an option to create the output on either.
  14 + /// The field is stored *either* on the GPU or host memory, but not both. This enforces that there can't be different copies of the same field.
  15 + /// This class is designed to be included in all of the other scalar optics classes, allowing them to render output data so make sure to keep it general and compatible.
  16 +
  17 +template<typename T>
  18 +class scalarfield : public rect<T>{
  19 +
  20 +protected:
  21 + stim::complex<T>* E;
  22 + size_t R[2];
  23 + locationType loc;
  24 +
  25 +
  26 +
  27 +public:
  28 +
  29 + CUDA_CALLABLE scalarfield(size_t X, size_t Y, T size = 1, T z_pos = 0) : rect<T>::rect(size, z_pos){
  30 + R[0] = X; //set the field resolution
  31 + R[1] = Y;
  32 +
  33 + E = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * R[0] * R[1]); //allocate in CPU memory
  34 + loc = CPUmem;
  35 + }
  36 +
  37 + CUDA_CALLABLE ~scalarfield(){
  38 + if(loc == CPUmem) free(E);
  39 + else cudaFree(E);
  40 + }
  41 +
  42 + /// Returns the number of values in the field
  43 + CUDA_CALLABLE size_t size(){
  44 + return R[0] * R[1];
  45 + }
  46 +
  47 + CUDA_CALLABLE size_t grid_bytes(){
  48 + return sizeof(stim::complex<T>) * R[0] * R[1];
  49 + }
  50 +
  51 + /// Calculates the distance between points on the grid
  52 + T spacing(){
  53 + T du = rect<T>::X.len() / R[0];
  54 + T dv = rect<T>::Y.len() / R[1];
  55 + return min<T>(du, dv);
  56 + }
  57 +
  58 + /// Copy the field array to the GPU, if it isn't already there
  59 + void to_gpu(){
  60 + if(loc == GPUmem) return;
  61 + else{
  62 + stim::complex<T>* dev_E;
  63 + HANDLE_ERROR( cudaMalloc(&dev_E, e_bytes()) ); //allocate GPU memory
  64 + HANDLE_ERROR( cudaMemcpy(dev_E, E, e_bytes(), cudaMemcpyHostToDevice) ); //copy the field to the GPU
  65 + free(E); //free the CPU memory
  66 + E = dev_E; //swap pointers
  67 + }
  68 + }
  69 +
  70 + /// Copy the field array to the CPU, if it isn't already there
  71 + void to_cpu(){
  72 + if(loc == CPUmem) return;
  73 + else{
  74 + stim::complex<T>* host_E = (stim::complex<T>*) malloc(e_bytes()); //allocate space in main memory
  75 + HANDLE_ERROR( cudaMemcpy(host_E, E, e_bytes(), cudaMemcpyDeviceToHost) ); //copy from GPU to CPU
  76 + HANDLE_ERROR( cudaFree(E) ); //free device memory
  77 + E = host_E; //swap pointers
  78 + }
  79 + }
  80 +
  81 + std::string str(){
  82 + std::stringstream ss;
  83 + ss<<rect<T>::str()<<std::endl;
  84 + ss<<"[ "<<R[0]<<" x "<<R[1]<<" ]"<<std::endl;
  85 + ss<<"location: ";
  86 + if(loc == CPUmem) ss<<"CPU";
  87 + else ss<<"GPU";
  88 +
  89 + ss<<endl;
  90 + return ss.str();
  91 + }
  92 +
  93 + stim::complex<T>* ptr(){
  94 + return E;
  95 + }
  96 +
  97 + /// Evaluate the cartesian coordinates of each point in the field. The resulting arrays are allocated in the same memory where the field is stored.
  98 + void meshgrid(T* X, T* Y, T* Z, locationType location){
  99 + size_t array_size = sizeof(T) * R[0] * R[1];
  100 + if(location == CPUmem){
  101 +
  102 + T du = 1.0 / (R[0] - 1); //calculate the spacing between points in the grid
  103 + T dv = 1.0 / (R[1] - 1);
  104 +
  105 + size_t ui, vi, i;
  106 + stim::vec3<T> p;
  107 + for(vi = 0; vi < R[1]; vi++){
  108 + i = vi * R[0];
  109 + for(ui = 0; ui < R[0]; ui++){
  110 + p = rect<T>::p(ui * du, vi * dv);
  111 + X[i] = p[0];
  112 + Y[i] = p[1];
  113 + Z[i] = p[2];
  114 + i++;
  115 + }
  116 + }
  117 + stim::cpu2image(X, "X.bmp", R[0], R[1], stim::cmBrewer);
  118 + stim::cpu2image(Y, "Y.bmp", R[0], R[1], stim::cmBrewer);
  119 + stim::cpu2image(Z, "Z.bmp", R[0], R[1], stim::cmBrewer);
  120 + }
  121 + else{
  122 + std::cout<<"GPU allocation of a meshgrid isn't supported yet. You'll have to write kernels to do the calculation.";
  123 + exit(1);
  124 + }
  125 + }
  126 +
  127 + void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer){
  128 +
  129 + if(loc == GPUmem) to_cpu(); //if the field is in the GPU, move it to the CPU
  130 + T* image = (T*) malloc( sizeof(T) * size() ); //allocate space for the real image
  131 +
  132 + switch(type){ //get the specified component from the complex value
  133 + case complexMag:
  134 + stim::abs(image, E, size());
  135 + break;
  136 + case complexReal:
  137 + stim::real(image, E, size());
  138 + break;
  139 + case complexImaginary:
  140 + stim::imag(image, E, size());
  141 + }
  142 + stim::cpu2image(image, filename, R[0], R[1], cmap); //save the resulting image
  143 + free(image); //free the real image
  144 + }
  145 +
  146 +}; //end class scalarfield
  147 +}
  148 +
  149 +//stream insertion operator
  150 +template<typename T>
  151 +std::ostream& operator<<(std::ostream& os, stim::scalarfield<T>& rhs){
  152 + os<<rhs.str();
  153 + return os;
  154 +}
  155 +
  156 +
  157 +#endif
0 \ No newline at end of file 158 \ No newline at end of file
stim/optics/scalarwave.h 0 โ†’ 100644
  1 +#ifndef STIM_SCALARWAVE_H
  2 +#define STIM_SCALARWAVE_H
  3 +
  4 +
  5 +#include <string>
  6 +#include <sstream>
  7 +#include <cmath>
  8 +
  9 +//#include "../math/vector.h"
  10 +#include "../math/vec3.h"
  11 +#include "../math/quaternion.h"
  12 +#include "../math/constants.h"
  13 +#include "../math/plane.h"
  14 +#include "../math/complex.h"
  15 +
  16 +//CUDA
  17 +#include "../cuda/cudatools/devices.h"
  18 +#include "../cuda/cudatools/error.h"
  19 +#include "../cuda/sharedmem.cuh"
  20 +
  21 +namespace stim{
  22 +
  23 +template<typename T>
  24 +class scalarwave{
  25 +
  26 +public:
  27 +
  28 + stim::vec3<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
  29 + stim::complex<T> E0; //amplitude
  30 +
  31 + /// Bend a plane wave via refraction, given that the new propagation direction is known
  32 + CUDA_CALLABLE scalarwave<T> bend(stim::vec3<T> kn) const{
  33 + return scalarwave<T>(kn.norm() * kmag(), E0);
  34 + }
  35 +
  36 +public:
  37 +
  38 + ///constructor: create a plane wave propagating along k
  39 + CUDA_CALLABLE scalarwave(vec3<T> kvec = stim::vec3<T>(0, 0, (T)stim::TAU), complex<T> E = 1){
  40 + k = kvec;
  41 + E0 = E;
  42 + }
  43 +
  44 + CUDA_CALLABLE scalarwave(T kx, T ky, T kz, complex<T> E = 1){
  45 + k = vec3<T>(kx, ky, kz);
  46 + E0 = E;
  47 + }
  48 +
  49 + ///multiplication operator: scale E0
  50 + CUDA_CALLABLE scalarwave<T> & operator* (const T & rhs){
  51 + E0 = E0 * rhs;
  52 + return *this;
  53 + }
  54 +
  55 + CUDA_CALLABLE T lambda() const{
  56 + return stim::TAU / k.len();
  57 + }
  58 +
  59 + CUDA_CALLABLE T kmag() const{
  60 + return k.len();
  61 + }
  62 +
  63 + CUDA_CALLABLE complex<T> E(){
  64 + return E0;
  65 + }
  66 +
  67 + CUDA_CALLABLE vec3<T> kvec(){
  68 + return k;
  69 + }
  70 +
  71 + /// calculate the value of the field produced by the plane wave given a three-dimensional position
  72 + CUDA_CALLABLE complex<T> pos(T x, T y, T z){
  73 + return pos( stim::vec3<T>(x, y, z) );
  74 + }
  75 +
  76 + CUDA_CALLABLE complex<T> pos(vec3<T> p = vec3<T>(0, 0, 0)){
  77 + return E0 * exp(complex<T>(0, k.dot(p)));
  78 + }
  79 +
  80 + //scales k based on a transition from material ni to material nt
  81 + CUDA_CALLABLE scalarwave<T> n(T ni, T nt){
  82 + return scalarwave<T>(k * (nt / ni), E0);
  83 + }
  84 +
  85 + CUDA_CALLABLE scalarwave<T> refract(stim::vec3<T> kn) const{
  86 + return bend(kn);
  87 + }
  88 +
  89 + /// Calculate the result of a plane wave hitting an interface between two refractive indices
  90 +
  91 + /// @param P is a plane representing the position and orientation of the surface
  92 + /// @param n0 is the refractive index outside of the surface (in the direction of the normal)
  93 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  94 + /// @param r is the reflected component of the plane wave
  95 + /// @param t is the transmitted component of the plane wave
  96 + void scatter(stim::plane<T> P, T n0, T n1, scalarwave<T> &r, scalarwave<T> &t){
  97 + scatter(P, n1/n0, r, t);
  98 + }
  99 +
  100 + /// Calculate the scattering result when nr = n1/n0
  101 +
  102 + /// @param P is a plane representing the position and orientation of the surface
  103 + /// @param r is the ration n1/n0
  104 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  105 + /// @param r is the reflected component of the plane wave
  106 + /// @param t is the transmitted component of the plane wave
  107 + void scatter(stim::plane<T> P, T nr, scalarwave<T> &r, scalarwave<T> &t){
  108 + /*
  109 + int facing = P.face(k); //determine which direction the plane wave is coming in
  110 +
  111 + if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
  112 + P = P.flip(); //flip the plane
  113 + nr = 1/nr; //invert the refractive index (now nr = n0/n1)
  114 + }
  115 +
  116 + //use Snell's Law to calculate the transmitted angle
  117 + T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i
  118 + T theta_i = acos(cos_theta_i); //compute theta_i
  119 + T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law
  120 + T theta_t = asin(sin_theta_t); //compute the cosine of theta_t
  121 +
  122 + bool tir = false; //flag for total internal reflection
  123 + if(theta_t != theta_t){
  124 + tir = true;
  125 + theta_t = stim::PI / (T)2;
  126 + }
  127 +
  128 + //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
  129 + if(theta_i == 0){
  130 + T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients
  131 + T tp = 2 / (1 + nr);
  132 + vec3<T> kr = -k;
  133 + vec3<T> kt = k * nr; //set the k vectors for theta_i = 0
  134 + vec3< complex<T> > Er = E0 * rp; //compute the E vectors
  135 + vec3< complex<T> > Et = E0 * tp;
  136 + T phase_t = P.p().dot(k - kt); //compute the phase offset
  137 + T phase_r = P.p().dot(k - kr);
  138 +
  139 + //create the plane waves
  140 + r = planewave<T>(kr, Er, phase_r);
  141 + t = planewave<T>(kt, Et, phase_t);
  142 + return;
  143 + }
  144 +
  145 +
  146 + //compute the Fresnel coefficients
  147 + T rp, rs, tp, ts;
  148 + rp = tan(theta_t - theta_i) / tan(theta_t + theta_i);
  149 + rs = sin(theta_t - theta_i) / sin(theta_t + theta_i);
  150 +
  151 + if(tir){
  152 + tp = ts = 0;
  153 + }
  154 + else{
  155 + tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) );
  156 + ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i);
  157 + }
  158 +
  159 + //compute the coordinate space for the plane of incidence
  160 + vec3<T> z_hat = -P.norm();
  161 + vec3<T> y_hat = P.parallel(k).norm();
  162 + vec3<T> x_hat = y_hat.cross(z_hat).norm();
  163 +
  164 + //compute the k vectors for r and t
  165 + vec3<T> kr, kt;
  166 + kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag();
  167 + kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;
  168 +
  169 + //compute the magnitude of the p- and s-polarized components of the incident E vector
  170 + complex<T> Ei_s = E0.dot(x_hat);
  171 + int sgn = E0.dot(y_hat).sgn();
  172 + vec3< complex<T> > cx_hat = x_hat;
  173 + complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
  174 + //compute the magnitude of the p- and s-polarized components of the reflected E vector
  175 + complex<T> Er_s = Ei_s * rs;
  176 + complex<T> Er_p = Ei_p * rp;
  177 + //compute the magnitude of the p- and s-polarized components of the transmitted E vector
  178 + complex<T> Et_s = Ei_s * ts;
  179 + complex<T> Et_p = Ei_p * tp;
  180 +
  181 + //compute the reflected E vector
  182 + vec3< complex<T> > Er = vec3< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
  183 + //compute the transmitted E vector
  184 + vec3< complex<T> > Et = vec3< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;
  185 +
  186 + T phase_t = P.p().dot(k - kt);
  187 + T phase_r = P.p().dot(k - kr);
  188 +
  189 + //create the plane waves
  190 + r.k = kr;
  191 + r.E0 = Er * exp( complex<T>(0, phase_r) );
  192 +
  193 + t.k = kt;
  194 + t.E0 = Et * exp( complex<T>(0, phase_t) );
  195 + */
  196 + }
  197 +
  198 + std::string str()
  199 + {
  200 + std::stringstream ss;
  201 + ss<<"Plane Wave:"<<std::endl;
  202 + ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
  203 + return ss.str();
  204 + }
  205 +}; //end planewave class
  206 +
  207 +
  208 +/// CUDA kernel for computing the field produced by a batch of plane waves at an array of locations
  209 +template<typename T>
  210 +__global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){
  211 + extern __shared__ stim::scalarwave<T> shared_W[]; //declare the list of waves in shared memory
  212 +
  213 + stim::cuda::sharedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x); //copy the plane waves into shared memory for faster access
  214 + __syncthreads(); //synchronize threads to insure all data is copied
  215 +
  216 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  217 + if(i >= N) return; //exit if this thread is outside the array
  218 + T px, py, pz;
  219 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values and set positions
  220 + (y == NULL) ? py = 0 : py = y[i];
  221 + (z == NULL) ? pz = 0 : pz = z[i];
  222 +
  223 + stim::complex<T> f = 0; //create a register to store the result
  224 + for(size_t w = 0; w < n_waves; w++)
  225 + f += shared_W[w].pos(px, py, pz); //evaluate the plane wave
  226 + F[i] += f; //copy the result to device memory
  227 +}
  228 +
  229 +/// evaluate a scalar wave at several points, where all arrays are on the GPU
  230 +template<typename T>
  231 +void gpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  232 +
  233 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  234 + dim3 blocks(N / threads + 1); //calculate the optimal number of blocks
  235 + cuda_scalarwave<T><<< blocks, threads >>>(F, N, x, y, z, w); //call the kernel
  236 +}
  237 +
  238 +template<typename T>
  239 +void gpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW){
  240 +
  241 + size_t wave_bytes = sizeof(stim::scalarwave<T>);
  242 + size_t shared_bytes = stim::sharedMemPerBlock(); //calculate the maximum amount of shared memory available
  243 + size_t max_batch = shared_bytes / wave_bytes; //calculate number of plane waves that will fit into shared memory
  244 + size_t batch_bytes = min(nW, max_batch) * wave_bytes; //initialize the batch size (in bytes) to the maximum batch required
  245 +
  246 + stim::scalarwave<T>* batch_W;
  247 + HANDLE_ERROR(cudaMalloc(&batch_W, batch_bytes)); //allocate memory for a single batch of plane waves
  248 +
  249 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  250 + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  251 +
  252 + size_t batch_size; //declare a variable to store the size of the current batch
  253 + size_t waves_processed = 0; //initialize the number of waves processed to zero
  254 + while(waves_processed < nW){ //while there are still waves to be processed
  255 + batch_size = min<size_t>(max_batch, nW - waves_processed); //process either a whole batch, or whatever is left
  256 + batch_bytes = batch_size * sizeof(stim::scalarwave<T>);
  257 + HANDLE_ERROR(cudaMemcpy(batch_W, W + waves_processed, batch_bytes, cudaMemcpyDeviceToDevice)); //copy the plane waves into global memory
  258 + cuda_scalarwave<T><<< blocks, threads, batch_bytes >>>(F, N, x, y, z, batch_W, batch_size); //call the kernel
  259 + waves_processed += batch_size; //increment the counter indicating how many waves have been processed
  260 + }
  261 + cudaFree(batch_W);
  262 +}
  263 +
  264 +/// Sums a series of coherent plane waves at a specified point
  265 +/// @param field is the output array of field values corresponding to each input point
  266 +/// @param x is an array of x coordinates for the field point
  267 +/// @param y is an array of y coordinates for the field point
  268 +/// @param z is an array of z coordinates for the field point
  269 +/// @param N is the number of points in the input and output arrays
  270 +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  271 +/// @param A is the list of amplitudes for each wave
  272 +/// @param S is the list of propagation directions for each wave
  273 +template<typename T>
  274 +void cpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > W){
  275 + size_t S = W.size(); //store the number of waves
  276 +#ifdef __CUDACC__
  277 + stim::complex<T>* dev_F; //allocate space for the field
  278 + cudaMalloc(&dev_F, N * sizeof(stim::complex<T>));
  279 + cudaMemcpy(dev_F, F, N * sizeof(stim::complex<T>), cudaMemcpyHostToDevice);
  280 + //cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used)
  281 +
  282 + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified)
  283 + if(x != NULL){
  284 + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
  285 + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
  286 + }
  287 +
  288 + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified)
  289 + if(y != NULL){
  290 + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
  291 + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
  292 + }
  293 +
  294 + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified)
  295 + if(z != NULL){
  296 + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
  297 + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
  298 + }
  299 +
  300 + stim::scalarwave<T>* dev_W;
  301 + HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) );
  302 + HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) );
  303 +
  304 + gpu_scalarwaves(dev_F, N, dev_x, dev_y, dev_z, dev_W, W.size());
  305 +
  306 + cudaMemcpy(F, dev_F, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory
  307 +
  308 + if(x != NULL) cudaFree(dev_x); //free everything
  309 + if(y != NULL) cudaFree(dev_y);
  310 + if(z != NULL) cudaFree(dev_z);
  311 + cudaFree(dev_F);
  312 +#else
  313 + memset(F, 0, N * sizeof(stim::complex<T>));
  314 + T px, py, pz;
  315 + for(size_t i = 0; i < N; i++){ // for each element in the array
  316 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values
  317 + (y == NULL) ? py = 0 : py = y[i];
  318 + (z == NULL) ? pz = 0 : pz = z[i];
  319 +
  320 + for(size_t s = 0; s < S; s++){
  321 + F[i] += w_array[s].pos(px, py, pz); //sum all plane waves at this point
  322 + }
  323 + }
  324 +#endif
  325 +}
  326 +
  327 +template<typename T>
  328 +void cpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  329 + std::vector< stim::scalarwave<T> > w_array(1, w);
  330 + cpu_scalarwaves(F, N, x, y, z, w_array);
  331 +}
  332 +
  333 +template<typename T>
  334 +void cpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  335 + std::vector< stim::scalarwave<T> > w_array(1, w);
  336 + cpu_scalarwaves(F, N, x, y, z, w_array);
  337 +}
  338 +
  339 +
  340 +/// Sums a series of coherent plane waves at a specified point
  341 +/// @param x is the x coordinate of the field point
  342 +/// @param y is the y coordinate of the field point
  343 +/// @param z is the z coordinate of the field point
  344 +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  345 +/// @param A is the list of amplitudes for each wave
  346 +/// @param S is the list of propagation directions for each wave
  347 +template<typename T>
  348 +CUDA_CALLABLE stim::complex<T> cpu_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave<T> > W){
  349 + size_t N = W.size(); //get the number of plane wave samples
  350 + stim::complex<T> field(0, 0); //initialize the field to zero (0)
  351 + stim::vec3<T> k; //allocate space for the direction vector
  352 + for(size_t i = 0; i < N; i++){
  353 + field += W[i].pos(x, y, z);
  354 + }
  355 + return field;
  356 +}
  357 +
  358 +} //end namespace stim
  359 +
  360 +template <typename T>
  361 +std::ostream& operator<<(std::ostream& os, stim::scalarwave<T> p)
  362 +{
  363 + os<<p.str();
  364 + return os;
  365 +}
  366 +
  367 +#endif
0 \ No newline at end of file 368 \ No newline at end of file
stim/optics/beam.h renamed to stim/optics_old/beam.h
1 -#ifndef RTS_BEAM  
2 -#define RTS_BEAM  
3 -  
4 -#include "../math/vector.h"  
5 -#include "../math/function.h"  
6 -#include "../optics/planewave.h"  
7 -#include <vector>  
8 -  
9 -namespace stim{  
10 -  
11 -template<typename P>  
12 -class beam : public planewave<P>  
13 -{  
14 -public:  
15 - enum beam_type {Uniform, Bartlett, Hamming, Hanning};  
16 -  
17 -private:  
18 -  
19 - P _na[2]; //numerical aperature of the focusing optics  
20 - vec<P> f; //focal point  
21 - function<P, P> apod; //apodization function  
22 - unsigned int apod_res; //resolution of apodization filter functions  
23 -  
24 - void apod_uniform()  
25 - {  
26 - apod = (P)1;  
27 - }  
28 - void apod_bartlett()  
29 - {  
30 - apod = (P)1;  
31 - apod.insert((P)1, (P)0);  
32 - }  
33 - void apod_hanning()  
34 - {  
35 - apod = (P)0;  
36 - P x, y;  
37 - for(unsigned int n=0; n<apod_res; n++)  
38 - {  
39 - x = (P)n/(P)apod_res;  
40 - y = pow( cos( ((P)3.14159 * x) / 2 ), 2);  
41 - apod.insert(x, y);  
42 - }  
43 - }  
44 - void apod_hamming()  
45 - {  
46 - apod = (P)0;  
47 - P x, y;  
48 - for(unsigned int n=0; n<apod_res; n++)  
49 - {  
50 - x = (P)n/(P)apod_res;  
51 - y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x);  
52 - apod.insert(x, y);  
53 - }  
54 - }  
55 -  
56 - void set_apod(beam_type type)  
57 - {  
58 - if(type == Uniform)  
59 - apod_uniform();  
60 - if(type == Bartlett)  
61 - apod_bartlett();  
62 - if(type == Hanning)  
63 - apod_hanning();  
64 - if(type == Hamming)  
65 - apod_hamming();  
66 - }  
67 -  
68 -public:  
69 -  
70 - ///constructor: build a default beam (NA=1.0)  
71 - beam(  
72 - vec<P> k = rts::vec<P>(0, 0, rtsTAU),  
73 - vec<P> _E0 = rts::vec<P>(1, 0, 0),  
74 - beam_type _apod = Uniform)  
75 - : planewave<P>(k, _E0)  
76 - {  
77 - _na[0] = (P)0.0;  
78 - _na[1] = (P)1.0;  
79 - f = vec<P>( (P)0, (P)0, (P)0 );  
80 - apod_res = 256; //set the default resolution for apodization filters  
81 - set_apod(_apod); //set the apodization function type  
82 - }  
83 -  
84 - beam<P> refract(rts::vec<P> kn) const{  
85 -  
86 - beam<P> new_beam;  
87 - new_beam._na[0] = _na[0];  
88 - new_beam._na[1] = _na[1];  
89 -  
90 -  
91 - rts::planewave<P> pw = planewave<P>::bend(kn);  
92 - //std::cout<<pw.str()<<std::endl;  
93 -  
94 - new_beam.k = pw.kvec();  
95 - new_beam.E0 = pw.E();  
96 -  
97 - return new_beam;  
98 - }  
99 -  
100 - ///Numerical Aperature functions  
101 - void NA(P na)  
102 - {  
103 - _na[0] = (P)0;  
104 - _na[1] = na;  
105 - }  
106 - void NA(P na0, P na1)  
107 - {  
108 - _na[0] = na0;  
109 - _na[1] = na1;  
110 - }  
111 -  
112 - /*string str() :  
113 - {  
114 - stringstream ss;  
115 - ss<<"Beam Center: "<<k<<std::endl;  
116 -  
117 - return ss.str();  
118 - }*/  
119 -  
120 - //Monte-Carlo decomposition into plane waves  
121 - std::vector< planewave<P> > mc(unsigned int N = 100000, unsigned int seed = 0) const  
122 - {  
123 - /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling  
124 - of a sphere and projecting these samples onto an inscribed sphere.  
125 -  
126 - seed = seed for the random number generator  
127 - */  
128 - srand(seed); //seed the random number generator  
129 -  
130 - vec<P> k_hat = beam::k.norm();  
131 -  
132 - ///compute the rotation operator to transform (0, 0, 1) to k  
133 - P cos_angle = k_hat.dot(rts::vec<P>(0, 0, 1));  
134 - rts::matrix<P, 3> rotation;  
135 -  
136 - //if the cosine of the angle is -1, the rotation is just a flip across the z axis  
137 - if(cos_angle == -1){  
138 - rotation(2, 2) = -1;  
139 - }  
140 - else if(cos_angle != 1.0)  
141 - {  
142 - rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation  
143 - P angle = acos(cos_angle); //compute the angle of rotation  
144 - rts::quaternion<P> quat; //create a quaternion describing the rotation  
145 - quat.CreateRotation(angle, r_axis);  
146 - rotation = quat.toMatrix3(); //compute the rotation matrix  
147 - }  
148 -  
149 - //find the phi values associated with the cassegrain ring  
150 - P PHI[2];  
151 - PHI[0] = (P)asin(_na[0]);  
152 - PHI[1] = (P)asin(_na[1]);  
153 -  
154 - //calculate the z-axis cylinder coordinates associated with these angles  
155 - P Z[2];  
156 - Z[0] = cos(PHI[0]);  
157 - Z[1] = cos(PHI[1]);  
158 - P range = Z[0] - Z[1];  
159 -  
160 - std::vector< planewave<P> > samples; //create a vector of plane waves  
161 -  
162 - //draw a distribution of random phi, z values  
163 - P z, phi, theta;  
164 - for(int i=0; i<N; i++) //for each sample  
165 - {  
166 - z = ((P)rand() / (P)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder  
167 - theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159;  
168 - phi = acos(z); //project onto the sphere, computing phi in spherical coordinates  
169 -  
170 - //compute and store cartesian coordinates  
171 - rts::vec<P> spherical(1, theta, phi); //convert from spherical to cartesian coordinates  
172 - rts::vec<P> cart = spherical.sph2cart();  
173 - vec<P> k_prime = rotation * cart; //create a sample vector  
174 -  
175 - //store a wave refracted along the given direction  
176 - //std::cout<<"k prime: "<<rotation<<std::endl;  
177 - samples.push_back(planewave<P>::refract(k_prime) * apod(phi/PHI[1]));  
178 - }  
179 -  
180 - return samples;  
181 - }  
182 -  
183 - std::string str()  
184 - {  
185 - std::stringstream ss;  
186 - ss<<"Beam:"<<std::endl;  
187 - //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;  
188 - ss<<" Central Plane Wave: "<<beam::k<<std::endl;  
189 - if(_na[0] == 0)  
190 - ss<<" NA: "<<_na[1];  
191 - else  
192 - ss<<" NA: "<<_na[0]<<" -- "<<_na[1];  
193 -  
194 - return ss.str();  
195 - }  
196 -  
197 -  
198 -  
199 -};  
200 -  
201 -}  
202 -  
203 -#endif 1 +#ifndef RTS_BEAM
  2 +#define RTS_BEAM
  3 +
  4 +#include "../math/vector.h"
  5 +#include "../math/function.h"
  6 +#include "../optics/planewave.h"
  7 +#include <vector>
  8 +
  9 +namespace stim{
  10 +
  11 +template<typename P>
  12 +class beam : public planewave<P>
  13 +{
  14 +public:
  15 + enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  16 +
  17 +private:
  18 +
  19 + P _na[2]; //numerical aperature of the focusing optics
  20 + vec<P> f; //focal point
  21 + function<P, P> apod; //apodization function
  22 + unsigned int apod_res; //resolution of apodization filter functions
  23 +
  24 + void apod_uniform()
  25 + {
  26 + apod = (P)1;
  27 + }
  28 + void apod_bartlett()
  29 + {
  30 + apod = (P)1;
  31 + apod.insert((P)1, (P)0);
  32 + }
  33 + void apod_hanning()
  34 + {
  35 + apod = (P)0;
  36 + P x, y;
  37 + for(unsigned int n=0; n<apod_res; n++)
  38 + {
  39 + x = (P)n/(P)apod_res;
  40 + y = pow( cos( ((P)3.14159 * x) / 2 ), 2);
  41 + apod.insert(x, y);
  42 + }
  43 + }
  44 + void apod_hamming()
  45 + {
  46 + apod = (P)0;
  47 + P x, y;
  48 + for(unsigned int n=0; n<apod_res; n++)
  49 + {
  50 + x = (P)n/(P)apod_res;
  51 + y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x);
  52 + apod.insert(x, y);
  53 + }
  54 + }
  55 +
  56 + void set_apod(beam_type type)
  57 + {
  58 + if(type == Uniform)
  59 + apod_uniform();
  60 + if(type == Bartlett)
  61 + apod_bartlett();
  62 + if(type == Hanning)
  63 + apod_hanning();
  64 + if(type == Hamming)
  65 + apod_hamming();
  66 + }
  67 +
  68 +public:
  69 +
  70 + ///constructor: build a default beam (NA=1.0)
  71 + beam(
  72 + vec<P> k = rts::vec<P>(0, 0, rtsTAU),
  73 + vec<P> _E0 = rts::vec<P>(1, 0, 0),
  74 + beam_type _apod = Uniform)
  75 + : planewave<P>(k, _E0)
  76 + {
  77 + _na[0] = (P)0.0;
  78 + _na[1] = (P)1.0;
  79 + f = vec<P>( (P)0, (P)0, (P)0 );
  80 + apod_res = 256; //set the default resolution for apodization filters
  81 + set_apod(_apod); //set the apodization function type
  82 + }
  83 +
  84 + beam<P> refract(rts::vec<P> kn) const{
  85 +
  86 + beam<P> new_beam;
  87 + new_beam._na[0] = _na[0];
  88 + new_beam._na[1] = _na[1];
  89 +
  90 +
  91 + rts::planewave<P> pw = planewave<P>::bend(kn);
  92 + //std::cout<<pw.str()<<std::endl;
  93 +
  94 + new_beam.k = pw.kvec();
  95 + new_beam.E0 = pw.E();
  96 +
  97 + return new_beam;
  98 + }
  99 +
  100 + ///Numerical Aperature functions
  101 + void NA(P na)
  102 + {
  103 + _na[0] = (P)0;
  104 + _na[1] = na;
  105 + }
  106 + void NA(P na0, P na1)
  107 + {
  108 + _na[0] = na0;
  109 + _na[1] = na1;
  110 + }
  111 +
  112 + /*string str() :
  113 + {
  114 + stringstream ss;
  115 + ss<<"Beam Center: "<<k<<std::endl;
  116 +
  117 + return ss.str();
  118 + }*/
  119 +
  120 + //Monte-Carlo decomposition into plane waves
  121 + std::vector< planewave<P> > mc(unsigned int N = 100000, unsigned int seed = 0) const
  122 + {
  123 + /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling
  124 + of a sphere and projecting these samples onto an inscribed sphere.
  125 +
  126 + seed = seed for the random number generator
  127 + */
  128 + srand(seed); //seed the random number generator
  129 +
  130 + vec<P> k_hat = beam::k.norm();
  131 +
  132 + ///compute the rotation operator to transform (0, 0, 1) to k
  133 + P cos_angle = k_hat.dot(rts::vec<P>(0, 0, 1));
  134 + rts::matrix<P, 3> rotation;
  135 +
  136 + //if the cosine of the angle is -1, the rotation is just a flip across the z axis
  137 + if(cos_angle == -1){
  138 + rotation(2, 2) = -1;
  139 + }
  140 + else if(cos_angle != 1.0)
  141 + {
  142 + rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation
  143 + P angle = acos(cos_angle); //compute the angle of rotation
  144 + rts::quaternion<P> quat; //create a quaternion describing the rotation
  145 + quat.CreateRotation(angle, r_axis);
  146 + rotation = quat.toMatrix3(); //compute the rotation matrix
  147 + }
  148 +
  149 + //find the phi values associated with the cassegrain ring
  150 + P PHI[2];
  151 + PHI[0] = (P)asin(_na[0]);
  152 + PHI[1] = (P)asin(_na[1]);
  153 +
  154 + //calculate the z-axis cylinder coordinates associated with these angles
  155 + P Z[2];
  156 + Z[0] = cos(PHI[0]);
  157 + Z[1] = cos(PHI[1]);
  158 + P range = Z[0] - Z[1];
  159 +
  160 + std::vector< planewave<P> > samples; //create a vector of plane waves
  161 +
  162 + //draw a distribution of random phi, z values
  163 + P z, phi, theta;
  164 + for(int i=0; i<N; i++) //for each sample
  165 + {
  166 + z = ((P)rand() / (P)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
  167 + theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159;
  168 + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
  169 +
  170 + //compute and store cartesian coordinates
  171 + rts::vec<P> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
  172 + rts::vec<P> cart = spherical.sph2cart();
  173 + vec<P> k_prime = rotation * cart; //create a sample vector
  174 +
  175 + //store a wave refracted along the given direction
  176 + //std::cout<<"k prime: "<<rotation<<std::endl;
  177 + samples.push_back(planewave<P>::refract(k_prime) * apod(phi/PHI[1]));
  178 + }
  179 +
  180 + return samples;
  181 + }
  182 +
  183 + std::string str()
  184 + {
  185 + std::stringstream ss;
  186 + ss<<"Beam:"<<std::endl;
  187 + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  188 + ss<<" Central Plane Wave: "<<beam::k<<std::endl;
  189 + if(_na[0] == 0)
  190 + ss<<" NA: "<<_na[1];
  191 + else
  192 + ss<<" NA: "<<_na[0]<<" -- "<<_na[1];
  193 +
  194 + return ss.str();
  195 + }
  196 +
  197 +
  198 +
  199 +};
  200 +
  201 +}
  202 +
  203 +#endif
stim/optics/efield.cuh renamed to stim/optics_old/efield.cuh
stim/optics/esphere.cuh renamed to stim/optics_old/esphere.cuh
stim/optics/halfspace.cuh renamed to stim/optics_old/halfspace.cuh
stim/optics/material.h renamed to stim/optics_old/material.h
1 -#ifndef RTS_MATERIAL_H  
2 -#define RTS_MATERIAL_H  
3 -  
4 -#include <vector>  
5 -#include <ostream>  
6 -#include <iostream>  
7 -#include <fstream>  
8 -#include <complex>  
9 -#include <algorithm>  
10 -#include <sstream>  
11 -#include "../math/complex.h"  
12 -#include "../math/constants.h"  
13 -#include "../math/function.h"  
14 -  
15 -namespace stim{  
16 -  
17 -//Material class - default representation for the material property is the refractive index (RI)  
18 -template<typename T>  
19 -class material : public function< T, complex<T> >{  
20 -  
21 -public:  
22 - enum wave_property{microns, inverse_cm};  
23 - enum material_property{ri, absorbance};  
24 -  
25 -private:  
26 -  
27 - using function< T, complex<T> >::X;  
28 - using function< T, complex<T> >::Y;  
29 - using function< T, complex<T> >::insert;  
30 - using function< T, complex<T> >::bounding;  
31 -  
32 - std::string name; //name for the material (defaults to file name)  
33 -  
34 - void process_header(std::string str, wave_property& wp, material_property& mp){  
35 -  
36 - std::stringstream ss(str); //create a stream from the data string  
37 - std::string line;  
38 - std::getline(ss, line); //get the first line as a string  
39 - while(line[0] == '#'){ //continue looping while the line is a comment  
40 -  
41 - std::stringstream lstream(line); //create a stream from the line  
42 - lstream.ignore(); //ignore the first character ('#')  
43 -  
44 - std::string prop; //get the property name  
45 - lstream>>prop;  
46 -  
47 - if(prop == "X"){  
48 - std::string wp_name;  
49 - lstream>>wp_name;  
50 - if(wp_name == "microns") wp = microns;  
51 - else if(wp_name == "inverse_cm") wp = inverse_cm;  
52 - }  
53 - else if(prop == "Y"){  
54 - std::string mp_name;  
55 - lstream>>mp_name;  
56 - if(mp_name == "ri") mp = ri;  
57 - else if(mp_name == "absorbance") mp = absorbance;  
58 - }  
59 -  
60 - std::getline(ss, line); //get the next line  
61 - }  
62 -  
63 - function< T, stim::complex<T> >::process_string(str);  
64 - }  
65 -  
66 - void from_inverse_cm(){  
67 - //convert inverse centimeters to wavelength (in microns)  
68 - for(unsigned int i=0; i<X.size(); i++)  
69 - X[i] = 10000 / X[i];  
70 -  
71 - //reverse the function array  
72 - std::reverse(X.begin(), X.end());  
73 - std::reverse(Y.begin(), Y.end());  
74 -  
75 - }  
76 -  
77 - void init(){  
78 - bounding[0] = bounding[1] = stim::complex<T>(1, 0);  
79 - }  
80 -  
81 -  
82 -public:  
83 -  
84 - material(std::string filename, wave_property wp, material_property mp){  
85 - name = filename;  
86 - load(filename, wp, mp);  
87 - }  
88 -  
89 - material(std::string filename){  
90 - name = filename;  
91 - load(filename);  
92 - }  
93 -  
94 - material(){  
95 - init();  
96 - }  
97 -  
98 - complex<T> getN(T lambda){  
99 - return function< T, complex<T> >::linear(lambda);  
100 - }  
101 -  
102 - void load(std::string filename, wave_property wp, material_property mp){  
103 -  
104 - //load the file as a function  
105 - function< T, complex<T> >::load(filename);  
106 - }  
107 -  
108 - void load(std::string filename){  
109 -  
110 - wave_property wp = inverse_cm;  
111 - material_property mp = ri;  
112 - //turn the file into a string  
113 - std::ifstream t(filename.c_str()); //open the file as a stream  
114 -  
115 - if(!t){  
116 - std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;  
117 - exit(1);  
118 - }  
119 - std::string str((std::istreambuf_iterator<char>(t)),  
120 - std::istreambuf_iterator<char>());  
121 -  
122 - //process the header information  
123 - process_header(str, wp, mp);  
124 -  
125 - //convert units  
126 - if(wp == inverse_cm)  
127 - from_inverse_cm();  
128 - //set the bounding values  
129 - bounding[0] = Y[0];  
130 - bounding[1] = Y.back();  
131 - }  
132 - std::string str(){  
133 - std::stringstream ss;  
134 - ss<<name<<std::endl;  
135 - ss<<function< T, complex<T> >::str();  
136 - return ss.str();  
137 - }  
138 - std::string get_name(){  
139 - return name;  
140 - }  
141 -  
142 - void set_name(std::string str){  
143 - name = str;  
144 - }  
145 -  
146 -};  
147 -  
148 -}  
149 -  
150 -  
151 -  
152 -  
153 -#endif 1 +#ifndef RTS_MATERIAL_H
  2 +#define RTS_MATERIAL_H
  3 +
  4 +#include <vector>
  5 +#include <ostream>
  6 +#include <iostream>
  7 +#include <fstream>
  8 +#include <complex>
  9 +#include <algorithm>
  10 +#include <sstream>
  11 +#include "../math/complex.h"
  12 +#include "../math/constants.h"
  13 +#include "../math/function.h"
  14 +
  15 +namespace stim{
  16 +
  17 +//Material class - default representation for the material property is the refractive index (RI)
  18 +template<typename T>
  19 +class material : public function< T, complex<T> >{
  20 +
  21 +public:
  22 + enum wave_property{microns, inverse_cm};
  23 + enum material_property{ri, absorbance};
  24 +
  25 +private:
  26 +
  27 + using function< T, complex<T> >::X;
  28 + using function< T, complex<T> >::Y;
  29 + using function< T, complex<T> >::insert;
  30 + using function< T, complex<T> >::bounding;
  31 +
  32 + std::string name; //name for the material (defaults to file name)
  33 +
  34 + void process_header(std::string str, wave_property& wp, material_property& mp){
  35 +
  36 + std::stringstream ss(str); //create a stream from the data string
  37 + std::string line;
  38 + std::getline(ss, line); //get the first line as a string
  39 + while(line[0] == '#'){ //continue looping while the line is a comment
  40 +
  41 + std::stringstream lstream(line); //create a stream from the line
  42 + lstream.ignore(); //ignore the first character ('#')
  43 +
  44 + std::string prop; //get the property name
  45 + lstream>>prop;
  46 +
  47 + if(prop == "X"){
  48 + std::string wp_name;
  49 + lstream>>wp_name;
  50 + if(wp_name == "microns") wp = microns;
  51 + else if(wp_name == "inverse_cm") wp = inverse_cm;
  52 + }
  53 + else if(prop == "Y"){
  54 + std::string mp_name;
  55 + lstream>>mp_name;
  56 + if(mp_name == "ri") mp = ri;
  57 + else if(mp_name == "absorbance") mp = absorbance;
  58 + }
  59 +
  60 + std::getline(ss, line); //get the next line
  61 + }
  62 +
  63 + function< T, stim::complex<T> >::process_string(str);
  64 + }
  65 +
  66 + void from_inverse_cm(){
  67 + //convert inverse centimeters to wavelength (in microns)
  68 + for(unsigned int i=0; i<X.size(); i++)
  69 + X[i] = 10000 / X[i];
  70 +
  71 + //reverse the function array
  72 + std::reverse(X.begin(), X.end());
  73 + std::reverse(Y.begin(), Y.end());
  74 +
  75 + }
  76 +
  77 + void init(){
  78 + bounding[0] = bounding[1] = stim::complex<T>(1, 0);
  79 + }
  80 +
  81 +
  82 +public:
  83 +
  84 + material(std::string filename, wave_property wp, material_property mp){
  85 + name = filename;
  86 + load(filename, wp, mp);
  87 + }
  88 +
  89 + material(std::string filename){
  90 + name = filename;
  91 + load(filename);
  92 + }
  93 +
  94 + material(){
  95 + init();
  96 + }
  97 +
  98 + complex<T> getN(T lambda){
  99 + return function< T, complex<T> >::linear(lambda);
  100 + }
  101 +
  102 + void load(std::string filename, wave_property wp, material_property mp){
  103 +
  104 + //load the file as a function
  105 + function< T, complex<T> >::load(filename);
  106 + }
  107 +
  108 + void load(std::string filename){
  109 +
  110 + wave_property wp = inverse_cm;
  111 + material_property mp = ri;
  112 + //turn the file into a string
  113 + std::ifstream t(filename.c_str()); //open the file as a stream
  114 +
  115 + if(!t){
  116 + std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;
  117 + exit(1);
  118 + }
  119 + std::string str((std::istreambuf_iterator<char>(t)),
  120 + std::istreambuf_iterator<char>());
  121 +
  122 + //process the header information
  123 + process_header(str, wp, mp);
  124 +
  125 + //convert units
  126 + if(wp == inverse_cm)
  127 + from_inverse_cm();
  128 + //set the bounding values
  129 + bounding[0] = Y[0];
  130 + bounding[1] = Y.back();
  131 + }
  132 + std::string str(){
  133 + std::stringstream ss;
  134 + ss<<name<<std::endl;
  135 + ss<<function< T, complex<T> >::str();
  136 + return ss.str();
  137 + }
  138 + std::string get_name(){
  139 + return name;
  140 + }
  141 +
  142 + void set_name(std::string str){
  143 + name = str;
  144 + }
  145 +
  146 +};
  147 +
  148 +}
  149 +
  150 +
  151 +
  152 +
  153 +#endif
stim/optics/mirst-1d.cuh renamed to stim/optics_old/mirst-1d.cuh
1 -#include "../optics/material.h"  
2 -#include "../math/complexfield.cuh"  
3 -#include "../math/constants.h"  
4 -//#include "../envi/bil.h"  
5 -  
6 -#include "cufft.h"  
7 -  
8 -#include <vector>  
9 -#include <sstream>  
10 -  
11 -namespace stim{  
12 -  
13 -//this function writes a sinc function to "dest" such that an iFFT produces a slab  
14 -template<typename T>  
15 -__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,  
16 - T* src, T* zf,  
17 - T w, unsigned int zR, unsigned int nuR){  
18 - //dest = complex field representing the sample  
19 - //ri = refractive indices for each wavelength  
20 - //src = intensity of the light source for each wavelength  
21 - //zf = z position of the slab interface for each wavelength (accounting for optical path length)  
22 - //w = width of the slab (in pixels)  
23 - //zR = number of z-axis samples  
24 - //nuR = number of wavelengths  
25 -  
26 - //get the current coordinate in the plane slice  
27 - int ifz = blockIdx.x * blockDim.x + threadIdx.x;  
28 - int inu = blockIdx.y * blockDim.y + threadIdx.y;  
29 -  
30 - //make sure that the thread indices are in-bounds  
31 - if(inu >= nuR || ifz >= zR) return;  
32 -  
33 - int i = inu * zR + ifz;  
34 -  
35 - T fz;  
36 - if(ifz < zR/2)  
37 - fz = ifz / (T)zR;  
38 - else  
39 - fz = -(zR - ifz) / (T)zR;  
40 -  
41 - //if the slab starts outside of the simulation domain, just return  
42 - if(zf[inu] >= zR) return;  
43 -  
44 - //fill the array along z with a sinc function representing the Fourier transform of the layer  
45 -  
46 - T opl = w * ri[inu].real(); //optical path length  
47 -  
48 - //handle the case where the slab goes outside the simulation domain  
49 - if(zf[inu] + opl >= zR)  
50 - opl = zR - zf[inu];  
51 -  
52 - if(opl == 0) return;  
53 -  
54 - //T l = w * ri[inu].real();  
55 - //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));  
56 - complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));  
57 -  
58 - complex<T> eta = ri[inu] * ri[inu] - 1;  
59 -  
60 - //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);  
61 - if(ifz == 0)  
62 - dest[i] += opl * exp(e) * eta * src[inu];  
63 - else  
64 - dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);  
65 -}  
66 -  
67 -template<typename T>  
68 -__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){  
69 - //zf = current z depth (optical path length) in pixels  
70 - //ri = refractive index of the material  
71 - //w = actual width of the layer (in pixels)  
72 -  
73 -  
74 - //compute the index for this thread  
75 - int i = blockIdx.x * blockDim.x + threadIdx.x;  
76 - if(i >= S) return;  
77 -  
78 - if(ri == NULL)  
79 - zf[i] += w;  
80 - else  
81 - zf[i] += ri[i].real() * w;  
82 -}  
83 -  
84 -//apply the 1D MIRST filter to an existing sample (overwriting the sample)  
85 -template<typename T>  
86 -__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,  
87 - T dFz,  
88 - T inNA, T outNA,  
89 - unsigned int lambdaR, unsigned int zR,  
90 - T sigma = 0){  
91 - //sampleFFT = the sample in the Fourier domain (will be overwritten)  
92 - //lambda = list of wavelengths  
93 - //dFz = delta along the Fz axis in the frequency domain  
94 - //inNA = NA of the internal obscuration  
95 - //outNA = NA of the objective  
96 - //zR = number of pixels along the Fz axis (same as the z-axis)  
97 - //lambdaR = number of wavelengths  
98 - //sigma = width of the Gaussian source  
99 - int ifz = blockIdx.x * blockDim.x + threadIdx.x;  
100 - int inu = blockIdx.y * blockDim.y + threadIdx.y;  
101 -  
102 - if(inu >= lambdaR || ifz >= zR) return;  
103 -  
104 - //calculate the index into the sample FT  
105 - int i = inu * zR + ifz;  
106 -  
107 - //compute the frequency (and set all negative spatial frequencies to zero)  
108 - T fz;  
109 - if(ifz < zR / 2)  
110 - fz = ifz * dFz;  
111 - //if the spatial frequency is negative, set it to zero and exit  
112 - else{  
113 - sampleFFT[i] = 0;  
114 - return;  
115 - }  
116 -  
117 - //compute the frequency in inverse microns  
118 - T nu = 1/lambda[inu];  
119 -  
120 - //determine the radius of the integration circle  
121 - T nu_sq = nu * nu;  
122 - T fz_sq = (fz * fz) / 4;  
123 -  
124 - //cut off frequencies above the diffraction limit  
125 - T r;  
126 - if(fz_sq < nu_sq)  
127 - r = sqrt(nu_sq - fz_sq);  
128 - else  
129 - r = 0;  
130 -  
131 - //account for the optics  
132 - T Q = 0;  
133 - if(r > nu * inNA && r < nu * outNA)  
134 - Q = 1;  
135 -  
136 - //account for the source  
137 - //T sigma = 30.0;  
138 - T s = exp( - (r*r * sigma*sigma) / 2 );  
139 - //T s=1;  
140 -  
141 - //compute the final filter  
142 - T mirst = 0;  
143 - if(fz != 0)  
144 - mirst = 2 * stimPI * r * s * Q * (1/fz);  
145 -  
146 - sampleFFT[i] *= mirst;  
147 -  
148 -}  
149 -  
150 -/*This object performs a 1-dimensional (layered) MIRST simulation  
151 -*/  
152 -template<typename T>  
153 -class mirst1d{  
154 -  
155 -private:  
156 - unsigned int Z; //z-axis resolution  
157 - unsigned int pad; //pixel padding on either side of the sample  
158 -  
159 - std::vector< material<T> > matlist; //list of materials  
160 - std::vector< T > layers; //list of layer thicknesses  
161 -  
162 - std::vector< T > lambdas; //list of wavelengths that are being simulated  
163 - unsigned int S; //number of wavelengths (size of "lambdas")  
164 -  
165 - T NA[2]; //numerical aperature (central obscuration and outer diameter)  
166 -  
167 - function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)  
168 -  
169 - complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.  
170 -  
171 - void fft(int direction = CUFFT_FORWARD){  
172 -  
173 - unsigned padZ = Z + pad;  
174 -  
175 - //create cuFFT handles  
176 - cufftHandle plan;  
177 - cufftResult result;  
178 -  
179 - if(sizeof(T) == 4)  
180 - result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision  
181 - else  
182 - result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision  
183 -  
184 - //check for Plan 1D errors  
185 - if(result != CUFFT_SUCCESS){  
186 - std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;  
187 - CufftError(result);  
188 - exit(1);  
189 - }  
190 -  
191 - if(sizeof(T) == 4)  
192 - result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);  
193 - else  
194 - result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);  
195 -  
196 - //check for FFT errors  
197 - if(result != CUFFT_SUCCESS){  
198 - std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;  
199 - CufftError(result);  
200 - exit(1);  
201 - }  
202 -  
203 - cufftDestroy(plan);  
204 - }  
205 -  
206 -  
207 - //initialize the scratch memory  
208 - void init_scratch(){  
209 - scratch = complexfield<T, 1>(Z + pad , lambdas.size());  
210 - scratch = 0;  
211 - }  
212 -  
213 - //get the list of scattering efficiency (eta) values for a specified layer  
214 - std::vector< complex<T> > layer_etas(unsigned int l){  
215 -  
216 - std::vector< complex<T> > etas;  
217 -  
218 - //fill the list of etas  
219 - for(unsigned int i=0; i<lambdas.size(); i++)  
220 - etas.push_back( matlist[l].eta(lambdas[i]) );  
221 - return etas;  
222 - }  
223 -  
224 - //calculates the optimal block and grid sizes using information from the GPU  
225 - void cuda_params(dim3& grids, dim3& blocks){  
226 - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size  
227 - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);  
228 -  
229 - //create one thread for each detector pixel  
230 - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);  
231 - grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);  
232 - }  
233 -  
234 - //add the fourier transform of layer n to the scratch space  
235 - void build_layer_fft(unsigned int n, T* zf){  
236 - unsigned int paddedZ = Z + pad;  
237 -  
238 - T wpx = layers[n] / dz(); //calculate the width of the layer in pixels  
239 -  
240 - //allocate memory for the refractive index  
241 - complex<T>* gpuRi;  
242 - HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));  
243 -  
244 - //allocate memory for the source profile  
245 - T* gpuSrc;  
246 - HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));  
247 -  
248 - complex<T> ri;  
249 - T source;  
250 - //store the refractive index and source profile in a CPU array  
251 - for(int inu=0; inu<S; inu++){  
252 - //save the refractive index to the GPU  
253 - ri = matlist[n].getN(lambdas[inu]);  
254 - HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));  
255 -  
256 - //save the source profile to the GPU  
257 - source = source_profile(10000 / lambdas[inu]);  
258 - HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));  
259 -  
260 - }  
261 -  
262 - //create one thread for each pixel of the field slice  
263 - dim3 gridDim, blockDim;  
264 - cuda_params(gridDim, blockDim);  
265 - stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);  
266 -  
267 - int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size  
268 - int linGrid = S / linBlock + 1;  
269 - stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);  
270 -  
271 - //free memory  
272 - HANDLE_ERROR(cudaFree(gpuRi));  
273 - HANDLE_ERROR(cudaFree(gpuSrc));  
274 - }  
275 -  
276 - void build_sample(){  
277 - init_scratch(); //initialize the GPU scratch space  
278 - //build_layer(1);  
279 -  
280 - T* zf;  
281 - HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));  
282 - HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));  
283 -  
284 - //render each layer of the sample  
285 - for(unsigned int l=0; l<layers.size(); l++){  
286 - build_layer_fft(l, zf);  
287 - }  
288 -  
289 - HANDLE_ERROR(cudaFree(zf));  
290 - }  
291 -  
292 - void apply_filter(){  
293 - dim3 gridDim, blockDim;  
294 - cuda_params(gridDim, blockDim);  
295 -  
296 - unsigned int Zpad = Z + pad;  
297 -  
298 - T sim_range = dz() * Zpad;  
299 - T dFz = 1 / sim_range;  
300 -  
301 - //copy the array of wavelengths to the GPU  
302 - T* gpuLambdas;  
303 - HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));  
304 - HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));  
305 - stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,  
306 - dFz,  
307 - NA[0], NA[1],  
308 - S, Zpad);  
309 - }  
310 -  
311 - //crop the image to the sample thickness - keep in mind that sample thickness != optical path length  
312 - void crop(){  
313 -  
314 - scratch = scratch.crop(Z, S);  
315 - }  
316 -  
317 - //save the scratch field as a binary file  
318 - void to_binary(std::string filename){  
319 -  
320 - }  
321 -  
322 -  
323 -public:  
324 -  
325 - //constructor  
326 - mirst1d(unsigned int rZ = 100,  
327 - unsigned int padding = 0){  
328 - Z = rZ;  
329 - pad = padding;  
330 - NA[0] = 0;  
331 - NA[1] = 0.8;  
332 - S = 0;  
333 - source_profile = 1;  
334 - }  
335 -  
336 - //add a layer, thickness = microns  
337 - void add_layer(material<T> mat, T thickness){  
338 - matlist.push_back(mat);  
339 - layers.push_back(thickness);  
340 - }  
341 -  
342 - void add_layer(std::string filename, T thickness){  
343 - add_layer(material<T>(filename), thickness);  
344 - }  
345 -  
346 - //adds a profile spectrum for the light source  
347 - void set_source(std::string filename){  
348 - source_profile.load(filename);  
349 - }  
350 -  
351 - //adds a block of wavenumbers (cm^-1) to the simulation parameters  
352 - void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){  
353 - unsigned int nu = start;  
354 - while(nu <= stop){  
355 - lambdas.push_back((T)10000 / nu);  
356 - nu += step;  
357 - }  
358 - S = lambdas.size(); //increment the number of wavelengths (shorthand for later)  
359 - }  
360 -  
361 - T thickness(){  
362 - T t = 0;  
363 - for(unsigned int l=0; l<layers.size(); l++)  
364 - t += layers[l];  
365 - return t;  
366 - }  
367 -  
368 - void padding(unsigned int padding = 0){  
369 - pad = padding;  
370 - }  
371 -  
372 - T dz(){  
373 - return thickness() / Z; //calculate the z-axis step size  
374 - }  
375 -  
376 - void na(T in, T out){  
377 - NA[0] = in;  
378 - NA[1] = out;  
379 - }  
380 -  
381 - void na(T out){  
382 - na(0, out);  
383 - }  
384 -  
385 - stim::function<T, T> get_source(){  
386 - return source_profile;  
387 - }  
388 -  
389 - void save_sample(std::string filename){  
390 - //create a sample and save the magnitude as an image  
391 - build_sample();  
392 - fft(CUFFT_INVERSE);  
393 - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);  
394 - }  
395 -  
396 - void save_mirst(std::string filename, bool binary = true){  
397 - //apply the MIRST filter to a sample and save the image  
398 -  
399 - //build the sample in the Fourier domain  
400 - build_sample();  
401 -  
402 - //apply the MIRST filter  
403 - apply_filter();  
404 -  
405 - //apply an inverse FFT to bring the results back into the spatial domain  
406 - fft(CUFFT_INVERSE);  
407 -  
408 - crop();  
409 -  
410 - //save the image  
411 - if(binary)  
412 - to_binary(filename);  
413 - else  
414 - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);  
415 - }  
416 -  
417 -  
418 -  
419 -  
420 - std::string str(){  
421 -  
422 - stringstream ss;  
423 - ss<<"1D MIRST Simulation========================="<<std::endl;  
424 - ss<<"z-axis resolution: "<<Z<<std::endl;  
425 - ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;  
426 - ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;  
427 - ss<<"padding: "<<pad<<std::endl;  
428 - ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;  
429 - ss<<"dz: "<<dz()<<" um"<<std::endl;  
430 - ss<<std::endl;  
431 - ss<<layers.size()<<" layers-------------"<<std::endl;  
432 - for(unsigned int l=0; l<layers.size(); l++)  
433 - ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;  
434 -  
435 - ss<<"source profile-----------"<<std::endl;  
436 - ss<<get_source().str()<<std::endl;  
437 -  
438 - return ss.str();  
439 -  
440 -  
441 - }  
442 -  
443 -  
444 -  
445 -};  
446 -  
447 -} 1 +#include "../optics/material.h"
  2 +#include "../math/complexfield.cuh"
  3 +#include "../math/constants.h"
  4 +//#include "../envi/bil.h"
  5 +
  6 +#include "cufft.h"
  7 +
  8 +#include <vector>
  9 +#include <sstream>
  10 +
  11 +namespace stim{
  12 +
  13 +//this function writes a sinc function to "dest" such that an iFFT produces a slab
  14 +template<typename T>
  15 +__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,
  16 + T* src, T* zf,
  17 + T w, unsigned int zR, unsigned int nuR){
  18 + //dest = complex field representing the sample
  19 + //ri = refractive indices for each wavelength
  20 + //src = intensity of the light source for each wavelength
  21 + //zf = z position of the slab interface for each wavelength (accounting for optical path length)
  22 + //w = width of the slab (in pixels)
  23 + //zR = number of z-axis samples
  24 + //nuR = number of wavelengths
  25 +
  26 + //get the current coordinate in the plane slice
  27 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  28 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  29 +
  30 + //make sure that the thread indices are in-bounds
  31 + if(inu >= nuR || ifz >= zR) return;
  32 +
  33 + int i = inu * zR + ifz;
  34 +
  35 + T fz;
  36 + if(ifz < zR/2)
  37 + fz = ifz / (T)zR;
  38 + else
  39 + fz = -(zR - ifz) / (T)zR;
  40 +
  41 + //if the slab starts outside of the simulation domain, just return
  42 + if(zf[inu] >= zR) return;
  43 +
  44 + //fill the array along z with a sinc function representing the Fourier transform of the layer
  45 +
  46 + T opl = w * ri[inu].real(); //optical path length
  47 +
  48 + //handle the case where the slab goes outside the simulation domain
  49 + if(zf[inu] + opl >= zR)
  50 + opl = zR - zf[inu];
  51 +
  52 + if(opl == 0) return;
  53 +
  54 + //T l = w * ri[inu].real();
  55 + //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
  56 + complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
  57 +
  58 + complex<T> eta = ri[inu] * ri[inu] - 1;
  59 +
  60 + //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
  61 + if(ifz == 0)
  62 + dest[i] += opl * exp(e) * eta * src[inu];
  63 + else
  64 + dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
  65 +}
  66 +
  67 +template<typename T>
  68 +__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
  69 + //zf = current z depth (optical path length) in pixels
  70 + //ri = refractive index of the material
  71 + //w = actual width of the layer (in pixels)
  72 +
  73 +
  74 + //compute the index for this thread
  75 + int i = blockIdx.x * blockDim.x + threadIdx.x;
  76 + if(i >= S) return;
  77 +
  78 + if(ri == NULL)
  79 + zf[i] += w;
  80 + else
  81 + zf[i] += ri[i].real() * w;
  82 +}
  83 +
  84 +//apply the 1D MIRST filter to an existing sample (overwriting the sample)
  85 +template<typename T>
  86 +__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,
  87 + T dFz,
  88 + T inNA, T outNA,
  89 + unsigned int lambdaR, unsigned int zR,
  90 + T sigma = 0){
  91 + //sampleFFT = the sample in the Fourier domain (will be overwritten)
  92 + //lambda = list of wavelengths
  93 + //dFz = delta along the Fz axis in the frequency domain
  94 + //inNA = NA of the internal obscuration
  95 + //outNA = NA of the objective
  96 + //zR = number of pixels along the Fz axis (same as the z-axis)
  97 + //lambdaR = number of wavelengths
  98 + //sigma = width of the Gaussian source
  99 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  100 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  101 +
  102 + if(inu >= lambdaR || ifz >= zR) return;
  103 +
  104 + //calculate the index into the sample FT
  105 + int i = inu * zR + ifz;
  106 +
  107 + //compute the frequency (and set all negative spatial frequencies to zero)
  108 + T fz;
  109 + if(ifz < zR / 2)
  110 + fz = ifz * dFz;
  111 + //if the spatial frequency is negative, set it to zero and exit
  112 + else{
  113 + sampleFFT[i] = 0;
  114 + return;
  115 + }
  116 +
  117 + //compute the frequency in inverse microns
  118 + T nu = 1/lambda[inu];
  119 +
  120 + //determine the radius of the integration circle
  121 + T nu_sq = nu * nu;
  122 + T fz_sq = (fz * fz) / 4;
  123 +
  124 + //cut off frequencies above the diffraction limit
  125 + T r;
  126 + if(fz_sq < nu_sq)
  127 + r = sqrt(nu_sq - fz_sq);
  128 + else
  129 + r = 0;
  130 +
  131 + //account for the optics
  132 + T Q = 0;
  133 + if(r > nu * inNA && r < nu * outNA)
  134 + Q = 1;
  135 +
  136 + //account for the source
  137 + //T sigma = 30.0;
  138 + T s = exp( - (r*r * sigma*sigma) / 2 );
  139 + //T s=1;
  140 +
  141 + //compute the final filter
  142 + T mirst = 0;
  143 + if(fz != 0)
  144 + mirst = 2 * stimPI * r * s * Q * (1/fz);
  145 +
  146 + sampleFFT[i] *= mirst;
  147 +
  148 +}
  149 +
  150 +/*This object performs a 1-dimensional (layered) MIRST simulation
  151 +*/
  152 +template<typename T>
  153 +class mirst1d{
  154 +
  155 +private:
  156 + unsigned int Z; //z-axis resolution
  157 + unsigned int pad; //pixel padding on either side of the sample
  158 +
  159 + std::vector< material<T> > matlist; //list of materials
  160 + std::vector< T > layers; //list of layer thicknesses
  161 +
  162 + std::vector< T > lambdas; //list of wavelengths that are being simulated
  163 + unsigned int S; //number of wavelengths (size of "lambdas")
  164 +
  165 + T NA[2]; //numerical aperature (central obscuration and outer diameter)
  166 +
  167 + function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)
  168 +
  169 + complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.
  170 +
  171 + void fft(int direction = CUFFT_FORWARD){
  172 +
  173 + unsigned padZ = Z + pad;
  174 +
  175 + //create cuFFT handles
  176 + cufftHandle plan;
  177 + cufftResult result;
  178 +
  179 + if(sizeof(T) == 4)
  180 + result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision
  181 + else
  182 + result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision
  183 +
  184 + //check for Plan 1D errors
  185 + if(result != CUFFT_SUCCESS){
  186 + std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
  187 + CufftError(result);
  188 + exit(1);
  189 + }
  190 +
  191 + if(sizeof(T) == 4)
  192 + result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
  193 + else
  194 + result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
  195 +
  196 + //check for FFT errors
  197 + if(result != CUFFT_SUCCESS){
  198 + std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
  199 + CufftError(result);
  200 + exit(1);
  201 + }
  202 +
  203 + cufftDestroy(plan);
  204 + }
  205 +
  206 +
  207 + //initialize the scratch memory
  208 + void init_scratch(){
  209 + scratch = complexfield<T, 1>(Z + pad , lambdas.size());
  210 + scratch = 0;
  211 + }
  212 +
  213 + //get the list of scattering efficiency (eta) values for a specified layer
  214 + std::vector< complex<T> > layer_etas(unsigned int l){
  215 +
  216 + std::vector< complex<T> > etas;
  217 +
  218 + //fill the list of etas
  219 + for(unsigned int i=0; i<lambdas.size(); i++)
  220 + etas.push_back( matlist[l].eta(lambdas[i]) );
  221 + return etas;
  222 + }
  223 +
  224 + //calculates the optimal block and grid sizes using information from the GPU
  225 + void cuda_params(dim3& grids, dim3& blocks){
  226 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  227 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  228 +
  229 + //create one thread for each detector pixel
  230 + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
  231 + grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
  232 + }
  233 +
  234 + //add the fourier transform of layer n to the scratch space
  235 + void build_layer_fft(unsigned int n, T* zf){
  236 + unsigned int paddedZ = Z + pad;
  237 +
  238 + T wpx = layers[n] / dz(); //calculate the width of the layer in pixels
  239 +
  240 + //allocate memory for the refractive index
  241 + complex<T>* gpuRi;
  242 + HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
  243 +
  244 + //allocate memory for the source profile
  245 + T* gpuSrc;
  246 + HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
  247 +
  248 + complex<T> ri;
  249 + T source;
  250 + //store the refractive index and source profile in a CPU array
  251 + for(int inu=0; inu<S; inu++){
  252 + //save the refractive index to the GPU
  253 + ri = matlist[n].getN(lambdas[inu]);
  254 + HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
  255 +
  256 + //save the source profile to the GPU
  257 + source = source_profile(10000 / lambdas[inu]);
  258 + HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
  259 +
  260 + }
  261 +
  262 + //create one thread for each pixel of the field slice
  263 + dim3 gridDim, blockDim;
  264 + cuda_params(gridDim, blockDim);
  265 + stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
  266 +
  267 + int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
  268 + int linGrid = S / linBlock + 1;
  269 + stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
  270 +
  271 + //free memory
  272 + HANDLE_ERROR(cudaFree(gpuRi));
  273 + HANDLE_ERROR(cudaFree(gpuSrc));
  274 + }
  275 +
  276 + void build_sample(){
  277 + init_scratch(); //initialize the GPU scratch space
  278 + //build_layer(1);
  279 +
  280 + T* zf;
  281 + HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
  282 + HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
  283 +
  284 + //render each layer of the sample
  285 + for(unsigned int l=0; l<layers.size(); l++){
  286 + build_layer_fft(l, zf);
  287 + }
  288 +
  289 + HANDLE_ERROR(cudaFree(zf));
  290 + }
  291 +
  292 + void apply_filter(){
  293 + dim3 gridDim, blockDim;
  294 + cuda_params(gridDim, blockDim);
  295 +
  296 + unsigned int Zpad = Z + pad;
  297 +
  298 + T sim_range = dz() * Zpad;
  299 + T dFz = 1 / sim_range;
  300 +
  301 + //copy the array of wavelengths to the GPU
  302 + T* gpuLambdas;
  303 + HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
  304 + HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
  305 + stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,
  306 + dFz,
  307 + NA[0], NA[1],
  308 + S, Zpad);
  309 + }
  310 +
  311 + //crop the image to the sample thickness - keep in mind that sample thickness != optical path length
  312 + void crop(){
  313 +
  314 + scratch = scratch.crop(Z, S);
  315 + }
  316 +
  317 + //save the scratch field as a binary file
  318 + void to_binary(std::string filename){
  319 +
  320 + }
  321 +
  322 +
  323 +public:
  324 +
  325 + //constructor
  326 + mirst1d(unsigned int rZ = 100,
  327 + unsigned int padding = 0){
  328 + Z = rZ;
  329 + pad = padding;
  330 + NA[0] = 0;
  331 + NA[1] = 0.8;
  332 + S = 0;
  333 + source_profile = 1;
  334 + }
  335 +
  336 + //add a layer, thickness = microns
  337 + void add_layer(material<T> mat, T thickness){
  338 + matlist.push_back(mat);
  339 + layers.push_back(thickness);
  340 + }
  341 +
  342 + void add_layer(std::string filename, T thickness){
  343 + add_layer(material<T>(filename), thickness);
  344 + }
  345 +
  346 + //adds a profile spectrum for the light source
  347 + void set_source(std::string filename){
  348 + source_profile.load(filename);
  349 + }
  350 +
  351 + //adds a block of wavenumbers (cm^-1) to the simulation parameters
  352 + void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
  353 + unsigned int nu = start;
  354 + while(nu <= stop){
  355 + lambdas.push_back((T)10000 / nu);
  356 + nu += step;
  357 + }
  358 + S = lambdas.size(); //increment the number of wavelengths (shorthand for later)
  359 + }
  360 +
  361 + T thickness(){
  362 + T t = 0;
  363 + for(unsigned int l=0; l<layers.size(); l++)
  364 + t += layers[l];
  365 + return t;
  366 + }
  367 +
  368 + void padding(unsigned int padding = 0){
  369 + pad = padding;
  370 + }
  371 +
  372 + T dz(){
  373 + return thickness() / Z; //calculate the z-axis step size
  374 + }
  375 +
  376 + void na(T in, T out){
  377 + NA[0] = in;
  378 + NA[1] = out;
  379 + }
  380 +
  381 + void na(T out){
  382 + na(0, out);
  383 + }
  384 +
  385 + stim::function<T, T> get_source(){
  386 + return source_profile;
  387 + }
  388 +
  389 + void save_sample(std::string filename){
  390 + //create a sample and save the magnitude as an image
  391 + build_sample();
  392 + fft(CUFFT_INVERSE);
  393 + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  394 + }
  395 +
  396 + void save_mirst(std::string filename, bool binary = true){
  397 + //apply the MIRST filter to a sample and save the image
  398 +
  399 + //build the sample in the Fourier domain
  400 + build_sample();
  401 +
  402 + //apply the MIRST filter
  403 + apply_filter();
  404 +
  405 + //apply an inverse FFT to bring the results back into the spatial domain
  406 + fft(CUFFT_INVERSE);
  407 +
  408 + crop();
  409 +
  410 + //save the image
  411 + if(binary)
  412 + to_binary(filename);
  413 + else
  414 + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  415 + }
  416 +
  417 +
  418 +
  419 +
  420 + std::string str(){
  421 +
  422 + stringstream ss;
  423 + ss<<"1D MIRST Simulation========================="<<std::endl;
  424 + ss<<"z-axis resolution: "<<Z<<std::endl;
  425 + ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
  426 + ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
  427 + ss<<"padding: "<<pad<<std::endl;
  428 + ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
  429 + ss<<"dz: "<<dz()<<" um"<<std::endl;
  430 + ss<<std::endl;
  431 + ss<<layers.size()<<" layers-------------"<<std::endl;
  432 + for(unsigned int l=0; l<layers.size(); l++)
  433 + ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
  434 +
  435 + ss<<"source profile-----------"<<std::endl;
  436 + ss<<get_source().str()<<std::endl;
  437 +
  438 + return ss.str();
  439 +
  440 +
  441 + }
  442 +
  443 +
  444 +
  445 +};
  446 +
  447 +}
stim/optics_old/planewave.h 0 โ†’ 100644
  1 +#ifndef RTS_PLANEWAVE
  2 +#define RTS_PLANEWAVE
  3 +
  4 +#include <string>
  5 +#include <sstream>
  6 +
  7 +#include "../math/vector.h"
  8 +#include "../math/quaternion.h"
  9 +#include "../math/constants.h"
  10 +#include "../math/plane.h"
  11 +#include "../cuda/callable.h"
  12 +
  13 +/*Basic conversions used here (assuming a vacuum)
  14 + lambda =
  15 +*/
  16 +
  17 +namespace stim{
  18 + namespace optics{
  19 +
  20 +template<typename T>
  21 +class planewave{
  22 +
  23 +protected:
  24 +
  25 + vec<T> k; //k = tau / lambda
  26 + vec< complex<T> > E0; //amplitude
  27 + //T phi;
  28 +
  29 + CUDA_CALLABLE planewave<T> bend(rts::vec<T> kn) const{
  30 +
  31 + vec<T> kn_hat = kn.norm(); //normalize the new k
  32 + vec<T> k_hat = k.norm(); //normalize the current k
  33 +
  34 + //std::cout<<"PLANE WAVE BENDING------------------"<<std::endl;
  35 + //std::cout<<"kn_hat: "<<kn_hat<<" k_hat: "<<k_hat<<std::endl;
  36 +
  37 + planewave<T> new_p; //create a new plane wave
  38 +
  39 + //if kn is equal to k or -k, handle the degenerate case
  40 + T k_dot_kn = k_hat.dot(kn_hat);
  41 +
  42 + //if k . n < 0, then the bend is a reflection
  43 + //flip k_hat
  44 + if(k_dot_kn < 0) k_hat = -k_hat;
  45 +
  46 + //std::cout<<"k dot kn: "<<k_dot_kn<<std::endl;
  47 +
  48 + //std::cout<<"k_dot_kn: "<<k_dot_kn<<std::endl;
  49 + if(k_dot_kn == -1){
  50 + new_p.k = -k;
  51 + new_p.E0 = E0;
  52 + return new_p;
  53 + }
  54 + else if(k_dot_kn == 1){
  55 + new_p.k = k;
  56 + new_p.E0 = E0;
  57 + return new_p;
  58 + }
  59 +
  60 + vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector
  61 +
  62 + //std::cout<<"r: "<<r<<std::endl;
  63 +
  64 + T theta = asin(r.len()); //compute the angle of the rotation about r
  65 +
  66 +
  67 +
  68 + //deal with a zero vector (both k and kn point in the same direction)
  69 + //if(theta == (T)0)
  70 + //{
  71 + // new_p = *this;
  72 + // return new_p;
  73 + //}
  74 +
  75 + //create a quaternion to capture the rotation
  76 + quaternion<T> q;
  77 + q.CreateRotation(theta, r.norm());
  78 +
  79 + //apply the rotation to E0
  80 + vec< complex<T> > E0n = q.toMatrix3() * E0;
  81 +
  82 + new_p.k = kn_hat * kmag();
  83 + new_p.E0 = E0n;
  84 +
  85 + return new_p;
  86 + }
  87 +
  88 +public:
  89 +
  90 +
  91 + ///constructor: create a plane wave propagating along z, polarized along x
  92 + /*planewave(T lambda = (T)1)
  93 + {
  94 + k = rts::vec<T>(0, 0, 1) * (TAU/lambda);
  95 + E0 = rts::vec<T>(1, 0, 0);
  96 + }*/
  97 + ///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega
  98 + CUDA_CALLABLE planewave(vec<T> kvec = rts::vec<T>(0, 0, rtsTAU),
  99 + vec< complex<T> > E = rts::vec<T>(1, 0, 0), T phase = 0)
  100 + {
  101 + //phi = phase;
  102 +
  103 + k = kvec;
  104 + vec< complex<T> > k_hat = k.norm();
  105 +
  106 + if(E.len() == 0) //if the plane wave has an amplitude of 0
  107 + E0 = vec<T>(0); //just return it
  108 + else{
  109 + vec< complex<T> > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector
  110 + vec< complex<T> > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector
  111 + E0 = E_hat * E_hat.dot(E); //compute the projection of _E0 onto E0_hat
  112 + }
  113 +
  114 + E0 = E0 * exp( complex<T>(0, phase) );
  115 + }
  116 +
  117 + ///multiplication operator: scale E0
  118 + CUDA_CALLABLE planewave<T> & operator* (const T & rhs)
  119 + {
  120 +
  121 + E0 = E0 * rhs;
  122 + return *this;
  123 + }
  124 +
  125 + CUDA_CALLABLE T lambda() const
  126 + {
  127 + return rtsTAU / k.len();
  128 + }
  129 +
  130 + CUDA_CALLABLE T kmag() const
  131 + {
  132 + return k.len();
  133 + }
  134 +
  135 + CUDA_CALLABLE vec< complex<T> > E(){
  136 + return E0;
  137 + }
  138 +
  139 + CUDA_CALLABLE vec<T> kvec(){
  140 + return k;
  141 + }
  142 +
  143 + /*CUDA_CALLABLE T phase(){
  144 + return phi;
  145 + }
  146 +
  147 + CUDA_CALLABLE void phase(T p){
  148 + phi = p;
  149 + }*/
  150 +
  151 + CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){
  152 + vec< complex<T> > result;
  153 +
  154 + T kdp = k.dot(p);
  155 + complex<T> x = complex<T>(0, kdp);
  156 + complex<T> expx = exp(x);
  157 +
  158 + result[0] = E0[0] * expx;
  159 + result[1] = E0[1] * expx;
  160 + result[2] = E0[2] * expx;
  161 +
  162 + return result;
  163 + }
  164 +
  165 + //scales k based on a transition from material ni to material nt
  166 + CUDA_CALLABLE planewave<T> n(T ni, T nt){
  167 + return planewave<T>(k * (nt / ni), E0);
  168 + }
  169 +
  170 + CUDA_CALLABLE planewave<T> refract(rts::vec<T> kn) const
  171 + {
  172 + return bend(kn);
  173 + }
  174 +
  175 + void scatter(rts::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){
  176 +
  177 + int facing = P.face(k); //determine which direction the plane wave is coming in
  178 +
  179 + //if(facing == 0) //if the wave is tangent to the plane, return an identical wave
  180 + // return *this;
  181 + //else
  182 + if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
  183 + P = P.flip(); //flip the plane
  184 + nr = 1/nr; //invert the refractive index (now nr = n0/n1)
  185 + }
  186 +
  187 + //use Snell's Law to calculate the transmitted angle
  188 + T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i
  189 + T theta_i = acos(cos_theta_i); //compute theta_i
  190 + T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law
  191 + T theta_t = asin(sin_theta_t); //compute the cosine of theta_t
  192 +
  193 + bool tir = false; //flag for total internal reflection
  194 + if(theta_t != theta_t){
  195 + tir = true;
  196 + theta_t = rtsPI / (T)2;
  197 + }
  198 +
  199 + //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
  200 + if(theta_i == 0){
  201 + T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients
  202 + T tp = 2 / (1 + nr);
  203 + vec<T> kr = -k;
  204 + vec<T> kt = k * nr; //set the k vectors for theta_i = 0
  205 + vec< complex<T> > Er = E0 * rp; //compute the E vectors
  206 + vec< complex<T> > Et = E0 * tp;
  207 + T phase_t = P.p().dot(k - kt); //compute the phase offset
  208 + T phase_r = P.p().dot(k - kr);
  209 + //std::cout<<"Degeneracy: Head-On"<<std::endl;
  210 + //std::cout<<"rs: "<<rp<<" rp: "<<rp<<" ts: "<<tp<<" tp: "<<tp<<std::endl;
  211 + //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
  212 +
  213 + //create the plane waves
  214 + r = planewave<T>(kr, Er, phase_r);
  215 + t = planewave<T>(kt, Et, phase_t);
  216 +
  217 + //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
  218 + //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
  219 + //std::cout<<"--------------------------------"<<std::endl;
  220 + return;
  221 + }
  222 +
  223 +
  224 + //compute the Fresnel coefficients
  225 + T rp, rs, tp, ts;
  226 + rp = tan(theta_t - theta_i) / tan(theta_t + theta_i);
  227 + rs = sin(theta_t - theta_i) / sin(theta_t + theta_i);
  228 +
  229 + if(tir){
  230 + tp = ts = 0;
  231 + }
  232 + else{
  233 + tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) );
  234 + ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i);
  235 + }
  236 +
  237 + //compute the coordinate space for the plane of incidence
  238 + vec<T> z_hat = -P.norm();
  239 + vec<T> y_hat = P.parallel(k).norm();
  240 + vec<T> x_hat = y_hat.cross(z_hat).norm();
  241 +
  242 + //compute the k vectors for r and t
  243 + vec<T> kr, kt;
  244 + kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag();
  245 + kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;
  246 +
  247 + //compute the magnitude of the p- and s-polarized components of the incident E vector
  248 + complex<T> Ei_s = E0.dot(x_hat);
  249 + //int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0);
  250 + int sgn = E0.dot(y_hat).sgn();
  251 + vec< complex<T> > cx_hat = x_hat;
  252 + complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
  253 + //T Ei_p = ( E0 - x_hat * Ei_s ).len();
  254 + //compute the magnitude of the p- and s-polarized components of the reflected E vector
  255 + complex<T> Er_s = Ei_s * rs;
  256 + complex<T> Er_p = Ei_p * rp;
  257 + //compute the magnitude of the p- and s-polarized components of the transmitted E vector
  258 + complex<T> Et_s = Ei_s * ts;
  259 + complex<T> Et_p = Ei_p * tp;
  260 +
  261 + //std::cout<<"E0: "<<E0<<std::endl;
  262 + //std::cout<<"E0 dot y_hat: "<<E0.dot(y_hat)<<std::endl;
  263 + //std::cout<<"theta i: "<<theta_i<<" theta t: "<<theta_t<<std::endl;
  264 + //std::cout<<"x_hat: "<<x_hat<<" y_hat: "<<y_hat<<" z_hat: "<<z_hat<<std::endl;
  265 + //std::cout<<"Ei_s: "<<Ei_s<<" Ei_p: "<<Ei_p<<" Er_s: "<<Er_s<<" Er_p: "<<Er_p<<" Et_s: "<<Et_s<<" Et_p: "<<Et_p<<std::endl;
  266 + //std::cout<<"rs: "<<rs<<" rp: "<<rp<<" ts: "<<ts<<" tp: "<<tp<<std::endl;
  267 +
  268 +
  269 + //compute the reflected E vector
  270 + vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
  271 + //compute the transmitted E vector
  272 + vec< complex<T> > Et = vec< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;
  273 +
  274 + T phase_t = P.p().dot(k - kt);
  275 + T phase_r = P.p().dot(k - kr);
  276 +
  277 + //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
  278 +
  279 + //std::cout<<"phase: "<<phase<<std::endl;
  280 +
  281 + //create the plane waves
  282 + r.k = kr;
  283 + r.E0 = Er * exp( complex<T>(0, phase_r) );
  284 + //r.phi = phase_r;
  285 +
  286 + //t = bend(kt);
  287 + //t.k = t.k * nr;
  288 +
  289 + t.k = kt;
  290 + t.E0 = Et * exp( complex<T>(0, phase_t) );
  291 + //t.phi = phase_t;
  292 + //std::cout<<"i: "<<str()<<std::endl;
  293 + //std::cout<<"r: "<<r.str()<<std::endl;
  294 + //std::cout<<"t: "<<t.str()<<std::endl;
  295 +
  296 + //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
  297 + //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
  298 + //std::cout<<"--------------------------------"<<std::endl;
  299 +
  300 + }
  301 +
  302 + std::string str()
  303 + {
  304 + std::stringstream ss;
  305 + ss<<"Plane Wave:"<<std::endl;
  306 + ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
  307 + return ss.str();
  308 + }
  309 +}; //end planewave class
  310 +} //end namespace optics
  311 +} //end namespace stim
  312 +
  313 +template <typename T>
  314 +std::ostream& operator<<(std::ostream& os, rts::planewave<T> p)
  315 +{
  316 + os<<p.str();
  317 + return os;
  318 +}
  319 +
  320 +#endif
stim/visualization/aaboundingbox.h
@@ -10,8 +10,8 @@ class aaboundingbox{ @@ -10,8 +10,8 @@ class aaboundingbox{
10 10
11 public: 11 public:
12 bool set; //has the bounding box been set to include any points? 12 bool set; //has the bounding box been set to include any points?
13 - stim::vec<T> A; //minimum point in the bounding box  
14 - stim::vec<T> B; //maximum point in the bounding box 13 + stim::vec3<T> A; //minimum point in the bounding box
  14 + stim::vec3<T> B; //maximum point in the bounding box
15 15
16 aaboundingbox(){ //constructor generates an empty bounding box 16 aaboundingbox(){ //constructor generates an empty bounding box
17 set = false; 17 set = false;
@@ -21,7 +21,7 @@ public: @@ -21,7 +21,7 @@ public:
21 /// Test if a point is inside of the bounding box and returns true if it is. 21 /// Test if a point is inside of the bounding box and returns true if it is.
22 22
23 /// @param p is the point to be tested 23 /// @param p is the point to be tested
24 - bool test(stim::vec<T> p){ 24 + bool test(stim::vec3<T> p){
25 25
26 for(unsigned d = 0; d < p.size(); p++){ //for each dimension 26 for(unsigned d = 0; d < p.size(); p++){ //for each dimension
27 if(p[d] < A[d]) return false; //if the point is less than the minimum bound, return false 27 if(p[d] < A[d]) return false; //if the point is less than the minimum bound, return false
@@ -33,7 +33,7 @@ public: @@ -33,7 +33,7 @@ public:
33 /// Expand the bounding box to include the specified point. 33 /// Expand the bounding box to include the specified point.
34 34
35 /// @param p is the point to be included 35 /// @param p is the point to be included
36 - void expand(stim::vec<T> p){ 36 + void expand(stim::vec3<T> p){
37 37
38 if(!set){ //if the bounding box is empty, fill it with the current point 38 if(!set){ //if the bounding box is empty, fill it with the current point
39 A = B = p; 39 A = B = p;
@@ -47,12 +47,12 @@ public: @@ -47,12 +47,12 @@ public:
47 } 47 }
48 48
49 /// Return the center point of the bounding box as a stim::vec 49 /// Return the center point of the bounding box as a stim::vec
50 - stim::vec<T> center(){ 50 + stim::vec3<T> center(){
51 return (B + A) * 0.5; 51 return (B + A) * 0.5;
52 } 52 }
53 53
54 /// Return the size of the bounding box as a stim::vec 54 /// Return the size of the bounding box as a stim::vec
55 - stim::vec<T> size(){ 55 + stim::vec3<T> size(){
56 return (B - A); 56 return (B - A);
57 } 57 }
58 58
stim/visualization/camera.h
@@ -11,32 +11,32 @@ namespace stim{ @@ -11,32 +11,32 @@ namespace stim{
11 11
12 class camera 12 class camera
13 { 13 {
14 - vec<float> d; //direction that the camera is pointing  
15 - vec<float> p; //position of the camera  
16 - vec<float> up; //"up" direction 14 + vec3<float> d; //direction that the camera is pointing
  15 + vec3<float> p; //position of the camera
  16 + vec3<float> up; //"up" direction
17 float focus; //focal length of the camera 17 float focus; //focal length of the camera
18 float fov; 18 float fov;
19 19
20 //private function makes sure that the up vector is orthogonal to the direction vector and both are normalized 20 //private function makes sure that the up vector is orthogonal to the direction vector and both are normalized
21 void stabalize() 21 void stabalize()
22 { 22 {
23 - vec<float> side = up.cross(d); 23 + vec3<float> side = up.cross(d);
24 up = d.cross(side); 24 up = d.cross(side);
25 up = up.norm(); 25 up = up.norm();
26 d = d.norm(); 26 d = d.norm();
27 } 27 }
28 28
29 public: 29 public:
30 - void setPosition(vec<float> pos) 30 + void setPosition(vec3<float> pos)
31 { 31 {
32 p = pos; 32 p = pos;
33 } 33 }
34 - void setPosition(float x, float y, float z){setPosition(vec<float>(x, y, z));} 34 + void setPosition(float x, float y, float z){setPosition(vec3<float>(x, y, z));}
35 35
36 void setFocalDistance(float distance){focus = distance;} 36 void setFocalDistance(float distance){focus = distance;}
37 void setFOV(float field_of_view){fov = field_of_view;} 37 void setFOV(float field_of_view){fov = field_of_view;}
38 38
39 - void LookAt(vec<float> pos) 39 + void LookAt(vec3<float> pos)
40 { 40 {
41 //find the new direction 41 //find the new direction
42 d = pos - p; 42 d = pos - p;
@@ -47,22 +47,22 @@ public: @@ -47,22 +47,22 @@ public:
47 //stabalize the camera 47 //stabalize the camera
48 stabalize(); 48 stabalize();
49 } 49 }
50 - void LookAt(float px, float py, float pz){LookAt(vec<float>(px, py, pz));}  
51 - void LookAt(vec<float> pos, vec<float> new_up){up = new_up; LookAt(pos);}  
52 - void LookAt(float px, float py, float pz, float ux, float uy, float uz){LookAt(vec<float>(px, py, pz), vec<float>(ux, uy, uz));} 50 + void LookAt(float px, float py, float pz){LookAt(vec3<float>(px, py, pz));}
  51 + void LookAt(vec3<float> pos, vec3<float> new_up){up = new_up; LookAt(pos);}
  52 + void LookAt(float px, float py, float pz, float ux, float uy, float uz){LookAt(vec3<float>(px, py, pz), vec3<float>(ux, uy, uz));}
53 void LookAtDolly(float lx, float ly, float lz) 53 void LookAtDolly(float lx, float ly, float lz)
54 { 54 {
55 //find the current focus point 55 //find the current focus point
56 - vec<float> f = p + focus*d;  
57 - vec<float> T = vec<float>(lx, ly, lz) - f; 56 + vec3<float> f = p + focus*d;
  57 + vec3<float> T = vec3<float>(lx, ly, lz) - f;
58 p = p + T; 58 p = p + T;
59 } 59 }
60 60
61 - void Dolly(vec<float> direction) 61 + void Dolly(vec3<float> direction)
62 { 62 {
63 p = p+direction; 63 p = p+direction;
64 } 64 }
65 - void Dolly(float x, float y, float z){Dolly(vec<float>(x, y, z));} 65 + void Dolly(float x, float y, float z){Dolly(vec3<float>(x, y, z));}
66 void Push(float delta) 66 void Push(float delta)
67 { 67 {
68 if(delta > focus) 68 if(delta > focus)
@@ -80,7 +80,7 @@ public: @@ -80,7 +80,7 @@ public:
80 qx.CreateRotation(theta_x, up[0], up[1], up[2]); 80 qx.CreateRotation(theta_x, up[0], up[1], up[2]);
81 81
82 //y rotation is around the side axis 82 //y rotation is around the side axis
83 - vec<float> side = up.cross(d); 83 + vec3<float> side = up.cross(d);
84 quaternion<float> qy; 84 quaternion<float> qy;
85 qy.CreateRotation(theta_y, side[0], side[1], side[2]); 85 qy.CreateRotation(theta_y, side[0], side[1], side[2]);
86 86
@@ -118,28 +118,28 @@ public: @@ -118,28 +118,28 @@ public:
118 void OrbitFocus(float theta_x, float theta_y) 118 void OrbitFocus(float theta_x, float theta_y)
119 { 119 {
120 //find the focal point 120 //find the focal point
121 - vec<float> focal_point = p + focus*d; 121 + vec3<float> focal_point = p + focus*d;
122 122
123 //center the coordinate system on the focal point 123 //center the coordinate system on the focal point
124 - vec<float> centered = p - (focal_point - vec<float>(0, 0, 0)); 124 + vec3<float> centered = p - (focal_point - vec3<float>(0, 0, 0));
125 125
126 //create the x rotation (around the up vector) 126 //create the x rotation (around the up vector)
127 quaternion<float> qx; 127 quaternion<float> qx;
128 qx.CreateRotation(theta_x, up[0], up[1], up[2]); 128 qx.CreateRotation(theta_x, up[0], up[1], up[2]);
129 - centered = vec<float>(0, 0, 0) + qx.toMatrix3()*(centered - vec<float>(0, 0, 0)); 129 + centered = vec3<float>(0, 0, 0) + qx.toMatrix3()*(centered - vec3<float>(0, 0, 0));
130 130
131 //get a side vector for theta_y rotation 131 //get a side vector for theta_y rotation
132 - vec<float> side = up.cross((vec<float>(0, 0, 0) - centered).norm()); 132 + vec3<float> side = up.cross((vec3<float>(0, 0, 0) - centered).norm());
133 133
134 quaternion<float> qy; 134 quaternion<float> qy;
135 qy.CreateRotation(theta_y, side[0], side[1], side[2]); 135 qy.CreateRotation(theta_y, side[0], side[1], side[2]);
136 - centered = vec<float>(0, 0, 0) + qy.toMatrix3()*(centered - vec<float>(0, 0, 0)); 136 + centered = vec3<float>(0, 0, 0) + qy.toMatrix3()*(centered - vec3<float>(0, 0, 0));
137 137
138 //perform the rotation on the centered camera position 138 //perform the rotation on the centered camera position
139 //centered = final.toMatrix()*centered; 139 //centered = final.toMatrix()*centered;
140 140
141 //re-position the camera 141 //re-position the camera
142 - p = centered + (focal_point - vec<float>(0, 0, 0)); 142 + p = centered + (focal_point - vec3<float>(0, 0, 0));
143 143
144 //make sure we are looking at the focal point 144 //make sure we are looking at the focal point
145 LookAt(focal_point); 145 LookAt(focal_point);
@@ -151,17 +151,17 @@ public: @@ -151,17 +151,17 @@ public:
151 151
152 void Slide(float u, float v) 152 void Slide(float u, float v)
153 { 153 {
154 - vec<float> V = up.norm();  
155 - vec<float> U = up.cross(d).norm(); 154 + vec3<float> V = up.norm();
  155 + vec3<float> U = up.cross(d).norm();
156 156
157 p = p + (V * v) + (U * u); 157 p = p + (V * v) + (U * u);
158 } 158 }
159 159
160 //accessor methods 160 //accessor methods
161 - vec<float> getPosition(){return p;}  
162 - vec<float> getUp(){return up;}  
163 - vec<float> getDirection(){return d;}  
164 - vec<float> getLookAt(){return p + focus*d;} 161 + vec3<float> getPosition(){return p;}
  162 + vec3<float> getUp(){return up;}
  163 + vec3<float> getDirection(){return d;}
  164 + vec3<float> getLookAt(){return p + focus*d;}
165 float getFOV(){return fov;} 165 float getFOV(){return fov;}
166 166
167 //output the camera settings 167 //output the camera settings
@@ -182,9 +182,9 @@ public: @@ -182,9 +182,9 @@ public:
182 //constructor 182 //constructor
183 camera() 183 camera()
184 { 184 {
185 - p = vec<float>(0, 0, 0);  
186 - d = vec<float>(0, 0, 1);  
187 - up = vec<float>(0, 1, 0); 185 + p = vec3<float>(0, 0, 0);
  186 + d = vec3<float>(0, 0, 1);
  187 + up = vec3<float>(0, 1, 0);
188 focus = 1; 188 focus = 1;
189 189
190 } 190 }
stim/visualization/cylinder.h
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 #define STIM_CYLINDER_H 2 #define STIM_CYLINDER_H
3 #include <iostream> 3 #include <iostream>
4 #include <stim/math/circle.h> 4 #include <stim/math/circle.h>
5 -#include <stim/math/vector.h> 5 +#include <stim/math/vec3.h>
6 6
7 7
8 namespace stim 8 namespace stim
@@ -25,11 +25,11 @@ class cylinder @@ -25,11 +25,11 @@ class cylinder
25 25
26 ///inits the cylinder from a list of points (inP) and radii (inM) 26 ///inits the cylinder from a list of points (inP) and radii (inM)
27 void 27 void
28 - init(std::vector<stim::vec<T> > inP, std::vector<stim::vec<T> > inM) 28 + init(std::vector<stim::vec3<T> > inP, std::vector<stim::vec<T> > inM)
29 { 29 {
30 mags = inM; 30 mags = inM;
31 - stim::vec<float> v1;  
32 - stim::vec<float> v2; 31 + stim::vec3<float> v1;
  32 + stim::vec3<float> v2;
33 e.resize(inP.size()); 33 e.resize(inP.size());
34 if(inP.size() < 2) 34 if(inP.size() < 2)
35 return; 35 return;
@@ -38,16 +38,16 @@ class cylinder @@ -38,16 +38,16 @@ class cylinder
38 L.resize(inP.size()); 38 L.resize(inP.size());
39 T temp = (T)0; 39 T temp = (T)0;
40 L[0] = 0; 40 L[0] = 0;
41 - for(int i = 1; i < L.size(); i++) 41 + for(size_t i = 1; i < L.size(); i++)
42 { 42 {
43 temp += (inP[i-1] - inP[i]).len(); 43 temp += (inP[i-1] - inP[i]).len();
44 L[i] = temp; 44 L[i] = temp;
45 } 45 }
46 46
47 - stim::vec<T> dr = (inP[1] - inP[0]).norm();  
48 - s = stim::circle<T>(inP[0], inM[0][0], dr, stim::vec<T>(1,0,0)); 47 + stim::vec3<T> dr = (inP[1] - inP[0]).norm();
  48 + s = stim::circle<T>(inP[0], inM[0][0], dr, stim::vec3<T>(1,0,0));
49 e[0] = s; 49 e[0] = s;
50 - for(int i = 1; i < inP.size()-1; i++) 50 + for(size_t i = 1; i < inP.size()-1; i++)
51 { 51 {
52 s.center(inP[i]); 52 s.center(inP[i]);
53 v1 = (inP[i] - inP[i-1]).norm(); 53 v1 = (inP[i] - inP[i-1]).norm();
@@ -67,7 +67,7 @@ class cylinder @@ -67,7 +67,7 @@ class cylinder
67 } 67 }
68 68
69 ///returns the direction vector at point idx. 69 ///returns the direction vector at point idx.
70 - stim::vec<T> 70 + stim::vec3<T>
71 d(int idx) 71 d(int idx)
72 { 72 {
73 if(idx == 0) 73 if(idx == 0)
@@ -81,15 +81,15 @@ class cylinder @@ -81,15 +81,15 @@ class cylinder
81 else 81 else
82 { 82 {
83 // return (e[idx+1].P - e[idx].P).norm(); 83 // return (e[idx+1].P - e[idx].P).norm();
84 - stim::vec<float> v1 = (e[idx].P-e[idx-1].P).norm();  
85 - stim::vec<float> v2 = (e[idx+1].P-e[idx].P).norm(); 84 + stim::vec3<float> v1 = (e[idx].P-e[idx-1].P).norm();
  85 + stim::vec3<float> v2 = (e[idx+1].P-e[idx].P).norm();
86 return (v1+v2).norm(); 86 return (v1+v2).norm();
87 } 87 }
88 // return e[idx].N; 88 // return e[idx].N;
89 89
90 } 90 }
91 91
92 - stim::vec<T> 92 + stim::vec3<T>
93 d(T l, int idx) 93 d(T l, int idx)
94 { 94 {
95 if(idx == 0 || idx == e.size()-1) 95 if(idx == 0 || idx == e.size()-1)
@@ -144,13 +144,13 @@ class cylinder @@ -144,13 +144,13 @@ class cylinder
144 ///constructor to create a cylinder from a set of points, radii, and the number of sides for the cylinder. 144 ///constructor to create a cylinder from a set of points, radii, and the number of sides for the cylinder.
145 ///@param inP: Vector of stim vecs composing the points of the centerline. 145 ///@param inP: Vector of stim vecs composing the points of the centerline.
146 ///@param inM: Vector of stim vecs composing the radii of the centerline. 146 ///@param inM: Vector of stim vecs composing the radii of the centerline.
147 - cylinder(std::vector<stim::vec<T> > inP, std::vector<stim::vec<T> > inM){ 147 + cylinder(std::vector<stim::vec3<T> > inP, std::vector<stim::vec3<T> > inM){
148 init(inP, inM); 148 init(inP, inM);
149 } 149 }
150 150
151 ///Constructor defines a cylinder with centerline inP and magnitudes of zero 151 ///Constructor defines a cylinder with centerline inP and magnitudes of zero
152 ///@param inP: Vector of stim vecs composing the points of the centerline 152 ///@param inP: Vector of stim vecs composing the points of the centerline
153 - cylinder(std::vector< stim::vec<T> > inP){ 153 + cylinder(std::vector< stim::vec3<T> > inP){
154 std::vector< stim::vec<T> > inM; //create an array of arbitrary magnitudes 154 std::vector< stim::vec<T> > inM; //create an array of arbitrary magnitudes
155 155
156 stim::vec<T> zero; 156 stim::vec<T> zero;
@@ -171,12 +171,12 @@ class cylinder @@ -171,12 +171,12 @@ class cylinder
171 ///Returns a position vector at the given p-value (p value ranges from 0 to 1). 171 ///Returns a position vector at the given p-value (p value ranges from 0 to 1).
172 ///interpolates the position along the line. 172 ///interpolates the position along the line.
173 ///@param pvalue: the location of the in the cylinder, from 0 (beginning to 1). 173 ///@param pvalue: the location of the in the cylinder, from 0 (beginning to 1).
174 - stim::vec<T> 174 + stim::vec3<T>
175 p(T pvalue) 175 p(T pvalue)
176 { 176 {
177 if(pvalue < 0.0 || pvalue > 1.0) 177 if(pvalue < 0.0 || pvalue > 1.0)
178 { 178 {
179 - return stim::vec<float>(-1,-1,-1); 179 + return stim::vec3<float>(-1,-1,-1);
180 } 180 }
181 T l = pvalue*L[L.size()-1]; 181 T l = pvalue*L[L.size()-1];
182 int idx = findIdx(l); 182 int idx = findIdx(l);
@@ -188,7 +188,7 @@ class cylinder @@ -188,7 +188,7 @@ class cylinder
188 ///Interpolates the radius along the line. 188 ///Interpolates the radius along the line.
189 ///@param l: the location of the in the cylinder. 189 ///@param l: the location of the in the cylinder.
190 ///@param idx: integer location of the point closest to l but prior to it. 190 ///@param idx: integer location of the point closest to l but prior to it.
191 - stim::vec<T> 191 + stim::vec3<T>
192 p(T l, int idx) 192 p(T l, int idx)
193 { 193 {
194 T rat = (l-L[idx])/(L[idx+1]-L[idx]); 194 T rat = (l-L[idx])/(L[idx+1]-L[idx]);
@@ -252,16 +252,16 @@ class cylinder @@ -252,16 +252,16 @@ class cylinder
252 ///in x, y, z coordinates. Theta is in degrees from 0 to 360. 252 ///in x, y, z coordinates. Theta is in degrees from 0 to 360.
253 ///@param pvalue: the location of the in the cylinder, from 0 (beginning to 1). 253 ///@param pvalue: the location of the in the cylinder, from 0 (beginning to 1).
254 ///@param theta: the angle to the point of a circle. 254 ///@param theta: the angle to the point of a circle.
255 - stim::vec<T> 255 + stim::vec3<T>
256 surf(T pvalue, T theta) 256 surf(T pvalue, T theta)
257 { 257 {
258 if(pvalue < 0.0 || pvalue > 1.0) 258 if(pvalue < 0.0 || pvalue > 1.0)
259 { 259 {
260 - return stim::vec<float>(-1,-1,-1); 260 + return stim::vec3<float>(-1,-1,-1);
261 } else { 261 } else {
262 T l = pvalue*L[L.size()-1]; 262 T l = pvalue*L[L.size()-1];
263 int idx = findIdx(l); 263 int idx = findIdx(l);
264 - stim::vec<T> ps = p(l, idx); 264 + stim::vec3<T> ps = p(l, idx);
265 T m = r(l, idx); 265 T m = r(l, idx);
266 s = e[idx]; 266 s = e[idx];
267 s.center(ps); 267 s.center(ps);
@@ -273,10 +273,10 @@ class cylinder @@ -273,10 +273,10 @@ class cylinder
273 273
274 ///returns a vector of points necessary to create a circle at every position in the fiber. 274 ///returns a vector of points necessary to create a circle at every position in the fiber.
275 ///@param sides: the number of sides of each circle. 275 ///@param sides: the number of sides of each circle.
276 - std::vector<std::vector<vec<T> > > 276 + std::vector<std::vector<vec3<T> > >
277 getPoints(int sides) 277 getPoints(int sides)
278 { 278 {
279 - std::vector<std::vector <vec<T> > > points; 279 + std::vector<std::vector <vec3<T> > > points;
280 points.resize(e.size()); 280 points.resize(e.size());
281 for(int i = 0; i < e.size(); i++) 281 for(int i = 0; i < e.size(); i++)
282 { 282 {
@@ -293,7 +293,7 @@ class cylinder @@ -293,7 +293,7 @@ class cylinder
293 } 293 }
294 /// Allows a point on the centerline to be accessed using bracket notation 294 /// Allows a point on the centerline to be accessed using bracket notation
295 295
296 - vec<T> operator[](unsigned int i){ 296 + vec3<T> operator[](unsigned int i){
297 return e[i].P; 297 return e[i].P;
298 } 298 }
299 299
@@ -309,7 +309,7 @@ class cylinder @@ -309,7 +309,7 @@ class cylinder
309 T M = 0; //initialize the integral to zero 309 T M = 0; //initialize the integral to zero
310 T m0, m1; //allocate space for both magnitudes in a single segment 310 T m0, m1; //allocate space for both magnitudes in a single segment
311 311
312 - //vec<T> p0, p1; //allocate space for both points in a single segment 312 + //vec3<T> p0, p1; //allocate space for both points in a single segment
313 313
314 m0 = mags[0][m]; //initialize the first point and magnitude to the first point in the cylinder 314 m0 = mags[0][m]; //initialize the first point and magnitude to the first point in the cylinder
315 //p0 = pos[0]; 315 //p0 = pos[0];
@@ -325,7 +325,7 @@ class cylinder @@ -325,7 +325,7 @@ class cylinder
325 if(p > 1) len = (L[p-1] - L[p-2]); //calculate the segment length using the L array 325 if(p > 1) len = (L[p-1] - L[p-2]); //calculate the segment length using the L array
326 326
327 //add the average magnitude, weighted by the segment length 327 //add the average magnitude, weighted by the segment length
328 - M += (m0 + m1)/2.0 * len; 328 + M += (m0 + m1)/(T)2.0 * len;
329 329
330 m0 = m1; //move to the next segment by shifting points 330 m0 = m1; //move to the next segment by shifting points
331 } 331 }
@@ -345,21 +345,21 @@ class cylinder @@ -345,21 +345,21 @@ class cylinder
345 /// @param spacing is the maximum spacing allowed between sample points 345 /// @param spacing is the maximum spacing allowed between sample points
346 cylinder<T> resample(T spacing){ 346 cylinder<T> resample(T spacing){
347 347
348 - std::vector< vec<T> > result; 348 + std::vector< vec3<T> > result;
349 349
350 - vec<T> p0 = e[0].P; //initialize p0 to the first point on the centerline  
351 - vec<T> p1; 350 + vec3<T> p0 = e[0].P; //initialize p0 to the first point on the centerline
  351 + vec3<T> p1;
352 unsigned N = size(); //number of points in the current centerline 352 unsigned N = size(); //number of points in the current centerline
353 353
354 //for each line segment on the centerline 354 //for each line segment on the centerline
355 for(unsigned int i = 1; i < N; i++){ 355 for(unsigned int i = 1; i < N; i++){
356 p1 = e[i].P; //get the second point in the line segment 356 p1 = e[i].P; //get the second point in the line segment
357 357
358 - vec<T> v = p1 - p0; //calculate the vector between these two points 358 + vec3<T> v = p1 - p0; //calculate the vector between these two points
359 T d = v.len(); //calculate the distance between these two points (length of the line segment) 359 T d = v.len(); //calculate the distance between these two points (length of the line segment)
360 360
361 - unsigned nsteps = d / spacing+1; //calculate the number of steps to take along the segment to meet the spacing criteria  
362 - T stepsize = 1.0 / nsteps; //calculate the parametric step size between new centerline points 361 + size_t nsteps = (size_t)std::ceil(d / spacing); //calculate the number of steps to take along the segment to meet the spacing criteria
  362 + T stepsize = (T)1.0 / nsteps; //calculate the parametric step size between new centerline points
363 363
364 //for each step along the line segment 364 //for each step along the line segment
365 for(unsigned s = 0; s < nsteps; s++){ 365 for(unsigned s = 0; s < nsteps; s++){