Commit 03c403facb572c7df53e36dd20ef6b73ea1290e3

Authored by Pavel Govyadinov
2 parents c0e09133 ad2123e6

Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib

stim/biomodels/network.h
... ... @@ -8,7 +8,7 @@
8 8 #include <algorithm>
9 9 #include <string.h>
10 10 #include <math.h>
11   -#include <stim/math/vector.h>
  11 +#include <stim/math/vec3.h>
12 12 #include <stim/visualization/obj.h>
13 13 #include <stim/visualization/cylinder.h>
14 14 #include <ANN/ANN.h>
... ... @@ -37,7 +37,7 @@ class network{
37 37 /// Constructor - creates an edge from a list of points by calling the stim::fiber constructor
38 38  
39 39 ///@param p is an array of positions in space
40   - edge(std::vector< stim::vec<T> > p) : cylinder<T>(p){}
  40 + edge(std::vector< stim::vec3<T> > p) : cylinder<T>(p){}
41 41  
42 42 /// Copy constructor creates an edge from a fiber
43 43 edge(stim::cylinder<T> f) : cylinder<T>(f) {}
... ... @@ -61,20 +61,20 @@ class network{
61 61 };
62 62  
63 63 ///Node class that stores the physical position of the node as well as the edges it is connected to (edges that connect to it), As well as any additional data necessary.
64   - class vertex : public stim::vec<T>
  64 + class vertex : public stim::vec3<T>
65 65 {
66 66 public:
67 67 //std::vector<unsigned int> edges; //indices of edges connected to this node.
68 68 std::vector<unsigned int> e[2]; //indices of edges going out (e[0]) and coming in (e[1])
69   - //stim::vec<T> p; //position of this node in physical space.
  69 + //stim::vec3<T> p; //position of this node in physical space.
70 70  
71 71 //constructor takes a stim::vec
72   - vertex(stim::vec<T> p) : stim::vec<T>(p){}
  72 + vertex(stim::vec3<T> p) : stim::vec3<T>(p){}
73 73  
74 74 /// Output the vertex information as a string
75 75 std::string str(){
76 76 std::stringstream ss;
77   - ss<<"\t(x, y, z) = "<<stim::vec<T>::str();
  77 + ss<<"\t(x, y, z) = "<<stim::vec3<T>::str();
78 78  
79 79 if(e[0].size() > 0){
80 80 ss<<"\t> ";
... ... @@ -129,7 +129,11 @@ public:
129 129 std::vector< stim::vec<T> > c; //allocate an array of points for the vessel centerline
130 130 O.getLine(l, c); //get the fiber centerline
131 131  
132   - edge new_edge = c; //create an edge from the given centerline
  132 + std::vector< stim::vec3<T> > c3(c.size());
  133 + for(size_t j = 0; j < c.size(); j++)
  134 + c3[j] = c[j];
  135 +
  136 + edge new_edge = c3; //create an edge from the given centerline
133 137 unsigned int I = new_edge.size(); //calculate the number of points on the centerline
134 138  
135 139 //get the first and last vertex IDs for the line
... ... @@ -222,7 +226,7 @@ public:
222 226 float gaussianFunction(float x, float std=25){ return exp(-x/(2*std*std));} // by default std = 25
223 227  
224 228 // stim 3d vector to annpoint of 3 dimensions
225   - void stim2ann(ANNpoint &a, stim::vec<T> b){
  229 + void stim2ann(ANNpoint &a, stim::vec3<T> b){
226 230 a[0] = b[0];
227 231 a[1] = b[1];
228 232 a[2] = b[2];
... ... @@ -278,10 +282,9 @@ public:
278 282 ANNdistArray dists = new ANNdist[1]; // near neighbor distances
279 283 ANNidxArray nnIdx = new ANNidx[1]; // near neighbor indices // allocate near neigh indices
280 284  
281   - stim::vec<T> p0, p1;
282   - float m0, m1;
  285 + stim::vec3<T> p0, p1;
  286 + float m1;
283 287 float M = 0; //stores the total metric value
284   - float l; //stores the segment length
285 288 float L = 0; //stores the total network length
286 289 ANNpoint queryPt = annAllocPt(3);
287 290 for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A
... ... @@ -292,7 +295,7 @@ public:
292 295 p1 = R.E[e][p]; //get the next point in the edge
293 296 stim2ann(queryPt, p1);
294 297 kdt->annkSearch( queryPt, 1, nnIdx, dists, eps); //find the distance between A and the current network
295   - m1 = 1.0f - gaussianFunction(dists[0], sigma); //calculate the metric value based on the distance
  298 + m1 = 1.0f - gaussianFunction((float)dists[0], sigma); //calculate the metric value based on the distance
296 299 R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment
297 300  
298 301 }
... ...
stim/cuda/cudatools/callable.h
... ... @@ -2,7 +2,7 @@
2 2  
3 3 //define the CUDA_CALLABLE macro (will prefix all members)
4 4 #ifdef __CUDACC__
5   -#define CUDA_CALLABLE __host__ __device__
  5 +#define CUDA_CALLABLE __host__ __device__ inline
6 6 #else
7 7 #define CUDA_CALLABLE
8 8 #endif
... ...
stim/cuda/cudatools/devices.h
... ... @@ -15,7 +15,7 @@ int maxThreadsPerBlock()
15 15 }
16 16  
17 17 extern "C"
18   -int sharedMemPerBlock()
  18 +size_t sharedMemPerBlock()
19 19 {
20 20 int device;
21 21 cudaGetDevice(&device); //get the id of the current device
... ... @@ -23,6 +23,16 @@ int sharedMemPerBlock()
23 23 cudaGetDeviceProperties(&props, device);
24 24 return props.sharedMemPerBlock;
25 25 }
  26 +
  27 +extern "C"
  28 +size_t constMem()
  29 +{
  30 + int device;
  31 + cudaGetDevice(&device); //get the id of the current device
  32 + cudaDeviceProp props; //device property structure
  33 + cudaGetDeviceProperties(&props, device);
  34 + return props.totalConstMem;
  35 +}
26 36 } //end namespace rts
27 37  
28 38 #endif
... ...
stim/cuda/sharedmem.cuh
... ... @@ -5,7 +5,7 @@
5 5 namespace stim{
6 6 namespace cuda{
7 7  
8   - // Copies values from global memory to shared memory, optimizing threads
  8 + // Copies values from texture memory to shared memory, optimizing threads
9 9 template<typename T>
10 10 __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
11 11 unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
... ... @@ -35,6 +35,19 @@ namespace stim{
35 35 }
36 36 }
37 37  
  38 + // Copies values from global memory to shared memory, optimizing threads
  39 + template<typename T>
  40 + __device__ void sharedMemcpy(T* dest, T* src, size_t N, size_t tid, size_t nt){
  41 +
  42 + size_t I = N / nt + 1; //calculate the number of iterations required to make the copy
  43 + size_t xi = tid; //initialize the source and destination index to the thread ID
  44 + for(size_t i = 0; i < I; i++){ //for each iteration
  45 + if(xi < N) //if the index is within the copy region
  46 + dest[xi] = src[xi]; //perform the copy
  47 + xi += nt;
  48 + }
  49 + }
  50 +
38 51  
39 52 }
40 53 }
... ...
stim/envi/bil.h
... ... @@ -884,7 +884,7 @@ public:
884 884 /// using the following indexing: i = p*B + b
885 885 /// @param matrix is the destination for the pixel data
886 886 /// @param mask is the mask
887   - bool sift(T* matrix, unsigned char* mask = NULL){
  887 + bool sift(T* matrix, unsigned char* mask = NULL, bool PROGRESS = false){
888 888 size_t Lbytes = sizeof(T) * X();
889 889 T* line = (T*) malloc( Lbytes ); //allocate space for a line
890 890  
... ... @@ -903,6 +903,7 @@ public:
903 903 pl++; //increment the pixel pointer
904 904 }
905 905 }
  906 + if(PROGRESS) progress = (double)( (y+1)*Z() + 1) / (double)(Y() * Z()) * 100;
906 907 }
907 908 p += pl; //add the line increment to the running pixel index
908 909 }
... ...
stim/envi/bip.h
... ... @@ -817,7 +817,7 @@ public:
817 817 /// using the following indexing: i = p*B + b
818 818 /// @param matrix is the destination for the pixel data
819 819 /// @param mask is the mask
820   - bool sift(T* matrix, unsigned char* mask = NULL){
  820 + bool sift(T* matrix, unsigned char* mask = NULL, bool PROGRESS = false){
821 821 size_t Bbytes = sizeof(T) * Z();
822 822 size_t XY = X() * Y();
823 823 T* band = (T*) malloc( Bbytes ); //allocate space for a line
... ... @@ -836,6 +836,7 @@ public:
836 836 }
837 837 else
838 838 file.seekg(Bbytes, std::ios::cur); //otherwise skip this band
  839 + if(PROGRESS) progress = (double)(xy+1) / (double)XY * 100;
839 840 }
840 841 return true;
841 842 }
... ...
stim/envi/bsq.h
... ... @@ -809,7 +809,7 @@ public:
809 809 /// using the following indexing: i = p*B + b
810 810 /// @param matrix is the destination for the pixel data
811 811 /// @param mask is the mask
812   - bool sift(T* matrix, unsigned char* mask = NULL){
  812 + bool sift(T* matrix, unsigned char* mask = NULL, bool PROGRESS = false){
813 813 unsigned long long XY = X() * Y(); //Number of XY pixels
814 814 unsigned long long L = XY * sizeof(T); //size of XY plane (in bytes)
815 815  
... ... @@ -827,9 +827,8 @@ public:
827 827 if(mask == NULL || mask[xy] != 0){ //if the pixel is valid
828 828 matrix[i*Z() + b] = band_image[xy]; //copy it to the appropriate point in the values[] array
829 829 i++;
830   - //std::cout<<i<<std::endl;
831 830 }
832   -
  831 + if(PROGRESS) progress = (double)(xy+1) / (double)XY * 100;
833 832 }
834 833 }
835 834  
... ...
stim/envi/envi.h
... ... @@ -670,13 +670,13 @@ public:
670 670 /// using the following indexing: i = b*P + p
671 671 /// @param matrix is the destination for the pixel data
672 672 /// @param p is the mask
673   - bool sift(void* matrix, unsigned char* p = NULL){
  673 + bool sift(void* matrix, unsigned char* p = NULL, bool PROGRESS = false){
674 674  
675 675 if (header.interleave == envi_header::BSQ){ //if the infile is bsq file
676 676 if (header.data_type == envi_header::float32)
677   - return ((bsq<float>*)file)->sift((float*)matrix, p);
  677 + return ((bsq<float>*)file)->sift((float*)matrix, p, PROGRESS);
678 678 else if (header.data_type == envi_header::float64)
679   - return ((bsq<double>*)file)->sift((double*)matrix, p);
  679 + return ((bsq<double>*)file)->sift((double*)matrix, p, PROGRESS);
680 680 else{
681 681 std::cout << "ERROR: unidentified data type" << std::endl;
682 682 exit(1);
... ... @@ -685,9 +685,9 @@ public:
685 685  
686 686 if (header.interleave == envi_header::BIP){
687 687 if (header.data_type == envi_header::float32)
688   - return ((bip<float>*)file)->sift((float*)matrix, p);
  688 + return ((bip<float>*)file)->sift((float*)matrix, p, PROGRESS);
689 689 else if (header.data_type == envi_header::float64)
690   - return ((bip<double>*)file)->sift((double*)matrix, p);
  690 + return ((bip<double>*)file)->sift((double*)matrix, p, PROGRESS);
691 691 else{
692 692 std::cout << "ERROR: unidentified data type" << std::endl;
693 693 exit(1);
... ... @@ -695,9 +695,9 @@ public:
695 695 }
696 696 if (header.interleave == envi_header::BIL){
697 697 if (header.data_type == envi_header::float32)
698   - return ((bil<float>*)file)->sift((float*)matrix, p);
  698 + return ((bil<float>*)file)->sift((float*)matrix, p, PROGRESS);
699 699 else if (header.data_type == envi_header::float64)
700   - return ((bil<double>*)file)->sift((double*)matrix, p);
  700 + return ((bil<double>*)file)->sift((double*)matrix, p, PROGRESS);
701 701 else{
702 702 std::cout << "ERROR: unidentified data type" << std::endl;
703 703 exit(1);
... ...
stim/image/image.h
... ... @@ -6,6 +6,7 @@
6 6 #include <vector>
7 7 #include <iostream>
8 8 #include <limits>
  9 +#include <typeinfo>
9 10  
10 11 namespace stim{
11 12 /// This static class provides the STIM interface for loading, saving, and storing 2D images.
... ... @@ -24,8 +25,6 @@ class image{
24 25 size_t Y() const { return R[2]; }
25 26 size_t C() const { return R[0]; }
26 27  
27   - size_t bytes(){ return size() * sizeof(T); }
28   -
29 28 void init(){ //initializes all variables, assumes no memory is allocated
30 29 memset(R, 0, sizeof(size_t) * 3); //set the resolution and number of channels to zero
31 30 img = NULL;
... ... @@ -33,7 +32,6 @@ class image{
33 32  
34 33 void unalloc(){ //frees any resources associated with the image
35 34 if(img) free(img); //if memory has been allocated, free it
36   - img=NULL;
37 35 }
38 36  
39 37  
... ... @@ -44,16 +42,15 @@ class image{
44 42  
45 43 void allocate(){
46 44 unalloc();
47   - img = (T*) malloc( bytes() ); //allocate memory
48   - memset(img, 0, bytes());
  45 + img = (T*) malloc( sizeof(T) * R[0] * R[1] * R[2] ); //allocate memory
49 46 }
50 47  
51 48 void allocate(size_t x, size_t y, size_t c){ //allocate memory based on the resolution
52   - unalloc();
53 49 R[0] = c; R[1] = x; R[2] = y; //set the resolution
54 50 allocate(); //allocate memory
55 51 }
56 52  
  53 + size_t bytes(){ return size() * sizeof(T); }
57 54  
58 55 size_t idx(size_t x, size_t y, size_t c = 0){
59 56 return y * C() * X() + x * C() + c;
... ... @@ -61,13 +58,23 @@ class image{
61 58  
62 59  
63 60 int cv_type(){
64   - if(std::is_same<T, unsigned char>::value) return CV_MAKETYPE(CV_8U, (int)C());
65   - if(std::is_same<T, char>::value) return CV_MAKETYPE(CV_8S, (int)C());
66   - if(std::is_same<T, unsigned short>::value) return CV_MAKETYPE(CV_16U, (int)C());
67   - if(std::is_same<T, short>::value) return CV_MAKETYPE(CV_16S, (int)C());
68   - if(std::is_same<T, int>::value) return CV_MAKETYPE(CV_32S, (int)C());
69   - if(std::is_same<T, float>::value) return CV_MAKETYPE(CV_32F, (int)C());
70   - if(std::is_same<T, double>::value) return CV_MAKETYPE(CV_64F, (int)C());
  61 + // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution
  62 +
  63 + //if(std::is_same<T, unsigned char>::value) return CV_MAKETYPE(CV_8U, (int)C());
  64 + //if(std::is_same<T, char>::value) return CV_MAKETYPE(CV_8S, (int)C());
  65 + //if(std::is_same<T, unsigned short>::value) return CV_MAKETYPE(CV_16U, (int)C());
  66 + //if(std::is_same<T, short>::value) return CV_MAKETYPE(CV_16S, (int)C());
  67 + //if(std::is_same<T, int>::value) return CV_MAKETYPE(CV_32S, (int)C());
  68 + //if(std::is_same<T, float>::value) return CV_MAKETYPE(CV_32F, (int)C());
  69 + //if(std::is_same<T, double>::value) return CV_MAKETYPE(CV_64F, (int)C());
  70 +
  71 + if(typeid(T) == typeid(unsigned char)) return CV_MAKETYPE(CV_8U, (int)C());
  72 + if(typeid(T) == typeid(char)) return CV_MAKETYPE(CV_8S, (int)C());
  73 + if(typeid(T) == typeid(unsigned short)) return CV_MAKETYPE(CV_16U, (int)C());
  74 + if(typeid(T) == typeid(short)) return CV_MAKETYPE(CV_16S, (int)C());
  75 + if(typeid(T) == typeid(int)) return CV_MAKETYPE(CV_32S, (int)C());
  76 + if(typeid(T) == typeid(float)) return CV_MAKETYPE(CV_32F, (int)C());
  77 + if(typeid(T) == typeid(double)) return CV_MAKETYPE(CV_64F, (int)C());
71 78  
72 79 std::cout<<"ERROR in stim::image::cv_type - no valid data type found"<<std::endl;
73 80 exit(1);
... ... @@ -75,15 +82,26 @@ class image{
75 82  
76 83 /// Returns the value for "white" based on the dynamic range (assumes white is 1.0 for floating point images)
77 84 T white(){
78   - if(std::is_same<T, unsigned char>::value) return UCHAR_MAX;
79   - if(std::is_same<T, unsigned short>::value) return SHRT_MAX;
80   - if(std::is_same<T, unsigned>::value) return UINT_MAX;
81   - if(std::is_same<T, unsigned long>::value) return ULONG_MAX;
82   - if(std::is_same<T, unsigned long long>::value) return ULLONG_MAX;
83   - if(std::is_same<T, float>::value) return 1.0f;
84   - if(std::is_same<T, double>::value) return 1.0;
  85 + // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution
  86 +
  87 + //if(std::is_same<T, unsigned char>::value) return UCHAR_MAX;
  88 + //if(std::is_same<T, unsigned short>::value) return SHRT_MAX;
  89 + //if(std::is_same<T, unsigned>::value) return UINT_MAX;
  90 + //if(std::is_same<T, unsigned long>::value) return ULONG_MAX;
  91 + //if(std::is_same<T, unsigned long long>::value) return ULLONG_MAX;
  92 + //if(std::is_same<T, float>::value) return 1.0f;
  93 + //if(std::is_same<T, double>::value) return 1.0;
  94 +
  95 + if(typeid(T) == typeid(unsigned char)) return UCHAR_MAX;
  96 + if(typeid(T) == typeid(unsigned short)) return SHRT_MAX;
  97 + if(typeid(T) == typeid(unsigned)) return UINT_MAX;
  98 + if(typeid(T) == typeid(unsigned long)) return ULONG_MAX;
  99 + if(typeid(T) == typeid(unsigned long long)) return ULLONG_MAX;
  100 + if(typeid(T) == typeid(float)) return 1.0f;
  101 + if(typeid(T) == typeid(double)) return 1.0;
85 102  
86 103 std::cout<<"ERROR in stim::image::white - no white value known for this data type"<<std::endl;
  104 + exit(1);
87 105  
88 106 }
89 107  
... ... @@ -91,9 +109,7 @@ class image{
91 109 public:
92 110  
93 111 /// Default constructor - creates an empty image object
94   - image(){
95   - init(); //initialize all variables to zero, don't allocate any memory
96   - }
  112 + image(){ init(); } //initialize all variables to zero, don't allocate any memory
97 113  
98 114 /// Constructor with a filename - loads the specified file
99 115 image(std::string filename){ //constructor initialize the image with an image file
... ... @@ -115,7 +131,7 @@ public:
115 131 }
116 132  
117 133 /// Copy constructor - duplicates an image object
118   - image(const stim::image<T> &I){
  134 + image(const stim::image<T>& I){
119 135 init();
120 136 allocate(I.X(), I.Y(), I.C());
121 137 memcpy(img, I.img, bytes());
... ... @@ -127,6 +143,7 @@ public:
127 143 }
128 144  
129 145 stim::image<T>& operator=(const stim::image<T>& I){
  146 + init();
130 147 if(&I == this) //handle self-assignment
131 148 return *this;
132 149 allocate(I.X(), I.Y(), I.C());
... ... @@ -139,22 +156,15 @@ public:
139 156  
140 157 cv::Mat cvImage = cv::imread(filename, CV_LOAD_IMAGE_UNCHANGED); //use OpenCV to open the image file
141 158 if(!cvImage.data){
142   - std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<" ["<<__FILE__<<" (line "<<__LINE__<<")]"<<std::endl;
  159 + std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<std::endl;
143 160 exit(1);
144 161 }
145 162 allocate(cvImage.cols, cvImage.rows, cvImage.channels()); //allocate space for the image
146   - T* cv_ptr = (T*) cvImage.data;
147   - if(C() == 1)
148   - {
149   - //if this is a single-color image, just copy the data
150   - memcpy(img, cv_ptr, bytes());
151   - }
152   - if(C() == 3)
153   - { //if this is a 3-color image, OpenCV uses BGR interleaving
  163 + T* cv_ptr = (T*)cvImage.data;
  164 + if(C() == 1) //if this is a single-color image, just copy the data
  165 + memcpy(img, cv_ptr, bytes());
  166 + if(C() == 3) //if this is a 3-color image, OpenCV uses BGR interleaving
154 167 set_interleaved_bgr(cv_ptr, X(), Y());
155   - }
156   -
157   - cvImage.release();
158 168 }
159 169  
160 170 //save a file
... ... @@ -168,18 +178,16 @@ public:
168 178 get_interleaved_bgr(buffer);
169 179 cv::Mat cvImage((int)Y(), (int)X(), cv_type(), buffer);
170 180 cv::imwrite(filename, cvImage);
171   - cvImage.release();
172   - free(buffer);
173 181 }
174 182  
175 183 //create an image from an interleaved buffer
176   - void set_interleaved_rgb(T* buffer, size_t width, size_t height, size_t channels = 3){
177   - allocate(width, height, channels);
  184 + void set_interleaved_rgb(T* buffer, size_t width, size_t height){
  185 + allocate(width, height, 3);
178 186 memcpy(img, buffer, bytes());
179 187 }
180 188  
181   - void set_interleaved_bgr(T* buffer, size_t width, size_t height, size_t channels = 3){
182   - allocate(width, height, channels);
  189 + void set_interleaved_bgr(T* buffer, size_t width, size_t height){
  190 + allocate(width, height, 3);
183 191 for(size_t c = 0; c < C(); c++){ //copy directly
184 192 for(size_t y = 0; y < Y(); y++){
185 193 for(size_t x = 0; x < X(); x++){
... ... @@ -359,34 +367,6 @@ public:
359 367  
360 368 return r; //return the inverted image
361 369 }
362   -
363   - /// Invert an image by calculating I1 = alpha - I0, where alpha is the maximum image value
364   - image<T> invert(){
365   - size_t N = size(); //calculate the total number of values in the image
366   - image<T> r(X(), Y(), C()); //allocate space for the resulting image
367   - T white_val = maxv();
368   - for(size_t n = 0; n < N; n++)
369   - r.img[n] = white_val - img[n]; //perform the inversion
370   -
371   - return r; //return the inverted image
372   - }
373   -
374   - ///crops the image from x1 to x0 and y1 to y0 and returns a new (smaller) image.
375   - image<T> crop(int x0, int x1, int y0, int y1)
376   - {
377   -
378   - image<T> ret(x1-x0, y1-y0, C());
379   - int newWidth = x1-x0;
380   - int destidx, srcidx;
381   - ///for each row, cut what amount of data from the original and put it into the new copy.
382   - for(int i = 0; i < (y1-y0); i++)
383   - {
384   - destidx = i*newWidth*C(); ///destination index one per each row
385   - srcidx = ((i+(y0))*X()+x0)*C(); ///source index, one per each row.
386   - memcpy(&ret.img[destidx], &img[srcidx], sizeof(T)*newWidth*C());
387   - }
388   - return ret;
389   - }
390 370  
391 371 image<T> srgb2lab(){
392 372 std::cout<<"ERROR stim::image::srgb2lab - function has been broken, re-implement."<<std::endl;
... ... @@ -405,7 +385,6 @@ public:
405 385 exit(1);
406 386 }
407 387  
408   -
409 388 // leila's code for non_interleaving data in 3D
410 389 //create an data set from an interleaved buffer
411 390 void set_interleaved3(T* buffer, size_t width, size_t height, size_t depth, size_t channels = 3){
... ...
stim/math/bessel.h
... ... @@ -17,6 +17,11 @@ static complex&lt;double&gt; czero(0.0,0.0);
17 17 template< typename P >
18 18 P gamma(P x)
19 19 {
  20 + const P EPS = numeric_limits<P>::epsilon();
  21 + const P FPMIN_MAG = numeric_limits<P>::min();
  22 + const P FPMIN = numeric_limits<P>::lowest();
  23 + const P FPMAX = numeric_limits<P>::max();
  24 +
20 25 int i,k,m;
21 26 P ga,gr,r,z;
22 27  
... ... @@ -47,7 +52,7 @@ P gamma(P x)
47 52 -0.54e-14,
48 53 0.14e-14};
49 54  
50   - if (x > 171.0) return 1e308; // This value is an overflow flag.
  55 + if (x > 171.0) return FPMAX; // This value is an overflow flag.
51 56 if (x == (int)x) {
52 57 if (x > 0.0) {
53 58 ga = 1.0; // use factorial
... ... @@ -56,7 +61,7 @@ P gamma(P x)
56 61 }
57 62 }
58 63 else
59   - ga = 1e308;
  64 + ga = FPMAX;
60 65 }
61 66 else {
62 67 if (fabs(x) > 1.0) {
... ... @@ -89,6 +94,11 @@ template&lt;typename P&gt;
89 94 int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1,
90 95 P &j0p,P &j1p,P &y0p,P &y1p)
91 96 {
  97 + const P EPS = numeric_limits<P>::epsilon();
  98 + const P FPMIN_MAG = numeric_limits<P>::min();
  99 + const P FPMIN = numeric_limits<P>::lowest();
  100 + const P FPMAX = numeric_limits<P>::max();
  101 +
92 102 P x2,r,ec,w0,w1,r0,r1,cs0,cs1;
93 103 P cu,p0,q0,p1,q1,t1,t2;
94 104 int k,kz;
... ... @@ -157,12 +167,12 @@ int bessjy01a(P x,P &amp;j0,P &amp;j1,P &amp;y0,P &amp;y1,
157 167 if (x == 0.0) {
158 168 j0 = 1.0;
159 169 j1 = 0.0;
160   - y0 = -1e308;
161   - y1 = -1e308;
  170 + y0 = -FPMIN;
  171 + y1 = -FPMIN;
162 172 j0p = 0.0;
163 173 j1p = 0.5;
164   - y0p = 1e308;
165   - y1p = 1e308;
  174 + y0p = FPMAX;
  175 + y1p = FPMAX;
166 176 return 0;
167 177 }
168 178 x2 = x*x;
... ... @@ -329,7 +339,7 @@ int msta1(P x,int mp)
329 339 for (i=0;i<20;i++) {
330 340 nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
331 341 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp;
332   - if (abs(nn-n1) < 1) break;
  342 + if (std::abs(nn-n1) < 1) break;
333 343 n0 = n1;
334 344 f0 = f1;
335 345 n1 = nn;
... ... @@ -361,7 +371,7 @@ int msta2(P x,int n,int mp)
361 371 for (i=0;i<20;i++) {
362 372 nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
363 373 f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj;
364   - if (abs(nn-n1) < 1) break;
  374 + if (std::abs(nn-n1) < 1) break;
365 375 n0 = n1;
366 376 f0 = f1;
367 377 n1 = nn;
... ... @@ -596,21 +606,26 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
596 606 P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk;
597 607 int j,k,l,m,n,kz;
598 608  
  609 + const P EPS = numeric_limits<P>::epsilon();
  610 + const P FPMIN_MAG = numeric_limits<P>::min();
  611 + const P FPMIN = numeric_limits<P>::lowest();
  612 + const P FPMAX = numeric_limits<P>::max();
  613 +
599 614 x2 = x*x;
600 615 n = (int)v;
601 616 v0 = v-n;
602 617 if ((x < 0.0) || (v < 0.0)) return 1;
603   - if (x < 1e-15) {
  618 + if (x < EPS) {
604 619 for (k=0;k<=n;k++) {
605 620 jv[k] = 0.0;
606   - yv[k] = -1e308;
  621 + yv[k] = FPMIN;
607 622 djv[k] = 0.0;
608   - dyv[k] = 1e308;
  623 + dyv[k] = FPMAX;
609 624 if (v0 == 0.0) {
610 625 jv[0] = 1.0;
611 626 djv[1] = 0.5;
612 627 }
613   - else djv[0] = 1e308;
  628 + else djv[0] = FPMAX;
614 629 }
615 630 vm = v;
616 631 return 0;
... ... @@ -623,7 +638,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
623 638 for (k=1;k<=40;k++) {
624 639 r *= -0.25*x2/(k*(k+vl));
625 640 bjvl += r;
626   - if (fabs(r) < fabs(bjvl)*1e-15) break;
  641 + if (fabs(r) < fabs(bjvl)*EPS) break;
627 642 }
628 643 vg = 1.0 + vl;
629 644 a = pow(0.5*x,vl)/gamma(vg);
... ... @@ -686,7 +701,7 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
686 701 if (m < n) n = m;
687 702 else m = msta2(x,n,15);
688 703 f2 = 0.0;
689   - f1 = 1.0e-100;
  704 + f1 = FPMIN_MAG;
690 705 for (k=m;k>=0;k--) {
691 706 f = 2.0*(v0+k+1.0)*f1/x-f2;
692 707 if (k <= n) jv[k] = f;
... ... @@ -763,20 +778,26 @@ int bessjyv(P v,P x,P &amp;vm,P *jv,P *yv,
763 778  
764 779 template<typename P>
765 780 int bessjyv_sph(int v, P z, P &vm, P* cjv,
766   - P* cyv, P* cjvp, P* cyvp)
767   -{
  781 + P* cyv, P* cjvp, P* cyvp){
  782 +
768 783 //first, compute the bessel functions of fractional order
769   - bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  784 + bessjyv<P>(v + (P)0.5, z, vm, cjv, cyv, cjvp, cyvp);
  785 +
  786 + if(z == 0){ //handle degenerate case of z = 0
  787 + memset(cjv, 0, sizeof(P) * (v+1));
  788 + cjv[0] = 1;
  789 + }
770 790  
771 791 //iterate through each and scale
772   - for(int n = 0; n<=v; n++)
773   - {
  792 + for(int n = 0; n<=v; n++){
774 793  
775   - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0));
776   - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0));
  794 + if(z != 0){ //handle degenerate case of z = 0
  795 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  796 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
  797 + }
777 798  
778   - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0));
779   - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0));
  799 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  800 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
780 801 }
781 802  
782 803 return 0;
... ... @@ -1237,7 +1258,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1237 1258 P a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa;
1238 1259 int j,n,k,kz,l,lb,lb0,m;
1239 1260  
1240   - a0 = abs(z);
  1261 + a0 = ::abs(z);
1241 1262 z1 = z;
1242 1263 z2 = z*z;
1243 1264 n = (int)v;
... ... @@ -1265,7 +1286,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1265 1286 vm = v;
1266 1287 return 0;
1267 1288 }
1268   - if (real(z1) < 0.0) z1 = -z;
  1289 + if (::real(z1) < 0.0) z1 = -z;
1269 1290 if (a0 <= 12.0) {
1270 1291 for (l=0;l<2;l++) {
1271 1292 vl = v0+l;
... ... @@ -1274,7 +1295,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1274 1295 for (k=1;k<=40;k++) {
1275 1296 cr *= -0.25*z2/(k*(k+vl));
1276 1297 cjvl += cr;
1277   - if (abs(cr) < abs(cjvl)*eps) break;
  1298 + if (::abs(cr) < ::abs(cjvl)*eps) break;
1278 1299 }
1279 1300 vg = 1.0 + vl;
1280 1301 ga = gamma(vg);
... ... @@ -1327,7 +1348,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1327 1348 for (k=1;k<=40;k++) {
1328 1349 cr *= -0.25*z2/(k*(k-vl));
1329 1350 cjvl += cr;
1330   - if (abs(cr) < abs(cjvl)*eps) break;
  1351 + if (::abs(cr) < ::abs(cjvl)*eps) break;
1331 1352 }
1332 1353 vg = 1.0-vl;
1333 1354 gb = gamma(vg);
... ... @@ -1360,16 +1381,16 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1360 1381 cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1);
1361 1382 }
1362 1383 }
1363   - if (real(z) < 0.0) {
  1384 + if (::real(z) < 0.0) {
1364 1385 cfac0 = exp(pv0*cii);
1365 1386 cfac1 = exp(pv1*cii);
1366   - if (imag(z) < 0.0) {
  1387 + if (::imag(z) < 0.0) {
1367 1388 cyv0 = cfac0*cyv0-(P)2.0*(complex<P>)cii*cos(pv0)*cjv0;
1368 1389 cyv1 = cfac1*cyv1-(P)2.0*(complex<P>)cii*cos(pv1)*cjv1;
1369 1390 cjv0 /= cfac0;
1370 1391 cjv1 /= cfac1;
1371 1392 }
1372   - else if (imag(z) > 0.0) {
  1393 + else if (::imag(z) > 0.0) {
1373 1394 cyv0 = cyv0/cfac0+(P)2.0*(complex<P>)cii*cos(pv0)*cjv0;
1374 1395 cyv1 = cyv1/cfac1+(P)2.0*(complex<P>)cii*cos(pv1)*cjv1;
1375 1396 cjv0 *= cfac0;
... ... @@ -1400,7 +1421,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1400 1421 cf2 = cf1;
1401 1422 cf1 = cf;
1402 1423 }
1403   - if (abs(cjv0) > abs(cjv1)) cs = cjv0/cf;
  1424 + if (::abs(cjv0) > ::abs(cjv1)) cs = cjv0/cf;
1404 1425 else cs = cjv1/cf2;
1405 1426 for (k=0;k<=n;k++) {
1406 1427 cjv[k] *= cs;
... ... @@ -1412,21 +1433,21 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1412 1433 }
1413 1434 cyv[0] = cyv0;
1414 1435 cyv[1] = cyv1;
1415   - ya0 = abs(cyv0);
  1436 + ya0 = ::abs(cyv0);
1416 1437 lb = 0;
1417 1438 cg0 = cyv0;
1418 1439 cg1 = cyv1;
1419 1440 for (k=2;k<=n;k++) {
1420 1441 cyk = 2.0*(v0+k-1.0)*cg1/z-cg0;
1421   - yak = abs(cyk);
1422   - ya1 = abs(cg0);
  1442 + yak = ::abs(cyk);
  1443 + ya1 = ::abs(cg0);
1423 1444 if ((yak < ya0) && (yak< ya1)) lb = k;
1424 1445 cyv[k] = cyk;
1425 1446 cg0 = cg1;
1426 1447 cg1 = cyk;
1427 1448 }
1428 1449 lb0 = 0;
1429   - if ((lb > 4) && (imag(z) != 0.0)) {
  1450 + if ((lb > 4) && (::imag(z) != 0.0)) {
1430 1451 while(lb != lb0) {
1431 1452 ch2 = cone;
1432 1453 ch1 = czero;
... ... @@ -1449,7 +1470,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1449 1470 cp21 = ch2;
1450 1471 if (lb == n)
1451 1472 cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1];
1452   - if (abs(cjv[0]) > abs(cjv[1])) {
  1473 + if (::abs(cjv[0]) > ::abs(cjv[1])) {
1453 1474 cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0];
1454 1475 cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0];
1455 1476 }
... ... @@ -1474,8 +1495,8 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1474 1495 cyl2 = cylk;
1475 1496 }
1476 1497 for (k=2;k<=n;k++) {
1477   - wa = abs(cyv[k]);
1478   - if (wa < abs(cyv[k-1])) lb = k;
  1498 + wa = ::abs(cyv[k]);
  1499 + if (wa < ::abs(cyv[k-1])) lb = k;
1479 1500 }
1480 1501 }
1481 1502 }
... ... @@ -1494,15 +1515,21 @@ int cbessjyva_sph(int v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1494 1515 //first, compute the bessel functions of fractional order
1495 1516 cbessjyva<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
1496 1517  
  1518 + if(z == 0){ //handle degenerate case of z = 0
  1519 + memset(cjv, 0, sizeof(P) * (v+1));
  1520 + cjv[0] = 1;
  1521 + }
  1522 +
1497 1523 //iterate through each and scale
1498 1524 for(int n = 0; n<=v; n++)
1499 1525 {
  1526 + if(z != 0){ //handle degenerate case of z = 0
  1527 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  1528 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
  1529 + }
1500 1530  
1501   - cjv[n] = cjv[n] * sqrt(rtsPI/(z * 2.0));
1502   - cyv[n] = cyv[n] * sqrt(rtsPI/(z * 2.0));
1503   -
1504   - cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(rtsPI / (z * 2.0));
1505   - cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(rtsPI / (z * 2.0));
  1531 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  1532 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
1506 1533 }
1507 1534  
1508 1535 return 0;
... ...
stim/math/circle.h
... ... @@ -17,7 +17,7 @@ class circle : plane&lt;T&gt;
17 17  
18 18 private:
19 19  
20   - stim::vec<T> Y;
  20 + stim::vec3<T> Y;
21 21  
22 22 CUDA_CALLABLE void
23 23 init()
... ... @@ -48,7 +48,7 @@ public:
48 48 circle(T size, T z_pos = (T)0) : plane<T>()
49 49 {
50 50 init();
51   - center(stim::vec<T>(0,0,z_pos));
  51 + center(stim::vec3<T>(0,0,z_pos));
52 52 scale(size);
53 53 }
54 54  
... ... @@ -56,7 +56,7 @@ public:
56 56 ///@param c: x,y,z location of the center.
57 57 ///@param n: x,y,z direction of the normal.
58 58 CUDA_CALLABLE
59   - circle(vec<T> c, vec<T> n = vec<T>(0,0,1)) : plane<T>()
  59 + circle(vec3<T> c, vec3<T> n = vec3<T>(0,0,1)) : plane<T>()
60 60 {
61 61 center(c);
62 62 normal(n);
... ... @@ -68,7 +68,7 @@ public:
68 68 ///@param s: size of the rectangle.
69 69 ///@param n: x,y,z direction of the normal.
70 70 CUDA_CALLABLE
71   - circle(vec<T> c, T s, vec<T> n = vec<T>(0,0,1)) : plane<T>()
  71 + circle(vec3<T> c, T s, vec3<T> n = vec3<T>(0,0,1)) : plane<T>()
72 72 {
73 73 init();
74 74 center(c);
... ... @@ -82,7 +82,7 @@ public:
82 82 ///@param n: x,y,z direction of the normal.
83 83 ///@param u: x,y,z direction for the zero vector (from where the rotation starts)
84 84 CUDA_CALLABLE
85   - circle(vec<T> c, T s, vec<T> n = vec<T>(0,0,1), vec<T> u = vec<T>(1, 0, 0)) : plane<T>()
  85 + circle(vec3<T> c, T s, vec3<T> n = vec3<T>(0,0,1), vec3<T> u = vec3<T>(1, 0, 0)) : plane<T>()
86 86 {
87 87 init();
88 88 setU(u);
... ... @@ -103,16 +103,15 @@ public:
103 103 ///sets the normal for the cirlce
104 104 ///@param n: x,y,z direction of the normal.
105 105 CUDA_CALLABLE void
106   - normal(vec<T> n)
  106 + normal(vec3<T> n)
107 107 {
108 108 rotate(n, Y);
109 109 }
110 110  
111 111 ///sets the center of the circle.
112 112 ///@param n: x,y,z location of the center.
113   - CUDA_CALLABLE T
114   - center(vec<T> p)
115   - {
  113 + CUDA_CALLABLE void
  114 + center(vec3<T> p){
116 115 this->P = p;
117 116 }
118 117  
... ... @@ -127,17 +126,17 @@ public:
127 126 }
128 127  
129 128 ///get the world space value given the planar coordinates a, b in [0, 1]
130   - CUDA_CALLABLE stim::vec<T> p(T a, T b)
  129 + CUDA_CALLABLE stim::vec3<T> p(T a, T b)
131 130 {
132   - stim::vec<T> result;
  131 + stim::vec3<T> result;
133 132  
134   - vec<T> A = this->P - this->U * (T)0.5 - Y * (T)0.5;
  133 + vec3<T> A = this->P - this->U * (T)0.5 - Y * (T)0.5;
135 134 result = A + this->U * a + Y * b;
136 135 return result;
137 136 }
138 137  
139 138 ///parenthesis operator returns the world space given rectangular coordinates a and b in [0 1]
140   - CUDA_CALLABLE stim::vec<T> operator()(T a, T b)
  139 + CUDA_CALLABLE stim::vec3<T> operator()(T a, T b)
141 140 {
142 141 return p(a,b);
143 142 }
... ... @@ -145,11 +144,11 @@ public:
145 144 ///returns a vector with the points on the initialized circle.
146 145 ///connecting the points results in a circle.
147 146 ///@param n: integer for the number of points representing the circle.
148   - std::vector<stim::vec<T> >
  147 + std::vector<stim::vec3<T> >
149 148 getPoints(int n)
150 149 {
151   - std::vector<stim::vec<T> > result;
152   - stim::vec<T> point;
  150 + std::vector<stim::vec3<T> > result;
  151 + stim::vec3<T> point;
153 152 T x,y;
154 153 float step = 360.0/(float) n;
155 154 for(float j = 0; j <= 360.0; j += step)
... ... @@ -164,7 +163,7 @@ public:
164 163 ///returns a vector with the points on the initialized circle.
165 164 ///connecting the points results in a circle.
166 165 ///@param n: integer for the number of points representing the circle.
167   - stim::vec<T>
  166 + stim::vec3<T>
168 167 p(T theta)
169 168 {
170 169 T x,y;
... ...
stim/math/complex.h
1   -/*RTS Complex number class. This class is CUDA compatible,
2   -and can therefore be used in CUDA code and on CUDA devices.
3   -*/
  1 +/// CUDA compatible complex number class
4 2  
5   -#ifndef RTS_COMPLEX
6   -#define RTS_COMPLEX
  3 +#ifndef STIM_COMPLEX
  4 +#define STIM_COMPLEX
7 5  
8   -#include "../cuda/callable.h"
  6 +#include "../cuda/cudatools/callable.h"
9 7 #include <cmath>
10 8 #include <string>
11 9 #include <sstream>
... ... @@ -13,6 +11,7 @@ and can therefore be used in CUDA code and on CUDA devices.
13 11  
14 12 namespace stim
15 13 {
  14 + enum complexComponentType {complexReal, complexImaginary, complexMag};
16 15  
17 16 template <class T>
18 17 struct complex
... ... @@ -230,12 +229,6 @@ struct complex
230 229 return result;
231 230 }
232 231  
233   - /*CUDA_CALLABLE complex<T> pow(int y)
234   - {
235   -
236   - return pow((double)y);
237   - }*/
238   -
239 232 CUDA_CALLABLE complex<T> pow(T y)
240 233 {
241 234 complex<T> result;
... ... @@ -328,8 +321,31 @@ struct complex
328 321 return *this;
329 322 }
330 323  
  324 +
  325 +
331 326 };
332 327  
  328 +/// Cast an array of complex values to an array of real values
  329 +template<typename T>
  330 +static void real(T* r, complex<T>* c, size_t n){
  331 + for(size_t i = 0; i < n; i++)
  332 + r[i] = c[i].real();
  333 +}
  334 +
  335 +/// Cast an array of complex values to an array of real values
  336 +template<typename T>
  337 +static void imag(T* r, complex<T>* c, size_t n){
  338 + for(size_t i = 0; i < n; i++)
  339 + r[i] = c[i].imag();
  340 +}
  341 +
  342 +/// Calculate the magnitude of an array of complex values
  343 +template<typename T>
  344 +static void abs(T* m, complex<T>* c, size_t n){
  345 + for(size_t i = 0; i < n; i++)
  346 + m[i] = c[i].abs();
  347 +}
  348 +
333 349 } //end RTS namespace
334 350  
335 351 //addition
... ... @@ -432,17 +448,6 @@ CUDA_CALLABLE static T imag(stim::complex&lt;T&gt; a)
432 448 return a.i;
433 449 }
434 450  
435   -//trigonometric functions
436   -//template<class A>
437   -/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)
438   -{
439   - stim::complex<float> result;
440   - result.r = sinf(x.r) * coshf(x.i);
441   - result.i = cosf(x.r) * sinhf(x.i);
442   -
443   - return result;
444   -}*/
445   -
446 451 template<class A>
447 452 CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)
448 453 {
... ... @@ -453,17 +458,6 @@ CUDA_CALLABLE stim::complex&lt;A&gt; sin(const stim::complex&lt;A&gt; x)
453 458 return result;
454 459 }
455 460  
456   -//floating point template
457   -//template<class A>
458   -/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)
459   -{
460   - stim::complex<float> result;
461   - result.r = cosf(x.r) * coshf(x.i);
462   - result.i = -(sinf(x.r) * sinhf(x.i));
463   -
464   - return result;
465   -}*/
466   -
467 461 template<class A>
468 462 CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)
469 463 {
... ... @@ -496,10 +490,4 @@ std::istream&amp; operator&gt;&gt;(std::istream&amp; is, stim::complex&lt;A&gt;&amp; x)
496 490 return is; //return the stream
497 491 }
498 492  
499   -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
500   -//template<class T> using rtsComplex = stim::complex<T>;
501   -//#endif
502   -
503   -
504   -
505 493 #endif
... ...
stim/math/constants.h
1   -#ifndef RTS_CONSTANTS_H
2   -#define RTS_CONSTANTS_H
  1 +#ifndef STIM_CONSTANTS_H
  2 +#define STIM_CONSTANTS_H
3 3  
4   -#define stimPI 3.14159
5   -#define stimTAU 2 * rtsPI
  4 +#include "stim/cuda/cudatools/callable.h"
  5 +namespace stim{
  6 + const double PI = 3.1415926535897932384626433832795028841971693993751058209749445923078164062862;
  7 + const double TAU = 2 * stim::PI;
  8 +}
6 9  
7 10 #endif
... ...
stim/math/fft.h 0 โ†’ 100644
  1 +#ifndef STIM_FFT_H
  2 +#define STIM_FFT_H
  3 +
  4 +namespace stim{
  5 +
  6 + template<class T>
  7 + void circshift(T *out, const T *in, size_t xdim, size_t ydim, size_t xshift, size_t yshift){
  8 + size_t i, j, ii, jj;
  9 + for (i =0; i < xdim; i++) {
  10 + ii = (i + xshift) % xdim;
  11 + for (j = 0; j < ydim; j++) {
  12 + jj = (j + yshift) % ydim;
  13 + out[ii * ydim + jj] = in[i * ydim + j];
  14 + }
  15 + }
  16 + }
  17 +
  18 + template<typename T>
  19 + void cpu_fftshift(T* out, T* in, size_t xdim, size_t ydim){
  20 + circshift(out, in, xdim, ydim, xdim/2, ydim/2);
  21 + }
  22 +
  23 + template<typename T>
  24 + void cpu_ifftshift(T* out, T* in, size_t xdim, size_t ydim){
  25 + circshift(out, in, xdim, ydim, xdim/2, ydim/2);
  26 + }
  27 +
  28 +
  29 +}
  30 +
  31 +#endif
0 32 \ No newline at end of file
... ...
stim/math/legendre.h
1 1 #ifndef RTS_LEGENDRE_H
2 2 #define RTS_LEGENDRE_H
3 3  
4   -#include "rts/cuda/callable.h"
  4 +#include "../cuda/cudatools/callable.h"
5 5  
6 6 namespace stim{
7 7  
... ... @@ -24,9 +24,11 @@ CUDA_CALLABLE void shift_legendre(int n, T x, T&amp; P0, T&amp; P1)
24 24 P1 = Pnew;
25 25 }
26 26  
  27 +/// Iteratively evaluates the Legendre polynomials for orders l = [0 n]
27 28 template <typename T>
28 29 CUDA_CALLABLE void legendre(int n, T x, T* P)
29 30 {
  31 + if(n < 0) return;
30 32 P[0] = 1;
31 33  
32 34 if(n >= 1)
... ...
stim/math/matrix.h
... ... @@ -5,6 +5,7 @@
5 5 #include <string.h>
6 6 #include <iostream>
7 7 #include <stim/math/vector.h>
  8 +#include <stim/math/vec3.h>
8 9 #include <stim/cuda/cudatools/callable.h>
9 10  
10 11 namespace stim{
... ... @@ -50,10 +51,8 @@ struct matrix
50 51 return *this;
51 52 }
52 53  
53   -
54 54 template<typename Y>
55   - CUDA_CALLABLE vec<Y> operator*(vec<Y> rhs)
56   - {
  55 + vec<Y> operator*(vec<Y> rhs){
57 56 unsigned int N = rhs.size();
58 57  
59 58 vec<Y> result;
... ... @@ -66,6 +65,16 @@ struct matrix
66 65 return result;
67 66 }
68 67  
  68 + template<typename Y>
  69 + CUDA_CALLABLE vec3<Y> operator*(vec3<Y> rhs){
  70 + vec3<Y> result = 0;
  71 + for(int r=0; r<3; r++)
  72 + for(int c=0; c<3; c++)
  73 + result[r] += (*this)(r, c) * rhs[c];
  74 +
  75 + return result;
  76 + }
  77 +
69 78 std::string toStr()
70 79 {
71 80 std::stringstream ss;
... ... @@ -82,10 +91,6 @@ struct matrix
82 91  
83 92 return ss.str();
84 93 }
85   -
86   -
87   -
88   -
89 94 };
90 95  
91 96 } //end namespace rts
... ...
stim/math/meshgrid.h 0 โ†’ 100644
  1 +#ifndef STIM_MESHGRID_H
  2 +#define STIM_MESHGRID_H
  3 +
  4 +namespace stim{
  5 +
  6 + /// Create a 2D grid based on a pair of vectors representing the grid spacing (see Matlab)
  7 + /// @param X is an [nx x ny] array that will store the X coordinates for each 2D point
  8 + /// @param Y is an [nx x ny] array that will store the Y coordinates for each 2D point
  9 + /// @param x is an [nx] array that provides the positions of grid points in the x direction
  10 + /// @param nx is the number of grid points in the x direction
  11 + /// @param y is an [ny] array that provides the positions of grid points in the y direction
  12 + /// @param ny is the number of grid points in the y direction
  13 + template<typename T>
  14 + void meshgrid(T* X, T* Y, T* x, size_t nx, T* y, size_t ny){
  15 + size_t xi, yi; //allocate index variables
  16 + for(yi = 0; yi < ny; yi++){ //iterate through each column
  17 + for(xi = 0; xi < nx; xi++){ //iterate through each row
  18 + X[yi * nx + xi] = x[xi];
  19 + Y[yi * nx + xi] = y[yi];
  20 + }
  21 + }
  22 + }
  23 +
  24 + /// Creates an array of n equally spaced values in the range [xmin xmax]
  25 + /// @param X is an array of length n that stores the values
  26 + /// @param xmin is the start point of the array
  27 + /// @param xmax is the end point of the array
  28 + /// @param n is the number of points in the array
  29 + template<typename T>
  30 + void linspace(T* X, T xmin, T xmax, size_t n){
  31 + T alpha;
  32 + for(size_t i = 0; i < n; i++){
  33 + alpha = (T)i / (T)n;
  34 + X[i] = (1 - alpha) * xmin + alpha * xmax;
  35 + }
  36 + }
  37 +
  38 +
  39 +}
  40 +
  41 +
  42 +#endif
0 43 \ No newline at end of file
... ...
stim/math/plane.h
... ... @@ -2,7 +2,7 @@
2 2 #define STIM_PLANE_H
3 3  
4 4 #include <iostream>
5   -#include <stim/math/vector.h>
  5 +#include <stim/math/vec3.h>
6 6 #include <stim/cuda/cudatools/callable.h>
7 7 #include <stim/math/quaternion.h>
8 8  
... ... @@ -22,17 +22,17 @@ template &lt;typename T&gt;
22 22 class plane
23 23 {
24 24 protected:
25   - stim::vec<T> P;
26   - stim::vec<T> N;
27   - stim::vec<T> U;
  25 + stim::vec3<T> P;
  26 + stim::vec3<T> N;
  27 + stim::vec3<T> U;
28 28  
29 29 ///Initializes the plane with standard coordinates.
30 30 ///
31 31 CUDA_CALLABLE void init()
32 32 {
33   - P = stim::vec<T>(0, 0, 0);
34   - N = stim::vec<T>(0, 0, 1);
35   - U = stim::vec<T>(1, 0, 0);
  33 + P = stim::vec3<T>(0, 0, 0);
  34 + N = stim::vec3<T>(0, 0, 1);
  35 + U = stim::vec3<T>(1, 0, 0);
36 36 }
37 37  
38 38 public:
... ... @@ -42,7 +42,7 @@ class plane
42 42 init();
43 43 }
44 44  
45   - CUDA_CALLABLE plane(vec<T> n, vec<T> p = vec<T>(0, 0, 0))
  45 + CUDA_CALLABLE plane(vec3<T> n, vec3<T> p = vec3<T>(0, 0, 0))
46 46 {
47 47 init();
48 48 P = p;
... ... @@ -56,11 +56,11 @@ class plane
56 56 }
57 57  
58 58 //create a plane from three points (a triangle)
59   - CUDA_CALLABLE plane(vec<T> a, vec<T> b, vec<T> c)
  59 + CUDA_CALLABLE plane(vec3<T> a, vec3<T> b, vec3<T> c)
60 60 {
61 61 init();
62 62 P = c;
63   - stim::vec<T> n = (c - a).cross(b - a);
  63 + stim::vec3<T> n = (c - a).cross(b - a);
64 64 try
65 65 {
66 66 if(n.len() != 0)
... ... @@ -84,17 +84,17 @@ class plane
84 84  
85 85 }
86 86  
87   - CUDA_CALLABLE vec<T> n()
  87 + CUDA_CALLABLE vec3<T> n()
88 88 {
89 89 return N;
90 90 }
91 91  
92   - CUDA_CALLABLE vec<T> p()
  92 + CUDA_CALLABLE vec3<T> p()
93 93 {
94 94 return P;
95 95 }
96 96  
97   - CUDA_CALLABLE vec<T> u()
  97 + CUDA_CALLABLE vec3<T> u()
98 98 {
99 99 return U;
100 100 }
... ... @@ -107,7 +107,7 @@ class plane
107 107 }
108 108  
109 109 //determines how a vector v intersects the plane (1 = intersects front, 0 = within plane, -1 = intersects back)
110   - CUDA_CALLABLE int face(vec<T> v){
  110 + CUDA_CALLABLE int face(vec3<T> v){
111 111  
112 112 T dprod = v.dot(N); //get the dot product between v and N
113 113  
... ... @@ -121,46 +121,46 @@ class plane
121 121 }
122 122  
123 123 //determine on which side of the plane a point lies (1 = front, 0 = on the plane, -1 = bac k)
124   - CUDA_CALLABLE int side(vec<T> p){
  124 + CUDA_CALLABLE int side(vec3<T> p){
125 125  
126   - vec<T> v = p - P; //get the vector from P to the query point p
  126 + vec3<T> v = p - P; //get the vector from P to the query point p
127 127  
128 128 return face(v);
129 129 }
130 130  
131 131 //compute the component of v that is perpendicular to the plane
132   - CUDA_CALLABLE vec<T> perpendicular(vec<T> v){
  132 + CUDA_CALLABLE vec3<T> perpendicular(vec3<T> v){
133 133 return N * v.dot(N);
134 134 }
135 135  
136 136 //compute the projection of v in the plane
137   - CUDA_CALLABLE vec<T> parallel(vec<T> v){
  137 + CUDA_CALLABLE vec3<T> parallel(vec3<T> v){
138 138 return v - perpendicular(v);
139 139 }
140 140  
141   - CUDA_CALLABLE void setU(vec<T> v)
  141 + CUDA_CALLABLE void setU(vec3<T> v)
142 142 {
143 143 U = (parallel(v.norm())).norm();
144 144 }
145 145  
146   - CUDA_CALLABLE void decompose(vec<T> v, vec<T>& para, vec<T>& perp){
  146 + CUDA_CALLABLE void decompose(vec3<T> v, vec3<T>& para, vec3<T>& perp){
147 147 perp = N * v.dot(N);
148 148 para = v - perp;
149 149 }
150 150  
151 151 //get both the parallel and perpendicular components of a vector v w.r.t. the plane
152   - CUDA_CALLABLE void project(vec<T> v, vec<T> &v_par, vec<T> &v_perp){
  152 + CUDA_CALLABLE void project(vec3<T> v, vec3<T> &v_par, vec3<T> &v_perp){
153 153  
154 154 v_perp = v.dot(N);
155 155 v_par = v - v_perp;
156 156 }
157 157  
158 158 //compute the reflection of v off of the plane
159   - CUDA_CALLABLE vec<T> reflect(vec<T> v){
  159 + CUDA_CALLABLE vec3<T> reflect(vec3<T> v){
160 160  
161 161 //compute the reflection using N_prime as the plane normal
162   - vec<T> par = parallel(v);
163   - vec<T> r = (-v) + par * 2;
  162 + vec3<T> par = parallel(v);
  163 + vec3<T> r = (-v) + par * 2;
164 164 return r;
165 165  
166 166 }
... ... @@ -184,7 +184,7 @@ class plane
184 184 }
185 185  
186 186  
187   - CUDA_CALLABLE void rotate(vec<T> n)
  187 + CUDA_CALLABLE void rotate(vec3<T> n)
188 188 {
189 189 quaternion<T> q;
190 190 q.CreateRotation(N, n);
... ... @@ -194,7 +194,7 @@ class plane
194 194  
195 195 }
196 196  
197   - CUDA_CALLABLE void rotate(vec<T> n, vec<T> &Y)
  197 + CUDA_CALLABLE void rotate(vec3<T> n, vec3<T> &Y)
198 198 {
199 199 quaternion<T> q;
200 200 q.CreateRotation(N, n);
... ... @@ -205,7 +205,7 @@ class plane
205 205  
206 206 }
207 207  
208   - CUDA_CALLABLE void rotate(vec<T> n, vec<T> &X, vec<T> &Y)
  208 + CUDA_CALLABLE void rotate(vec3<T> n, vec3<T> &X, vec3<T> &Y)
209 209 {
210 210 quaternion<T> q;
211 211 q.CreateRotation(N, n);
... ...
stim/math/plane_old.h deleted
1   -#ifndef RTS_PLANE_H
2   -#define RTS_PLANE_H
3   -
4   -#include <iostream>
5   -#include <stim/math/vector.h>
6   -#include "rts/cuda/callable.h"
7   -
8   -
9   -namespace stim{
10   -template <typename T, int D> class plane;
11   -}
12   -
13   -template <typename T, int D>
14   -CUDA_CALLABLE stim::plane<T, D> operator-(stim::plane<T, D> v);
15   -
16   -namespace stim{
17   -
18   -template <class T, int D = 3>
19   -class plane{
20   -
21   - //a plane is defined by a point and a normal
22   -
23   -private:
24   -
25   - vec<T, D> P; //point on the plane
26   - vec<T, D> N; //plane normal
27   -
28   - CUDA_CALLABLE void init(){
29   - P = vec<T, D>(0, 0, 0);
30   - N = vec<T, D>(0, 0, 1);
31   - }
32   -
33   -
34   -public:
35   -
36   - //default constructor
37   - CUDA_CALLABLE plane(){
38   - init();
39   - }
40   -
41   - CUDA_CALLABLE plane(vec<T, D> n, vec<T, D> p = vec<T, D>(0, 0, 0)){
42   - P = p;
43   - N = n.norm();
44   - }
45   -
46   - CUDA_CALLABLE plane(T z_pos){
47   - init();
48   - P[2] = z_pos;
49   - }
50   -
51   - //create a plane from three points (a triangle)
52   - CUDA_CALLABLE plane(vec<T, D> a, vec<T, D> b, vec<T, D> c){
53   - P = c;
54   - N = (c - a).cross(b - a);
55   - if(N.len() == 0) //handle the degenerate case when two vectors are the same, N = 0
56   - N = 0;
57   - else
58   - N = N.norm();
59   - }
60   -
61   - template< typename U >
62   - CUDA_CALLABLE operator plane<U, D>(){
63   -
64   - plane<U, D> result(N, P);
65   - return result;
66   - }
67   -
68   - CUDA_CALLABLE vec<T, D> norm(){
69   - return N;
70   - }
71   -
72   - CUDA_CALLABLE vec<T, D> p(){
73   - return P;
74   - }
75   -
76   - //flip the plane front-to-back
77   - CUDA_CALLABLE plane<T, D> flip(){
78   - plane<T, D> result = *this;
79   - result.N = -result.N;
80   - return result;
81   - }
82   -
83   - //determines how a vector v intersects the plane (1 = intersects front, 0 = within plane, -1 = intersects back)
84   - CUDA_CALLABLE int face(vec<T, D> v){
85   -
86   - T dprod = v.dot(N); //get the dot product between v and N
87   -
88   - //conditional returns the appropriate value
89   - if(dprod < 0)
90   - return 1;
91   - else if(dprod > 0)
92   - return -1;
93   - else
94   - return 0;
95   - }
96   -
97   - //determine on which side of the plane a point lies (1 = front, 0 = on the plane, -1 = back)
98   - CUDA_CALLABLE int side(vec<T, D> p){
99   -
100   - vec<T, D> v = p - P; //get the vector from P to the query point p
101   -
102   - return face(v);
103   - }
104   -
105   - //compute the component of v that is perpendicular to the plane
106   - CUDA_CALLABLE vec<T, D> perpendicular(vec<T, D> v){
107   - return N * v.dot(N);
108   - }
109   -
110   - //compute the projection of v in the plane
111   - CUDA_CALLABLE vec<T, D> parallel(vec<T, D> v){
112   - return v - perpendicular(v);
113   - }
114   -
115   - CUDA_CALLABLE void decompose(vec<T, D> v, vec<T, D>& para, vec<T, D>& perp){
116   - perp = N * v.dot(N);
117   - para = v - perp;
118   - }
119   -
120   - //get both the parallel and perpendicular components of a vector v w.r.t. the plane
121   - CUDA_CALLABLE void project(vec<T, D> v, vec<T, D> &v_par, vec<T, D> &v_perp){
122   -
123   - v_perp = v.dot(N);
124   - v_par = v - v_perp;
125   - }
126   -
127   - //compute the reflection of v off of the plane
128   - CUDA_CALLABLE vec<T, D> reflect(vec<T, D> v){
129   -
130   - //compute the reflection using N_prime as the plane normal
131   - vec<T, D> par = parallel(v);
132   - vec<T, D> r = (-v) + par * 2;
133   -
134   - /*std::cout<<"----------------REFLECT-----------------------------"<<std::endl;
135   - std::cout<<str()<<std::endl;
136   - std::cout<<"v: "<<v<<std::endl;
137   - std::cout<<"r: "<<r<<std::endl;
138   - std::cout<<"Perpendicular: "<<perpendicular(v)<<std::endl;
139   - std::cout<<"Parallel: "<<par<<std::endl;*/
140   - return r;
141   -
142   - }
143   -
144   - CUDA_CALLABLE rts::plane<T, D> operator-()
145   - {
146   - rts::plane<T, D> p = *this;
147   -
148   - //negate the normal vector
149   - p.N = -p.N;
150   -
151   - return p;
152   - }
153   -
154   - //output a string
155   - std::string str(){
156   - std::stringstream ss;
157   - ss<<"P: "<<P<<std::endl;
158   - ss<<"N: "<<N;
159   - return ss.str();
160   - }
161   -
162   - ///////Friendship
163   - //friend CUDA_CALLABLE rts::plane<T, D> operator- <> (rts::plane<T, D> v);
164   -
165   -
166   -
167   -};
168   -
169   -}
170   -
171   -//arithmetic operators
172   -
173   -//negative operator flips the plane (front to back)
174   -//template <typename T, int D>
175   -
176   -
177   -
178   -
179   -#endif
stim/math/quad.h deleted
1   -#ifndef RTS_QUAD_H
2   -#define RTS_QUAD_H
3   -
4   -//enable CUDA_CALLABLE macro
5   -#include <stim/cuda/callable.h>
6   -#include <stim/math/vector.h>
7   -#include <stim/math/triangle.h>
8   -#include <stim/math/quaternion.h>
9   -#include <iostream>
10   -#include <iomanip>
11   -#include <algorithm>
12   -
13   -namespace stim{
14   -
15   -//template for a quadangle class in ND space
16   -template <class T, int N = 3>
17   -struct quad
18   -{
19   - /*
20   - B------------------>C
21   - ^ ^
22   - | |
23   - Y |
24   - | |
25   - | |
26   - A---------X-------->O
27   - */
28   -
29   - /*T A[N];
30   - T B[N];
31   - T C[N];*/
32   -
33   - rts::vec<T, N> A;
34   - rts::vec<T, N> X;
35   - rts::vec<T, N> Y;
36   -
37   -
38   - CUDA_CALLABLE quad()
39   - {
40   -
41   - }
42   -
43   - CUDA_CALLABLE quad(vec<T, N> a, vec<T, N> b, vec<T, N> c)
44   - {
45   -
46   - A = a;
47   - Y = b - a;
48   - X = c - a - Y;
49   -
50   - }
51   -
52   - /*******************************************************************
53   - Constructor - create a quad from a position, normal, and rotation
54   - *******************************************************************/
55   - CUDA_CALLABLE quad(rts::vec<T, N> c, rts::vec<T, N> normal, T width, T height, T theta)
56   - {
57   -
58   - //compute the X direction - start along world-space X
59   - Y = rts::vec<T, N>(0, 1, 0);
60   - if(Y == normal)
61   - Y = rts::vec<T, N>(0, 0, 1);
62   -
63   - X = Y.cross(normal).norm();
64   -
65   - std::cout<<X<<std::endl;
66   -
67   - //rotate the X axis by theta radians
68   - rts::quaternion<T> q;
69   - q.CreateRotation(theta, normal);
70   - X = q.toMatrix3() * X;
71   - Y = normal.cross(X);
72   -
73   - //normalize everything
74   - X = X.norm();
75   - Y = Y.norm();
76   -
77   - //scale to match the quad width and height
78   - X = X * width;
79   - Y = Y * height;
80   -
81   - //set the corner of the plane
82   - A = c - X * 0.5f - Y * 0.5f;
83   -
84   - std::cout<<X<<std::endl;
85   - }
86   -
87   - //boolean comparison
88   - bool operator==(const quad<T, N> & rhs)
89   - {
90   - if(A == rhs.A && X == rhs.X && Y == rhs.Y)
91   - return true;
92   - else
93   - return false;
94   - }
95   -
96   - /*******************************************
97   - Return the normal for the quad
98   - *******************************************/
99   - CUDA_CALLABLE rts::vec<T, N> n()
100   - {
101   - return (X.cross(Y)).norm();
102   - }
103   -
104   - CUDA_CALLABLE rts::vec<T, N> p(T a, T b)
105   - {
106   - rts::vec<T, N> result;
107   - //given the two parameters a, b = [0 1], returns the position in world space
108   - result = A + X * a + Y * b;
109   -
110   - return result;
111   - }
112   -
113   - CUDA_CALLABLE rts::vec<T, N> operator()(T a, T b)
114   - {
115   - return p(a, b);
116   - }
117   -
118   - std::string str()
119   - {
120   - std::stringstream ss;
121   -
122   - ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
123   - ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl;
124   - ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X;
125   -
126   - return ss.str();
127   -
128   - }
129   -
130   - CUDA_CALLABLE quad<T, N> operator*(T rhs)
131   - {
132   - //scales the plane by a scalar value
133   -
134   - //compute the center point
135   - rts::vec<T, N> c = A + X*0.5f + Y*0.5f;
136   -
137   - //create the new quadangle
138   - quad<T, N> result;
139   - result.X = X * rhs;
140   - result.Y = Y * rhs;
141   - result.A = c - result.X*0.5f - result.Y*0.5f;
142   -
143   - return result;
144   -
145   - }
146   -
147   - CUDA_CALLABLE T dist(vec<T, N> p)
148   - {
149   - //compute the distance between a point and this quad
150   -
151   - //first break the quad up into two triangles
152   - triangle<T, N> T0(A, A+X, A+Y);
153   - triangle<T, N> T1(A+X+Y, A+X, A+Y);
154   -
155   -
156   - T d0 = T0.dist(p);
157   - T d1 = T1.dist(p);
158   -
159   - if(d0 < d1)
160   - return d0;
161   - else
162   - return d1;
163   - }
164   -
165   - CUDA_CALLABLE T dist_max(vec<T, N> p)
166   - {
167   - T da = (A - p).len();
168   - T db = (A+X - p).len();
169   - T dc = (A+Y - p).len();
170   - T dd = (A+X+Y - p).len();
171   -
172   - return std::max( da, std::max(db, std::max(dc, dd) ) );
173   - }
174   -};
175   -
176   -} //end namespace rts
177   -
178   -template <typename T, int N>
179   -std::ostream& operator<<(std::ostream& os, rts::quad<T, N> R)
180   -{
181   - os<<R.str();
182   - return os;
183   -}
184   -
185   -
186   -#endif
stim/math/quaternion.h
... ... @@ -26,13 +26,13 @@ public:
26 26  
27 27 CUDA_CALLABLE void CreateRotation(T theta, T ux, T uy, T uz){
28 28  
29   - vec<T> u(ux, uy, uz);
  29 + vec3<T> u(ux, uy, uz);
30 30 CreateRotation(theta, u);
31 31 }
32 32  
33   - CUDA_CALLABLE void CreateRotation(T theta, vec<T> u){
  33 + CUDA_CALLABLE void CreateRotation(T theta, vec3<T> u){
34 34  
35   - vec<T> u_hat = u.norm();
  35 + vec3<T> u_hat = u.norm();
36 36  
37 37 //assign the given Euler rotation to this quaternion
38 38 w = (T)cos(theta/2);
... ... @@ -41,9 +41,11 @@ public:
41 41 z = u_hat[2]*(T)sin(theta/2);
42 42 }
43 43  
44   - void CreateRotation(vec<T> from, vec<T> to){
  44 + CUDA_CALLABLE void CreateRotation(vec3<T> from, vec3<T> to){
45 45  
46   - vec<T> r = from.cross(to); //compute the rotation vector
  46 + from = from.norm();
  47 + to = to.norm();
  48 + vec3<T> r = from.cross(to); //compute the rotation vector
47 49 T theta = asin(r.len()); //compute the angle of the rotation about r
48 50 //deal with a zero vector (both k and kn point in the same direction)
49 51 if(theta == (T)0){
... ...
stim/math/rect.h
... ... @@ -28,13 +28,10 @@ class rect : plane &lt;T&gt;
28 28 O---------X--------->
29 29 */
30 30  
31   -private:
32   -
33   - stim::vec<T> X;
34   - stim::vec<T> Y;
35   -
36   -
  31 +protected:
37 32  
  33 + stim::vec3<T> X;
  34 + stim::vec3<T> Y;
38 35  
39 36 public:
40 37  
... ... @@ -65,7 +62,7 @@ public:
65 62 ///create a rectangle from a center point, normal
66 63 ///@param c: x,y,z location of the center.
67 64 ///@param n: x,y,z direction of the normal.
68   - CUDA_CALLABLE rect(vec<T> c, vec<T> n = vec<T>(0, 0, 1))
  65 + CUDA_CALLABLE rect(vec3<T> c, vec3<T> n = vec3<T>(0, 0, 1))
69 66 : plane<T>()
70 67 {
71 68 init(); //start with the default setting
... ... @@ -76,7 +73,7 @@ public:
76 73 ///@param c: x,y,z location of the center.
77 74 ///@param s: size of the rectangle.
78 75 ///@param n: x,y,z direction of the normal.
79   - CUDA_CALLABLE rect(vec<T> c, T s, vec<T> n = vec<T>(0, 0, 1))
  76 + CUDA_CALLABLE rect(vec3<T> c, T s, vec3<T> n = vec3<T>(0, 0, 1))
80 77 : plane<T>()
81 78 {
82 79 init(); //start with the default setting
... ... @@ -89,7 +86,7 @@ public:
89 86 ///@param center: x,y,z location of the center.
90 87 ///@param directionX: u,v,w direction of the X vector.
91 88 ///@param directionY: u,v,w direction of the Y vector.
92   - CUDA_CALLABLE rect(vec<T> center, vec<T> directionX, vec<T> directionY )
  89 + CUDA_CALLABLE rect(vec3<T> center, vec3<T> directionX, vec3<T> directionY )
93 90 : plane<T>((directionX.cross(directionY)).norm(),center)
94 91 {
95 92 X = directionX;
... ... @@ -101,7 +98,7 @@ public:
101 98 ///@param center: x,y,z location of the center.
102 99 ///@param directionX: u,v,w direction of the X vector.
103 100 ///@param directionY: u,v,w direction of the Y vector.
104   - CUDA_CALLABLE rect(T size, vec<T> center, vec<T> directionX, vec<T> directionY )
  101 + CUDA_CALLABLE rect(T size, vec3<T> center, vec3<T> directionX, vec3<T> directionY )
105 102 : plane<T>((directionX.cross(directionY)).norm(),center)
106 103 {
107 104 X = directionX;
... ... @@ -114,7 +111,7 @@ public:
114 111 ///@param center: x,y,z location of the center.
115 112 ///@param directionX: u,v,w direction of the X vector.
116 113 ///@param directionY: u,v,w direction of the Y vector.
117   - CUDA_CALLABLE rect(vec<T> size, vec<T> center, vec<T> directionX, vec<T> directionY)
  114 + CUDA_CALLABLE rect(vec3<T> size, vec3<T> center, vec3<T> directionX, vec3<T> directionY)
118 115 : plane<T>((directionX.cross(directionY)).norm(), center)
119 116 {
120 117 X = directionX;
... ... @@ -138,7 +135,7 @@ public:
138 135  
139 136 ///@param n; vector with the normal.
140 137 ///Orients the rectangle along the normal n.
141   - CUDA_CALLABLE void normal(vec<T> n)
  138 + CUDA_CALLABLE void normal(vec3<T> n)
142 139 {
143 140 //orient the rectangle along the specified normal
144 141 rotate(n, X, Y);
... ... @@ -147,8 +144,8 @@ public:
147 144 ///general init method that sets a general rectangle.
148 145 CUDA_CALLABLE void init()
149 146 {
150   - X = vec<T>(1, 0, 0);
151   - Y = vec<T>(0, 1, 0);
  147 + X = vec3<T>(1, 0, 0);
  148 + Y = vec3<T>(0, 1, 0);
152 149 }
153 150  
154 151 //boolean comparison
... ... @@ -162,18 +159,18 @@ public:
162 159  
163 160  
164 161 //get the world space value given the planar coordinates a, b in [0, 1]
165   - CUDA_CALLABLE stim::vec<T> p(T a, T b)
  162 + CUDA_CALLABLE stim::vec3<T> p(T a, T b)
166 163 {
167   - stim::vec<T> result;
  164 + stim::vec3<T> result;
168 165 //given the two parameters a, b = [0 1], returns the position in world space
169   - vec<T> A = this->P - X * (T)0.5 - Y * (T)0.5;
  166 + vec3<T> A = this->P - X * (T)0.5 - Y * (T)0.5;
170 167 result = A + X * a + Y * b;
171 168  
172 169 return result;
173 170 }
174 171  
175 172 //parenthesis operator returns the world space given rectangular coordinates a and b in [0 1]
176   - CUDA_CALLABLE stim::vec<T> operator()(T a, T b)
  173 + CUDA_CALLABLE stim::vec3<T> operator()(T a, T b)
177 174 {
178 175 return p(a, b);
179 176 }
... ... @@ -181,12 +178,12 @@ public:
181 178 std::string str()
182 179 {
183 180 std::stringstream ss;
184   - vec<T> A = P - X * (T)0.5 - Y * (T)0.5;
  181 + vec3<T> A = P - X * (T)0.5 - Y * (T)0.5;
185 182 ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
186 183 ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl;
187 184 ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X;
188 185  
189   - return ss.str();
  186 + return ss.str();
190 187  
191 188 }
192 189  
... ... @@ -205,11 +202,11 @@ public:
205 202  
206 203 ///computes the distance between the specified point and this rectangle.
207 204 ///@param p: x, y, z coordinates of the point to calculate distance to.
208   - CUDA_CALLABLE T dist(vec<T> p)
  205 + CUDA_CALLABLE T dist(vec3<T> p)
209 206 {
210 207 //compute the distance between a point and this rect
211 208  
212   - vec<T> A = P - X * (T)0.5 - Y * (T)0.5;
  209 + vec3<T> A = P - X * (T)0.5 - Y * (T)0.5;
213 210  
214 211 //first break the rect up into two triangles
215 212 triangle<T> T0(A, A+X, A+Y);
... ... @@ -225,16 +222,16 @@ public:
225 222 return d1;
226 223 }
227 224  
228   - CUDA_CALLABLE T center(vec<T> p)
  225 + CUDA_CALLABLE T center(vec3<T> p)
229 226 {
230 227 this->P = p;
231 228 }
232 229  
233 230 ///Returns the maximum distance of the rectangle from a point p to the sides of the rectangle.
234 231 ///@param p: x, y, z point.
235   - CUDA_CALLABLE T dist_max(vec<T> p)
  232 + CUDA_CALLABLE T dist_max(vec3<T> p)
236 233 {
237   - vec<T> A = P - X * (T)0.5 - Y * (T)0.5;
  234 + vec3<T> A = P - X * (T)0.5 - Y * (T)0.5;
238 235 T da = (A - p).len();
239 236 T db = (A+X - p).len();
240 237 T dc = (A+Y - p).len();
... ...
stim/math/vec3.h 0 โ†’ 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +
  7 +
  8 +namespace stim{
  9 +
  10 +
  11 +/// A class designed to act as a 3D vector with CUDA compatibility
  12 +template<typename T>
  13 +class vec3{
  14 +
  15 +protected:
  16 + T ptr[3];
  17 +
  18 +public:
  19 +
  20 + CUDA_CALLABLE vec3(){}
  21 +
  22 + CUDA_CALLABLE vec3(T v){
  23 + ptr[0] = ptr[1] = ptr[2] = v;
  24 + }
  25 +
  26 + CUDA_CALLABLE vec3(T x, T y, T z){
  27 + ptr[0] = x;
  28 + ptr[1] = y;
  29 + ptr[2] = z;
  30 + }
  31 +
  32 + //copy constructor
  33 + CUDA_CALLABLE vec3( const vec3<T>& other){
  34 + ptr[0] = other.ptr[0];
  35 + ptr[1] = other.ptr[1];
  36 + ptr[2] = other.ptr[2];
  37 + }
  38 +
  39 + //access an element using an index
  40 + CUDA_CALLABLE T& operator[](int idx){
  41 + return ptr[idx];
  42 + }
  43 +
  44 +/// Casting operator. Creates a new vector with a new type U.
  45 + template< typename U >
  46 + CUDA_CALLABLE operator vec3<U>(){
  47 + vec3<U> result;
  48 + result.ptr[0] = (U)ptr[0];
  49 + result.ptr[1] = (U)ptr[1];
  50 + result.ptr[2] = (U)ptr[2];
  51 +
  52 + return result;
  53 + }
  54 +
  55 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  56 + CUDA_CALLABLE T len_sq() const{
  57 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  58 + }
  59 +
  60 + /// computes the Euclidean length of the vector
  61 + CUDA_CALLABLE T len() const{
  62 + return sqrt(len_sq());
  63 + }
  64 +
  65 +
  66 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  67 + CUDA_CALLABLE vec3<T> cart2sph() const{
  68 + vec3<T> sph;
  69 + sph.ptr[0] = len();
  70 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  71 + if(sph.ptr[0] == 0)
  72 + sph.ptr[2] = 0;
  73 + else
  74 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  75 + return sph;
  76 + }
  77 +
  78 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  79 + CUDA_CALLABLE vec3<T> sph2cart() const{
  80 + vec3<T> cart;
  81 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  82 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  83 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  84 +
  85 + return cart;
  86 + }
  87 +
  88 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  89 + CUDA_CALLABLE vec3<T> norm() const{
  90 + vec3<T> result;
  91 + T l = len(); //compute the vector length
  92 + return (*this) / l;
  93 + }
  94 +
  95 + /// Computes the cross product of a 3-dimensional vector
  96 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  97 +
  98 + vec3<T> result;
  99 +
  100 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  101 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  102 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  103 +
  104 + return result;
  105 + }
  106 +
  107 + /// Compute the Euclidean inner (dot) product
  108 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  109 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  110 + }
  111 +
  112 + /// Arithmetic addition operator
  113 +
  114 + /// @param rhs is the right-hand-side operator for the addition
  115 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  116 + vec3<T> result;
  117 + result.ptr[0] = ptr[0] + rhs[0];
  118 + result.ptr[1] = ptr[1] + rhs[1];
  119 + result.ptr[2] = ptr[2] + rhs[2];
  120 + return result;
  121 + }
  122 +
  123 + /// Arithmetic addition to a scalar
  124 +
  125 + /// @param rhs is the right-hand-side operator for the addition
  126 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  127 + vec3<T> result;
  128 + result.ptr[0] = ptr[0] + rhs;
  129 + result.ptr[1] = ptr[1] + rhs;
  130 + result.ptr[2] = ptr[2] + rhs;
  131 + return result;
  132 + }
  133 +
  134 + /// Arithmetic subtraction operator
  135 +
  136 + /// @param rhs is the right-hand-side operator for the subtraction
  137 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  138 + vec3<T> result;
  139 + result.ptr[0] = ptr[0] - rhs[0];
  140 + result.ptr[1] = ptr[1] - rhs[1];
  141 + result.ptr[2] = ptr[2] - rhs[2];
  142 + return result;
  143 + }
  144 + /// Arithmetic subtraction to a scalar
  145 +
  146 + /// @param rhs is the right-hand-side operator for the addition
  147 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  148 + vec3<T> result;
  149 + result.ptr[0] = ptr[0] - rhs;
  150 + result.ptr[1] = ptr[1] - rhs;
  151 + result.ptr[2] = ptr[2] - rhs;
  152 + return result;
  153 + }
  154 +
  155 + /// Arithmetic scalar multiplication operator
  156 +
  157 + /// @param rhs is the right-hand-side operator for the subtraction
  158 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  159 + vec3<T> result;
  160 + result.ptr[0] = ptr[0] * rhs;
  161 + result.ptr[1] = ptr[1] * rhs;
  162 + result.ptr[2] = ptr[2] * rhs;
  163 + return result;
  164 + }
  165 +
  166 + /// Arithmetic scalar division operator
  167 +
  168 + /// @param rhs is the right-hand-side operator for the subtraction
  169 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  170 + return (*this) * ((T)1.0/rhs);
  171 + }
  172 +
  173 + /// Multiplication by a scalar, followed by assignment
  174 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  175 + ptr[0] = ptr[0] * rhs;
  176 + ptr[1] = ptr[1] * rhs;
  177 + ptr[2] = ptr[2] * rhs;
  178 + return *this;
  179 + }
  180 +
  181 + /// Addition and assignment
  182 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  183 + ptr[0] = ptr[0] + rhs;
  184 + ptr[1] = ptr[1] + rhs;
  185 + ptr[2] = ptr[2] + rhs;
  186 + return *this;
  187 + }
  188 +
  189 + /// Assign a scalar to all values
  190 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  191 + ptr[0] = ptr[0] = rhs;
  192 + ptr[1] = ptr[1] = rhs;
  193 + ptr[2] = ptr[2] = rhs;
  194 + return *this;
  195 + }
  196 +
  197 + /// Casting and assignment
  198 + template<typename Y>
  199 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  200 + ptr[0] = (T)rhs.ptr[0];
  201 + ptr[1] = (T)rhs.ptr[1];
  202 + ptr[2] = (T)rhs.ptr[2];
  203 + return *this;
  204 + }
  205 +
  206 + /// Unary minus (returns the negative of the vector)
  207 + CUDA_CALLABLE vec3<T> operator-() const{
  208 + vec3<T> result;
  209 + result.ptr[0] = -ptr[0];
  210 + result.ptr[1] = -ptr[1];
  211 + result.ptr[2] = -ptr[2];
  212 + return result;
  213 + }
  214 +
  215 +
  216 + /// Outputs the vector as a string
  217 + std::string str() const{
  218 + std::stringstream ss;
  219 +
  220 + const size_t N = 3;
  221 +
  222 + ss<<"[";
  223 + for(size_t i=0; i<N; i++)
  224 + {
  225 + ss<<ptr[i];
  226 + if(i != N-1)
  227 + ss<<", ";
  228 + }
  229 + ss<<"]";
  230 +
  231 + return ss.str();
  232 + }
  233 +
  234 + size_t size(){ return 3; }
  235 +
  236 + }; //end class vec3
  237 +} //end namespace stim
  238 +
  239 +/// Multiply a vector by a constant when the vector is on the right hand side
  240 +template <typename T>
  241 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  242 + return rhs * lhs;
  243 +}
  244 +
  245 +//stream operator
  246 +template<typename T>
  247 +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){
  248 + os<<rhs.str();
  249 + return os;
  250 +}
  251 +
  252 +#endif
0 253 \ No newline at end of file
... ...
stim/math/vector.h
1   -#ifndef RTS_VECTOR_H
2   -#define RTS_VECTOR_H
  1 +#ifndef STIM_VECTOR_H
  2 +#define STIM_VECTOR_H
3 3  
4 4 #include <iostream>
5 5 #include <cmath>
6 6 #include <sstream>
7 7 #include <vector>
8   -
  8 +
9 9 #include <stim/cuda/cudatools/callable.h>
  10 +#include <stim/math/vec3.h>
10 11  
11 12 namespace stim
12 13 {
13 14  
14   -
15   -
16 15 template <class T>
17 16 struct vec : public std::vector<T>
18 17 {
... ... @@ -72,8 +71,8 @@ struct vec : public std::vector&lt;T&gt;
72 71 size_t N = other.size();
73 72 resize(N); //resize the current vector to match the copy
74 73 for(size_t i=0; i<N; i++){ //copy each element
75   - at(i) = other[i];
76   - }
  74 + at(i) = other[i];
  75 + }
77 76 }
78 77  
79 78 //I'm not sure what these were doing here.
... ... @@ -329,6 +328,15 @@ struct vec : public std::vector&lt;T&gt;
329 328 return *this;
330 329 }
331 330  
  331 + /// Cast to a vec3
  332 + operator stim::vec3<T>(){
  333 + stim::vec3<T> r;
  334 + size_t N = std::min<size_t>(size(), 3);
  335 + for(size_t i = 0; i < N; i++)
  336 + r[i] = at(i);
  337 + return r;
  338 + }
  339 +
332 340 /// Casting and assignment
333 341 template<typename Y>
334 342 vec<T> & operator=(vec<Y> rhs){
... ...
stim/optics/lens.h 0 โ†’ 100644
  1 +#ifndef STIM_LENS_H
  2 +#define STIM_LENS_H
  3 +
  4 +#include "scalarwave.h"
  5 +#include "../math/bessel.h"
  6 +#include "../cuda/cudatools/devices.h"
  7 +#include "../visualization/colormap.h"
  8 +#include "../math/fft.h"
  9 +
  10 +#include "cufft.h"
  11 +
  12 +#include <cmath>
  13 +
  14 +namespace stim{
  15 +
  16 + /// Perform a k-space transform of a scalar field (FFT). The given field has a width of x and the calculated momentum space has a
  17 + /// width of kx (in radians).
  18 + /// @param K is a pointer to the output array of all plane waves in the field
  19 + /// @param kx is the width of the frame in momentum space
  20 + /// @param ky is the height of the frame in momentum space
  21 + /// @param E is the field to be transformed
  22 + /// @param x is the width of the field in the spatial domain
  23 + /// @param y is the height of the field in the spatial domain
  24 + /// @param nx is the number of pixels representing the field in the x (and kx) direction
  25 + /// @param ny is the number of pixels representing the field in the y (and ky) direction
  26 + template<typename T>
  27 + void cpu_scalar_to_kspace(stim::complex<T>* K, T& kx, T& ky, stim::complex<T>* E, T x, T y, size_t nx, size_t ny){
  28 +
  29 + kx = stim::TAU * nx / x; //calculate the width of the momentum space
  30 + ky = stim::TAU * ny / y;
  31 +
  32 + stim::complex<T>* dev_FFT;
  33 + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array
  34 +
  35 + stim::complex<T>* dev_E;
  36 + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field
  37 + HANDLE_ERROR( cudaMemcpy(dev_E, E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory
  38 +
  39 + cufftResult result;
  40 + cufftHandle plan;
  41 + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C);
  42 + if(result != CUFFT_SUCCESS){
  43 + std::cout<<"Error creating cuFFT plan."<<std::endl;
  44 + exit(1);
  45 + }
  46 +
  47 + result = cufftExecC2C(plan, (cufftComplex*)dev_E, (cufftComplex*)dev_FFT, CUFFT_FORWARD);
  48 + if(result != CUFFT_SUCCESS){
  49 + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl;
  50 + exit(1);
  51 + }
  52 +
  53 + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny);
  54 + HANDLE_ERROR( cudaMemcpy(fft, dev_FFT, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) );
  55 +
  56 + stim::cpu_fftshift(K, fft, nx, ny);
  57 + }
  58 +
  59 + template<typename T>
  60 + void cpu_scalar_from_kspace(stim::complex<T>* E, T& x, T& y, stim::complex<T>* K, T kx, T ky, size_t nx, size_t ny){
  61 +
  62 + x = stim::TAU * nx / kx; //calculate the width of the momentum space
  63 + y = stim::TAU * ny / ky;
  64 +
  65 + stim::complex<T>* fft = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * nx * ny);
  66 + stim::cpu_ifftshift(fft, K, nx, ny);
  67 +
  68 + stim::complex<T>* dev_FFT;
  69 + HANDLE_ERROR( cudaMalloc(&dev_FFT, sizeof(stim::complex<T>) * nx * ny) ); //allocate space on the CUDA device for the output array
  70 + HANDLE_ERROR( cudaMemcpy(dev_FFT, fft, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyHostToDevice) ); //copy the field to GPU memory
  71 +
  72 + stim::complex<T>* dev_E;
  73 + HANDLE_ERROR( cudaMalloc(&dev_E, sizeof(stim::complex<T>) * nx * ny) ); //allocate space for the field
  74 +
  75 + cufftResult result;
  76 + cufftHandle plan;
  77 + result = cufftPlan2d(&plan, nx, ny, CUFFT_C2C);
  78 + if(result != CUFFT_SUCCESS){
  79 + std::cout<<"Error creating cuFFT plan."<<std::endl;
  80 + exit(1);
  81 + }
  82 +
  83 + result = cufftExecC2C(plan, (cufftComplex*)dev_FFT, (cufftComplex*)dev_E, CUFFT_FORWARD);
  84 + if(result != CUFFT_SUCCESS){
  85 + std::cout<<"Error using cuFFT to perform a forward Fourier transform of the field."<<std::endl;
  86 + exit(1);
  87 + }
  88 +
  89 + HANDLE_ERROR( cudaMemcpy(E, dev_E, sizeof(stim::complex<T>) * nx * ny, cudaMemcpyDeviceToHost) );
  90 +
  91 +
  92 + }
  93 +
  94 + /// Propagate a field slice along its orthogonal direction by a given distance z
  95 + /// @param Enew is the resulting propogated field
  96 + template<typename T>
  97 + void cpu_scalar_propagate(stim::complex<T>* Enew, stim::complex<T>* E, T sx, T sy, T z, T k, size_t nx, size_t ny){
  98 +
  99 + stim::complex<T>* K = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * nx * ny );
  100 +
  101 + T Kx, Ky; //width and height in k space
  102 + cpu_scalar_to_kspace(K, Kx, Ky, E ,sx, sy, nx, ny);
  103 +
  104 + T* mag = (T*) malloc( sizeof(T) * nx * ny );
  105 + stim::abs(mag, K, nx * ny);
  106 + stim::cpu2image<float>(mag, "kspace_pre_shift.bmp", nx, ny, stim::cmBrewer);
  107 +
  108 + size_t kxi, kyi;
  109 + size_t i;
  110 + T kx, kx_sq, ky, ky_sq, k_sq;
  111 + T kz;
  112 + stim::complex<T> shift;
  113 + T min_kx = -Kx / 2;
  114 + T dkx = Kx / (nx);
  115 + T min_ky = -Ky / 2;
  116 + T dky = Ky / (ny);
  117 + for(kyi = 0; kyi < ny; kyi++){ //for each plane wave in the ky direction
  118 + for(kxi = 0; kxi < nx; kxi++){ //for each plane wave in the ky direction
  119 + i = kyi * nx + kxi;
  120 +
  121 + kx = min_kx + kxi * dkx; //calculate the position of the current plane wave
  122 + ky = min_ky + kyi * dky;
  123 +
  124 + kx_sq = kx * kx;
  125 + ky_sq = ky * ky;
  126 + k_sq = k*k;
  127 +
  128 + if(kx_sq + ky_sq < k_sq){
  129 + kz = sqrt(k*k - kx * kx - ky * ky); //estimate kz using the Fresnel approximation
  130 + shift = -exp(stim::complex<T>(0, kz * z));
  131 + K[i] *= shift;
  132 + }
  133 + else{
  134 + K[i] = 0;
  135 + }
  136 + }
  137 + }
  138 +
  139 + stim::abs(mag, K, nx * ny);
  140 + stim::cpu2image<float>(mag, "kspace_post_shift.bmp", nx, ny, stim::cmBrewer);
  141 +
  142 + cpu_scalar_from_kspace(Enew, sx, sy, K, Kx, Ky, nx, ny);
  143 + }
  144 +
  145 +}
  146 +
  147 +
  148 +#endif
0 149 \ No newline at end of file
... ...
stim/optics/mie.h 0 โ†’ 100644
  1 +#ifndef STIM_MIE_H
  2 +#define STIM_MIE_H
  3 +#include <boost/math/special_functions/bessel.hpp>
  4 +
  5 +#include "scalarwave.h"
  6 +#include "../math/bessel.h"
  7 +#include "../cuda/cudatools/devices.h"
  8 +#include <cmath>
  9 +
  10 +namespace stim{
  11 +
  12 +
  13 +/// Calculate the scattering coefficients for a spherical scatterer
  14 +template<typename T>
  15 +void B_coefficients(stim::complex<T>* B, T a, T k, stim::complex<T> n, int Nl){
  16 +
  17 + //temporary variables
  18 + double vm; //allocate space to store the return values for the bessel function calculation
  19 + double* j_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
  20 + double* y_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
  21 + double* dj_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  22 + double* dy_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  23 +
  24 + stim::complex<double>* j_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  25 + stim::complex<double>* y_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  26 + stim::complex<double>* dj_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  27 + stim::complex<double>* dy_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  28 +
  29 + double ka = k * a; //store k*a (argument for spherical bessel and Hankel functions)
  30 + stim::complex<double> kna = k * n * a; //store k*n*a (argument for spherical bessel functions and derivatives)
  31 +
  32 + stim::bessjyv_sph<double>(Nl, ka, vm, j_ka, y_ka, dj_ka, dy_ka); //calculate bessel functions and derivatives for k*a
  33 + stim::cbessjyva_sph<double>(Nl, kna, vm, j_kna, y_kna, dj_kna, dy_kna); //calculate complex bessel functions for k*n*a
  34 +
  35 + stim::complex<double> h_ka, dh_ka;
  36 + stim::complex<double> numerator, denominator;
  37 + stim::complex<double> i(0, 1);
  38 + for(int l = 0; l <= Nl; l++){
  39 + h_ka.r = j_ka[l];
  40 + h_ka.i = y_ka[l];
  41 + dh_ka.r = dj_ka[l];
  42 + dh_ka.i = dy_ka[l];
  43 +
  44 + numerator = j_ka[l] * dj_kna[l] * (stim::complex<double>)n - j_kna[l] * dj_ka[l];
  45 + denominator = j_kna[l] * dh_ka - h_ka * dj_kna[l] * (stim::complex<double>)n;
  46 + B[l] = (2 * l + 1) * pow(i, l) * numerator / denominator;
  47 + }
  48 +}
  49 +
  50 +template<typename T>
  51 +void A_coefficients(stim::complex<T>* A, T a, T k, stim::complex<T> n, int Nl){
  52 + //temporary variables
  53 + double vm; //allocate space to store the return values for the bessel function calculation
  54 + double* j_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
  55 + double* y_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
  56 + double* dj_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  57 + double* dy_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  58 +
  59 + stim::complex<double>* j_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  60 + stim::complex<double>* y_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  61 + stim::complex<double>* dj_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  62 + stim::complex<double>* dy_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  63 +
  64 + double ka = k * a; //store k*a (argument for spherical bessel and Hankel functions)
  65 + stim::complex<double> kna = k * n * a; //store k*n*a (argument for spherical bessel functions and derivatives)
  66 +
  67 + stim::bessjyv_sph<double>(Nl, ka, vm, j_ka, y_ka, dj_ka, dy_ka); //calculate bessel functions and derivatives for k*a
  68 + stim::cbessjyva_sph<double>(Nl, kna, vm, j_kna, y_kna, dj_kna, dy_kna); //calculate complex bessel functions for k*n*a
  69 +
  70 + stim::complex<double> h_ka, dh_ka;
  71 + stim::complex<double> numerator, denominator;
  72 + stim::complex<double> i(0, 1);
  73 + for(size_t l = 0; l <= Nl; l++){
  74 + h_ka.r = j_ka[l];
  75 + h_ka.i = y_ka[l];
  76 + dh_ka.r = dj_ka[l];
  77 + dh_ka.i = dy_ka[l];
  78 +
  79 + numerator = j_ka[l] * dh_ka - dj_ka[l] * h_ka;
  80 + denominator = j_kna[l] * dh_ka - h_ka * dj_kna[l] * (stim::complex<double>)n;
  81 + A[l] = (2 * l + 1) * pow(i, l) * numerator / denominator;
  82 + }
  83 +}
  84 +
  85 +#define LOCAL_NL 16
  86 +template<typename T>
  87 +__global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* hB, T r_min, T dr, size_t N_hB, int Nl){
  88 + extern __shared__ stim::complex<T> shared_hB[]; //declare the list of waves in shared memory
  89 +
  90 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  91 + if(i >= N) return; //exit if this thread is outside the array
  92 + stim::vec3<T> p;
  93 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  94 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  95 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  96 +
  97 + T r = p.len(); //calculate the distance from the sphere
  98 + if(r < a) return; //exit if the point is inside the sphere (we only calculate the internal field)
  99 + T fij = (r - r_min)/dr; //FP index into the spherical bessel LUT
  100 + size_t ij = (size_t) fij; //convert to an integral index
  101 + T alpha = fij - ij; //calculate the fractional portion of the index
  102 + size_t n0j = ij * (Nl + 1); //start of the first entry in the LUT
  103 + size_t n1j = (ij+1) * (Nl + 1); //start of the second entry in the LUT
  104 +
  105 + T cos_phi;
  106 + T Pl_2, Pl_1, Pl; //declare registers to store the previous two Legendre polynomials
  107 +
  108 + stim::complex<T> hBl;
  109 + stim::complex<T> Ei = 0; //create a register to store the result
  110 + int l;
  111 +
  112 + stim::complex<T> hlBl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed
  113 + int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops
  114 +
  115 + #pragma unroll LOCAL_NL+1 //copy the first LOCAL_NL+1 h_l * B_l components to registers
  116 + for(l = 0; l <= LOCAL_NL; l++)
  117 + hlBl[l] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha );
  118 +
  119 + for(l = LOCAL_NL+1; l <= Nl; l++) //copy any additional h_l * B_l components to shared memory
  120 + shared_hB[shared_start + (l - (LOCAL_NL+1))] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha );
  121 +
  122 + for(size_t w = 0; w < nW; w++){ //for each plane wave
  123 + cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle between the k vector and the direction from the sphere
  124 + Pl_2 = 1; //the Legendre polynomials will be calculated recursively, initialize the first two steps of the recursive relation
  125 + Pl_1 = cos_phi;
  126 + Ei += W[w].E() * hlBl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation
  127 + Ei += W[w].E() * hlBl[1] * Pl_1;
  128 +
  129 + #pragma unroll LOCAL_NL-1 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file)
  130 + for(l = 2; l <= LOCAL_NL; l++){
  131 + Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs)
  132 + Ei += W[w].E() * hlBl[l] * Pl; //calculate and sum the current field order
  133 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  134 + Pl_1 = Pl;
  135 + }
  136 +
  137 + for(l = LOCAL_NL+1; l <= Nl; l++){ //do the same as above, except for any additional orders that are stored in shared memory (not registers)
  138 + Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //again, this is where most computation in the kernel occurs
  139 + Ei += W[w].E() * shared_hB[shared_start + l - LOCAL_NL - 1] * Pl;
  140 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  141 + Pl_1 = Pl;
  142 + }
  143 + }
  144 + E[i] += Ei; //copy the result to device memory
  145 +}
  146 +
  147 +template<typename T>
  148 +void gpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* hB, T kr_min, T dkr, size_t N_hB, size_t Nl){
  149 +
  150 + size_t max_shared_mem = stim::sharedMemPerBlock();
  151 + size_t hBl_array = sizeof(stim::complex<T>) * (Nl + 1);
  152 + std::cout<<"hl*Bl array size: "<<hBl_array<<std::endl;
  153 + std::cout<<"shared memory: "<<max_shared_mem<<std::endl;
  154 + int threads = (int)((max_shared_mem / hBl_array) / 32 * 32);
  155 + std::cout<<"threads per block: "<<threads<<std::endl;
  156 + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  157 +
  158 + size_t shared_mem;
  159 + if(Nl <= LOCAL_NL) shared_mem = 0;
  160 + else shared_mem = threads * sizeof(stim::complex<T>) * (Nl - LOCAL_NL); //amount of shared memory to allocate
  161 + std::cout<<"shared memory allocated: "<<shared_mem<<std::endl;
  162 + cuda_scalar_mie_scatter<T><<< blocks, threads, shared_mem >>>(E, N, x, y, z, W, nW, a, n, hB, kr_min, dkr, N_hB, (int)Nl); //call the kernel
  163 +}
  164 +
  165 +template<typename T>
  166 +__global__ void cuda_dist(T* r, T* x, T* y, T* z, size_t N){
  167 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  168 + if(i >= N) return; //exit if this thread is outside the array
  169 +
  170 + stim::vec3<T> p;
  171 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  172 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  173 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  174 +
  175 + r[i] = p.len();
  176 +}
  177 +/// Calculate the scalar Mie solution for the scattered field produced by a single plane wave
  178 +
  179 +/// @param E is a pointer to the destination field values
  180 +/// @param N is the number of points used to calculate the field
  181 +/// @param x is an array of x coordinates for each point, specified relative to the sphere (x = NULL assumes all zeros)
  182 +/// @param y is an array of y coordinates for each point, specified relative to the sphere (y = NULL assumes all zeros)
  183 +/// @param z is an array of z coordinates for each point, specified relative to the sphere (z = NULL assumes all zeros)
  184 +/// @param W is an array of planewaves that will be scattered
  185 +/// @param a is the radius of the sphere
  186 +/// @param n is the complex refractive index of the sphere
  187 +template<typename T>
  188 +void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std::vector<stim::scalarwave<T>> W, T a, stim::complex<T> n, T r_spacing = 0.1){
  189 + //calculate the necessary number of orders required to represent the scattered field
  190 + T k = W[0].kmag();
  191 +
  192 + int Nl = (int)ceil(k*a + 4 * cbrt( k * a ) + 2);
  193 + if(Nl < LOCAL_NL) Nl = LOCAL_NL; //always do at least the minimum number of local operations (kernel optimization)
  194 + std::cout<<"Nl: "<<Nl<<std::endl;
  195 +
  196 + //calculate the scattering coefficients for the sphere
  197 + stim::complex<T>* B = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * (Nl + 1) ); //allocate space for the scattering coefficients
  198 + B_coefficients(B, a, k, n, Nl);
  199 +
  200 +#ifdef CUDA_FOUND
  201 + stim::complex<T>* dev_E; //allocate space for the field
  202 + cudaMalloc(&dev_E, N * sizeof(stim::complex<T>));
  203 + cudaMemcpy(dev_E, E, N * sizeof(stim::complex<T>), cudaMemcpyHostToDevice);
  204 + //cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used)
  205 +
  206 + // COORDINATES
  207 + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified)
  208 + if(x != NULL){
  209 + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
  210 + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
  211 + }
  212 + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified)
  213 + if(y != NULL){
  214 + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
  215 + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
  216 + }
  217 + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified)
  218 + if(z != NULL){
  219 + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
  220 + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
  221 + }
  222 +
  223 + // PLANE WAVES
  224 + stim::scalarwave<T>* dev_W; //allocate space and copy plane waves
  225 + HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) );
  226 + HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) );
  227 +
  228 + // BESSEL FUNCTION LOOK-UP TABLE
  229 + //calculate the distance from the sphere center
  230 + T* dev_r;
  231 + HANDLE_ERROR( cudaMalloc(&dev_r, sizeof(T) * N) );
  232 +
  233 + int threads = stim::maxThreadsPerBlock();
  234 + dim3 blocks((unsigned)(N / threads + 1));
  235 + cuda_dist<T> <<< blocks, threads >>>(dev_r, dev_x, dev_y, dev_z, N);
  236 +
  237 + //Find the minimum and maximum values of r
  238 + cublasStatus_t stat;
  239 + cublasHandle_t handle;
  240 +
  241 + stat = cublasCreate(&handle); //create a cuBLAS handle
  242 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  243 + printf ("CUBLAS initialization failed\n");
  244 + exit(1);
  245 + }
  246 +
  247 + int i_min, i_max;
  248 + stat = cublasIsamin(handle, (int)N, dev_r, 1, &i_min);
  249 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  250 + printf ("CUBLAS Error: failed to calculate minimum r value.\n");
  251 + exit(1);
  252 + }
  253 + stat = cublasIsamax(handle, (int)N, dev_r, 1, &i_max);
  254 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  255 + printf ("CUBLAS Error: failed to calculate maximum r value.\n");
  256 + exit(1);
  257 + }
  258 +
  259 + i_min--; //cuBLAS uses 1-based indexing for Fortran compatibility
  260 + i_max--;
  261 + T r_min, r_max; //allocate space to store the minimum and maximum values
  262 + HANDLE_ERROR( cudaMemcpy(&r_min, dev_r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU
  263 + HANDLE_ERROR( cudaMemcpy(&r_max, dev_r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
  264 +
  265 + r_min = max(r_min, a); //if the radius of the sphere is larger than r_min, change r_min to a (the scattered field doesn't exist inside the sphere)
  266 +
  267 + //size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r
  268 + size_t N_hB_lut = (size_t)((r_max - r_min) / r_spacing + 1);
  269 +
  270 + //T kr_min = k * r_min;
  271 + //T kr_max = k * r_max;
  272 +
  273 + //temporary variables
  274 + double vm; //allocate space to store the return values for the bessel function calculation
  275 + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
  276 + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
  277 + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
  278 + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
  279 +
  280 + size_t hB_bytes = sizeof(stim::complex<T>) * (Nl+1) * N_hB_lut;
  281 + stim::complex<T>* hB_lut = (stim::complex<T>*) malloc(hB_bytes); //pointer to the look-up table
  282 + T dr = (r_max - r_min) / (N_hB_lut-1); //distance between values in the LUT
  283 + std::cout<<"LUT jl bytes: "<<hB_bytes<<std::endl;
  284 + stim::complex<T> hl;
  285 + for(size_t ri = 0; ri < N_hB_lut; ri++){ //for each value in the LUT
  286 + stim::bessjyv_sph<double>(Nl, k * (r_min + ri * dr), vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]
  287 + for(size_t l = 0; l <= Nl; l++){ //for each order
  288 + hl.r = (T)jv[l];
  289 + hl.i = (T)yv[l];
  290 +
  291 + hB_lut[ri * (Nl + 1) + l] = hl * B[l]; //store the bessel function result
  292 + //std::cout<<hB_lut[ri * (Nl + 1) + l]<<std::endl;
  293 + }
  294 + }
  295 + T* real_lut = (T*) malloc(hB_bytes/2);
  296 + stim::real(real_lut, hB_lut, N_hB_lut);
  297 + stim::cpu2image<T>(real_lut, "hankel_B.bmp", Nl+1, N_hB_lut, stim::cmBrewer);
  298 +
  299 + //Allocate device memory and copy everything to the GPU
  300 + stim::complex<T>* dev_hB_lut;
  301 + HANDLE_ERROR( cudaMalloc(&dev_hB_lut, hB_bytes) );
  302 + HANDLE_ERROR( cudaMemcpy(dev_hB_lut, hB_lut, hB_bytes, cudaMemcpyHostToDevice) );
  303 +
  304 + gpu_scalar_mie_scatter<T>(dev_E, N, dev_x, dev_y, dev_z, dev_W, W.size(), a, n, dev_hB_lut, r_min, dr, N_hB_lut, Nl);
  305 +
  306 + cudaMemcpy(E, dev_E, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory
  307 +
  308 + if(x != NULL) cudaFree(dev_x); //free everything
  309 + if(y != NULL) cudaFree(dev_y);
  310 + if(z != NULL) cudaFree(dev_z);
  311 + cudaFree(dev_E);
  312 +#else
  313 +
  314 +
  315 + //allocate space to store the bessel function call results
  316 + double vm;
  317 + double* j_kr = (double*) malloc( (Nl + 1) * sizeof(double) );
  318 + double* y_kr = (double*) malloc( (Nl + 1) * sizeof(double) );
  319 + double* dj_kr= (double*) malloc( (Nl + 1) * sizeof(double) );
  320 + double* dy_kr= (double*) malloc( (Nl + 1) * sizeof(double) );
  321 +
  322 + T* P = (T*) malloc( (Nl + 1) * sizeof(T) );
  323 +
  324 + T r, kr, cos_phi;
  325 + stim::complex<T> h;
  326 + for(size_t i = 0; i < N; i++){
  327 + stim::vec3<T> p; //declare a 3D point
  328 +
  329 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  330 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  331 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  332 + r = p.len();
  333 + if(r >= a){
  334 + for(size_t w = 0; w < W.size(); w++){
  335 + kr = p.len() * W[w].kmag(); //calculate k*r
  336 + stim::bessjyv_sph<double>(Nl, kr, vm, j_kr, y_kr, dj_kr, dy_kr);
  337 + cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle from the propagating direction
  338 + stim::legendre<T>(Nl, cos_phi, P);
  339 +
  340 + for(size_t l = 0; l <= Nl; l++){
  341 + h.r = j_kr[l];
  342 + h.i = y_kr[l];
  343 + E[i] += W[w].E() * B[l] * h * P[l];
  344 + }
  345 + }
  346 + }
  347 + }
  348 +#endif
  349 +}
  350 +
  351 +template<typename T>
  352 +void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w, T a, stim::complex<T> n, T r_spacing = 0.1){
  353 + std::vector< stim::scalarwave<T> > W(1, w);
  354 + cpu_scalar_mie_scatter(E, N, x, y, z, W, a, n, r_spacing);
  355 +}
  356 +
  357 +template<typename T>
  358 +__global__ void cuda_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* jA, T r_min, T dr, size_t N_jA, int Nl){
  359 + extern __shared__ stim::complex<T> shared_jA[]; //declare the list of waves in shared memory
  360 +
  361 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  362 + if(i >= N) return; //exit if this thread is outside the array
  363 + stim::vec3<T> p;
  364 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  365 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  366 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  367 +
  368 + T r = p.len(); //calculate the distance from the sphere
  369 + if(r > a) return; //exit if the point is inside the sphere (we only calculate the internal field)
  370 + T fij = (r - r_min)/dr; //FP index into the spherical bessel LUT
  371 + size_t ij = (size_t) fij; //convert to an integral index
  372 + T alpha = fij - ij; //calculate the fractional portion of the index
  373 + size_t n0j = ij * (Nl + 1); //start of the first entry in the LUT
  374 + size_t n1j = (ij+1) * (Nl + 1); //start of the second entry in the LUT
  375 +
  376 + T cos_phi;
  377 + T Pl_2, Pl_1, Pl; //declare registers to store the previous two Legendre polynomials
  378 +
  379 + stim::complex<T> jAl;
  380 + stim::complex<T> Ei = 0; //create a register to store the result
  381 + int l;
  382 +
  383 + stim::complex<T> jlAl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed
  384 + int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops
  385 +
  386 + #pragma unroll LOCAL_NL+1 //copy the first LOCAL_NL+1 h_l * B_l components to registers
  387 + for(l = 0; l <= LOCAL_NL; l++)
  388 + jlAl[l] = clerp<T>( jA[n0j + l], jA[n1j + l], alpha );
  389 +
  390 + for(l = LOCAL_NL+1; l <= Nl; l++) //copy any additional h_l * B_l components to shared memory
  391 + shared_jA[shared_start + (l - (LOCAL_NL+1))] = clerp<T>( jA[n0j + l], jA[n1j + l], alpha );
  392 +
  393 + for(size_t w = 0; w < nW; w++){ //for each plane wave
  394 + if(r == 0) cos_phi = 0;
  395 + else
  396 + cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle between the k vector and the direction from the sphere
  397 + Pl_2 = 1; //the Legendre polynomials will be calculated recursively, initialize the first two steps of the recursive relation
  398 + Pl_1 = cos_phi;
  399 + Ei += W[w].E() * jlAl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation
  400 + Ei += W[w].E() * jlAl[1] * Pl_1;
  401 +
  402 + #pragma unroll LOCAL_NL-1 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file)
  403 + for(l = 2; l <= LOCAL_NL; l++){
  404 + Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs)
  405 + Ei += W[w].E() * jlAl[l] * Pl; //calculate and sum the current field order
  406 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  407 + Pl_1 = Pl;
  408 + }
  409 +
  410 + for(l = LOCAL_NL+1; l <= Nl; l++){ //do the same as above, except for any additional orders that are stored in shared memory (not registers)
  411 + Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //again, this is where most computation in the kernel occurs
  412 + Ei += W[w].E() * shared_jA[shared_start + l - LOCAL_NL - 1] * Pl;
  413 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  414 + Pl_1 = Pl;
  415 + }
  416 + }
  417 + E[i] = Ei; //copy the result to device memory
  418 +}
  419 +
  420 +template<typename T>
  421 +void gpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* jA, T r_min, T dr, size_t N_jA, size_t Nl){
  422 +
  423 + size_t max_shared_mem = stim::sharedMemPerBlock();
  424 + size_t hBl_array = sizeof(stim::complex<T>) * (Nl + 1);
  425 + std::cout<<"hl*Bl array size: "<<hBl_array<<std::endl;
  426 + std::cout<<"shared memory: "<<max_shared_mem<<std::endl;
  427 + int threads = (int)((max_shared_mem / hBl_array) / 32 * 32);
  428 + std::cout<<"threads per block: "<<threads<<std::endl;
  429 + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  430 +
  431 + size_t shared_mem;
  432 + if(Nl <= LOCAL_NL) shared_mem = 0;
  433 + else shared_mem = threads * sizeof(stim::complex<T>) * (Nl - LOCAL_NL); //amount of shared memory to allocate
  434 + std::cout<<"shared memory allocated: "<<shared_mem<<std::endl;
  435 + cuda_scalar_mie_internal<T><<< blocks, threads, shared_mem >>>(E, N, x, y, z, W, nW, a, n, jA, r_min, dr, N_jA, (int)Nl); //call the kernel
  436 +}
  437 +
  438 +/// Calculate the scalar Mie solution for the internal field produced by a single plane wave scattered by a sphere
  439 +
  440 +/// @param E is a pointer to the destination field values
  441 +/// @param N is the number of points used to calculate the field
  442 +/// @param x is an array of x coordinates for each point, specified relative to the sphere (x = NULL assumes all zeros)
  443 +/// @param y is an array of y coordinates for each point, specified relative to the sphere (y = NULL assumes all zeros)
  444 +/// @param z is an array of z coordinates for each point, specified relative to the sphere (z = NULL assumes all zeros)
  445 +/// @param w is a planewave that will be scattered
  446 +/// @param a is the radius of the sphere
  447 +/// @param n is the complex refractive index of the sphere
  448 +template<typename T>
  449 +void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > W, T a, stim::complex<T> n, T r_spacing = 0.1){
  450 +//calculate the necessary number of orders required to represent the scattered field
  451 + T k = W[0].kmag();
  452 +
  453 + int Nl = (int)ceil(k*a + 4 * cbrt( k * a ) + 2);
  454 + if(Nl < LOCAL_NL) Nl = LOCAL_NL; //always do at least the minimum number of local operations (kernel optimization)
  455 + std::cout<<"Nl: "<<Nl<<std::endl;
  456 +
  457 + //calculate the scattering coefficients for the sphere
  458 + stim::complex<T>* A = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * (Nl + 1) ); //allocate space for the scattering coefficients
  459 + A_coefficients(A, a, k, n, Nl);
  460 +
  461 +#ifdef CUDA_FOUND
  462 + stim::complex<T>* dev_E; //allocate space for the field
  463 + cudaMalloc(&dev_E, N * sizeof(stim::complex<T>));
  464 + cudaMemcpy(dev_E, E, N * sizeof(stim::complex<T>), cudaMemcpyHostToDevice);
  465 + //cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used)
  466 +
  467 + // COORDINATES
  468 + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified)
  469 + if(x != NULL){
  470 + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
  471 + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
  472 + }
  473 + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified)
  474 + if(y != NULL){
  475 + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
  476 + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
  477 + }
  478 + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified)
  479 + if(z != NULL){
  480 + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
  481 + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
  482 + }
  483 +
  484 + // PLANE WAVES
  485 + stim::scalarwave<T>* dev_W; //allocate space and copy plane waves
  486 + HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) );
  487 + HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) );
  488 +
  489 + // BESSEL FUNCTION LOOK-UP TABLE
  490 + //calculate the distance from the sphere center
  491 + T* dev_r;
  492 + HANDLE_ERROR( cudaMalloc(&dev_r, sizeof(T) * N) );
  493 +
  494 + int threads = stim::maxThreadsPerBlock();
  495 + dim3 blocks((unsigned)(N / threads + 1));
  496 + cuda_dist<T> <<< blocks, threads >>>(dev_r, dev_x, dev_y, dev_z, N);
  497 +
  498 + //Find the minimum and maximum values of r
  499 + cublasStatus_t stat;
  500 + cublasHandle_t handle;
  501 +
  502 + stat = cublasCreate(&handle); //create a cuBLAS handle
  503 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  504 + printf ("CUBLAS initialization failed\n");
  505 + exit(1);
  506 + }
  507 +
  508 + int i_min, i_max;
  509 + stat = cublasIsamin(handle, (int)N, dev_r, 1, &i_min);
  510 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  511 + printf ("CUBLAS Error: failed to calculate minimum r value.\n");
  512 + exit(1);
  513 + }
  514 + stat = cublasIsamax(handle, (int)N, dev_r, 1, &i_max);
  515 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  516 + printf ("CUBLAS Error: failed to calculate maximum r value.\n");
  517 + exit(1);
  518 + }
  519 +
  520 + i_min--; //cuBLAS uses 1-based indexing for Fortran compatibility
  521 + i_max--;
  522 + T r_min, r_max; //allocate space to store the minimum and maximum values
  523 + HANDLE_ERROR( cudaMemcpy(&r_min, dev_r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU
  524 + HANDLE_ERROR( cudaMemcpy(&r_max, dev_r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
  525 +
  526 + r_max = min(r_max, a); //the internal field doesn't exist outside of the sphere
  527 +
  528 + size_t N_jA_lut = (size_t)((r_max - r_min) / r_spacing + 1);
  529 +
  530 + //temporary variables
  531 + double vm; //allocate space to store the return values for the bessel function calculation
  532 + stim::complex<double>* jv = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  533 + stim::complex<double>* yv = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  534 + stim::complex<double>* djv= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  535 + stim::complex<double>* dyv= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  536 +
  537 + size_t jA_bytes = sizeof(stim::complex<T>) * (Nl+1) * N_jA_lut;
  538 + stim::complex<T>* jA_lut = (stim::complex<T>*) malloc(jA_bytes); //pointer to the look-up table
  539 + T dr = (r_max - r_min) / (N_jA_lut-1); //distance between values in the LUT
  540 + std::cout<<"LUT jl bytes: "<<jA_bytes<<std::endl;
  541 + stim::complex<T> hl;
  542 + stim::complex<double> nd = (stim::complex<double>)n;
  543 + for(size_t ri = 0; ri < N_jA_lut; ri++){ //for each value in the LUT
  544 + stim::cbessjyva_sph<double>(Nl, nd * k * (r_min + ri * dr), vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]
  545 + for(size_t l = 0; l <= Nl; l++){ //for each order
  546 + jA_lut[ri * (Nl + 1) + l] = (stim::complex<T>)(jv[l] * (stim::complex<double>)A[l]); //store the bessel function result
  547 + }
  548 + }
  549 +
  550 + //Allocate device memory and copy everything to the GPU
  551 + stim::complex<T>* dev_jA_lut;
  552 + HANDLE_ERROR( cudaMalloc(&dev_jA_lut, jA_bytes) );
  553 + HANDLE_ERROR( cudaMemcpy(dev_jA_lut, jA_lut, jA_bytes, cudaMemcpyHostToDevice) );
  554 +
  555 + gpu_scalar_mie_internal<T>(dev_E, N, dev_x, dev_y, dev_z, dev_W, W.size(), a, n, dev_jA_lut, r_min, dr, N_jA_lut, Nl);
  556 +
  557 + cudaMemcpy(E, dev_E, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory
  558 +
  559 + if(x != NULL) cudaFree(dev_x); //free everything
  560 + if(y != NULL) cudaFree(dev_y);
  561 + if(z != NULL) cudaFree(dev_z);
  562 + cudaFree(dev_E);
  563 +#else
  564 +
  565 + //allocate space to store the bessel function call results
  566 + double vm;
  567 + stim::complex<double>* j_knr = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  568 + stim::complex<double>* y_knr = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  569 + stim::complex<double>* dj_knr= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  570 + stim::complex<double>* dy_knr= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  571 +
  572 + T* P = (T*) malloc( (Nl + 1) * sizeof(T) );
  573 +
  574 + T r, cos_phi;
  575 + stim::complex<double> knr;
  576 + stim::complex<T> h;
  577 + for(size_t i = 0; i < N; i++){
  578 + stim::vec3<T> p; //declare a 3D point
  579 +
  580 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  581 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  582 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  583 + r = p.len();
  584 + if(r < a){
  585 + E[i] = 0;
  586 + for(size_t w = 0; w < W.size(); w++){
  587 + knr = (stim::complex<double>)n * p.len() * W[w].kmag(); //calculate k*n*r
  588 +
  589 + stim::cbessjyva_sph<double>(Nl, knr, vm, j_knr, y_knr, dj_knr, dy_knr);
  590 + if(r == 0)
  591 + cos_phi = 0;
  592 + else
  593 + cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle from the propagating direction
  594 + stim::legendre<T>(Nl, cos_phi, P);
  595 +
  596 + for(size_t l = 0; l <= Nl; l++){
  597 + E[i] += W[w].E() * A[l] * (stim::complex<T>)j_knr[l] * P[l];
  598 + }
  599 + }
  600 + }
  601 + }
  602 +#endif
  603 +}
  604 +
  605 +template<typename T>
  606 +void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w, T a, stim::complex<T> n, T r_spacing = 0.1){
  607 + std::vector< stim::scalarwave<T> > W(1, w);
  608 + cpu_scalar_mie_internal(E, N, x, y, z, W, a, n, r_spacing);
  609 +}
  610 +
  611 +}
  612 +
  613 +#endif
0 614 \ No newline at end of file
... ...
stim/optics/planewave.h
1   -#ifndef RTS_PLANEWAVE
2   -#define RTS_PLANEWAVE
  1 +#ifndef STIM_PLANEWAVE_H
  2 +#define STIM_PLANEWAVE_H
3 3  
4 4 #include <string>
5 5 #include <sstream>
  6 +#include <cmath>
6 7  
7 8 #include "../math/vector.h"
8 9 #include "../math/quaternion.h"
9 10 #include "../math/constants.h"
10 11 #include "../math/plane.h"
11   -#include "../cuda/callable.h"
12   -
13   -/*Basic conversions used here (assuming a vacuum)
14   - lambda =
15   -*/
  12 +#include "../math/complex.h"
16 13  
17 14 namespace stim{
  15 + namespace optics{
  16 +
  17 + /// evaluate the scalar field produced by a plane wave at a point (x, y, z)
  18 +
  19 + /// @param x is the x-coordinate of the point
  20 + /// @param y is the y-coordinate of the point
  21 + /// @param z is the z-coordinate of the point
  22 + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0)
  23 + /// @param kx is the k-vector component in the x direction
  24 + /// @param ky is the k-vector component in the y direction
  25 + /// @param kz is the k-vector component in the z direction
  26 + template<typename T>
  27 + stim::complex<T> planewave_scalar(T x, T y, T z, stim::complex<T> A, T kx, T ky, T kz){
  28 + T d = x * kx + y * ky + z * kz; //calculate the dot product between k and p = (x, y, z) to find the distance p is along the propagation direction
  29 + stim::complex<T> di = stim::complex<T>(0, d); //calculate the phase shift that will have to be applied to propagate the wave distance d
  30 + return A * exp(di); //multiply the phase term by the amplitude at (0, 0, 0) to propagate the wave to p
  31 + }
  32 +
  33 + /// evaluate the scalar field produced by a plane wave at several positions
  34 +
  35 + /// @param field is a pre-allocated block of memory that will store the complex field at all points
  36 + /// @param N is the number of field values to be evaluated
  37 + /// @param x is a set of x coordinates defining positions within the field (NULL implies that all values are zero)
  38 + /// @param y is a set of y coordinates defining positions within the field (NULL implies that all values are zero)
  39 + /// @param z is a set of z coordinates defining positions within the field (NULL implies that all values are zero)
  40 + /// @param A is the amplitude of the plane wave, specifically the field at (0, 0, 0)
  41 + /// @param kx is the k-vector component in the x direction
  42 + /// @param ky is the k-vector component in the y direction
  43 + /// @param kz is the k-vector component in the z direction
  44 + template<typename T>
  45 + void cpu_planewave_scalar(stim::complex<T>* field, size_t N, T* x, T* y = NULL, T* z = NULL, stim::complex<T> A = 1.0, T kx = 0.0, T ky = 0.0, T kz = 0.0){
  46 + T px, py, pz;
  47 + for(size_t i = 0; i < N; i++){ // for each element in the array
  48 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values
  49 + (y == NULL) ? py = 0 : py = y[i];
  50 + (z == NULL) ? pz = 0 : pz = z[i];
  51 +
  52 + field[i] = planewave_scalar(px, py, pz, A, kx, ky, kz); // call the single-value plane wave function
  53 + }
  54 + }
18 55  
19 56 template<typename T>
20 57 class planewave{
21 58  
22 59 protected:
23 60  
24   - vec<T> k; //k = tau / lambda
25   - vec< complex<T> > E0; //amplitude
26   - //T phi;
27   -
28   - CUDA_CALLABLE planewave<T> bend(rts::vec<T> kn) const{
  61 + stim::vec<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
  62 + stim::vec< stim::complex<T> > E0; //amplitude (for a scalar plane wave, only E0[0] is used)
29 63  
30   - vec<T> kn_hat = kn.norm(); //normalize the new k
31   - vec<T> k_hat = k.norm(); //normalize the current k
  64 + /// Bend a plane wave via refraction, given that the new propagation direction is known
  65 + CUDA_CALLABLE planewave<T> bend(stim::vec<T> kn) const{
32 66  
33   - //std::cout<<"PLANE WAVE BENDING------------------"<<std::endl;
34   - //std::cout<<"kn_hat: "<<kn_hat<<" k_hat: "<<k_hat<<std::endl;
  67 + stim::vec<T> kn_hat = kn.norm(); //normalize the new k
  68 + stim::vec<T> k_hat = k.norm(); //normalize the current k
35 69  
36   - planewave<T> new_p; //create a new plane wave
  70 + planewave<T> new_p; //create a new plane wave
37 71  
38   - //if kn is equal to k or -k, handle the degenerate case
39   - T k_dot_kn = k_hat.dot(kn_hat);
  72 + T k_dot_kn = k_hat.dot(kn_hat); //if kn is equal to k or -k, handle the degenerate case
40 73  
41 74 //if k . n < 0, then the bend is a reflection
42   - //flip k_hat
43   - if(k_dot_kn < 0) k_hat = -k_hat;
  75 + if(k_dot_kn < 0) k_hat = -k_hat; //flip k_hat
44 76  
45   - //std::cout<<"k dot kn: "<<k_dot_kn<<std::endl;
46   -
47   - //std::cout<<"k_dot_kn: "<<k_dot_kn<<std::endl;
48 77 if(k_dot_kn == -1){
49 78 new_p.k = -k;
50 79 new_p.E0 = E0;
... ... @@ -56,28 +85,11 @@ protected:
56 85 return new_p;
57 86 }
58 87  
59   - vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector
60   -
61   - //std::cout<<"r: "<<r<<std::endl;
62   -
63   - T theta = asin(r.len()); //compute the angle of the rotation about r
64   -
65   -
66   -
67   - //deal with a zero vector (both k and kn point in the same direction)
68   - //if(theta == (T)0)
69   - //{
70   - // new_p = *this;
71   - // return new_p;
72   - //}
73   -
74   - //create a quaternion to capture the rotation
75   - quaternion<T> q;
76   - q.CreateRotation(theta, r.norm());
77   -
78   - //apply the rotation to E0
79   - vec< complex<T> > E0n = q.toMatrix3() * E0;
80   -
  88 + vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector
  89 + T theta = asin(r.len()); //compute the angle of the rotation about r
  90 + quaternion<T> q; //create a quaternion to capture the rotation
  91 + q.CreateRotation(theta, r.norm());
  92 + vec< complex<T> > E0n = q.toMatrix3() * E0; //apply the rotation to E0
81 93 new_p.k = kn_hat * kmag();
82 94 new_p.E0 = E0n;
83 95  
... ... @@ -86,16 +98,9 @@ protected:
86 98  
87 99 public:
88 100  
89   -
90   - ///constructor: create a plane wave propagating along z, polarized along x
91   - /*planewave(T lambda = (T)1)
92   - {
93   - k = rts::vec<T>(0, 0, 1) * (TAU/lambda);
94   - E0 = rts::vec<T>(1, 0, 0);
95   - }*/
96   - ///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega
97   - CUDA_CALLABLE planewave(vec<T> kvec = rts::vec<T>(0, 0, rtsTAU),
98   - vec< complex<T> > E = rts::vec<T>(1, 0, 0), T phase = 0)
  101 + ///constructor: create a plane wave propagating along k
  102 + CUDA_CALLABLE planewave(vec<T> kvec = stim::vec<T>(0, 0, stim::TAU),
  103 + vec< complex<T> > E = stim::vec<T>(1, 0, 0))
99 104 {
100 105 //phi = phase;
101 106  
... ... @@ -107,27 +112,23 @@ public:
107 112 else{
108 113 vec< complex<T> > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector
109 114 vec< complex<T> > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector
110   - E0 = E_hat * E_hat.dot(E); //compute the projection of _E0 onto E0_hat
  115 + E0 = E_hat;// * E_hat.dot(E); //compute the projection of _E0 onto E0_hat
111 116 }
112 117  
113 118 E0 = E0 * exp( complex<T>(0, phase) );
114 119 }
115 120  
116 121 ///multiplication operator: scale E0
117   - CUDA_CALLABLE planewave<T> & operator* (const T & rhs)
118   - {
119   -
  122 + CUDA_CALLABLE planewave<T> & operator* (const T & rhs){
120 123 E0 = E0 * rhs;
121 124 return *this;
122 125 }
123 126  
124   - CUDA_CALLABLE T lambda() const
125   - {
126   - return rtsTAU / k.len();
  127 + CUDA_CALLABLE T lambda() const{
  128 + return stim::TAU / k.len();
127 129 }
128 130  
129   - CUDA_CALLABLE T kmag() const
130   - {
  131 + CUDA_CALLABLE T kmag() const{
131 132 return k.len();
132 133 }
133 134  
... ... @@ -139,14 +140,11 @@ public:
139 140 return k;
140 141 }
141 142  
142   - /*CUDA_CALLABLE T phase(){
143   - return phi;
  143 + /// calculate the value of the field produced by the plane wave given a three-dimensional position
  144 + CUDA_CALLABLE vec< complex<T> > pos(T x, T y, T z){
  145 + return pos( stim::vec<T>(x, y, z) );
144 146 }
145 147  
146   - CUDA_CALLABLE void phase(T p){
147   - phi = p;
148   - }*/
149   -
150 148 CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){
151 149 vec< complex<T> > result;
152 150  
... ... @@ -166,18 +164,32 @@ public:
166 164 return planewave<T>(k * (nt / ni), E0);
167 165 }
168 166  
169   - CUDA_CALLABLE planewave<T> refract(rts::vec<T> kn) const
170   - {
  167 + CUDA_CALLABLE planewave<T> refract(stim::vec<T> kn) const{
171 168 return bend(kn);
172 169 }
173 170  
174   - void scatter(rts::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){
  171 + /// Calculate the result of a plane wave hitting an interface between two refractive indices
  172 +
  173 + /// @param P is a plane representing the position and orientation of the surface
  174 + /// @param n0 is the refractive index outside of the surface (in the direction of the normal)
  175 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  176 + /// @param r is the reflected component of the plane wave
  177 + /// @param t is the transmitted component of the plane wave
  178 + void scatter(stim::plane<T> P, T n0, T n1, planewave<T> &r, planewave<T> &t){
  179 + scatter(P, n1/n0, r, t);
  180 + }
  181 +
  182 + /// Calculate the scattering result when nr = n1/n0
  183 +
  184 + /// @param P is a plane representing the position and orientation of the surface
  185 + /// @param r is the ration n1/n0
  186 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  187 + /// @param r is the reflected component of the plane wave
  188 + /// @param t is the transmitted component of the plane wave
  189 + void scatter(stim::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){
175 190  
176 191 int facing = P.face(k); //determine which direction the plane wave is coming in
177 192  
178   - //if(facing == 0) //if the wave is tangent to the plane, return an identical wave
179   - // return *this;
180   - //else
181 193 if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
182 194 P = P.flip(); //flip the plane
183 195 nr = 1/nr; //invert the refractive index (now nr = n0/n1)
... ... @@ -192,7 +204,7 @@ public:
192 204 bool tir = false; //flag for total internal reflection
193 205 if(theta_t != theta_t){
194 206 tir = true;
195   - theta_t = rtsPI / (T)2;
  207 + theta_t = stim::PI / (T)2;
196 208 }
197 209  
198 210 //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
... ... @@ -205,17 +217,10 @@ public:
205 217 vec< complex<T> > Et = E0 * tp;
206 218 T phase_t = P.p().dot(k - kt); //compute the phase offset
207 219 T phase_r = P.p().dot(k - kr);
208   - //std::cout<<"Degeneracy: Head-On"<<std::endl;
209   - //std::cout<<"rs: "<<rp<<" rp: "<<rp<<" ts: "<<tp<<" tp: "<<tp<<std::endl;
210   - //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
211 220  
212 221 //create the plane waves
213 222 r = planewave<T>(kr, Er, phase_r);
214 223 t = planewave<T>(kt, Et, phase_t);
215   -
216   - //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
217   - //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
218   - //std::cout<<"--------------------------------"<<std::endl;
219 224 return;
220 225 }
221 226  
... ... @@ -245,11 +250,9 @@ public:
245 250  
246 251 //compute the magnitude of the p- and s-polarized components of the incident E vector
247 252 complex<T> Ei_s = E0.dot(x_hat);
248   - //int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0);
249 253 int sgn = E0.dot(y_hat).sgn();
250 254 vec< complex<T> > cx_hat = x_hat;
251 255 complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
252   - //T Ei_p = ( E0 - x_hat * Ei_s ).len();
253 256 //compute the magnitude of the p- and s-polarized components of the reflected E vector
254 257 complex<T> Er_s = Ei_s * rs;
255 258 complex<T> Er_p = Ei_p * rp;
... ... @@ -257,14 +260,6 @@ public:
257 260 complex<T> Et_s = Ei_s * ts;
258 261 complex<T> Et_p = Ei_p * tp;
259 262  
260   - //std::cout<<"E0: "<<E0<<std::endl;
261   - //std::cout<<"E0 dot y_hat: "<<E0.dot(y_hat)<<std::endl;
262   - //std::cout<<"theta i: "<<theta_i<<" theta t: "<<theta_t<<std::endl;
263   - //std::cout<<"x_hat: "<<x_hat<<" y_hat: "<<y_hat<<" z_hat: "<<z_hat<<std::endl;
264   - //std::cout<<"Ei_s: "<<Ei_s<<" Ei_p: "<<Ei_p<<" Er_s: "<<Er_s<<" Er_p: "<<Er_p<<" Et_s: "<<Et_s<<" Et_p: "<<Et_p<<std::endl;
265   - //std::cout<<"rs: "<<rs<<" rp: "<<rp<<" ts: "<<ts<<" tp: "<<tp<<std::endl;
266   -
267   -
268 263 //compute the reflected E vector
269 264 vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
270 265 //compute the transmitted E vector
... ... @@ -273,29 +268,12 @@ public:
273 268 T phase_t = P.p().dot(k - kt);
274 269 T phase_r = P.p().dot(k - kr);
275 270  
276   - //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
277   -
278   - //std::cout<<"phase: "<<phase<<std::endl;
279   -
280 271 //create the plane waves
281 272 r.k = kr;
282 273 r.E0 = Er * exp( complex<T>(0, phase_r) );
283   - //r.phi = phase_r;
284   -
285   - //t = bend(kt);
286   - //t.k = t.k * nr;
287 274  
288 275 t.k = kt;
289 276 t.E0 = Et * exp( complex<T>(0, phase_t) );
290   - //t.phi = phase_t;
291   - //std::cout<<"i: "<<str()<<std::endl;
292   - //std::cout<<"r: "<<r.str()<<std::endl;
293   - //std::cout<<"t: "<<t.str()<<std::endl;
294   -
295   - //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
296   - //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
297   - //std::cout<<"--------------------------------"<<std::endl;
298   -
299 277 }
300 278  
301 279 std::string str()
... ... @@ -305,14 +283,15 @@ public:
305 283 ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
306 284 return ss.str();
307 285 }
308   -};
309   -}
  286 +}; //end planewave class
  287 +} //end namespace optics
  288 +} //end namespace stim
310 289  
311 290 template <typename T>
312   -std::ostream& operator<<(std::ostream& os, rts::planewave<T> p)
  291 +std::ostream& operator<<(std::ostream& os, stim::optics::planewave<T> p)
313 292 {
314 293 os<<p.str();
315 294 return os;
316 295 }
317 296  
318 297 -#endif
  298 +#endif
319 299 \ No newline at end of file
... ...
stim/optics/scalarbeam.h 0 โ†’ 100644
  1 +#ifndef RTS_BEAM
  2 +#define RTS_BEAM
  3 +#include <boost/math/special_functions/bessel.hpp>
  4 +
  5 +#include "../math/vec3.h"
  6 +#include "../optics/scalarwave.h"
  7 +#include "../math/bessel.h"
  8 +#include "../math/legendre.h"
  9 +#include "../cuda/cudatools/devices.h"
  10 +#include "../cuda/cudatools/timer.h"
  11 +#include "../optics/scalarfield.h"
  12 +#include <cublas_v2.h>
  13 +#include <math_constants.h>
  14 +#include <vector>
  15 +#include <stdlib.h>
  16 +
  17 +
  18 +
  19 +namespace stim{
  20 +
  21 +/// Function returns the value of the scalar field produced by a beam with the specified parameters
  22 +
  23 +template<typename T>
  24 +std::vector< stim::vec3<T> > generate_focusing_vectors(size_t N, stim::vec3<T> d, T NA, T NA_in = 0){
  25 +
  26 + std::vector< stim::vec3<T> > dirs(N); //allocate an array to store the focusing vectors
  27 +
  28 + ///compute the rotation operator to transform (0, 0, 1) to k
  29 + T cos_angle = d.dot(vec3<T>(0, 0, 1));
  30 + stim::matrix<T, 3> rotation;
  31 +
  32 + //if the cosine of the angle is -1, the rotation is just a flip across the z axis
  33 + if(cos_angle == -1){
  34 + rotation(2, 2) = -1;
  35 + }
  36 + else if(cos_angle != 1.0)
  37 + {
  38 + vec3<T> r_axis = vec3<T>(0, 0, 1).cross(d).norm(); //compute the axis of rotation
  39 + T angle = acos(cos_angle); //compute the angle of rotation
  40 + quaternion<T> quat; //create a quaternion describing the rotation
  41 + quat.CreateRotation(angle, r_axis);
  42 + rotation = quat.toMatrix3(); //compute the rotation matrix
  43 + }
  44 +
  45 + //find the phi values associated with the cassegrain ring
  46 + T PHI[2];
  47 + PHI[0] = (T)asin(NA);
  48 + PHI[1] = (T)asin(NA_in);
  49 +
  50 + //calculate the z-axis cylinder coordinates associated with these angles
  51 + T Z[2];
  52 + Z[0] = cos(PHI[0]);
  53 + Z[1] = cos(PHI[1]);
  54 + T range = Z[0] - Z[1];
  55 +
  56 + //draw a distribution of random phi, z values
  57 + T z, phi, theta;
  58 + //T kmag = stim::TAU / lambda;
  59 + for(int i=0; i<N; i++){ //for each sample
  60 + z = (T)((double)rand() / (double)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
  61 + theta = (T)(((double)rand() / (double)RAND_MAX) * stim::TAU);
  62 + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
  63 +
  64 + //compute and store cartesian coordinates
  65 + vec3<T> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
  66 + vec3<T> cart = spherical.sph2cart();
  67 + dirs[i] = rotation * cart; //create a sample vector
  68 + }
  69 + return dirs;
  70 +}
  71 +
  72 +
  73 +/// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional)
  74 +/// @param C is a pointer to Nl + 1 values where the terms will be stored
  75 +template<typename T>
  76 +CUDA_CALLABLE void cpu_aperture_integral(T* C, int Nl, T NA, T NA_in = 0){
  77 +
  78 + size_t table_bytes = (Nl + 1) * sizeof(T); //calculate the number of bytes required to store the terms
  79 + T cos_alpha_1 = cos(asin(NA_in)); //calculate the cosine of the angle subtended by the central obscuration
  80 + T cos_alpha_2 = cos(asin(NA)); //calculate the cosine of the angle subtended by the aperture
  81 +
  82 + // the aperture integral is computed using four individual Legendre polynomials, each a function of the angles subtended
  83 + // by the objective and central obscuration
  84 + T* Pln_a1 = (T*) malloc(table_bytes);
  85 + stim::legendre<T>(Nl-1, cos_alpha_1, &Pln_a1[1]);
  86 + Pln_a1[0] = 1;
  87 +
  88 + T* Pln_a2 = (T*) malloc(table_bytes);
  89 + stim::legendre<T>(Nl-1, cos_alpha_2, &Pln_a2[1]);
  90 + Pln_a2[0] = 1;
  91 +
  92 + T* Plp_a1 = (T*) malloc(table_bytes+sizeof(T));
  93 + stim::legendre<T>(Nl+1, cos_alpha_1, Plp_a1);
  94 +
  95 + T* Plp_a2 = (T*) malloc(table_bytes+sizeof(T));
  96 + stim::legendre<T>(Nl+1, cos_alpha_2, Plp_a2);
  97 +
  98 + for(size_t l = 0; l <= Nl; l++){
  99 + C[l] = Plp_a1[l+1] - Plp_a2[l+1] - Pln_a1[l] + Pln_a2[l];
  100 + }
  101 +
  102 + free(Pln_a1);
  103 + free(Pln_a2);
  104 + free(Plp_a1);
  105 + free(Plp_a2);
  106 +}
  107 +
  108 +/// performs linear interpolation into a look-up table
  109 +template<typename T>
  110 +CUDA_CALLABLE void lut_lookup(T* lut_values, T* lut, T val, size_t N, T min_val, T delta, size_t n_vals){
  111 + T idx = ((val - min_val) / delta);
  112 + size_t i = (size_t) idx;
  113 + T a1 = idx - i;
  114 + T a0 = 1 - a1;
  115 + size_t n0 = i * n_vals;
  116 + size_t n1 = (i+1) * n_vals;
  117 + for(size_t n = 0; n < n_vals; n++){
  118 + lut_values[n] = lut[n0 + n] * a0 + lut[n1 + n] * a1;
  119 + }
  120 +}
  121 +
  122 +template <typename T>
  123 +CUDA_CALLABLE stim::complex<T> clerp(stim::complex<T> v0, stim::complex<T> v1, T t) {
  124 + return stim::complex<T>( fma(t, v1.r, fma(-t, v0.r, v0.r)), fma(t, v1.i, fma(-t, v0.i, v0.i)) );
  125 +}
  126 +
  127 +template <typename T>
  128 +CUDA_CALLABLE T lerp(T v0, T v1, T t) {
  129 + return fma(t, v1, fma(-t, v0, v0));
  130 +}
  131 +
  132 +#ifdef CUDA_FOUND
  133 +template<typename T>
  134 +__global__ void cuda_scalar_psf(stim::complex<T>* E, size_t N, T* r, T* phi, T A, size_t Nl,
  135 + T* C,
  136 + T* lut_j, size_t Nj, T min_r, T dr){
  137 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  138 + if(i >= N) return; //exit if this thread is outside the array
  139 +
  140 + T cos_phi = cos(phi[i]); //calculate the thread value for cos(phi)
  141 + stim::complex<T> Ei = 0; //initialize the value of the field to zero
  142 + size_t NC = Nl + 1; //calculate the number of coefficients to be used
  143 +
  144 + T fij = (r[i] - min_r)/dr; //FP index into the spherical bessel LUT
  145 + size_t ij = (size_t) fij; //convert to an integral index
  146 + T a = fij - ij; //calculate the fractional portion of the index
  147 + size_t n0j = ij * (NC); //start of the first entry in the LUT
  148 + size_t n1j = (ij+1) * (NC); //start of the second entry in the LUT
  149 +
  150 + T jl; //declare register to store the spherical bessel function
  151 + T Pl_2, Pl_1; //declare registers to store the previous two Legendre polynomials
  152 + T Pl = 1; //initialize the current value for the Legendre polynomial
  153 + stim::complex<T> im(0, 1); //declare i (imaginary 1)
  154 + stim::complex<T> i_pow(1, 0); //i_pow stores the current value of i^l so it doesn't have to be re-computed every iteration
  155 + for(int l = 0; l <= Nl; l++){ //for each order
  156 + jl = lerp<T>( lut_j[n0j + l], lut_j[n1j + l], a ); //read jl from the LUT and interpolate the result
  157 + Ei += i_pow * jl * Pl * C[l]; //calculate the value for the field and sum
  158 + i_pow *= im; //multiply i^l * i for the next iteration
  159 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  160 + Pl_1 = Pl;
  161 + if(l == 0){ //computing Pl is done recursively, where the recursive relation
  162 + Pl = cos_phi; // requires the first two orders. This defines the second.
  163 + }
  164 + else{ //if this is not the first iteration, use the recursive relation to calculate Pl
  165 + Pl = ( (2 * (l+1) - 1) * cos_phi * Pl_1 - (l) * Pl_2 ) / (l+1);
  166 + }
  167 +
  168 + }
  169 + E[i] = Ei * A * 2 * CUDART_PI_F; //scale the integral by the amplitude
  170 +}
  171 +
  172 +template<typename T>
  173 +void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl, T r_spacing){
  174 +
  175 + //Find the minimum and maximum values of r
  176 + cublasStatus_t stat;
  177 + cublasHandle_t handle;
  178 +
  179 + stat = cublasCreate(&handle); //create a cuBLAS handle
  180 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  181 + printf ("CUBLAS initialization failed\n");
  182 + exit(1);
  183 + }
  184 +
  185 + int i_min, i_max;
  186 + stat = cublasIsamin(handle, (int)N, r, 1, &i_min);
  187 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  188 + printf ("CUBLAS Error: failed to calculate minimum r value.\n");
  189 + exit(1);
  190 + }
  191 + stat = cublasIsamax(handle, (int)N, r, 1, &i_max);
  192 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  193 + printf ("CUBLAS Error: failed to calculate maximum r value.\n");
  194 + exit(1);
  195 + }
  196 +
  197 + i_min--; //cuBLAS uses 1-based indexing for Fortran compatibility
  198 + i_max--;
  199 + T r_min, r_max; //allocate space to store the minimum and maximum values
  200 + HANDLE_ERROR( cudaMemcpy(&r_min, r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU
  201 + HANDLE_ERROR( cudaMemcpy(&r_max, r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
  202 +
  203 + T k = (T)stim::TAU / lambda; //calculate the wavenumber from lambda
  204 + size_t C_bytes = (Nl + 1) * sizeof(T);
  205 + T* C = (T*) malloc( C_bytes ); //allocate space for the aperture integral terms
  206 + cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms
  207 +
  208 + size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r
  209 +
  210 +
  211 + size_t lutj_bytes = sizeof(T) * (Nl+1) * Nlut_j;
  212 + T* j_lut = (T*) malloc(lutj_bytes); //pointer to the look-up table
  213 + T dr = (r_max - r_min) / (Nlut_j-1); //distance between values in the LUT
  214 + T jl;
  215 + for(size_t ri = 0; ri < Nlut_j; ri++){ //for each value in the LUT
  216 + for(size_t l = 0; l <= Nl; l++){ //for each order
  217 + jl = boost::math::sph_bessel<T>(l, k*(r_min + ri * dr)); //use boost to calculate the spherical bessel function
  218 + j_lut[ri * (Nl + 1) + l] = jl; //store the bessel function result
  219 + }
  220 + }
  221 +
  222 + stim::cpu2image<T>(j_lut, "j_lut.bmp", Nl+1, Nlut_j, stim::cmBrewer);
  223 + //Allocate device memory and copy everything to the GPU
  224 +
  225 + T* gpu_C;
  226 + HANDLE_ERROR( cudaMalloc(&gpu_C, C_bytes) );
  227 + HANDLE_ERROR( cudaMemcpy(gpu_C, C, C_bytes, cudaMemcpyHostToDevice) );
  228 + T* gpu_j_lut;
  229 + HANDLE_ERROR( cudaMalloc(&gpu_j_lut, lutj_bytes) );
  230 + HANDLE_ERROR( cudaMemcpy(gpu_j_lut, j_lut, lutj_bytes, cudaMemcpyHostToDevice) );
  231 +
  232 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  233 + dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  234 +
  235 + cuda_scalar_psf<T><<< blocks, threads >>>(E, N, r, phi, A, Nl, gpu_C, gpu_j_lut, Nlut_j, r_min, dr);
  236 +
  237 + //free the LUT and condenser tables
  238 + HANDLE_ERROR( cudaFree(gpu_C) );
  239 + HANDLE_ERROR( cudaFree(gpu_j_lut) );
  240 +}
  241 +#endif
  242 +
  243 +/// Calculate the analytical solution to a scalar point spread function given a set of spherical coordinates about the PSF (beam propagation along phi = theta = 0)
  244 +template<typename T>
  245 +void cpu_scalar_psf_local(stim::complex<T>* F, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl){
  246 + T k = (T)stim::TAU / lambda;
  247 + size_t C_bytes = (Nl + 1) * sizeof(T);
  248 + T* C = (T*) malloc( C_bytes ); //allocate space for the aperture integral terms
  249 + cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms
  250 + memset(F, 0, N * sizeof(stim::complex<T>));
  251 + T jl, Pl, kr, cos_phi;
  252 +
  253 + double vm;
  254 + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
  255 + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
  256 + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
  257 + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
  258 +
  259 + T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T));
  260 +
  261 + for(size_t n = 0; n < N; n++){ //for each point in the field
  262 + kr = k * r[n]; //calculate kr (the optical distance between the focal point and p)
  263 + cos_phi = std::cos(phi[n]); //calculate the cosine of phi
  264 + stim::bessjyv_sph<double>(Nl, kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]
  265 + stim::legendre<T>(Nl, cos_phi, Pl_cos_phi); //calculate the [0 Nl] legendre polynomials for this point
  266 +
  267 + for(int l = 0; l <= Nl; l++){
  268 + jl = (T)jv[l];
  269 + Pl = Pl_cos_phi[l];
  270 + F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l];
  271 + }
  272 + F[n] *= A * stim::TAU;
  273 + }
  274 +
  275 + free(C);
  276 + free(Pl_cos_phi);
  277 +}
  278 +
  279 +/// Converts a set of cartesian points into spherical coordinates surrounding a point spread function (PSF)
  280 +/// @param r is the output distance from the PSF
  281 +/// @param phi is the non-symmetric direction about the PSF
  282 +/// @param x (x, y, z) are the cartesian coordinates in world space
  283 +/// @f is the focal point of the PSF in cartesian coordinates
  284 +/// @d is the propagation direction of the PSF in cartesian coordinates
  285 +template<typename T>
  286 +__global__ void cuda_cart2psf(T* r, T* phi, size_t N, T* x, T* y, T* z, stim::vec3<T> f, stim::quaternion<T> q){
  287 +
  288 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  289 + if(i >= N) return; //exit if this thread is outside the array
  290 +
  291 + stim::vec3<T> p; //declare a 3D point
  292 +
  293 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  294 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  295 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  296 +
  297 + p = p - f; //shift the point to the center of the PSF (focal point)
  298 + p = q.toMatrix3() * p; //rotate the point to align with the propagation direction
  299 +
  300 + stim::vec3<T> ps = p.cart2sph(); //convert from cartesian to spherical coordinates
  301 + r[i] = ps[0]; //store r
  302 + phi[i] = ps[2]; //phi = [0 pi]
  303 +}
  304 +
  305 +#ifdef CUDA_FOUND
  306 +/// Calculate the analytical solution to a point spread function given a set of points in cartesian coordinates
  307 +template<typename T>
  308 +void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
  309 +
  310 + T* gpu_r; //allocate space for the coordinates in r
  311 + HANDLE_ERROR( cudaMalloc(&gpu_r, sizeof(T) * N) );
  312 + T* gpu_phi;
  313 + HANDLE_ERROR( cudaMalloc(&gpu_phi, sizeof(T) * N) );
  314 + //stim::complex<T>* gpu_E;
  315 + //HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
  316 +
  317 + stim::quaternion<T> q; //create a quaternion
  318 + q.CreateRotation(d, stim::vec3<T>(0, 0, 1)); //create a mapping from the propagation direction to the PSF space
  319 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  320 + dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  321 + cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space
  322 +
  323 + gpu_scalar_psf_local(E, N, gpu_r, gpu_phi, lambda, A, NA, NA_in, Nl, r_spacing);
  324 +
  325 +}
  326 +#endif
  327 +
  328 +template<typename T>
  329 +void cpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
  330 +
  331 +// If CUDA is available, copy the cartesian points to the GPU and evaluate them in a kernel
  332 +#ifdef CUDA_FOUND
  333 +
  334 + T* gpu_x = NULL;
  335 + if(x != NULL){
  336 + HANDLE_ERROR( cudaMalloc(&gpu_x, sizeof(T) * N) );
  337 + HANDLE_ERROR( cudaMemcpy(gpu_x, x, sizeof(T) * N, cudaMemcpyHostToDevice) );
  338 + }
  339 + T* gpu_y = NULL;
  340 + if(y != NULL){
  341 + HANDLE_ERROR( cudaMalloc(&gpu_y, sizeof(T) * N) );
  342 + HANDLE_ERROR( cudaMemcpy(gpu_y, y, sizeof(T) * N, cudaMemcpyHostToDevice) );
  343 + }
  344 + T* gpu_z = NULL;
  345 + if(z != NULL){
  346 + HANDLE_ERROR( cudaMalloc(&gpu_z, sizeof(T) * N) );
  347 + HANDLE_ERROR( cudaMemcpy(gpu_z, z, sizeof(T) * N, cudaMemcpyHostToDevice) );
  348 + }
  349 +
  350 + stim::complex<T>* gpu_E;
  351 + HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
  352 + HANDLE_ERROR( cudaMemcpy(gpu_E, E, sizeof(stim::complex<T>) * N, cudaMemcpyHostToDevice) );
  353 + gpu_scalar_psf_cart<T>(gpu_E, N, gpu_x, gpu_y, gpu_z, lambda, A, f, d, NA, NA_in, Nl, r_spacing);
  354 + HANDLE_ERROR( cudaMemcpy(E, gpu_E, sizeof(stim::complex<T>) * N, cudaMemcpyDeviceToHost) );
  355 +
  356 + HANDLE_ERROR( cudaFree(gpu_x) );
  357 + HANDLE_ERROR( cudaFree(gpu_y) );
  358 + HANDLE_ERROR( cudaFree(gpu_z) );
  359 + HANDLE_ERROR( cudaFree(gpu_E) );
  360 +
  361 +#else
  362 + T* r = (T*) malloc(N * sizeof(T)); //allocate space for p in spherical coordinates
  363 + T* phi = (T*) malloc(N * sizeof(T)); // only r and phi are necessary (the scalar PSF is symmetric about theta)
  364 +
  365 + stim::quaternion<T> q;
  366 + q.CreateRotation(d, stim::vec3<T>(0, 0, 1));
  367 + stim::matrix<T, 3> R = q.toMatrix3();
  368 + stim::vec3<T> p, ps, ds;
  369 + for(size_t i = 0; i < N; i++){
  370 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  371 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  372 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  373 +
  374 + p = p - f;
  375 +
  376 + p = R * p; //rotate the cartesian point
  377 +
  378 + ps = p.cart2sph(); //convert from cartesian to spherical coordinates
  379 + r[i] = ps[0]; //store r
  380 + phi[i] = ps[2]; //phi = [0 pi]
  381 + }
  382 +
  383 + cpu_scalar_psf_local(E, N, r, phi, lambda, A, NA, NA_in, Nl); //call the spherical coordinate CPU function
  384 +
  385 + free(r);
  386 + free(phi);
  387 +#endif
  388 +}
  389 +
  390 +/// Class stim::beam represents a beam of light focused at a point and composed of several plane waves
  391 +template<typename T>
  392 +class scalarbeam
  393 +{
  394 +public:
  395 + //enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  396 +
  397 +private:
  398 +
  399 + T NA[2]; //numerical aperature of the focusing optics
  400 + vec3<T> f; //focal point
  401 + vec3<T> d; //propagation direction
  402 + T A; //beam amplitude
  403 + T lambda; //beam wavelength
  404 +public:
  405 +
  406 + ///constructor: build a default beam (NA=1.0)
  407 + scalarbeam(T wavelength = 1, T amplitude = 1, vec3<T> focal_point = vec3<T>(0, 0, 0), vec3<T> direction = vec3<T>(0, 0, 1), T numerical_aperture = 1, T center_obsc = 0){
  408 + lambda = wavelength;
  409 + A = amplitude;
  410 + f = focal_point;
  411 + d = direction.norm(); //make sure that the direction vector is normalized (makes calculations more efficient later on)
  412 + NA[0] = numerical_aperture;
  413 + NA[1] = center_obsc;
  414 + }
  415 +
  416 + ///Numerical Aperature functions
  417 + void setNA(T na)
  418 + {
  419 + NA[0] = (T)0;
  420 + NA[1] = na;
  421 + }
  422 + void setNA(T na0, T na1)
  423 + {
  424 + NA[0] = na0;
  425 + NA[1] = na1;
  426 + }
  427 +
  428 + //Monte-Carlo decomposition into plane waves
  429 + std::vector< scalarwave<T> > mc(size_t N = 100000) const{
  430 +
  431 + std::vector< stim::vec3<T> > dirs = generate_focusing_vectors(N, d, NA[0], NA[1]); //generate a random set of N vectors forming a focus
  432 + std::vector< scalarwave<T> > samples(N); //create a vector of plane waves
  433 + T kmag = (T)stim::TAU / lambda; //calculate the wavenumber
  434 + stim::complex<T> apw; //allocate space for the amplitude at the focal point
  435 + T a = (T)(stim::TAU * ( (1 - cos(asin(NA[0]))) - (1 - cos(asin(NA[1])))) / (double)N); //constant value weights plane waves based on the aperture and number of samples (N)
  436 + stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction
  437 + for(size_t i=0; i<N; i++){ //for each sample
  438 + kpw = dirs[i] * kmag; //calculate the k-vector for the new plane wave
  439 + apw = a * exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave
  440 + samples[i] = scalarwave<T>(kpw, apw); //create a plane wave based on the direction
  441 + }
  442 + return samples;
  443 + }
  444 +
  445 + /// Evaluate the beam to a scalar field using Debye focusing
  446 + void eval(stim::scalarfield<T>& E, size_t order = 500){
  447 + size_t array_size = E.grid_bytes();
  448 + T* X = (T*) malloc( array_size ); //allocate space for the coordinate meshes
  449 + T* Y = (T*) malloc( array_size );
  450 + T* Z = (T*) malloc( array_size );
  451 +
  452 + E.meshgrid(X, Y, Z, stim::CPUmem); //calculate the coordinate meshes
  453 + cpu_scalar_psf_cart<T>(E.ptr(), E.size(), X, Y, Z, lambda, A, f, d, NA[0], NA[1], order, E.spacing());
  454 +
  455 + free(X); //free the coordinate meshes
  456 + free(Y);
  457 + free(Z);
  458 + }
  459 +
  460 + /// Calculate the field at a given point
  461 + /// @param x is the x-coordinate of the field point
  462 + /// @O is the approximation accuracy
  463 + stim::complex<T> field(T x, T y, T z, size_t O){
  464 + std::vector< scalarwave<T> > W = mc(O);
  465 + T result = 0; //initialize the result to zero (0)
  466 + for(size_t i = 0; i < O; i++){ //for each plane wave
  467 + result += W[i].pos(x, y, z);
  468 + }
  469 + return result;
  470 + }
  471 +
  472 + std::string str()
  473 + {
  474 + std::stringstream ss;
  475 + ss<<"Beam:"<<std::endl;
  476 + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  477 + ss<<" Beam Direction: "<<d<<std::endl;
  478 + if(NA[0] == 0)
  479 + ss<<" NA: "<<NA[1];
  480 + else
  481 + ss<<" NA: "<<NA[0]<<" -- "<<NA[1];
  482 +
  483 + return ss.str();
  484 + }
  485 +
  486 +
  487 +
  488 +}; //end beam
  489 +} //end namespace stim
  490 +
  491 +#endif
... ...
stim/optics/scalarfield.h 0 โ†’ 100644
  1 +#ifndef STIM_SCALARFIELD_H
  2 +#define STIM_SCALARFIELD_H
  3 +
  4 +#include "../math/rect.h"
  5 +#include "../math/complex.h"
  6 +
  7 +namespace stim{
  8 +
  9 + enum locationType {CPUmem, GPUmem};
  10 +
  11 + /// Class represents a scalar optical field.
  12 +
  13 + /// In general, this class is designed to operate between the CPU and GPU. So, make sure all functions have an option to create the output on either.
  14 + /// The field is stored *either* on the GPU or host memory, but not both. This enforces that there can't be different copies of the same field.
  15 + /// This class is designed to be included in all of the other scalar optics classes, allowing them to render output data so make sure to keep it general and compatible.
  16 +
  17 +template<typename T>
  18 +class scalarfield : public rect<T>{
  19 +
  20 +protected:
  21 + stim::complex<T>* E;
  22 + size_t R[2];
  23 + locationType loc;
  24 +
  25 +
  26 +
  27 +public:
  28 +
  29 + CUDA_CALLABLE scalarfield(size_t X, size_t Y, T size = 1, T z_pos = 0) : rect<T>::rect(size, z_pos){
  30 + R[0] = X; //set the field resolution
  31 + R[1] = Y;
  32 +
  33 + E = (stim::complex<T>*) malloc(sizeof(stim::complex<T>) * R[0] * R[1]); //allocate in CPU memory
  34 + loc = CPUmem;
  35 + }
  36 +
  37 + CUDA_CALLABLE ~scalarfield(){
  38 + if(loc == CPUmem) free(E);
  39 + else cudaFree(E);
  40 + }
  41 +
  42 + /// Returns the number of values in the field
  43 + CUDA_CALLABLE size_t size(){
  44 + return R[0] * R[1];
  45 + }
  46 +
  47 + CUDA_CALLABLE size_t grid_bytes(){
  48 + return sizeof(stim::complex<T>) * R[0] * R[1];
  49 + }
  50 +
  51 + /// Calculates the distance between points on the grid
  52 + T spacing(){
  53 + T du = rect<T>::X.len() / R[0];
  54 + T dv = rect<T>::Y.len() / R[1];
  55 + return min<T>(du, dv);
  56 + }
  57 +
  58 + /// Copy the field array to the GPU, if it isn't already there
  59 + void to_gpu(){
  60 + if(loc == GPUmem) return;
  61 + else{
  62 + stim::complex<T>* dev_E;
  63 + HANDLE_ERROR( cudaMalloc(&dev_E, e_bytes()) ); //allocate GPU memory
  64 + HANDLE_ERROR( cudaMemcpy(dev_E, E, e_bytes(), cudaMemcpyHostToDevice) ); //copy the field to the GPU
  65 + free(E); //free the CPU memory
  66 + E = dev_E; //swap pointers
  67 + }
  68 + }
  69 +
  70 + /// Copy the field array to the CPU, if it isn't already there
  71 + void to_cpu(){
  72 + if(loc == CPUmem) return;
  73 + else{
  74 + stim::complex<T>* host_E = (stim::complex<T>*) malloc(e_bytes()); //allocate space in main memory
  75 + HANDLE_ERROR( cudaMemcpy(host_E, E, e_bytes(), cudaMemcpyDeviceToHost) ); //copy from GPU to CPU
  76 + HANDLE_ERROR( cudaFree(E) ); //free device memory
  77 + E = host_E; //swap pointers
  78 + }
  79 + }
  80 +
  81 + std::string str(){
  82 + std::stringstream ss;
  83 + ss<<rect<T>::str()<<std::endl;
  84 + ss<<"[ "<<R[0]<<" x "<<R[1]<<" ]"<<std::endl;
  85 + ss<<"location: ";
  86 + if(loc == CPUmem) ss<<"CPU";
  87 + else ss<<"GPU";
  88 +
  89 + ss<<endl;
  90 + return ss.str();
  91 + }
  92 +
  93 + stim::complex<T>* ptr(){
  94 + return E;
  95 + }
  96 +
  97 + /// Evaluate the cartesian coordinates of each point in the field. The resulting arrays are allocated in the same memory where the field is stored.
  98 + void meshgrid(T* X, T* Y, T* Z, locationType location){
  99 + size_t array_size = sizeof(T) * R[0] * R[1];
  100 + if(location == CPUmem){
  101 +
  102 + T du = 1.0 / (R[0] - 1); //calculate the spacing between points in the grid
  103 + T dv = 1.0 / (R[1] - 1);
  104 +
  105 + size_t ui, vi, i;
  106 + stim::vec3<T> p;
  107 + for(vi = 0; vi < R[1]; vi++){
  108 + i = vi * R[0];
  109 + for(ui = 0; ui < R[0]; ui++){
  110 + p = rect<T>::p(ui * du, vi * dv);
  111 + X[i] = p[0];
  112 + Y[i] = p[1];
  113 + Z[i] = p[2];
  114 + i++;
  115 + }
  116 + }
  117 + stim::cpu2image(X, "X.bmp", R[0], R[1], stim::cmBrewer);
  118 + stim::cpu2image(Y, "Y.bmp", R[0], R[1], stim::cmBrewer);
  119 + stim::cpu2image(Z, "Z.bmp", R[0], R[1], stim::cmBrewer);
  120 + }
  121 + else{
  122 + std::cout<<"GPU allocation of a meshgrid isn't supported yet. You'll have to write kernels to do the calculation.";
  123 + exit(1);
  124 + }
  125 + }
  126 +
  127 + void image(std::string filename, stim::complexComponentType type = complexMag, stim::colormapType cmap = stim::cmBrewer){
  128 +
  129 + if(loc == GPUmem) to_cpu(); //if the field is in the GPU, move it to the CPU
  130 + T* image = (T*) malloc( sizeof(T) * size() ); //allocate space for the real image
  131 +
  132 + switch(type){ //get the specified component from the complex value
  133 + case complexMag:
  134 + stim::abs(image, E, size());
  135 + break;
  136 + case complexReal:
  137 + stim::real(image, E, size());
  138 + break;
  139 + case complexImaginary:
  140 + stim::imag(image, E, size());
  141 + }
  142 + stim::cpu2image(image, filename, R[0], R[1], cmap); //save the resulting image
  143 + free(image); //free the real image
  144 + }
  145 +
  146 +}; //end class scalarfield
  147 +}
  148 +
  149 +//stream insertion operator
  150 +template<typename T>
  151 +std::ostream& operator<<(std::ostream& os, stim::scalarfield<T>& rhs){
  152 + os<<rhs.str();
  153 + return os;
  154 +}
  155 +
  156 +
  157 +#endif
0 158 \ No newline at end of file
... ...
stim/optics/scalarwave.h 0 โ†’ 100644
  1 +#ifndef STIM_SCALARWAVE_H
  2 +#define STIM_SCALARWAVE_H
  3 +
  4 +
  5 +#include <string>
  6 +#include <sstream>
  7 +#include <cmath>
  8 +
  9 +//#include "../math/vector.h"
  10 +#include "../math/vec3.h"
  11 +#include "../math/quaternion.h"
  12 +#include "../math/constants.h"
  13 +#include "../math/plane.h"
  14 +#include "../math/complex.h"
  15 +
  16 +//CUDA
  17 +#include "../cuda/cudatools/devices.h"
  18 +#include "../cuda/cudatools/error.h"
  19 +#include "../cuda/sharedmem.cuh"
  20 +
  21 +namespace stim{
  22 +
  23 +template<typename T>
  24 +class scalarwave{
  25 +
  26 +public:
  27 +
  28 + stim::vec3<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
  29 + stim::complex<T> E0; //amplitude
  30 +
  31 + /// Bend a plane wave via refraction, given that the new propagation direction is known
  32 + CUDA_CALLABLE scalarwave<T> bend(stim::vec3<T> kn) const{
  33 + return scalarwave<T>(kn.norm() * kmag(), E0);
  34 + }
  35 +
  36 +public:
  37 +
  38 + ///constructor: create a plane wave propagating along k
  39 + CUDA_CALLABLE scalarwave(vec3<T> kvec = stim::vec3<T>(0, 0, (T)stim::TAU), complex<T> E = 1){
  40 + k = kvec;
  41 + E0 = E;
  42 + }
  43 +
  44 + CUDA_CALLABLE scalarwave(T kx, T ky, T kz, complex<T> E = 1){
  45 + k = vec3<T>(kx, ky, kz);
  46 + E0 = E;
  47 + }
  48 +
  49 + ///multiplication operator: scale E0
  50 + CUDA_CALLABLE scalarwave<T> & operator* (const T & rhs){
  51 + E0 = E0 * rhs;
  52 + return *this;
  53 + }
  54 +
  55 + CUDA_CALLABLE T lambda() const{
  56 + return stim::TAU / k.len();
  57 + }
  58 +
  59 + CUDA_CALLABLE T kmag() const{
  60 + return k.len();
  61 + }
  62 +
  63 + CUDA_CALLABLE complex<T> E(){
  64 + return E0;
  65 + }
  66 +
  67 + CUDA_CALLABLE vec3<T> kvec(){
  68 + return k;
  69 + }
  70 +
  71 + /// calculate the value of the field produced by the plane wave given a three-dimensional position
  72 + CUDA_CALLABLE complex<T> pos(T x, T y, T z){
  73 + return pos( stim::vec3<T>(x, y, z) );
  74 + }
  75 +
  76 + CUDA_CALLABLE complex<T> pos(vec3<T> p = vec3<T>(0, 0, 0)){
  77 + return E0 * exp(complex<T>(0, k.dot(p)));
  78 + }
  79 +
  80 + //scales k based on a transition from material ni to material nt
  81 + CUDA_CALLABLE scalarwave<T> n(T ni, T nt){
  82 + return scalarwave<T>(k * (nt / ni), E0);
  83 + }
  84 +
  85 + CUDA_CALLABLE scalarwave<T> refract(stim::vec3<T> kn) const{
  86 + return bend(kn);
  87 + }
  88 +
  89 + /// Calculate the result of a plane wave hitting an interface between two refractive indices
  90 +
  91 + /// @param P is a plane representing the position and orientation of the surface
  92 + /// @param n0 is the refractive index outside of the surface (in the direction of the normal)
  93 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  94 + /// @param r is the reflected component of the plane wave
  95 + /// @param t is the transmitted component of the plane wave
  96 + void scatter(stim::plane<T> P, T n0, T n1, scalarwave<T> &r, scalarwave<T> &t){
  97 + scatter(P, n1/n0, r, t);
  98 + }
  99 +
  100 + /// Calculate the scattering result when nr = n1/n0
  101 +
  102 + /// @param P is a plane representing the position and orientation of the surface
  103 + /// @param r is the ration n1/n0
  104 + /// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  105 + /// @param r is the reflected component of the plane wave
  106 + /// @param t is the transmitted component of the plane wave
  107 + void scatter(stim::plane<T> P, T nr, scalarwave<T> &r, scalarwave<T> &t){
  108 + /*
  109 + int facing = P.face(k); //determine which direction the plane wave is coming in
  110 +
  111 + if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
  112 + P = P.flip(); //flip the plane
  113 + nr = 1/nr; //invert the refractive index (now nr = n0/n1)
  114 + }
  115 +
  116 + //use Snell's Law to calculate the transmitted angle
  117 + T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i
  118 + T theta_i = acos(cos_theta_i); //compute theta_i
  119 + T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law
  120 + T theta_t = asin(sin_theta_t); //compute the cosine of theta_t
  121 +
  122 + bool tir = false; //flag for total internal reflection
  123 + if(theta_t != theta_t){
  124 + tir = true;
  125 + theta_t = stim::PI / (T)2;
  126 + }
  127 +
  128 + //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
  129 + if(theta_i == 0){
  130 + T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients
  131 + T tp = 2 / (1 + nr);
  132 + vec3<T> kr = -k;
  133 + vec3<T> kt = k * nr; //set the k vectors for theta_i = 0
  134 + vec3< complex<T> > Er = E0 * rp; //compute the E vectors
  135 + vec3< complex<T> > Et = E0 * tp;
  136 + T phase_t = P.p().dot(k - kt); //compute the phase offset
  137 + T phase_r = P.p().dot(k - kr);
  138 +
  139 + //create the plane waves
  140 + r = planewave<T>(kr, Er, phase_r);
  141 + t = planewave<T>(kt, Et, phase_t);
  142 + return;
  143 + }
  144 +
  145 +
  146 + //compute the Fresnel coefficients
  147 + T rp, rs, tp, ts;
  148 + rp = tan(theta_t - theta_i) / tan(theta_t + theta_i);
  149 + rs = sin(theta_t - theta_i) / sin(theta_t + theta_i);
  150 +
  151 + if(tir){
  152 + tp = ts = 0;
  153 + }
  154 + else{
  155 + tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) );
  156 + ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i);
  157 + }
  158 +
  159 + //compute the coordinate space for the plane of incidence
  160 + vec3<T> z_hat = -P.norm();
  161 + vec3<T> y_hat = P.parallel(k).norm();
  162 + vec3<T> x_hat = y_hat.cross(z_hat).norm();
  163 +
  164 + //compute the k vectors for r and t
  165 + vec3<T> kr, kt;
  166 + kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag();
  167 + kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;
  168 +
  169 + //compute the magnitude of the p- and s-polarized components of the incident E vector
  170 + complex<T> Ei_s = E0.dot(x_hat);
  171 + int sgn = E0.dot(y_hat).sgn();
  172 + vec3< complex<T> > cx_hat = x_hat;
  173 + complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
  174 + //compute the magnitude of the p- and s-polarized components of the reflected E vector
  175 + complex<T> Er_s = Ei_s * rs;
  176 + complex<T> Er_p = Ei_p * rp;
  177 + //compute the magnitude of the p- and s-polarized components of the transmitted E vector
  178 + complex<T> Et_s = Ei_s * ts;
  179 + complex<T> Et_p = Ei_p * tp;
  180 +
  181 + //compute the reflected E vector
  182 + vec3< complex<T> > Er = vec3< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
  183 + //compute the transmitted E vector
  184 + vec3< complex<T> > Et = vec3< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;
  185 +
  186 + T phase_t = P.p().dot(k - kt);
  187 + T phase_r = P.p().dot(k - kr);
  188 +
  189 + //create the plane waves
  190 + r.k = kr;
  191 + r.E0 = Er * exp( complex<T>(0, phase_r) );
  192 +
  193 + t.k = kt;
  194 + t.E0 = Et * exp( complex<T>(0, phase_t) );
  195 + */
  196 + }
  197 +
  198 + std::string str()
  199 + {
  200 + std::stringstream ss;
  201 + ss<<"Plane Wave:"<<std::endl;
  202 + ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
  203 + return ss.str();
  204 + }
  205 +}; //end planewave class
  206 +
  207 +
  208 +/// CUDA kernel for computing the field produced by a batch of plane waves at an array of locations
  209 +template<typename T>
  210 +__global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){
  211 + extern __shared__ stim::scalarwave<T> shared_W[]; //declare the list of waves in shared memory
  212 +
  213 + stim::cuda::sharedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x); //copy the plane waves into shared memory for faster access
  214 + __syncthreads(); //synchronize threads to insure all data is copied
  215 +
  216 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  217 + if(i >= N) return; //exit if this thread is outside the array
  218 + T px, py, pz;
  219 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values and set positions
  220 + (y == NULL) ? py = 0 : py = y[i];
  221 + (z == NULL) ? pz = 0 : pz = z[i];
  222 +
  223 + stim::complex<T> f = 0; //create a register to store the result
  224 + for(size_t w = 0; w < n_waves; w++)
  225 + f += shared_W[w].pos(px, py, pz); //evaluate the plane wave
  226 + F[i] += f; //copy the result to device memory
  227 +}
  228 +
  229 +/// evaluate a scalar wave at several points, where all arrays are on the GPU
  230 +template<typename T>
  231 +void gpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  232 +
  233 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  234 + dim3 blocks(N / threads + 1); //calculate the optimal number of blocks
  235 + cuda_scalarwave<T><<< blocks, threads >>>(F, N, x, y, z, w); //call the kernel
  236 +}
  237 +
  238 +template<typename T>
  239 +void gpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW){
  240 +
  241 + size_t wave_bytes = sizeof(stim::scalarwave<T>);
  242 + size_t shared_bytes = stim::sharedMemPerBlock(); //calculate the maximum amount of shared memory available
  243 + size_t max_batch = shared_bytes / wave_bytes; //calculate number of plane waves that will fit into shared memory
  244 + size_t batch_bytes = min(nW, max_batch) * wave_bytes; //initialize the batch size (in bytes) to the maximum batch required
  245 +
  246 + stim::scalarwave<T>* batch_W;
  247 + HANDLE_ERROR(cudaMalloc(&batch_W, batch_bytes)); //allocate memory for a single batch of plane waves
  248 +
  249 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  250 + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  251 +
  252 + size_t batch_size; //declare a variable to store the size of the current batch
  253 + size_t waves_processed = 0; //initialize the number of waves processed to zero
  254 + while(waves_processed < nW){ //while there are still waves to be processed
  255 + batch_size = min<size_t>(max_batch, nW - waves_processed); //process either a whole batch, or whatever is left
  256 + batch_bytes = batch_size * sizeof(stim::scalarwave<T>);
  257 + HANDLE_ERROR(cudaMemcpy(batch_W, W + waves_processed, batch_bytes, cudaMemcpyDeviceToDevice)); //copy the plane waves into global memory
  258 + cuda_scalarwave<T><<< blocks, threads, batch_bytes >>>(F, N, x, y, z, batch_W, batch_size); //call the kernel
  259 + waves_processed += batch_size; //increment the counter indicating how many waves have been processed
  260 + }
  261 + cudaFree(batch_W);
  262 +}
  263 +
  264 +/// Sums a series of coherent plane waves at a specified point
  265 +/// @param field is the output array of field values corresponding to each input point
  266 +/// @param x is an array of x coordinates for the field point
  267 +/// @param y is an array of y coordinates for the field point
  268 +/// @param z is an array of z coordinates for the field point
  269 +/// @param N is the number of points in the input and output arrays
  270 +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  271 +/// @param A is the list of amplitudes for each wave
  272 +/// @param S is the list of propagation directions for each wave
  273 +template<typename T>
  274 +void cpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > W){
  275 + size_t S = W.size(); //store the number of waves
  276 +#ifdef __CUDACC__
  277 + stim::complex<T>* dev_F; //allocate space for the field
  278 + cudaMalloc(&dev_F, N * sizeof(stim::complex<T>));
  279 + cudaMemcpy(dev_F, F, N * sizeof(stim::complex<T>), cudaMemcpyHostToDevice);
  280 + //cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used)
  281 +
  282 + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified)
  283 + if(x != NULL){
  284 + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
  285 + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
  286 + }
  287 +
  288 + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified)
  289 + if(y != NULL){
  290 + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
  291 + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
  292 + }
  293 +
  294 + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified)
  295 + if(z != NULL){
  296 + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
  297 + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
  298 + }
  299 +
  300 + stim::scalarwave<T>* dev_W;
  301 + HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) );
  302 + HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) );
  303 +
  304 + gpu_scalarwaves(dev_F, N, dev_x, dev_y, dev_z, dev_W, W.size());
  305 +
  306 + cudaMemcpy(F, dev_F, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory
  307 +
  308 + if(x != NULL) cudaFree(dev_x); //free everything
  309 + if(y != NULL) cudaFree(dev_y);
  310 + if(z != NULL) cudaFree(dev_z);
  311 + cudaFree(dev_F);
  312 +#else
  313 + memset(F, 0, N * sizeof(stim::complex<T>));
  314 + T px, py, pz;
  315 + for(size_t i = 0; i < N; i++){ // for each element in the array
  316 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values
  317 + (y == NULL) ? py = 0 : py = y[i];
  318 + (z == NULL) ? pz = 0 : pz = z[i];
  319 +
  320 + for(size_t s = 0; s < S; s++){
  321 + F[i] += w_array[s].pos(px, py, pz); //sum all plane waves at this point
  322 + }
  323 + }
  324 +#endif
  325 +}
  326 +
  327 +template<typename T>
  328 +void cpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  329 + std::vector< stim::scalarwave<T> > w_array(1, w);
  330 + cpu_scalarwaves(F, N, x, y, z, w_array);
  331 +}
  332 +
  333 +template<typename T>
  334 +void cpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  335 + std::vector< stim::scalarwave<T> > w_array(1, w);
  336 + cpu_scalarwaves(F, N, x, y, z, w_array);
  337 +}
  338 +
  339 +
  340 +/// Sums a series of coherent plane waves at a specified point
  341 +/// @param x is the x coordinate of the field point
  342 +/// @param y is the y coordinate of the field point
  343 +/// @param z is the z coordinate of the field point
  344 +/// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  345 +/// @param A is the list of amplitudes for each wave
  346 +/// @param S is the list of propagation directions for each wave
  347 +template<typename T>
  348 +CUDA_CALLABLE stim::complex<T> cpu_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave<T> > W){
  349 + size_t N = W.size(); //get the number of plane wave samples
  350 + stim::complex<T> field(0, 0); //initialize the field to zero (0)
  351 + stim::vec3<T> k; //allocate space for the direction vector
  352 + for(size_t i = 0; i < N; i++){
  353 + field += W[i].pos(x, y, z);
  354 + }
  355 + return field;
  356 +}
  357 +
  358 +} //end namespace stim
  359 +
  360 +template <typename T>
  361 +std::ostream& operator<<(std::ostream& os, stim::scalarwave<T> p)
  362 +{
  363 + os<<p.str();
  364 + return os;
  365 +}
  366 +
  367 +#endif
0 368 \ No newline at end of file
... ...
stim/optics/beam.h renamed to stim/optics_old/beam.h
1   -#ifndef RTS_BEAM
2   -#define RTS_BEAM
3   -
4   -#include "../math/vector.h"
5   -#include "../math/function.h"
6   -#include "../optics/planewave.h"
7   -#include <vector>
8   -
9   -namespace stim{
10   -
11   -template<typename P>
12   -class beam : public planewave<P>
13   -{
14   -public:
15   - enum beam_type {Uniform, Bartlett, Hamming, Hanning};
16   -
17   -private:
18   -
19   - P _na[2]; //numerical aperature of the focusing optics
20   - vec<P> f; //focal point
21   - function<P, P> apod; //apodization function
22   - unsigned int apod_res; //resolution of apodization filter functions
23   -
24   - void apod_uniform()
25   - {
26   - apod = (P)1;
27   - }
28   - void apod_bartlett()
29   - {
30   - apod = (P)1;
31   - apod.insert((P)1, (P)0);
32   - }
33   - void apod_hanning()
34   - {
35   - apod = (P)0;
36   - P x, y;
37   - for(unsigned int n=0; n<apod_res; n++)
38   - {
39   - x = (P)n/(P)apod_res;
40   - y = pow( cos( ((P)3.14159 * x) / 2 ), 2);
41   - apod.insert(x, y);
42   - }
43   - }
44   - void apod_hamming()
45   - {
46   - apod = (P)0;
47   - P x, y;
48   - for(unsigned int n=0; n<apod_res; n++)
49   - {
50   - x = (P)n/(P)apod_res;
51   - y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x);
52   - apod.insert(x, y);
53   - }
54   - }
55   -
56   - void set_apod(beam_type type)
57   - {
58   - if(type == Uniform)
59   - apod_uniform();
60   - if(type == Bartlett)
61   - apod_bartlett();
62   - if(type == Hanning)
63   - apod_hanning();
64   - if(type == Hamming)
65   - apod_hamming();
66   - }
67   -
68   -public:
69   -
70   - ///constructor: build a default beam (NA=1.0)
71   - beam(
72   - vec<P> k = rts::vec<P>(0, 0, rtsTAU),
73   - vec<P> _E0 = rts::vec<P>(1, 0, 0),
74   - beam_type _apod = Uniform)
75   - : planewave<P>(k, _E0)
76   - {
77   - _na[0] = (P)0.0;
78   - _na[1] = (P)1.0;
79   - f = vec<P>( (P)0, (P)0, (P)0 );
80   - apod_res = 256; //set the default resolution for apodization filters
81   - set_apod(_apod); //set the apodization function type
82   - }
83   -
84   - beam<P> refract(rts::vec<P> kn) const{
85   -
86   - beam<P> new_beam;
87   - new_beam._na[0] = _na[0];
88   - new_beam._na[1] = _na[1];
89   -
90   -
91   - rts::planewave<P> pw = planewave<P>::bend(kn);
92   - //std::cout<<pw.str()<<std::endl;
93   -
94   - new_beam.k = pw.kvec();
95   - new_beam.E0 = pw.E();
96   -
97   - return new_beam;
98   - }
99   -
100   - ///Numerical Aperature functions
101   - void NA(P na)
102   - {
103   - _na[0] = (P)0;
104   - _na[1] = na;
105   - }
106   - void NA(P na0, P na1)
107   - {
108   - _na[0] = na0;
109   - _na[1] = na1;
110   - }
111   -
112   - /*string str() :
113   - {
114   - stringstream ss;
115   - ss<<"Beam Center: "<<k<<std::endl;
116   -
117   - return ss.str();
118   - }*/
119   -
120   - //Monte-Carlo decomposition into plane waves
121   - std::vector< planewave<P> > mc(unsigned int N = 100000, unsigned int seed = 0) const
122   - {
123   - /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling
124   - of a sphere and projecting these samples onto an inscribed sphere.
125   -
126   - seed = seed for the random number generator
127   - */
128   - srand(seed); //seed the random number generator
129   -
130   - vec<P> k_hat = beam::k.norm();
131   -
132   - ///compute the rotation operator to transform (0, 0, 1) to k
133   - P cos_angle = k_hat.dot(rts::vec<P>(0, 0, 1));
134   - rts::matrix<P, 3> rotation;
135   -
136   - //if the cosine of the angle is -1, the rotation is just a flip across the z axis
137   - if(cos_angle == -1){
138   - rotation(2, 2) = -1;
139   - }
140   - else if(cos_angle != 1.0)
141   - {
142   - rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation
143   - P angle = acos(cos_angle); //compute the angle of rotation
144   - rts::quaternion<P> quat; //create a quaternion describing the rotation
145   - quat.CreateRotation(angle, r_axis);
146   - rotation = quat.toMatrix3(); //compute the rotation matrix
147   - }
148   -
149   - //find the phi values associated with the cassegrain ring
150   - P PHI[2];
151   - PHI[0] = (P)asin(_na[0]);
152   - PHI[1] = (P)asin(_na[1]);
153   -
154   - //calculate the z-axis cylinder coordinates associated with these angles
155   - P Z[2];
156   - Z[0] = cos(PHI[0]);
157   - Z[1] = cos(PHI[1]);
158   - P range = Z[0] - Z[1];
159   -
160   - std::vector< planewave<P> > samples; //create a vector of plane waves
161   -
162   - //draw a distribution of random phi, z values
163   - P z, phi, theta;
164   - for(int i=0; i<N; i++) //for each sample
165   - {
166   - z = ((P)rand() / (P)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
167   - theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159;
168   - phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
169   -
170   - //compute and store cartesian coordinates
171   - rts::vec<P> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
172   - rts::vec<P> cart = spherical.sph2cart();
173   - vec<P> k_prime = rotation * cart; //create a sample vector
174   -
175   - //store a wave refracted along the given direction
176   - //std::cout<<"k prime: "<<rotation<<std::endl;
177   - samples.push_back(planewave<P>::refract(k_prime) * apod(phi/PHI[1]));
178   - }
179   -
180   - return samples;
181   - }
182   -
183   - std::string str()
184   - {
185   - std::stringstream ss;
186   - ss<<"Beam:"<<std::endl;
187   - //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
188   - ss<<" Central Plane Wave: "<<beam::k<<std::endl;
189   - if(_na[0] == 0)
190   - ss<<" NA: "<<_na[1];
191   - else
192   - ss<<" NA: "<<_na[0]<<" -- "<<_na[1];
193   -
194   - return ss.str();
195   - }
196   -
197   -
198   -
199   -};
200   -
201   -}
202   -
203   -#endif
  1 +#ifndef RTS_BEAM
  2 +#define RTS_BEAM
  3 +
  4 +#include "../math/vector.h"
  5 +#include "../math/function.h"
  6 +#include "../optics/planewave.h"
  7 +#include <vector>
  8 +
  9 +namespace stim{
  10 +
  11 +template<typename P>
  12 +class beam : public planewave<P>
  13 +{
  14 +public:
  15 + enum beam_type {Uniform, Bartlett, Hamming, Hanning};
  16 +
  17 +private:
  18 +
  19 + P _na[2]; //numerical aperature of the focusing optics
  20 + vec<P> f; //focal point
  21 + function<P, P> apod; //apodization function
  22 + unsigned int apod_res; //resolution of apodization filter functions
  23 +
  24 + void apod_uniform()
  25 + {
  26 + apod = (P)1;
  27 + }
  28 + void apod_bartlett()
  29 + {
  30 + apod = (P)1;
  31 + apod.insert((P)1, (P)0);
  32 + }
  33 + void apod_hanning()
  34 + {
  35 + apod = (P)0;
  36 + P x, y;
  37 + for(unsigned int n=0; n<apod_res; n++)
  38 + {
  39 + x = (P)n/(P)apod_res;
  40 + y = pow( cos( ((P)3.14159 * x) / 2 ), 2);
  41 + apod.insert(x, y);
  42 + }
  43 + }
  44 + void apod_hamming()
  45 + {
  46 + apod = (P)0;
  47 + P x, y;
  48 + for(unsigned int n=0; n<apod_res; n++)
  49 + {
  50 + x = (P)n/(P)apod_res;
  51 + y = (P)27/(P)50 + ( (P)23/(P)50 ) * cos((P)3.14159 * x);
  52 + apod.insert(x, y);
  53 + }
  54 + }
  55 +
  56 + void set_apod(beam_type type)
  57 + {
  58 + if(type == Uniform)
  59 + apod_uniform();
  60 + if(type == Bartlett)
  61 + apod_bartlett();
  62 + if(type == Hanning)
  63 + apod_hanning();
  64 + if(type == Hamming)
  65 + apod_hamming();
  66 + }
  67 +
  68 +public:
  69 +
  70 + ///constructor: build a default beam (NA=1.0)
  71 + beam(
  72 + vec<P> k = rts::vec<P>(0, 0, rtsTAU),
  73 + vec<P> _E0 = rts::vec<P>(1, 0, 0),
  74 + beam_type _apod = Uniform)
  75 + : planewave<P>(k, _E0)
  76 + {
  77 + _na[0] = (P)0.0;
  78 + _na[1] = (P)1.0;
  79 + f = vec<P>( (P)0, (P)0, (P)0 );
  80 + apod_res = 256; //set the default resolution for apodization filters
  81 + set_apod(_apod); //set the apodization function type
  82 + }
  83 +
  84 + beam<P> refract(rts::vec<P> kn) const{
  85 +
  86 + beam<P> new_beam;
  87 + new_beam._na[0] = _na[0];
  88 + new_beam._na[1] = _na[1];
  89 +
  90 +
  91 + rts::planewave<P> pw = planewave<P>::bend(kn);
  92 + //std::cout<<pw.str()<<std::endl;
  93 +
  94 + new_beam.k = pw.kvec();
  95 + new_beam.E0 = pw.E();
  96 +
  97 + return new_beam;
  98 + }
  99 +
  100 + ///Numerical Aperature functions
  101 + void NA(P na)
  102 + {
  103 + _na[0] = (P)0;
  104 + _na[1] = na;
  105 + }
  106 + void NA(P na0, P na1)
  107 + {
  108 + _na[0] = na0;
  109 + _na[1] = na1;
  110 + }
  111 +
  112 + /*string str() :
  113 + {
  114 + stringstream ss;
  115 + ss<<"Beam Center: "<<k<<std::endl;
  116 +
  117 + return ss.str();
  118 + }*/
  119 +
  120 + //Monte-Carlo decomposition into plane waves
  121 + std::vector< planewave<P> > mc(unsigned int N = 100000, unsigned int seed = 0) const
  122 + {
  123 + /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling
  124 + of a sphere and projecting these samples onto an inscribed sphere.
  125 +
  126 + seed = seed for the random number generator
  127 + */
  128 + srand(seed); //seed the random number generator
  129 +
  130 + vec<P> k_hat = beam::k.norm();
  131 +
  132 + ///compute the rotation operator to transform (0, 0, 1) to k
  133 + P cos_angle = k_hat.dot(rts::vec<P>(0, 0, 1));
  134 + rts::matrix<P, 3> rotation;
  135 +
  136 + //if the cosine of the angle is -1, the rotation is just a flip across the z axis
  137 + if(cos_angle == -1){
  138 + rotation(2, 2) = -1;
  139 + }
  140 + else if(cos_angle != 1.0)
  141 + {
  142 + rts::vec<P> r_axis = rts::vec<P>(0, 0, 1).cross(k_hat).norm(); //compute the axis of rotation
  143 + P angle = acos(cos_angle); //compute the angle of rotation
  144 + rts::quaternion<P> quat; //create a quaternion describing the rotation
  145 + quat.CreateRotation(angle, r_axis);
  146 + rotation = quat.toMatrix3(); //compute the rotation matrix
  147 + }
  148 +
  149 + //find the phi values associated with the cassegrain ring
  150 + P PHI[2];
  151 + PHI[0] = (P)asin(_na[0]);
  152 + PHI[1] = (P)asin(_na[1]);
  153 +
  154 + //calculate the z-axis cylinder coordinates associated with these angles
  155 + P Z[2];
  156 + Z[0] = cos(PHI[0]);
  157 + Z[1] = cos(PHI[1]);
  158 + P range = Z[0] - Z[1];
  159 +
  160 + std::vector< planewave<P> > samples; //create a vector of plane waves
  161 +
  162 + //draw a distribution of random phi, z values
  163 + P z, phi, theta;
  164 + for(int i=0; i<N; i++) //for each sample
  165 + {
  166 + z = ((P)rand() / (P)RAND_MAX) * range + Z[1]; //find a random position on the surface of a cylinder
  167 + theta = ((P)rand() / (P)RAND_MAX) * 2 * (P)3.14159;
  168 + phi = acos(z); //project onto the sphere, computing phi in spherical coordinates
  169 +
  170 + //compute and store cartesian coordinates
  171 + rts::vec<P> spherical(1, theta, phi); //convert from spherical to cartesian coordinates
  172 + rts::vec<P> cart = spherical.sph2cart();
  173 + vec<P> k_prime = rotation * cart; //create a sample vector
  174 +
  175 + //store a wave refracted along the given direction
  176 + //std::cout<<"k prime: "<<rotation<<std::endl;
  177 + samples.push_back(planewave<P>::refract(k_prime) * apod(phi/PHI[1]));
  178 + }
  179 +
  180 + return samples;
  181 + }
  182 +
  183 + std::string str()
  184 + {
  185 + std::stringstream ss;
  186 + ss<<"Beam:"<<std::endl;
  187 + //ss<<" Central Plane Wave: "<<beam::E0<<" e^i ( "<<beam::k<<" . r )"<<std::endl;
  188 + ss<<" Central Plane Wave: "<<beam::k<<std::endl;
  189 + if(_na[0] == 0)
  190 + ss<<" NA: "<<_na[1];
  191 + else
  192 + ss<<" NA: "<<_na[0]<<" -- "<<_na[1];
  193 +
  194 + return ss.str();
  195 + }
  196 +
  197 +
  198 +
  199 +};
  200 +
  201 +}
  202 +
  203 +#endif
... ...
stim/optics/efield.cuh renamed to stim/optics_old/efield.cuh
stim/optics/esphere.cuh renamed to stim/optics_old/esphere.cuh
stim/optics/halfspace.cuh renamed to stim/optics_old/halfspace.cuh
stim/optics/material.h renamed to stim/optics_old/material.h
1   -#ifndef RTS_MATERIAL_H
2   -#define RTS_MATERIAL_H
3   -
4   -#include <vector>
5   -#include <ostream>
6   -#include <iostream>
7   -#include <fstream>
8   -#include <complex>
9   -#include <algorithm>
10   -#include <sstream>
11   -#include "../math/complex.h"
12   -#include "../math/constants.h"
13   -#include "../math/function.h"
14   -
15   -namespace stim{
16   -
17   -//Material class - default representation for the material property is the refractive index (RI)
18   -template<typename T>
19   -class material : public function< T, complex<T> >{
20   -
21   -public:
22   - enum wave_property{microns, inverse_cm};
23   - enum material_property{ri, absorbance};
24   -
25   -private:
26   -
27   - using function< T, complex<T> >::X;
28   - using function< T, complex<T> >::Y;
29   - using function< T, complex<T> >::insert;
30   - using function< T, complex<T> >::bounding;
31   -
32   - std::string name; //name for the material (defaults to file name)
33   -
34   - void process_header(std::string str, wave_property& wp, material_property& mp){
35   -
36   - std::stringstream ss(str); //create a stream from the data string
37   - std::string line;
38   - std::getline(ss, line); //get the first line as a string
39   - while(line[0] == '#'){ //continue looping while the line is a comment
40   -
41   - std::stringstream lstream(line); //create a stream from the line
42   - lstream.ignore(); //ignore the first character ('#')
43   -
44   - std::string prop; //get the property name
45   - lstream>>prop;
46   -
47   - if(prop == "X"){
48   - std::string wp_name;
49   - lstream>>wp_name;
50   - if(wp_name == "microns") wp = microns;
51   - else if(wp_name == "inverse_cm") wp = inverse_cm;
52   - }
53   - else if(prop == "Y"){
54   - std::string mp_name;
55   - lstream>>mp_name;
56   - if(mp_name == "ri") mp = ri;
57   - else if(mp_name == "absorbance") mp = absorbance;
58   - }
59   -
60   - std::getline(ss, line); //get the next line
61   - }
62   -
63   - function< T, stim::complex<T> >::process_string(str);
64   - }
65   -
66   - void from_inverse_cm(){
67   - //convert inverse centimeters to wavelength (in microns)
68   - for(unsigned int i=0; i<X.size(); i++)
69   - X[i] = 10000 / X[i];
70   -
71   - //reverse the function array
72   - std::reverse(X.begin(), X.end());
73   - std::reverse(Y.begin(), Y.end());
74   -
75   - }
76   -
77   - void init(){
78   - bounding[0] = bounding[1] = stim::complex<T>(1, 0);
79   - }
80   -
81   -
82   -public:
83   -
84   - material(std::string filename, wave_property wp, material_property mp){
85   - name = filename;
86   - load(filename, wp, mp);
87   - }
88   -
89   - material(std::string filename){
90   - name = filename;
91   - load(filename);
92   - }
93   -
94   - material(){
95   - init();
96   - }
97   -
98   - complex<T> getN(T lambda){
99   - return function< T, complex<T> >::linear(lambda);
100   - }
101   -
102   - void load(std::string filename, wave_property wp, material_property mp){
103   -
104   - //load the file as a function
105   - function< T, complex<T> >::load(filename);
106   - }
107   -
108   - void load(std::string filename){
109   -
110   - wave_property wp = inverse_cm;
111   - material_property mp = ri;
112   - //turn the file into a string
113   - std::ifstream t(filename.c_str()); //open the file as a stream
114   -
115   - if(!t){
116   - std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;
117   - exit(1);
118   - }
119   - std::string str((std::istreambuf_iterator<char>(t)),
120   - std::istreambuf_iterator<char>());
121   -
122   - //process the header information
123   - process_header(str, wp, mp);
124   -
125   - //convert units
126   - if(wp == inverse_cm)
127   - from_inverse_cm();
128   - //set the bounding values
129   - bounding[0] = Y[0];
130   - bounding[1] = Y.back();
131   - }
132   - std::string str(){
133   - std::stringstream ss;
134   - ss<<name<<std::endl;
135   - ss<<function< T, complex<T> >::str();
136   - return ss.str();
137   - }
138   - std::string get_name(){
139   - return name;
140   - }
141   -
142   - void set_name(std::string str){
143   - name = str;
144   - }
145   -
146   -};
147   -
148   -}
149   -
150   -
151   -
152   -
153   -#endif
  1 +#ifndef RTS_MATERIAL_H
  2 +#define RTS_MATERIAL_H
  3 +
  4 +#include <vector>
  5 +#include <ostream>
  6 +#include <iostream>
  7 +#include <fstream>
  8 +#include <complex>
  9 +#include <algorithm>
  10 +#include <sstream>
  11 +#include "../math/complex.h"
  12 +#include "../math/constants.h"
  13 +#include "../math/function.h"
  14 +
  15 +namespace stim{
  16 +
  17 +//Material class - default representation for the material property is the refractive index (RI)
  18 +template<typename T>
  19 +class material : public function< T, complex<T> >{
  20 +
  21 +public:
  22 + enum wave_property{microns, inverse_cm};
  23 + enum material_property{ri, absorbance};
  24 +
  25 +private:
  26 +
  27 + using function< T, complex<T> >::X;
  28 + using function< T, complex<T> >::Y;
  29 + using function< T, complex<T> >::insert;
  30 + using function< T, complex<T> >::bounding;
  31 +
  32 + std::string name; //name for the material (defaults to file name)
  33 +
  34 + void process_header(std::string str, wave_property& wp, material_property& mp){
  35 +
  36 + std::stringstream ss(str); //create a stream from the data string
  37 + std::string line;
  38 + std::getline(ss, line); //get the first line as a string
  39 + while(line[0] == '#'){ //continue looping while the line is a comment
  40 +
  41 + std::stringstream lstream(line); //create a stream from the line
  42 + lstream.ignore(); //ignore the first character ('#')
  43 +
  44 + std::string prop; //get the property name
  45 + lstream>>prop;
  46 +
  47 + if(prop == "X"){
  48 + std::string wp_name;
  49 + lstream>>wp_name;
  50 + if(wp_name == "microns") wp = microns;
  51 + else if(wp_name == "inverse_cm") wp = inverse_cm;
  52 + }
  53 + else if(prop == "Y"){
  54 + std::string mp_name;
  55 + lstream>>mp_name;
  56 + if(mp_name == "ri") mp = ri;
  57 + else if(mp_name == "absorbance") mp = absorbance;
  58 + }
  59 +
  60 + std::getline(ss, line); //get the next line
  61 + }
  62 +
  63 + function< T, stim::complex<T> >::process_string(str);
  64 + }
  65 +
  66 + void from_inverse_cm(){
  67 + //convert inverse centimeters to wavelength (in microns)
  68 + for(unsigned int i=0; i<X.size(); i++)
  69 + X[i] = 10000 / X[i];
  70 +
  71 + //reverse the function array
  72 + std::reverse(X.begin(), X.end());
  73 + std::reverse(Y.begin(), Y.end());
  74 +
  75 + }
  76 +
  77 + void init(){
  78 + bounding[0] = bounding[1] = stim::complex<T>(1, 0);
  79 + }
  80 +
  81 +
  82 +public:
  83 +
  84 + material(std::string filename, wave_property wp, material_property mp){
  85 + name = filename;
  86 + load(filename, wp, mp);
  87 + }
  88 +
  89 + material(std::string filename){
  90 + name = filename;
  91 + load(filename);
  92 + }
  93 +
  94 + material(){
  95 + init();
  96 + }
  97 +
  98 + complex<T> getN(T lambda){
  99 + return function< T, complex<T> >::linear(lambda);
  100 + }
  101 +
  102 + void load(std::string filename, wave_property wp, material_property mp){
  103 +
  104 + //load the file as a function
  105 + function< T, complex<T> >::load(filename);
  106 + }
  107 +
  108 + void load(std::string filename){
  109 +
  110 + wave_property wp = inverse_cm;
  111 + material_property mp = ri;
  112 + //turn the file into a string
  113 + std::ifstream t(filename.c_str()); //open the file as a stream
  114 +
  115 + if(!t){
  116 + std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;
  117 + exit(1);
  118 + }
  119 + std::string str((std::istreambuf_iterator<char>(t)),
  120 + std::istreambuf_iterator<char>());
  121 +
  122 + //process the header information
  123 + process_header(str, wp, mp);
  124 +
  125 + //convert units
  126 + if(wp == inverse_cm)
  127 + from_inverse_cm();
  128 + //set the bounding values
  129 + bounding[0] = Y[0];
  130 + bounding[1] = Y.back();
  131 + }
  132 + std::string str(){
  133 + std::stringstream ss;
  134 + ss<<name<<std::endl;
  135 + ss<<function< T, complex<T> >::str();
  136 + return ss.str();
  137 + }
  138 + std::string get_name(){
  139 + return name;
  140 + }
  141 +
  142 + void set_name(std::string str){
  143 + name = str;
  144 + }
  145 +
  146 +};
  147 +
  148 +}
  149 +
  150 +
  151 +
  152 +
  153 +#endif
... ...
stim/optics/mirst-1d.cuh renamed to stim/optics_old/mirst-1d.cuh
1   -#include "../optics/material.h"
2   -#include "../math/complexfield.cuh"
3   -#include "../math/constants.h"
4   -//#include "../envi/bil.h"
5   -
6   -#include "cufft.h"
7   -
8   -#include <vector>
9   -#include <sstream>
10   -
11   -namespace stim{
12   -
13   -//this function writes a sinc function to "dest" such that an iFFT produces a slab
14   -template<typename T>
15   -__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,
16   - T* src, T* zf,
17   - T w, unsigned int zR, unsigned int nuR){
18   - //dest = complex field representing the sample
19   - //ri = refractive indices for each wavelength
20   - //src = intensity of the light source for each wavelength
21   - //zf = z position of the slab interface for each wavelength (accounting for optical path length)
22   - //w = width of the slab (in pixels)
23   - //zR = number of z-axis samples
24   - //nuR = number of wavelengths
25   -
26   - //get the current coordinate in the plane slice
27   - int ifz = blockIdx.x * blockDim.x + threadIdx.x;
28   - int inu = blockIdx.y * blockDim.y + threadIdx.y;
29   -
30   - //make sure that the thread indices are in-bounds
31   - if(inu >= nuR || ifz >= zR) return;
32   -
33   - int i = inu * zR + ifz;
34   -
35   - T fz;
36   - if(ifz < zR/2)
37   - fz = ifz / (T)zR;
38   - else
39   - fz = -(zR - ifz) / (T)zR;
40   -
41   - //if the slab starts outside of the simulation domain, just return
42   - if(zf[inu] >= zR) return;
43   -
44   - //fill the array along z with a sinc function representing the Fourier transform of the layer
45   -
46   - T opl = w * ri[inu].real(); //optical path length
47   -
48   - //handle the case where the slab goes outside the simulation domain
49   - if(zf[inu] + opl >= zR)
50   - opl = zR - zf[inu];
51   -
52   - if(opl == 0) return;
53   -
54   - //T l = w * ri[inu].real();
55   - //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
56   - complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
57   -
58   - complex<T> eta = ri[inu] * ri[inu] - 1;
59   -
60   - //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
61   - if(ifz == 0)
62   - dest[i] += opl * exp(e) * eta * src[inu];
63   - else
64   - dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
65   -}
66   -
67   -template<typename T>
68   -__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
69   - //zf = current z depth (optical path length) in pixels
70   - //ri = refractive index of the material
71   - //w = actual width of the layer (in pixels)
72   -
73   -
74   - //compute the index for this thread
75   - int i = blockIdx.x * blockDim.x + threadIdx.x;
76   - if(i >= S) return;
77   -
78   - if(ri == NULL)
79   - zf[i] += w;
80   - else
81   - zf[i] += ri[i].real() * w;
82   -}
83   -
84   -//apply the 1D MIRST filter to an existing sample (overwriting the sample)
85   -template<typename T>
86   -__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,
87   - T dFz,
88   - T inNA, T outNA,
89   - unsigned int lambdaR, unsigned int zR,
90   - T sigma = 0){
91   - //sampleFFT = the sample in the Fourier domain (will be overwritten)
92   - //lambda = list of wavelengths
93   - //dFz = delta along the Fz axis in the frequency domain
94   - //inNA = NA of the internal obscuration
95   - //outNA = NA of the objective
96   - //zR = number of pixels along the Fz axis (same as the z-axis)
97   - //lambdaR = number of wavelengths
98   - //sigma = width of the Gaussian source
99   - int ifz = blockIdx.x * blockDim.x + threadIdx.x;
100   - int inu = blockIdx.y * blockDim.y + threadIdx.y;
101   -
102   - if(inu >= lambdaR || ifz >= zR) return;
103   -
104   - //calculate the index into the sample FT
105   - int i = inu * zR + ifz;
106   -
107   - //compute the frequency (and set all negative spatial frequencies to zero)
108   - T fz;
109   - if(ifz < zR / 2)
110   - fz = ifz * dFz;
111   - //if the spatial frequency is negative, set it to zero and exit
112   - else{
113   - sampleFFT[i] = 0;
114   - return;
115   - }
116   -
117   - //compute the frequency in inverse microns
118   - T nu = 1/lambda[inu];
119   -
120   - //determine the radius of the integration circle
121   - T nu_sq = nu * nu;
122   - T fz_sq = (fz * fz) / 4;
123   -
124   - //cut off frequencies above the diffraction limit
125   - T r;
126   - if(fz_sq < nu_sq)
127   - r = sqrt(nu_sq - fz_sq);
128   - else
129   - r = 0;
130   -
131   - //account for the optics
132   - T Q = 0;
133   - if(r > nu * inNA && r < nu * outNA)
134   - Q = 1;
135   -
136   - //account for the source
137   - //T sigma = 30.0;
138   - T s = exp( - (r*r * sigma*sigma) / 2 );
139   - //T s=1;
140   -
141   - //compute the final filter
142   - T mirst = 0;
143   - if(fz != 0)
144   - mirst = 2 * stimPI * r * s * Q * (1/fz);
145   -
146   - sampleFFT[i] *= mirst;
147   -
148   -}
149   -
150   -/*This object performs a 1-dimensional (layered) MIRST simulation
151   -*/
152   -template<typename T>
153   -class mirst1d{
154   -
155   -private:
156   - unsigned int Z; //z-axis resolution
157   - unsigned int pad; //pixel padding on either side of the sample
158   -
159   - std::vector< material<T> > matlist; //list of materials
160   - std::vector< T > layers; //list of layer thicknesses
161   -
162   - std::vector< T > lambdas; //list of wavelengths that are being simulated
163   - unsigned int S; //number of wavelengths (size of "lambdas")
164   -
165   - T NA[2]; //numerical aperature (central obscuration and outer diameter)
166   -
167   - function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)
168   -
169   - complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.
170   -
171   - void fft(int direction = CUFFT_FORWARD){
172   -
173   - unsigned padZ = Z + pad;
174   -
175   - //create cuFFT handles
176   - cufftHandle plan;
177   - cufftResult result;
178   -
179   - if(sizeof(T) == 4)
180   - result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision
181   - else
182   - result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision
183   -
184   - //check for Plan 1D errors
185   - if(result != CUFFT_SUCCESS){
186   - std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
187   - CufftError(result);
188   - exit(1);
189   - }
190   -
191   - if(sizeof(T) == 4)
192   - result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
193   - else
194   - result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
195   -
196   - //check for FFT errors
197   - if(result != CUFFT_SUCCESS){
198   - std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
199   - CufftError(result);
200   - exit(1);
201   - }
202   -
203   - cufftDestroy(plan);
204   - }
205   -
206   -
207   - //initialize the scratch memory
208   - void init_scratch(){
209   - scratch = complexfield<T, 1>(Z + pad , lambdas.size());
210   - scratch = 0;
211   - }
212   -
213   - //get the list of scattering efficiency (eta) values for a specified layer
214   - std::vector< complex<T> > layer_etas(unsigned int l){
215   -
216   - std::vector< complex<T> > etas;
217   -
218   - //fill the list of etas
219   - for(unsigned int i=0; i<lambdas.size(); i++)
220   - etas.push_back( matlist[l].eta(lambdas[i]) );
221   - return etas;
222   - }
223   -
224   - //calculates the optimal block and grid sizes using information from the GPU
225   - void cuda_params(dim3& grids, dim3& blocks){
226   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
227   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
228   -
229   - //create one thread for each detector pixel
230   - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
231   - grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
232   - }
233   -
234   - //add the fourier transform of layer n to the scratch space
235   - void build_layer_fft(unsigned int n, T* zf){
236   - unsigned int paddedZ = Z + pad;
237   -
238   - T wpx = layers[n] / dz(); //calculate the width of the layer in pixels
239   -
240   - //allocate memory for the refractive index
241   - complex<T>* gpuRi;
242   - HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
243   -
244   - //allocate memory for the source profile
245   - T* gpuSrc;
246   - HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
247   -
248   - complex<T> ri;
249   - T source;
250   - //store the refractive index and source profile in a CPU array
251   - for(int inu=0; inu<S; inu++){
252   - //save the refractive index to the GPU
253   - ri = matlist[n].getN(lambdas[inu]);
254   - HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
255   -
256   - //save the source profile to the GPU
257   - source = source_profile(10000 / lambdas[inu]);
258   - HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
259   -
260   - }
261   -
262   - //create one thread for each pixel of the field slice
263   - dim3 gridDim, blockDim;
264   - cuda_params(gridDim, blockDim);
265   - stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
266   -
267   - int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
268   - int linGrid = S / linBlock + 1;
269   - stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
270   -
271   - //free memory
272   - HANDLE_ERROR(cudaFree(gpuRi));
273   - HANDLE_ERROR(cudaFree(gpuSrc));
274   - }
275   -
276   - void build_sample(){
277   - init_scratch(); //initialize the GPU scratch space
278   - //build_layer(1);
279   -
280   - T* zf;
281   - HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
282   - HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
283   -
284   - //render each layer of the sample
285   - for(unsigned int l=0; l<layers.size(); l++){
286   - build_layer_fft(l, zf);
287   - }
288   -
289   - HANDLE_ERROR(cudaFree(zf));
290   - }
291   -
292   - void apply_filter(){
293   - dim3 gridDim, blockDim;
294   - cuda_params(gridDim, blockDim);
295   -
296   - unsigned int Zpad = Z + pad;
297   -
298   - T sim_range = dz() * Zpad;
299   - T dFz = 1 / sim_range;
300   -
301   - //copy the array of wavelengths to the GPU
302   - T* gpuLambdas;
303   - HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
304   - HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
305   - stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,
306   - dFz,
307   - NA[0], NA[1],
308   - S, Zpad);
309   - }
310   -
311   - //crop the image to the sample thickness - keep in mind that sample thickness != optical path length
312   - void crop(){
313   -
314   - scratch = scratch.crop(Z, S);
315   - }
316   -
317   - //save the scratch field as a binary file
318   - void to_binary(std::string filename){
319   -
320   - }
321   -
322   -
323   -public:
324   -
325   - //constructor
326   - mirst1d(unsigned int rZ = 100,
327   - unsigned int padding = 0){
328   - Z = rZ;
329   - pad = padding;
330   - NA[0] = 0;
331   - NA[1] = 0.8;
332   - S = 0;
333   - source_profile = 1;
334   - }
335   -
336   - //add a layer, thickness = microns
337   - void add_layer(material<T> mat, T thickness){
338   - matlist.push_back(mat);
339   - layers.push_back(thickness);
340   - }
341   -
342   - void add_layer(std::string filename, T thickness){
343   - add_layer(material<T>(filename), thickness);
344   - }
345   -
346   - //adds a profile spectrum for the light source
347   - void set_source(std::string filename){
348   - source_profile.load(filename);
349   - }
350   -
351   - //adds a block of wavenumbers (cm^-1) to the simulation parameters
352   - void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
353   - unsigned int nu = start;
354   - while(nu <= stop){
355   - lambdas.push_back((T)10000 / nu);
356   - nu += step;
357   - }
358   - S = lambdas.size(); //increment the number of wavelengths (shorthand for later)
359   - }
360   -
361   - T thickness(){
362   - T t = 0;
363   - for(unsigned int l=0; l<layers.size(); l++)
364   - t += layers[l];
365   - return t;
366   - }
367   -
368   - void padding(unsigned int padding = 0){
369   - pad = padding;
370   - }
371   -
372   - T dz(){
373   - return thickness() / Z; //calculate the z-axis step size
374   - }
375   -
376   - void na(T in, T out){
377   - NA[0] = in;
378   - NA[1] = out;
379   - }
380   -
381   - void na(T out){
382   - na(0, out);
383   - }
384   -
385   - stim::function<T, T> get_source(){
386   - return source_profile;
387   - }
388   -
389   - void save_sample(std::string filename){
390   - //create a sample and save the magnitude as an image
391   - build_sample();
392   - fft(CUFFT_INVERSE);
393   - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
394   - }
395   -
396   - void save_mirst(std::string filename, bool binary = true){
397   - //apply the MIRST filter to a sample and save the image
398   -
399   - //build the sample in the Fourier domain
400   - build_sample();
401   -
402   - //apply the MIRST filter
403   - apply_filter();
404   -
405   - //apply an inverse FFT to bring the results back into the spatial domain
406   - fft(CUFFT_INVERSE);
407   -
408   - crop();
409   -
410   - //save the image
411   - if(binary)
412   - to_binary(filename);
413   - else
414   - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
415   - }
416   -
417   -
418   -
419   -
420   - std::string str(){
421   -
422   - stringstream ss;
423   - ss<<"1D MIRST Simulation========================="<<std::endl;
424   - ss<<"z-axis resolution: "<<Z<<std::endl;
425   - ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
426   - ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
427   - ss<<"padding: "<<pad<<std::endl;
428   - ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
429   - ss<<"dz: "<<dz()<<" um"<<std::endl;
430   - ss<<std::endl;
431   - ss<<layers.size()<<" layers-------------"<<std::endl;
432   - for(unsigned int l=0; l<layers.size(); l++)
433   - ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
434   -
435   - ss<<"source profile-----------"<<std::endl;
436   - ss<<get_source().str()<<std::endl;
437   -
438   - return ss.str();
439   -
440   -
441   - }
442   -
443   -
444   -
445   -};
446   -
447   -}
  1 +#include "../optics/material.h"
  2 +#include "../math/complexfield.cuh"
  3 +#include "../math/constants.h"
  4 +//#include "../envi/bil.h"
  5 +
  6 +#include "cufft.h"
  7 +
  8 +#include <vector>
  9 +#include <sstream>
  10 +
  11 +namespace stim{
  12 +
  13 +//this function writes a sinc function to "dest" such that an iFFT produces a slab
  14 +template<typename T>
  15 +__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,
  16 + T* src, T* zf,
  17 + T w, unsigned int zR, unsigned int nuR){
  18 + //dest = complex field representing the sample
  19 + //ri = refractive indices for each wavelength
  20 + //src = intensity of the light source for each wavelength
  21 + //zf = z position of the slab interface for each wavelength (accounting for optical path length)
  22 + //w = width of the slab (in pixels)
  23 + //zR = number of z-axis samples
  24 + //nuR = number of wavelengths
  25 +
  26 + //get the current coordinate in the plane slice
  27 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  28 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  29 +
  30 + //make sure that the thread indices are in-bounds
  31 + if(inu >= nuR || ifz >= zR) return;
  32 +
  33 + int i = inu * zR + ifz;
  34 +
  35 + T fz;
  36 + if(ifz < zR/2)
  37 + fz = ifz / (T)zR;
  38 + else
  39 + fz = -(zR - ifz) / (T)zR;
  40 +
  41 + //if the slab starts outside of the simulation domain, just return
  42 + if(zf[inu] >= zR) return;
  43 +
  44 + //fill the array along z with a sinc function representing the Fourier transform of the layer
  45 +
  46 + T opl = w * ri[inu].real(); //optical path length
  47 +
  48 + //handle the case where the slab goes outside the simulation domain
  49 + if(zf[inu] + opl >= zR)
  50 + opl = zR - zf[inu];
  51 +
  52 + if(opl == 0) return;
  53 +
  54 + //T l = w * ri[inu].real();
  55 + //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
  56 + complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
  57 +
  58 + complex<T> eta = ri[inu] * ri[inu] - 1;
  59 +
  60 + //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
  61 + if(ifz == 0)
  62 + dest[i] += opl * exp(e) * eta * src[inu];
  63 + else
  64 + dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
  65 +}
  66 +
  67 +template<typename T>
  68 +__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
  69 + //zf = current z depth (optical path length) in pixels
  70 + //ri = refractive index of the material
  71 + //w = actual width of the layer (in pixels)
  72 +
  73 +
  74 + //compute the index for this thread
  75 + int i = blockIdx.x * blockDim.x + threadIdx.x;
  76 + if(i >= S) return;
  77 +
  78 + if(ri == NULL)
  79 + zf[i] += w;
  80 + else
  81 + zf[i] += ri[i].real() * w;
  82 +}
  83 +
  84 +//apply the 1D MIRST filter to an existing sample (overwriting the sample)
  85 +template<typename T>
  86 +__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,
  87 + T dFz,
  88 + T inNA, T outNA,
  89 + unsigned int lambdaR, unsigned int zR,
  90 + T sigma = 0){
  91 + //sampleFFT = the sample in the Fourier domain (will be overwritten)
  92 + //lambda = list of wavelengths
  93 + //dFz = delta along the Fz axis in the frequency domain
  94 + //inNA = NA of the internal obscuration
  95 + //outNA = NA of the objective
  96 + //zR = number of pixels along the Fz axis (same as the z-axis)
  97 + //lambdaR = number of wavelengths
  98 + //sigma = width of the Gaussian source
  99 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  100 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  101 +
  102 + if(inu >= lambdaR || ifz >= zR) return;
  103 +
  104 + //calculate the index into the sample FT
  105 + int i = inu * zR + ifz;
  106 +
  107 + //compute the frequency (and set all negative spatial frequencies to zero)
  108 + T fz;
  109 + if(ifz < zR / 2)
  110 + fz = ifz * dFz;
  111 + //if the spatial frequency is negative, set it to zero and exit
  112 + else{
  113 + sampleFFT[i] = 0;
  114 + return;
  115 + }
  116 +
  117 + //compute the frequency in inverse microns
  118 + T nu = 1/lambda[inu];
  119 +
  120 + //determine the radius of the integration circle
  121 + T nu_sq = nu * nu;
  122 + T fz_sq = (fz * fz) / 4;
  123 +
  124 + //cut off frequencies above the diffraction limit
  125 + T r;
  126 + if(fz_sq < nu_sq)
  127 + r = sqrt(nu_sq - fz_sq);
  128 + else
  129 + r = 0;
  130 +
  131 + //account for the optics
  132 + T Q = 0;
  133 + if(r > nu * inNA && r < nu * outNA)
  134 + Q = 1;
  135 +
  136 + //account for the source
  137 + //T sigma = 30.0;
  138 + T s = exp( - (r*r * sigma*sigma) / 2 );
  139 + //T s=1;
  140 +
  141 + //compute the final filter
  142 + T mirst = 0;
  143 + if(fz != 0)
  144 + mirst = 2 * stimPI * r * s * Q * (1/fz);
  145 +
  146 + sampleFFT[i] *= mirst;
  147 +
  148 +}
  149 +
  150 +/*This object performs a 1-dimensional (layered) MIRST simulation
  151 +*/
  152 +template<typename T>
  153 +class mirst1d{
  154 +
  155 +private:
  156 + unsigned int Z; //z-axis resolution
  157 + unsigned int pad; //pixel padding on either side of the sample
  158 +
  159 + std::vector< material<T> > matlist; //list of materials
  160 + std::vector< T > layers; //list of layer thicknesses
  161 +
  162 + std::vector< T > lambdas; //list of wavelengths that are being simulated
  163 + unsigned int S; //number of wavelengths (size of "lambdas")
  164 +
  165 + T NA[2]; //numerical aperature (central obscuration and outer diameter)
  166 +
  167 + function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)
  168 +
  169 + complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.
  170 +
  171 + void fft(int direction = CUFFT_FORWARD){
  172 +
  173 + unsigned padZ = Z + pad;
  174 +
  175 + //create cuFFT handles
  176 + cufftHandle plan;
  177 + cufftResult result;
  178 +
  179 + if(sizeof(T) == 4)
  180 + result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision
  181 + else
  182 + result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision
  183 +
  184 + //check for Plan 1D errors
  185 + if(result != CUFFT_SUCCESS){
  186 + std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
  187 + CufftError(result);
  188 + exit(1);
  189 + }
  190 +
  191 + if(sizeof(T) == 4)
  192 + result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
  193 + else
  194 + result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
  195 +
  196 + //check for FFT errors
  197 + if(result != CUFFT_SUCCESS){
  198 + std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
  199 + CufftError(result);
  200 + exit(1);
  201 + }
  202 +
  203 + cufftDestroy(plan);
  204 + }
  205 +
  206 +
  207 + //initialize the scratch memory
  208 + void init_scratch(){
  209 + scratch = complexfield<T, 1>(Z + pad , lambdas.size());
  210 + scratch = 0;
  211 + }
  212 +
  213 + //get the list of scattering efficiency (eta) values for a specified layer
  214 + std::vector< complex<T> > layer_etas(unsigned int l){
  215 +
  216 + std::vector< complex<T> > etas;
  217 +
  218 + //fill the list of etas
  219 + for(unsigned int i=0; i<lambdas.size(); i++)
  220 + etas.push_back( matlist[l].eta(lambdas[i]) );
  221 + return etas;
  222 + }
  223 +
  224 + //calculates the optimal block and grid sizes using information from the GPU
  225 + void cuda_params(dim3& grids, dim3& blocks){
  226 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  227 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  228 +
  229 + //create one thread for each detector pixel
  230 + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
  231 + grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
  232 + }
  233 +
  234 + //add the fourier transform of layer n to the scratch space
  235 + void build_layer_fft(unsigned int n, T* zf){
  236 + unsigned int paddedZ = Z + pad;
  237 +
  238 + T wpx = layers[n] / dz(); //calculate the width of the layer in pixels
  239 +
  240 + //allocate memory for the refractive index
  241 + complex<T>* gpuRi;
  242 + HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
  243 +
  244 + //allocate memory for the source profile
  245 + T* gpuSrc;
  246 + HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
  247 +
  248 + complex<T> ri;
  249 + T source;
  250 + //store the refractive index and source profile in a CPU array
  251 + for(int inu=0; inu<S; inu++){
  252 + //save the refractive index to the GPU
  253 + ri = matlist[n].getN(lambdas[inu]);
  254 + HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
  255 +
  256 + //save the source profile to the GPU
  257 + source = source_profile(10000 / lambdas[inu]);
  258 + HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
  259 +
  260 + }
  261 +
  262 + //create one thread for each pixel of the field slice
  263 + dim3 gridDim, blockDim;
  264 + cuda_params(gridDim, blockDim);
  265 + stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
  266 +
  267 + int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
  268 + int linGrid = S / linBlock + 1;
  269 + stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
  270 +
  271 + //free memory
  272 + HANDLE_ERROR(cudaFree(gpuRi));
  273 + HANDLE_ERROR(cudaFree(gpuSrc));
  274 + }
  275 +
  276 + void build_sample(){
  277 + init_scratch(); //initialize the GPU scratch space
  278 + //build_layer(1);
  279 +
  280 + T* zf;
  281 + HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
  282 + HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
  283 +
  284 + //render each layer of the sample
  285 + for(unsigned int l=0; l<layers.size(); l++){
  286 + build_layer_fft(l, zf);
  287 + }
  288 +
  289 + HANDLE_ERROR(cudaFree(zf));
  290 + }
  291 +
  292 + void apply_filter(){
  293 + dim3 gridDim, blockDim;
  294 + cuda_params(gridDim, blockDim);
  295 +
  296 + unsigned int Zpad = Z + pad;
  297 +
  298 + T sim_range = dz() * Zpad;
  299 + T dFz = 1 / sim_range;
  300 +
  301 + //copy the array of wavelengths to the GPU
  302 + T* gpuLambdas;
  303 + HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
  304 + HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
  305 + stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,
  306 + dFz,
  307 + NA[0], NA[1],
  308 + S, Zpad);
  309 + }
  310 +
  311 + //crop the image to the sample thickness - keep in mind that sample thickness != optical path length
  312 + void crop(){
  313 +
  314 + scratch = scratch.crop(Z, S);
  315 + }
  316 +
  317 + //save the scratch field as a binary file
  318 + void to_binary(std::string filename){
  319 +
  320 + }
  321 +
  322 +
  323 +public:
  324 +
  325 + //constructor
  326 + mirst1d(unsigned int rZ = 100,
  327 + unsigned int padding = 0){
  328 + Z = rZ;
  329 + pad = padding;
  330 + NA[0] = 0;
  331 + NA[1] = 0.8;
  332 + S = 0;
  333 + source_profile = 1;
  334 + }
  335 +
  336 + //add a layer, thickness = microns
  337 + void add_layer(material<T> mat, T thickness){
  338 + matlist.push_back(mat);
  339 + layers.push_back(thickness);
  340 + }
  341 +
  342 + void add_layer(std::string filename, T thickness){
  343 + add_layer(material<T>(filename), thickness);
  344 + }
  345 +
  346 + //adds a profile spectrum for the light source
  347 + void set_source(std::string filename){
  348 + source_profile.load(filename);
  349 + }
  350 +
  351 + //adds a block of wavenumbers (cm^-1) to the simulation parameters
  352 + void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
  353 + unsigned int nu = start;
  354 + while(nu <= stop){
  355 + lambdas.push_back((T)10000 / nu);
  356 + nu += step;
  357 + }
  358 + S = lambdas.size(); //increment the number of wavelengths (shorthand for later)
  359 + }
  360 +
  361 + T thickness(){
  362 + T t = 0;
  363 + for(unsigned int l=0; l<layers.size(); l++)
  364 + t += layers[l];
  365 + return t;
  366 + }
  367 +
  368 + void padding(unsigned int padding = 0){
  369 + pad = padding;
  370 + }
  371 +
  372 + T dz(){
  373 + return thickness() / Z; //calculate the z-axis step size
  374 + }
  375 +
  376 + void na(T in, T out){
  377 + NA[0] = in;
  378 + NA[1] = out;
  379 + }
  380 +
  381 + void na(T out){
  382 + na(0, out);
  383 + }
  384 +
  385 + stim::function<T, T> get_source(){
  386 + return source_profile;
  387 + }
  388 +
  389 + void save_sample(std::string filename){
  390 + //create a sample and save the magnitude as an image
  391 + build_sample();
  392 + fft(CUFFT_INVERSE);
  393 + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  394 + }
  395 +
  396 + void save_mirst(std::string filename, bool binary = true){
  397 + //apply the MIRST filter to a sample and save the image
  398 +
  399 + //build the sample in the Fourier domain
  400 + build_sample();
  401 +
  402 + //apply the MIRST filter
  403 + apply_filter();
  404 +
  405 + //apply an inverse FFT to bring the results back into the spatial domain
  406 + fft(CUFFT_INVERSE);
  407 +
  408 + crop();
  409 +
  410 + //save the image
  411 + if(binary)
  412 + to_binary(filename);
  413 + else
  414 + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  415 + }
  416 +
  417 +
  418 +
  419 +
  420 + std::string str(){
  421 +
  422 + stringstream ss;
  423 + ss<<"1D MIRST Simulation========================="<<std::endl;
  424 + ss<<"z-axis resolution: "<<Z<<std::endl;
  425 + ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
  426 + ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
  427 + ss<<"padding: "<<pad<<std::endl;
  428 + ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
  429 + ss<<"dz: "<<dz()<<" um"<<std::endl;
  430 + ss<<std::endl;
  431 + ss<<layers.size()<<" layers-------------"<<std::endl;
  432 + for(unsigned int l=0; l<layers.size(); l++)
  433 + ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
  434 +
  435 + ss<<"source profile-----------"<<std::endl;
  436 + ss<<get_source().str()<<std::endl;
  437 +
  438 + return ss.str();
  439 +
  440 +
  441 + }
  442 +
  443 +
  444 +
  445 +};
  446 +
  447 +}
... ...
stim/optics_old/planewave.h 0 โ†’ 100644
  1 +#ifndef RTS_PLANEWAVE
  2 +#define RTS_PLANEWAVE
  3 +
  4 +#include <string>
  5 +#include <sstream>
  6 +
  7 +#include "../math/vector.h"
  8 +#include "../math/quaternion.h"
  9 +#include "../math/constants.h"
  10 +#include "../math/plane.h"
  11 +#include "../cuda/callable.h"
  12 +
  13 +/*Basic conversions used here (assuming a vacuum)
  14 + lambda =
  15 +*/
  16 +
  17 +namespace stim{
  18 + namespace optics{
  19 +
  20 +template<typename T>
  21 +class planewave{
  22 +
  23 +protected:
  24 +
  25 + vec<T> k; //k = tau / lambda
  26 + vec< complex<T> > E0; //amplitude
  27 + //T phi;
  28 +
  29 + CUDA_CALLABLE planewave<T> bend(rts::vec<T> kn) const{
  30 +
  31 + vec<T> kn_hat = kn.norm(); //normalize the new k
  32 + vec<T> k_hat = k.norm(); //normalize the current k
  33 +
  34 + //std::cout<<"PLANE WAVE BENDING------------------"<<std::endl;
  35 + //std::cout<<"kn_hat: "<<kn_hat<<" k_hat: "<<k_hat<<std::endl;
  36 +
  37 + planewave<T> new_p; //create a new plane wave
  38 +
  39 + //if kn is equal to k or -k, handle the degenerate case
  40 + T k_dot_kn = k_hat.dot(kn_hat);
  41 +
  42 + //if k . n < 0, then the bend is a reflection
  43 + //flip k_hat
  44 + if(k_dot_kn < 0) k_hat = -k_hat;
  45 +
  46 + //std::cout<<"k dot kn: "<<k_dot_kn<<std::endl;
  47 +
  48 + //std::cout<<"k_dot_kn: "<<k_dot_kn<<std::endl;
  49 + if(k_dot_kn == -1){
  50 + new_p.k = -k;
  51 + new_p.E0 = E0;
  52 + return new_p;
  53 + }
  54 + else if(k_dot_kn == 1){
  55 + new_p.k = k;
  56 + new_p.E0 = E0;
  57 + return new_p;
  58 + }
  59 +
  60 + vec<T> r = k_hat.cross(kn_hat); //compute the rotation vector
  61 +
  62 + //std::cout<<"r: "<<r<<std::endl;
  63 +
  64 + T theta = asin(r.len()); //compute the angle of the rotation about r
  65 +
  66 +
  67 +
  68 + //deal with a zero vector (both k and kn point in the same direction)
  69 + //if(theta == (T)0)
  70 + //{
  71 + // new_p = *this;
  72 + // return new_p;
  73 + //}
  74 +
  75 + //create a quaternion to capture the rotation
  76 + quaternion<T> q;
  77 + q.CreateRotation(theta, r.norm());
  78 +
  79 + //apply the rotation to E0
  80 + vec< complex<T> > E0n = q.toMatrix3() * E0;
  81 +
  82 + new_p.k = kn_hat * kmag();
  83 + new_p.E0 = E0n;
  84 +
  85 + return new_p;
  86 + }
  87 +
  88 +public:
  89 +
  90 +
  91 + ///constructor: create a plane wave propagating along z, polarized along x
  92 + /*planewave(T lambda = (T)1)
  93 + {
  94 + k = rts::vec<T>(0, 0, 1) * (TAU/lambda);
  95 + E0 = rts::vec<T>(1, 0, 0);
  96 + }*/
  97 + ///constructor: create a plane wave propagating along k, polarized along _E0, at frequency _omega
  98 + CUDA_CALLABLE planewave(vec<T> kvec = rts::vec<T>(0, 0, rtsTAU),
  99 + vec< complex<T> > E = rts::vec<T>(1, 0, 0), T phase = 0)
  100 + {
  101 + //phi = phase;
  102 +
  103 + k = kvec;
  104 + vec< complex<T> > k_hat = k.norm();
  105 +
  106 + if(E.len() == 0) //if the plane wave has an amplitude of 0
  107 + E0 = vec<T>(0); //just return it
  108 + else{
  109 + vec< complex<T> > s = (k_hat.cross(E)).norm(); //compute an orthogonal side vector
  110 + vec< complex<T> > E_hat = (s.cross(k)).norm(); //compute a normalized E0 direction vector
  111 + E0 = E_hat * E_hat.dot(E); //compute the projection of _E0 onto E0_hat
  112 + }
  113 +
  114 + E0 = E0 * exp( complex<T>(0, phase) );
  115 + }
  116 +
  117 + ///multiplication operator: scale E0
  118 + CUDA_CALLABLE planewave<T> & operator* (const T & rhs)
  119 + {
  120 +
  121 + E0 = E0 * rhs;
  122 + return *this;
  123 + }
  124 +
  125 + CUDA_CALLABLE T lambda() const
  126 + {
  127 + return rtsTAU / k.len();
  128 + }
  129 +
  130 + CUDA_CALLABLE T kmag() const
  131 + {
  132 + return k.len();
  133 + }
  134 +
  135 + CUDA_CALLABLE vec< complex<T> > E(){
  136 + return E0;
  137 + }
  138 +
  139 + CUDA_CALLABLE vec<T> kvec(){
  140 + return k;
  141 + }
  142 +
  143 + /*CUDA_CALLABLE T phase(){
  144 + return phi;
  145 + }
  146 +
  147 + CUDA_CALLABLE void phase(T p){
  148 + phi = p;
  149 + }*/
  150 +
  151 + CUDA_CALLABLE vec< complex<T> > pos(vec<T> p = vec<T>(0, 0, 0)){
  152 + vec< complex<T> > result;
  153 +
  154 + T kdp = k.dot(p);
  155 + complex<T> x = complex<T>(0, kdp);
  156 + complex<T> expx = exp(x);
  157 +
  158 + result[0] = E0[0] * expx;
  159 + result[1] = E0[1] * expx;
  160 + result[2] = E0[2] * expx;
  161 +
  162 + return result;
  163 + }
  164 +
  165 + //scales k based on a transition from material ni to material nt
  166 + CUDA_CALLABLE planewave<T> n(T ni, T nt){
  167 + return planewave<T>(k * (nt / ni), E0);
  168 + }
  169 +
  170 + CUDA_CALLABLE planewave<T> refract(rts::vec<T> kn) const
  171 + {
  172 + return bend(kn);
  173 + }
  174 +
  175 + void scatter(rts::plane<T> P, T nr, planewave<T> &r, planewave<T> &t){
  176 +
  177 + int facing = P.face(k); //determine which direction the plane wave is coming in
  178 +
  179 + //if(facing == 0) //if the wave is tangent to the plane, return an identical wave
  180 + // return *this;
  181 + //else
  182 + if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr
  183 + P = P.flip(); //flip the plane
  184 + nr = 1/nr; //invert the refractive index (now nr = n0/n1)
  185 + }
  186 +
  187 + //use Snell's Law to calculate the transmitted angle
  188 + T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i
  189 + T theta_i = acos(cos_theta_i); //compute theta_i
  190 + T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law
  191 + T theta_t = asin(sin_theta_t); //compute the cosine of theta_t
  192 +
  193 + bool tir = false; //flag for total internal reflection
  194 + if(theta_t != theta_t){
  195 + tir = true;
  196 + theta_t = rtsPI / (T)2;
  197 + }
  198 +
  199 + //handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
  200 + if(theta_i == 0){
  201 + T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients
  202 + T tp = 2 / (1 + nr);
  203 + vec<T> kr = -k;
  204 + vec<T> kt = k * nr; //set the k vectors for theta_i = 0
  205 + vec< complex<T> > Er = E0 * rp; //compute the E vectors
  206 + vec< complex<T> > Et = E0 * tp;
  207 + T phase_t = P.p().dot(k - kt); //compute the phase offset
  208 + T phase_r = P.p().dot(k - kr);
  209 + //std::cout<<"Degeneracy: Head-On"<<std::endl;
  210 + //std::cout<<"rs: "<<rp<<" rp: "<<rp<<" ts: "<<tp<<" tp: "<<tp<<std::endl;
  211 + //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
  212 +
  213 + //create the plane waves
  214 + r = planewave<T>(kr, Er, phase_r);
  215 + t = planewave<T>(kt, Et, phase_t);
  216 +
  217 + //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
  218 + //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
  219 + //std::cout<<"--------------------------------"<<std::endl;
  220 + return;
  221 + }
  222 +
  223 +
  224 + //compute the Fresnel coefficients
  225 + T rp, rs, tp, ts;
  226 + rp = tan(theta_t - theta_i) / tan(theta_t + theta_i);
  227 + rs = sin(theta_t - theta_i) / sin(theta_t + theta_i);
  228 +
  229 + if(tir){
  230 + tp = ts = 0;
  231 + }
  232 + else{
  233 + tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) );
  234 + ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i);
  235 + }
  236 +
  237 + //compute the coordinate space for the plane of incidence
  238 + vec<T> z_hat = -P.norm();
  239 + vec<T> y_hat = P.parallel(k).norm();
  240 + vec<T> x_hat = y_hat.cross(z_hat).norm();
  241 +
  242 + //compute the k vectors for r and t
  243 + vec<T> kr, kt;
  244 + kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag();
  245 + kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;
  246 +
  247 + //compute the magnitude of the p- and s-polarized components of the incident E vector
  248 + complex<T> Ei_s = E0.dot(x_hat);
  249 + //int sgn = (0 < E0.dot(y_hat)) - (E0.dot(y_hat) < 0);
  250 + int sgn = E0.dot(y_hat).sgn();
  251 + vec< complex<T> > cx_hat = x_hat;
  252 + complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
  253 + //T Ei_p = ( E0 - x_hat * Ei_s ).len();
  254 + //compute the magnitude of the p- and s-polarized components of the reflected E vector
  255 + complex<T> Er_s = Ei_s * rs;
  256 + complex<T> Er_p = Ei_p * rp;
  257 + //compute the magnitude of the p- and s-polarized components of the transmitted E vector
  258 + complex<T> Et_s = Ei_s * ts;
  259 + complex<T> Et_p = Ei_p * tp;
  260 +
  261 + //std::cout<<"E0: "<<E0<<std::endl;
  262 + //std::cout<<"E0 dot y_hat: "<<E0.dot(y_hat)<<std::endl;
  263 + //std::cout<<"theta i: "<<theta_i<<" theta t: "<<theta_t<<std::endl;
  264 + //std::cout<<"x_hat: "<<x_hat<<" y_hat: "<<y_hat<<" z_hat: "<<z_hat<<std::endl;
  265 + //std::cout<<"Ei_s: "<<Ei_s<<" Ei_p: "<<Ei_p<<" Er_s: "<<Er_s<<" Er_p: "<<Er_p<<" Et_s: "<<Et_s<<" Et_p: "<<Et_p<<std::endl;
  266 + //std::cout<<"rs: "<<rs<<" rp: "<<rp<<" ts: "<<ts<<" tp: "<<tp<<std::endl;
  267 +
  268 +
  269 + //compute the reflected E vector
  270 + vec< complex<T> > Er = vec< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
  271 + //compute the transmitted E vector
  272 + vec< complex<T> > Et = vec< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;
  273 +
  274 + T phase_t = P.p().dot(k - kt);
  275 + T phase_r = P.p().dot(k - kr);
  276 +
  277 + //std::cout<<"phase r: "<<phase_r<<" phase t: "<<phase_t<<std::endl;
  278 +
  279 + //std::cout<<"phase: "<<phase<<std::endl;
  280 +
  281 + //create the plane waves
  282 + r.k = kr;
  283 + r.E0 = Er * exp( complex<T>(0, phase_r) );
  284 + //r.phi = phase_r;
  285 +
  286 + //t = bend(kt);
  287 + //t.k = t.k * nr;
  288 +
  289 + t.k = kt;
  290 + t.E0 = Et * exp( complex<T>(0, phase_t) );
  291 + //t.phi = phase_t;
  292 + //std::cout<<"i: "<<str()<<std::endl;
  293 + //std::cout<<"r: "<<r.str()<<std::endl;
  294 + //std::cout<<"t: "<<t.str()<<std::endl;
  295 +
  296 + //std::cout<<"i + r: "<<pos()[0] + r.pos()[0]<<pos()[1] + r.pos()[1]<<pos()[2] + r.pos()[2]<<std::endl;
  297 + //std::cout<<"t: "<<t.pos()[0]<<t.pos()[1]<<t.pos()[2]<<std::endl;
  298 + //std::cout<<"--------------------------------"<<std::endl;
  299 +
  300 + }
  301 +
  302 + std::string str()
  303 + {
  304 + std::stringstream ss;
  305 + ss<<"Plane Wave:"<<std::endl;
  306 + ss<<" "<<E0<<" e^i ( "<<k<<" . r )";
  307 + return ss.str();
  308 + }
  309 +}; //end planewave class
  310 +} //end namespace optics
  311 +} //end namespace stim
  312 +
  313 +template <typename T>
  314 +std::ostream& operator<<(std::ostream& os, rts::planewave<T> p)
  315 +{
  316 + os<<p.str();
  317 + return os;
  318 +}
  319 +
  320 +#endif
... ...
stim/visualization/aaboundingbox.h
... ... @@ -10,8 +10,8 @@ class aaboundingbox{
10 10  
11 11 public:
12 12 bool set; //has the bounding box been set to include any points?
13   - stim::vec<T> A; //minimum point in the bounding box
14   - stim::vec<T> B; //maximum point in the bounding box
  13 + stim::vec3<T> A; //minimum point in the bounding box
  14 + stim::vec3<T> B; //maximum point in the bounding box
15 15  
16 16 aaboundingbox(){ //constructor generates an empty bounding box
17 17 set = false;
... ... @@ -21,7 +21,7 @@ public:
21 21 /// Test if a point is inside of the bounding box and returns true if it is.
22 22  
23 23 /// @param p is the point to be tested
24   - bool test(stim::vec<T> p){
  24 + bool test(stim::vec3<T> p){
25 25  
26 26 for(unsigned d = 0; d < p.size(); p++){ //for each dimension
27 27 if(p[d] < A[d]) return false; //if the point is less than the minimum bound, return false
... ... @@ -33,7 +33,7 @@ public:
33 33 /// Expand the bounding box to include the specified point.
34 34  
35 35 /// @param p is the point to be included
36   - void expand(stim::vec<T> p){
  36 + void expand(stim::vec3<T> p){
37 37  
38 38 if(!set){ //if the bounding box is empty, fill it with the current point
39 39 A = B = p;
... ... @@ -47,12 +47,12 @@ public:
47 47 }
48 48  
49 49 /// Return the center point of the bounding box as a stim::vec
50   - stim::vec<T> center(){
  50 + stim::vec3<T> center(){
51 51 return (B + A) * 0.5;
52 52 }
53 53  
54 54 /// Return the size of the bounding box as a stim::vec
55   - stim::vec<T> size(){
  55 + stim::vec3<T> size(){
56 56 return (B - A);
57 57 }
58 58  
... ...
stim/visualization/camera.h
... ... @@ -11,32 +11,32 @@ namespace stim{
11 11  
12 12 class camera
13 13 {
14   - vec<float> d; //direction that the camera is pointing
15   - vec<float> p; //position of the camera
16   - vec<float> up; //"up" direction
  14 + vec3<float> d; //direction that the camera is pointing
  15 + vec3<float> p; //position of the camera
  16 + vec3<float> up; //"up" direction
17 17 float focus; //focal length of the camera
18 18 float fov;
19 19  
20 20 //private function makes sure that the up vector is orthogonal to the direction vector and both are normalized
21 21 void stabalize()
22 22 {
23   - vec<float> side = up.cross(d);
  23 + vec3<float> side = up.cross(d);
24 24 up = d.cross(side);
25 25 up = up.norm();
26 26 d = d.norm();
27 27 }
28 28  
29 29 public:
30   - void setPosition(vec<float> pos)
  30 + void setPosition(vec3<float> pos)
31 31 {
32 32 p = pos;
33 33 }
34   - void setPosition(float x, float y, float z){setPosition(vec<float>(x, y, z));}
  34 + void setPosition(float x, float y, float z){setPosition(vec3<float>(x, y, z));}
35 35  
36 36 void setFocalDistance(float distance){focus = distance;}
37 37 void setFOV(float field_of_view){fov = field_of_view;}
38 38  
39   - void LookAt(vec<float> pos)
  39 + void LookAt(vec3<float> pos)
40 40 {
41 41 //find the new direction
42 42 d = pos - p;
... ... @@ -47,22 +47,22 @@ public:
47 47 //stabalize the camera
48 48 stabalize();
49 49 }
50   - void LookAt(float px, float py, float pz){LookAt(vec<float>(px, py, pz));}
51   - void LookAt(vec<float> pos, vec<float> new_up){up = new_up; LookAt(pos);}
52   - void LookAt(float px, float py, float pz, float ux, float uy, float uz){LookAt(vec<float>(px, py, pz), vec<float>(ux, uy, uz));}
  50 + void LookAt(float px, float py, float pz){LookAt(vec3<float>(px, py, pz));}
  51 + void LookAt(vec3<float> pos, vec3<float> new_up){up = new_up; LookAt(pos);}
  52 + void LookAt(float px, float py, float pz, float ux, float uy, float uz){LookAt(vec3<float>(px, py, pz), vec3<float>(ux, uy, uz));}
53 53 void LookAtDolly(float lx, float ly, float lz)
54 54 {
55 55 //find the current focus point
56   - vec<float> f = p + focus*d;
57   - vec<float> T = vec<float>(lx, ly, lz) - f;
  56 + vec3<float> f = p + focus*d;
  57 + vec3<float> T = vec3<float>(lx, ly, lz) - f;
58 58 p = p + T;
59 59 }
60 60  
61   - void Dolly(vec<float> direction)
  61 + void Dolly(vec3<float> direction)
62 62 {
63 63 p = p+direction;
64 64 }
65   - void Dolly(float x, float y, float z){Dolly(vec<float>(x, y, z));}
  65 + void Dolly(float x, float y, float z){Dolly(vec3<float>(x, y, z));}
66 66 void Push(float delta)
67 67 {
68 68 if(delta > focus)
... ... @@ -80,7 +80,7 @@ public:
80 80 qx.CreateRotation(theta_x, up[0], up[1], up[2]);
81 81  
82 82 //y rotation is around the side axis
83   - vec<float> side = up.cross(d);
  83 + vec3<float> side = up.cross(d);
84 84 quaternion<float> qy;
85 85 qy.CreateRotation(theta_y, side[0], side[1], side[2]);
86 86  
... ... @@ -118,28 +118,28 @@ public:
118 118 void OrbitFocus(float theta_x, float theta_y)
119 119 {
120 120 //find the focal point
121   - vec<float> focal_point = p + focus*d;
  121 + vec3<float> focal_point = p + focus*d;
122 122  
123 123 //center the coordinate system on the focal point
124   - vec<float> centered = p - (focal_point - vec<float>(0, 0, 0));
  124 + vec3<float> centered = p - (focal_point - vec3<float>(0, 0, 0));
125 125  
126 126 //create the x rotation (around the up vector)
127 127 quaternion<float> qx;
128 128 qx.CreateRotation(theta_x, up[0], up[1], up[2]);
129   - centered = vec<float>(0, 0, 0) + qx.toMatrix3()*(centered - vec<float>(0, 0, 0));
  129 + centered = vec3<float>(0, 0, 0) + qx.toMatrix3()*(centered - vec3<float>(0, 0, 0));
130 130  
131 131 //get a side vector for theta_y rotation
132   - vec<float> side = up.cross((vec<float>(0, 0, 0) - centered).norm());
  132 + vec3<float> side = up.cross((vec3<float>(0, 0, 0) - centered).norm());
133 133  
134 134 quaternion<float> qy;
135 135 qy.CreateRotation(theta_y, side[0], side[1], side[2]);
136   - centered = vec<float>(0, 0, 0) + qy.toMatrix3()*(centered - vec<float>(0, 0, 0));
  136 + centered = vec3<float>(0, 0, 0) + qy.toMatrix3()*(centered - vec3<float>(0, 0, 0));
137 137  
138 138 //perform the rotation on the centered camera position
139 139 //centered = final.toMatrix()*centered;
140 140  
141 141 //re-position the camera
142   - p = centered + (focal_point - vec<float>(0, 0, 0));
  142 + p = centered + (focal_point - vec3<float>(0, 0, 0));
143 143  
144 144 //make sure we are looking at the focal point
145 145 LookAt(focal_point);
... ... @@ -151,17 +151,17 @@ public:
151 151  
152 152 void Slide(float u, float v)
153 153 {
154   - vec<float> V = up.norm();
155   - vec<float> U = up.cross(d).norm();
  154 + vec3<float> V = up.norm();
  155 + vec3<float> U = up.cross(d).norm();
156 156  
157 157 p = p + (V * v) + (U * u);
158 158 }
159 159  
160 160 //accessor methods
161   - vec<float> getPosition(){return p;}
162   - vec<float> getUp(){return up;}
163   - vec<float> getDirection(){return d;}
164   - vec<float> getLookAt(){return p + focus*d;}
  161 + vec3<float> getPosition(){return p;}
  162 + vec3<float> getUp(){return up;}
  163 + vec3<float> getDirection(){return d;}
  164 + vec3<float> getLookAt(){return p + focus*d;}
165 165 float getFOV(){return fov;}
166 166  
167 167 //output the camera settings
... ... @@ -182,9 +182,9 @@ public:
182 182 //constructor
183 183 camera()
184 184 {
185   - p = vec<float>(0, 0, 0);
186   - d = vec<float>(0, 0, 1);
187   - up = vec<float>(0, 1, 0);
  185 + p = vec3<float>(0, 0, 0);
  186 + d = vec3<float>(0, 0, 1);
  187 + up = vec3<float>(0, 1, 0);
188 188 focus = 1;
189 189  
190 190 }
... ...
stim/visualization/cylinder.h
... ... @@ -2,7 +2,7 @@
2 2 #define STIM_CYLINDER_H
3 3 #include <iostream>
4 4 #include <stim/math/circle.h>
5   -#include <stim/math/vector.h>
  5 +#include <stim/math/vec3.h>
6 6  
7 7  
8 8 namespace stim
... ... @@ -25,11 +25,11 @@ class cylinder
25 25  
26 26 ///inits the cylinder from a list of points (inP) and radii (inM)
27 27 void
28   - init(std::vector<stim::vec<T> > inP, std::vector<stim::vec<T> > inM)
  28 + init(std::vector<stim::vec3<T> > inP, std::vector<stim::vec<T> > inM)
29 29 {
30 30 mags = inM;
31   - stim::vec<float> v1;
32   - stim::vec<float> v2;
  31 + stim::vec3<float> v1;
  32 + stim::vec3<float> v2;
33 33 e.resize(inP.size());
34 34 if(inP.size() < 2)
35 35 return;
... ... @@ -38,16 +38,16 @@ class cylinder
38 38 L.resize(inP.size());
39 39 T temp = (T)0;
40 40 L[0] = 0;
41   - for(int i = 1; i < L.size(); i++)
  41 + for(size_t i = 1; i < L.size(); i++)
42 42 {
43 43 temp += (inP[i-1] - inP[i]).len();
44 44 L[i] = temp;
45 45 }
46 46  
47   - stim::vec<T> dr = (inP[1] - inP[0]).norm();
48   - s = stim::circle<T>(inP[0], inM[0][0], dr, stim::vec<T>(1,0,0));
  47 + stim::vec3<T> dr = (inP[1] - inP[0]).norm();
  48 + s = stim::circle<T>(inP[0], inM[0][0], dr, stim::vec3<T>(1,0,0));
49 49 e[0] = s;
50   - for(int i = 1; i < inP.size()-1; i++)
  50 + for(size_t i = 1; i < inP.size()-1; i++)
51 51 {
52 52 s.center(inP[i]);
53 53 v1 = (inP[i] - inP[i-1]).norm();
... ... @@ -67,7 +67,7 @@ class cylinder
67 67 }
68 68  
69 69 ///returns the direction vector at point idx.
70   - stim::vec<T>
  70 + stim::vec3<T>
71 71 d(int idx)
72 72 {
73 73 if(idx == 0)
... ... @@ -81,15 +81,15 @@ class cylinder
81 81 else
82 82 {
83 83 // return (e[idx+1].P - e[idx].P).norm();
84   - stim::vec<float> v1 = (e[idx].P-e[idx-1].P).norm();
85   - stim::vec<float> v2 = (e[idx+1].P-e[idx].P).norm();
  84 + stim::vec3<float> v1 = (e[idx].P-e[idx-1].P).norm();
  85 + stim::vec3<float> v2 = (e[idx+1].P-e[idx].P).norm();
86 86 return (v1+v2).norm();
87 87 }
88 88 // return e[idx].N;
89 89  
90 90 }
91 91  
92   - stim::vec<T>
  92 + stim::vec3<T>
93 93 d(T l, int idx)
94 94 {
95 95 if(idx == 0 || idx == e.size()-1)
... ... @@ -144,13 +144,13 @@ class cylinder
144 144 ///constructor to create a cylinder from a set of points, radii, and the number of sides for the cylinder.
145 145 ///@param inP: Vector of stim vecs composing the points of the centerline.
146 146 ///@param inM: Vector of stim vecs composing the radii of the centerline.
147   - cylinder(std::vector<stim::vec<T> > inP, std::vector<stim::vec<T> > inM){
  147 + cylinder(std::vector<stim::vec3<T> > inP, std::vector<stim::vec3<T> > inM){
148 148 init(inP, inM);
149 149 }
150 150  
151 151 ///Constructor defines a cylinder with centerline inP and magnitudes of zero
152 152 ///@param inP: Vector of stim vecs composing the points of the centerline
153   - cylinder(std::vector< stim::vec<T> > inP){
  153 + cylinder(std::vector< stim::vec3<T> > inP){
154 154 std::vector< stim::vec<T> > inM; //create an array of arbitrary magnitudes
155 155  
156 156 stim::vec<T> zero;
... ... @@ -171,12 +171,12 @@ class cylinder
171 171 ///Returns a position vector at the given p-value (p value ranges from 0 to 1).
172 172 ///interpolates the position along the line.
173 173 ///@param pvalue: the location of the in the cylinder, from 0 (beginning to 1).
174   - stim::vec<T>
  174 + stim::vec3<T>
175 175 p(T pvalue)
176 176 {
177 177 if(pvalue < 0.0 || pvalue > 1.0)
178 178 {
179   - return stim::vec<float>(-1,-1,-1);
  179 + return stim::vec3<float>(-1,-1,-1);
180 180 }
181 181 T l = pvalue*L[L.size()-1];
182 182 int idx = findIdx(l);
... ... @@ -188,7 +188,7 @@ class cylinder
188 188 ///Interpolates the radius along the line.
189 189 ///@param l: the location of the in the cylinder.
190 190 ///@param idx: integer location of the point closest to l but prior to it.
191   - stim::vec<T>
  191 + stim::vec3<T>
192 192 p(T l, int idx)
193 193 {
194 194 T rat = (l-L[idx])/(L[idx+1]-L[idx]);
... ... @@ -252,16 +252,16 @@ class cylinder
252 252 ///in x, y, z coordinates. Theta is in degrees from 0 to 360.
253 253 ///@param pvalue: the location of the in the cylinder, from 0 (beginning to 1).
254 254 ///@param theta: the angle to the point of a circle.
255   - stim::vec<T>
  255 + stim::vec3<T>
256 256 surf(T pvalue, T theta)
257 257 {
258 258 if(pvalue < 0.0 || pvalue > 1.0)
259 259 {
260   - return stim::vec<float>(-1,-1,-1);
  260 + return stim::vec3<float>(-1,-1,-1);
261 261 } else {
262 262 T l = pvalue*L[L.size()-1];
263 263 int idx = findIdx(l);
264   - stim::vec<T> ps = p(l, idx);
  264 + stim::vec3<T> ps = p(l, idx);
265 265 T m = r(l, idx);
266 266 s = e[idx];
267 267 s.center(ps);
... ... @@ -273,10 +273,10 @@ class cylinder
273 273  
274 274 ///returns a vector of points necessary to create a circle at every position in the fiber.
275 275 ///@param sides: the number of sides of each circle.
276   - std::vector<std::vector<vec<T> > >
  276 + std::vector<std::vector<vec3<T> > >
277 277 getPoints(int sides)
278 278 {
279   - std::vector<std::vector <vec<T> > > points;
  279 + std::vector<std::vector <vec3<T> > > points;
280 280 points.resize(e.size());
281 281 for(int i = 0; i < e.size(); i++)
282 282 {
... ... @@ -293,7 +293,7 @@ class cylinder
293 293 }
294 294 /// Allows a point on the centerline to be accessed using bracket notation
295 295  
296   - vec<T> operator[](unsigned int i){
  296 + vec3<T> operator[](unsigned int i){
297 297 return e[i].P;
298 298 }
299 299  
... ... @@ -309,7 +309,7 @@ class cylinder
309 309 T M = 0; //initialize the integral to zero
310 310 T m0, m1; //allocate space for both magnitudes in a single segment
311 311  
312   - //vec<T> p0, p1; //allocate space for both points in a single segment
  312 + //vec3<T> p0, p1; //allocate space for both points in a single segment
313 313  
314 314 m0 = mags[0][m]; //initialize the first point and magnitude to the first point in the cylinder
315 315 //p0 = pos[0];
... ... @@ -325,7 +325,7 @@ class cylinder
325 325 if(p > 1) len = (L[p-1] - L[p-2]); //calculate the segment length using the L array
326 326  
327 327 //add the average magnitude, weighted by the segment length
328   - M += (m0 + m1)/2.0 * len;
  328 + M += (m0 + m1)/(T)2.0 * len;
329 329  
330 330 m0 = m1; //move to the next segment by shifting points
331 331 }
... ... @@ -345,21 +345,21 @@ class cylinder
345 345 /// @param spacing is the maximum spacing allowed between sample points
346 346 cylinder<T> resample(T spacing){
347 347  
348   - std::vector< vec<T> > result;
  348 + std::vector< vec3<T> > result;
349 349  
350   - vec<T> p0 = e[0].P; //initialize p0 to the first point on the centerline
351   - vec<T> p1;
  350 + vec3<T> p0 = e[0].P; //initialize p0 to the first point on the centerline
  351 + vec3<T> p1;
352 352 unsigned N = size(); //number of points in the current centerline
353 353  
354 354 //for each line segment on the centerline
355 355 for(unsigned int i = 1; i < N; i++){
356 356 p1 = e[i].P; //get the second point in the line segment
357 357  
358   - vec<T> v = p1 - p0; //calculate the vector between these two points
  358 + vec3<T> v = p1 - p0; //calculate the vector between these two points
359 359 T d = v.len(); //calculate the distance between these two points (length of the line segment)
360 360  
361   - unsigned nsteps = d / spacing+1; //calculate the number of steps to take along the segment to meet the spacing criteria
362   - T stepsize = 1.0 / nsteps; //calculate the parametric step size between new centerline points
  361 + size_t nsteps = (size_t)std::ceil(d / spacing); //calculate the number of steps to take along the segment to meet the spacing criteria
  362 + T stepsize = (T)1.0 / nsteps; //calculate the parametric step size between new centerline points
363 363  
364 364 //for each step along the line segment
365 365 for(unsigned s = 0; s < nsteps; s++){
... ...