Commit 2bafc56990f49d0822f03098b9bbd5bc14f22c69

Authored by David Mayerich
2 parents d31038b3 31262e83

Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib

stim/biomodels/network.h
@@ -8,7 +8,7 @@ @@ -8,7 +8,7 @@
8 #include <algorithm> 8 #include <algorithm>
9 #include <string.h> 9 #include <string.h>
10 #include <math.h> 10 #include <math.h>
11 -#include <stim/math/vector.h> 11 +#include <stim/math/vec3.h>
12 #include <stim/visualization/obj.h> 12 #include <stim/visualization/obj.h>
13 #include <stim/visualization/cylinder.h> 13 #include <stim/visualization/cylinder.h>
14 #include <ANN/ANN.h> 14 #include <ANN/ANN.h>
@@ -37,7 +37,7 @@ class network{ @@ -37,7 +37,7 @@ class network{
37 /// Constructor - creates an edge from a list of points by calling the stim::fiber constructor 37 /// Constructor - creates an edge from a list of points by calling the stim::fiber constructor
38 38
39 ///@param p is an array of positions in space 39 ///@param p is an array of positions in space
40 - edge(std::vector< stim::vec<T> > p) : cylinder<T>(p){} 40 + edge(std::vector< stim::vec3<T> > p) : cylinder<T>(p){}
41 41
42 /// Copy constructor creates an edge from a fiber 42 /// Copy constructor creates an edge from a fiber
43 edge(stim::cylinder<T> f) : cylinder<T>(f) {} 43 edge(stim::cylinder<T> f) : cylinder<T>(f) {}
@@ -61,20 +61,20 @@ class network{ @@ -61,20 +61,20 @@ class network{
61 }; 61 };
62 62
63 ///Node class that stores the physical position of the node as well as the edges it is connected to (edges that connect to it), As well as any additional data necessary. 63 ///Node class that stores the physical position of the node as well as the edges it is connected to (edges that connect to it), As well as any additional data necessary.
64 - class vertex : public stim::vec<T> 64 + class vertex : public stim::vec3<T>
65 { 65 {
66 public: 66 public:
67 //std::vector<unsigned int> edges; //indices of edges connected to this node. 67 //std::vector<unsigned int> edges; //indices of edges connected to this node.
68 std::vector<unsigned int> e[2]; //indices of edges going out (e[0]) and coming in (e[1]) 68 std::vector<unsigned int> e[2]; //indices of edges going out (e[0]) and coming in (e[1])
69 - //stim::vec<T> p; //position of this node in physical space. 69 + //stim::vec3<T> p; //position of this node in physical space.
70 70
71 //constructor takes a stim::vec 71 //constructor takes a stim::vec
72 - vertex(stim::vec<T> p) : stim::vec<T>(p){} 72 + vertex(stim::vec3<T> p) : stim::vec3<T>(p){}
73 73
74 /// Output the vertex information as a string 74 /// Output the vertex information as a string
75 std::string str(){ 75 std::string str(){
76 std::stringstream ss; 76 std::stringstream ss;
77 - ss<<"\t(x, y, z) = "<<stim::vec<T>::str(); 77 + ss<<"\t(x, y, z) = "<<stim::vec3<T>::str();
78 78
79 if(e[0].size() > 0){ 79 if(e[0].size() > 0){
80 ss<<"\t> "; 80 ss<<"\t> ";
@@ -129,7 +129,11 @@ public: @@ -129,7 +129,11 @@ public:
129 std::vector< stim::vec<T> > c; //allocate an array of points for the vessel centerline 129 std::vector< stim::vec<T> > c; //allocate an array of points for the vessel centerline
130 O.getLine(l, c); //get the fiber centerline 130 O.getLine(l, c); //get the fiber centerline
131 131
132 - edge new_edge = c; //create an edge from the given centerline 132 + std::vector< stim::vec3<T> > c3(c.size());
  133 + for(size_t j = 0; j < c.size(); j++)
  134 + c3[j] = c[j];
  135 +
  136 + edge new_edge = c3; //create an edge from the given centerline
133 unsigned int I = new_edge.size(); //calculate the number of points on the centerline 137 unsigned int I = new_edge.size(); //calculate the number of points on the centerline
134 138
135 //get the first and last vertex IDs for the line 139 //get the first and last vertex IDs for the line
@@ -222,7 +226,7 @@ public: @@ -222,7 +226,7 @@ public:
222 float gaussianFunction(float x, float std=25){ return exp(-x/(2*std*std));} // by default std = 25 226 float gaussianFunction(float x, float std=25){ return exp(-x/(2*std*std));} // by default std = 25
223 227
224 // stim 3d vector to annpoint of 3 dimensions 228 // stim 3d vector to annpoint of 3 dimensions
225 - void stim2ann(ANNpoint &a, stim::vec<T> b){ 229 + void stim2ann(ANNpoint &a, stim::vec3<T> b){
226 a[0] = b[0]; 230 a[0] = b[0];
227 a[1] = b[1]; 231 a[1] = b[1];
228 a[2] = b[2]; 232 a[2] = b[2];
@@ -278,10 +282,9 @@ public: @@ -278,10 +282,9 @@ public:
278 ANNdistArray dists = new ANNdist[1]; // near neighbor distances 282 ANNdistArray dists = new ANNdist[1]; // near neighbor distances
279 ANNidxArray nnIdx = new ANNidx[1]; // near neighbor indices // allocate near neigh indices 283 ANNidxArray nnIdx = new ANNidx[1]; // near neighbor indices // allocate near neigh indices
280 284
281 - stim::vec<T> p0, p1;  
282 - float m0, m1; 285 + stim::vec3<T> p0, p1;
  286 + float m1;
283 float M = 0; //stores the total metric value 287 float M = 0; //stores the total metric value
284 - float l; //stores the segment length  
285 float L = 0; //stores the total network length 288 float L = 0; //stores the total network length
286 ANNpoint queryPt = annAllocPt(3); 289 ANNpoint queryPt = annAllocPt(3);
287 for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A 290 for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A
@@ -292,7 +295,7 @@ public: @@ -292,7 +295,7 @@ public:
292 p1 = R.E[e][p]; //get the next point in the edge 295 p1 = R.E[e][p]; //get the next point in the edge
293 stim2ann(queryPt, p1); 296 stim2ann(queryPt, p1);
294 kdt->annkSearch( queryPt, 1, nnIdx, dists, eps); //find the distance between A and the current network 297 kdt->annkSearch( queryPt, 1, nnIdx, dists, eps); //find the distance between A and the current network
295 - m1 = 1.0f - gaussianFunction(dists[0], sigma); //calculate the metric value based on the distance 298 + m1 = 1.0f - gaussianFunction((float)dists[0], sigma); //calculate the metric value based on the distance
296 R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment 299 R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment
297 300
298 } 301 }
stim/cuda/cudatools/callable.h
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 2
3 //define the CUDA_CALLABLE macro (will prefix all members) 3 //define the CUDA_CALLABLE macro (will prefix all members)
4 #ifdef __CUDACC__ 4 #ifdef __CUDACC__
5 -#define CUDA_CALLABLE __host__ __device__ 5 +#define CUDA_CALLABLE __host__ __device__ inline
6 #else 6 #else
7 #define CUDA_CALLABLE 7 #define CUDA_CALLABLE
8 #endif 8 #endif
@@ -884,7 +884,7 @@ public: @@ -884,7 +884,7 @@ public:
884 /// using the following indexing: i = p*B + b 884 /// using the following indexing: i = p*B + b
885 /// @param matrix is the destination for the pixel data 885 /// @param matrix is the destination for the pixel data
886 /// @param mask is the mask 886 /// @param mask is the mask
887 - bool sift(T* matrix, unsigned char* mask = NULL){ 887 + bool sift(T* matrix, unsigned char* mask = NULL, bool PROGRESS = false){
888 size_t Lbytes = sizeof(T) * X(); 888 size_t Lbytes = sizeof(T) * X();
889 T* line = (T*) malloc( Lbytes ); //allocate space for a line 889 T* line = (T*) malloc( Lbytes ); //allocate space for a line
890 890
@@ -903,6 +903,7 @@ public: @@ -903,6 +903,7 @@ public:
903 pl++; //increment the pixel pointer 903 pl++; //increment the pixel pointer
904 } 904 }
905 } 905 }
  906 + if(PROGRESS) progress = (double)( (y+1)*Z() + 1) / (double)(Y() * Z()) * 100;
906 } 907 }
907 p += pl; //add the line increment to the running pixel index 908 p += pl; //add the line increment to the running pixel index
908 } 909 }
@@ -817,7 +817,7 @@ public: @@ -817,7 +817,7 @@ public:
817 /// using the following indexing: i = p*B + b 817 /// using the following indexing: i = p*B + b
818 /// @param matrix is the destination for the pixel data 818 /// @param matrix is the destination for the pixel data
819 /// @param mask is the mask 819 /// @param mask is the mask
820 - bool sift(T* matrix, unsigned char* mask = NULL){ 820 + bool sift(T* matrix, unsigned char* mask = NULL, bool PROGRESS = false){
821 size_t Bbytes = sizeof(T) * Z(); 821 size_t Bbytes = sizeof(T) * Z();
822 size_t XY = X() * Y(); 822 size_t XY = X() * Y();
823 T* band = (T*) malloc( Bbytes ); //allocate space for a line 823 T* band = (T*) malloc( Bbytes ); //allocate space for a line
@@ -836,6 +836,7 @@ public: @@ -836,6 +836,7 @@ public:
836 } 836 }
837 else 837 else
838 file.seekg(Bbytes, std::ios::cur); //otherwise skip this band 838 file.seekg(Bbytes, std::ios::cur); //otherwise skip this band
  839 + if(PROGRESS) progress = (double)(xy+1) / (double)XY * 100;
839 } 840 }
840 return true; 841 return true;
841 } 842 }
@@ -809,7 +809,7 @@ public: @@ -809,7 +809,7 @@ public:
809 /// using the following indexing: i = p*B + b 809 /// using the following indexing: i = p*B + b
810 /// @param matrix is the destination for the pixel data 810 /// @param matrix is the destination for the pixel data
811 /// @param mask is the mask 811 /// @param mask is the mask
812 - bool sift(T* matrix, unsigned char* mask = NULL){ 812 + bool sift(T* matrix, unsigned char* mask = NULL, bool PROGRESS = false){
813 unsigned long long XY = X() * Y(); //Number of XY pixels 813 unsigned long long XY = X() * Y(); //Number of XY pixels
814 unsigned long long L = XY * sizeof(T); //size of XY plane (in bytes) 814 unsigned long long L = XY * sizeof(T); //size of XY plane (in bytes)
815 815
@@ -827,9 +827,8 @@ public: @@ -827,9 +827,8 @@ public:
827 if(mask == NULL || mask[xy] != 0){ //if the pixel is valid 827 if(mask == NULL || mask[xy] != 0){ //if the pixel is valid
828 matrix[i*Z() + b] = band_image[xy]; //copy it to the appropriate point in the values[] array 828 matrix[i*Z() + b] = band_image[xy]; //copy it to the appropriate point in the values[] array
829 i++; 829 i++;
830 - //std::cout<<i<<std::endl;  
831 } 830 }
832 - 831 + if(PROGRESS) progress = (double)(xy+1) / (double)XY * 100;
833 } 832 }
834 } 833 }
835 834
@@ -670,13 +670,13 @@ public: @@ -670,13 +670,13 @@ public:
670 /// using the following indexing: i = b*P + p 670 /// using the following indexing: i = b*P + p
671 /// @param matrix is the destination for the pixel data 671 /// @param matrix is the destination for the pixel data
672 /// @param p is the mask 672 /// @param p is the mask
673 - bool sift(void* matrix, unsigned char* p = NULL){ 673 + bool sift(void* matrix, unsigned char* p = NULL, bool PROGRESS = false){
674 674
675 if (header.interleave == envi_header::BSQ){ //if the infile is bsq file 675 if (header.interleave == envi_header::BSQ){ //if the infile is bsq file
676 if (header.data_type == envi_header::float32) 676 if (header.data_type == envi_header::float32)
677 - return ((bsq<float>*)file)->sift((float*)matrix, p); 677 + return ((bsq<float>*)file)->sift((float*)matrix, p, PROGRESS);
678 else if (header.data_type == envi_header::float64) 678 else if (header.data_type == envi_header::float64)
679 - return ((bsq<double>*)file)->sift((double*)matrix, p); 679 + return ((bsq<double>*)file)->sift((double*)matrix, p, PROGRESS);
680 else{ 680 else{
681 std::cout << "ERROR: unidentified data type" << std::endl; 681 std::cout << "ERROR: unidentified data type" << std::endl;
682 exit(1); 682 exit(1);
@@ -685,9 +685,9 @@ public: @@ -685,9 +685,9 @@ public:
685 685
686 if (header.interleave == envi_header::BIP){ 686 if (header.interleave == envi_header::BIP){
687 if (header.data_type == envi_header::float32) 687 if (header.data_type == envi_header::float32)
688 - return ((bip<float>*)file)->sift((float*)matrix, p); 688 + return ((bip<float>*)file)->sift((float*)matrix, p, PROGRESS);
689 else if (header.data_type == envi_header::float64) 689 else if (header.data_type == envi_header::float64)
690 - return ((bip<double>*)file)->sift((double*)matrix, p); 690 + return ((bip<double>*)file)->sift((double*)matrix, p, PROGRESS);
691 else{ 691 else{
692 std::cout << "ERROR: unidentified data type" << std::endl; 692 std::cout << "ERROR: unidentified data type" << std::endl;
693 exit(1); 693 exit(1);
@@ -695,9 +695,9 @@ public: @@ -695,9 +695,9 @@ public:
695 } 695 }
696 if (header.interleave == envi_header::BIL){ 696 if (header.interleave == envi_header::BIL){
697 if (header.data_type == envi_header::float32) 697 if (header.data_type == envi_header::float32)
698 - return ((bil<float>*)file)->sift((float*)matrix, p); 698 + return ((bil<float>*)file)->sift((float*)matrix, p, PROGRESS);
699 else if (header.data_type == envi_header::float64) 699 else if (header.data_type == envi_header::float64)
700 - return ((bil<double>*)file)->sift((double*)matrix, p); 700 + return ((bil<double>*)file)->sift((double*)matrix, p, PROGRESS);
701 else{ 701 else{
702 std::cout << "ERROR: unidentified data type" << std::endl; 702 std::cout << "ERROR: unidentified data type" << std::endl;
703 exit(1); 703 exit(1);
stim/image/image.h
@@ -25,8 +25,6 @@ class image{ @@ -25,8 +25,6 @@ class image{
25 size_t Y() const { return R[2]; } 25 size_t Y() const { return R[2]; }
26 size_t C() const { return R[0]; } 26 size_t C() const { return R[0]; }
27 27
28 - size_t bytes(){ return size() * sizeof(T); }  
29 -  
30 void init(){ //initializes all variables, assumes no memory is allocated 28 void init(){ //initializes all variables, assumes no memory is allocated
31 memset(R, 0, sizeof(size_t) * 3); //set the resolution and number of channels to zero 29 memset(R, 0, sizeof(size_t) * 3); //set the resolution and number of channels to zero
32 img = NULL; 30 img = NULL;
@@ -34,7 +32,6 @@ class image{ @@ -34,7 +32,6 @@ class image{
34 32
35 void unalloc(){ //frees any resources associated with the image 33 void unalloc(){ //frees any resources associated with the image
36 if(img) free(img); //if memory has been allocated, free it 34 if(img) free(img); //if memory has been allocated, free it
37 - img=NULL;  
38 } 35 }
39 36
40 37
@@ -45,16 +42,15 @@ class image{ @@ -45,16 +42,15 @@ class image{
45 42
46 void allocate(){ 43 void allocate(){
47 unalloc(); 44 unalloc();
48 - img = (T*) malloc( bytes() ); //allocate memory  
49 - memset(img, 0, bytes()); 45 + img = (T*) malloc( sizeof(T) * R[0] * R[1] * R[2] ); //allocate memory
50 } 46 }
51 47
52 void allocate(size_t x, size_t y, size_t c){ //allocate memory based on the resolution 48 void allocate(size_t x, size_t y, size_t c){ //allocate memory based on the resolution
53 - unalloc();  
54 R[0] = c; R[1] = x; R[2] = y; //set the resolution 49 R[0] = c; R[1] = x; R[2] = y; //set the resolution
55 allocate(); //allocate memory 50 allocate(); //allocate memory
56 } 51 }
57 52
  53 + size_t bytes(){ return size() * sizeof(T); }
58 54
59 size_t idx(size_t x, size_t y, size_t c = 0){ 55 size_t idx(size_t x, size_t y, size_t c = 0){
60 return y * C() * X() + x * C() + c; 56 return y * C() * X() + x * C() + c;
@@ -106,15 +102,14 @@ class image{ @@ -106,15 +102,14 @@ class image{
106 102
107 std::cout<<"ERROR in stim::image::white - no white value known for this data type"<<std::endl; 103 std::cout<<"ERROR in stim::image::white - no white value known for this data type"<<std::endl;
108 exit(1); 104 exit(1);
  105 +
109 } 106 }
110 107
111 108
112 public: 109 public:
113 110
114 /// Default constructor - creates an empty image object 111 /// Default constructor - creates an empty image object
115 - image(){  
116 - init(); //initialize all variables to zero, don't allocate any memory  
117 - } 112 + image(){ init(); } //initialize all variables to zero, don't allocate any memory
118 113
119 /// Constructor with a filename - loads the specified file 114 /// Constructor with a filename - loads the specified file
120 image(std::string filename){ //constructor initialize the image with an image file 115 image(std::string filename){ //constructor initialize the image with an image file
@@ -136,7 +131,7 @@ public: @@ -136,7 +131,7 @@ public:
136 } 131 }
137 132
138 /// Copy constructor - duplicates an image object 133 /// Copy constructor - duplicates an image object
139 - image(const stim::image<T> &I){ 134 + image(const stim::image<T>& I){
140 init(); 135 init();
141 allocate(I.X(), I.Y(), I.C()); 136 allocate(I.X(), I.Y(), I.C());
142 memcpy(img, I.img, bytes()); 137 memcpy(img, I.img, bytes());
@@ -148,6 +143,7 @@ public: @@ -148,6 +143,7 @@ public:
148 } 143 }
149 144
150 stim::image<T>& operator=(const stim::image<T>& I){ 145 stim::image<T>& operator=(const stim::image<T>& I){
  146 + init();
151 if(&I == this) //handle self-assignment 147 if(&I == this) //handle self-assignment
152 return *this; 148 return *this;
153 allocate(I.X(), I.Y(), I.C()); 149 allocate(I.X(), I.Y(), I.C());
@@ -160,22 +156,15 @@ public: @@ -160,22 +156,15 @@ public:
160 156
161 cv::Mat cvImage = cv::imread(filename, CV_LOAD_IMAGE_UNCHANGED); //use OpenCV to open the image file 157 cv::Mat cvImage = cv::imread(filename, CV_LOAD_IMAGE_UNCHANGED); //use OpenCV to open the image file
162 if(!cvImage.data){ 158 if(!cvImage.data){
163 - std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<" ["<<__FILE__<<" (line "<<__LINE__<<")]"<<std::endl; 159 + std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<std::endl;
164 exit(1); 160 exit(1);
165 } 161 }
166 allocate(cvImage.cols, cvImage.rows, cvImage.channels()); //allocate space for the image 162 allocate(cvImage.cols, cvImage.rows, cvImage.channels()); //allocate space for the image
167 - T* cv_ptr = (T*) cvImage.data;  
168 - if(C() == 1)  
169 - {  
170 - //if this is a single-color image, just copy the data  
171 - memcpy(img, cv_ptr, bytes());  
172 - }  
173 - if(C() == 3)  
174 - { //if this is a 3-color image, OpenCV uses BGR interleaving 163 + T* cv_ptr = (T*)cvImage.data;
  164 + if(C() == 1) //if this is a single-color image, just copy the data
  165 + memcpy(img, cv_ptr, bytes());
  166 + if(C() == 3) //if this is a 3-color image, OpenCV uses BGR interleaving
175 set_interleaved_bgr(cv_ptr, X(), Y()); 167 set_interleaved_bgr(cv_ptr, X(), Y());
176 - }  
177 -  
178 - cvImage.release();  
179 } 168 }
180 169
181 //save a file 170 //save a file
@@ -189,18 +178,16 @@ public: @@ -189,18 +178,16 @@ public:
189 get_interleaved_bgr(buffer); 178 get_interleaved_bgr(buffer);
190 cv::Mat cvImage((int)Y(), (int)X(), cv_type(), buffer); 179 cv::Mat cvImage((int)Y(), (int)X(), cv_type(), buffer);
191 cv::imwrite(filename, cvImage); 180 cv::imwrite(filename, cvImage);
192 - cvImage.release();  
193 - free(buffer);  
194 } 181 }
195 182
196 //create an image from an interleaved buffer 183 //create an image from an interleaved buffer
197 - void set_interleaved_rgb(T* buffer, size_t width, size_t height, size_t channels = 3){  
198 - allocate(width, height, channels); 184 + void set_interleaved_rgb(T* buffer, size_t width, size_t height){
  185 + allocate(width, height, 3);
199 memcpy(img, buffer, bytes()); 186 memcpy(img, buffer, bytes());
200 } 187 }
201 188
202 - void set_interleaved_bgr(T* buffer, size_t width, size_t height, size_t channels = 3){  
203 - allocate(width, height, channels); 189 + void set_interleaved_bgr(T* buffer, size_t width, size_t height){
  190 + allocate(width, height, 3);
204 for(size_t c = 0; c < C(); c++){ //copy directly 191 for(size_t c = 0; c < C(); c++){ //copy directly
205 for(size_t y = 0; y < Y(); y++){ 192 for(size_t y = 0; y < Y(); y++){
206 for(size_t x = 0; x < X(); x++){ 193 for(size_t x = 0; x < X(); x++){
@@ -380,34 +367,6 @@ public: @@ -380,34 +367,6 @@ public:
380 367
381 return r; //return the inverted image 368 return r; //return the inverted image
382 } 369 }
383 -  
384 - /// Invert an image by calculating I1 = alpha - I0, where alpha is the maximum image value  
385 - image<T> invert(){  
386 - size_t N = size(); //calculate the total number of values in the image  
387 - image<T> r(X(), Y(), C()); //allocate space for the resulting image  
388 - T white_val = maxv();  
389 - for(size_t n = 0; n < N; n++)  
390 - r.img[n] = white_val - img[n]; //perform the inversion  
391 -  
392 - return r; //return the inverted image  
393 - }  
394 -  
395 - ///crops the image from x1 to x0 and y1 to y0 and returns a new (smaller) image.  
396 - image<T> crop(int x0, int x1, int y0, int y1)  
397 - {  
398 -  
399 - image<T> ret(x1-x0, y1-y0, C());  
400 - int newWidth = x1-x0;  
401 - int destidx, srcidx;  
402 - ///for each row, cut what amount of data from the original and put it into the new copy.  
403 - for(int i = 0; i < (y1-y0); i++)  
404 - {  
405 - destidx = i*newWidth*C(); ///destination index one per each row  
406 - srcidx = ((i+(y0))*X()+x0)*C(); ///source index, one per each row.  
407 - memcpy(&ret.img[destidx], &img[srcidx], sizeof(T)*newWidth*C());  
408 - }  
409 - return ret;  
410 - }  
411 370
412 image<T> srgb2lab(){ 371 image<T> srgb2lab(){
413 std::cout<<"ERROR stim::image::srgb2lab - function has been broken, re-implement."<<std::endl; 372 std::cout<<"ERROR stim::image::srgb2lab - function has been broken, re-implement."<<std::endl;
@@ -426,7 +385,6 @@ public: @@ -426,7 +385,6 @@ public:
426 exit(1); 385 exit(1);
427 } 386 }
428 387
429 -  
430 // leila's code for non_interleaving data in 3D 388 // leila's code for non_interleaving data in 3D
431 //create an data set from an interleaved buffer 389 //create an data set from an interleaved buffer
432 void set_interleaved3(T* buffer, size_t width, size_t height, size_t depth, size_t channels = 3){ 390 void set_interleaved3(T* buffer, size_t width, size_t height, size_t depth, size_t channels = 3){
stim/math/bessel.h
@@ -1258,7 +1258,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1258,7 +1258,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1258 P a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa; 1258 P a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa;
1259 int j,n,k,kz,l,lb,lb0,m; 1259 int j,n,k,kz,l,lb,lb0,m;
1260 1260
1261 - a0 = abs(z); 1261 + a0 = ::abs(z);
1262 z1 = z; 1262 z1 = z;
1263 z2 = z*z; 1263 z2 = z*z;
1264 n = (int)v; 1264 n = (int)v;
@@ -1286,7 +1286,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1286,7 +1286,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1286 vm = v; 1286 vm = v;
1287 return 0; 1287 return 0;
1288 } 1288 }
1289 - if (real(z1) < 0.0) z1 = -z; 1289 + if (::real(z1) < 0.0) z1 = -z;
1290 if (a0 <= 12.0) { 1290 if (a0 <= 12.0) {
1291 for (l=0;l<2;l++) { 1291 for (l=0;l<2;l++) {
1292 vl = v0+l; 1292 vl = v0+l;
@@ -1295,7 +1295,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1295,7 +1295,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1295 for (k=1;k<=40;k++) { 1295 for (k=1;k<=40;k++) {
1296 cr *= -0.25*z2/(k*(k+vl)); 1296 cr *= -0.25*z2/(k*(k+vl));
1297 cjvl += cr; 1297 cjvl += cr;
1298 - if (abs(cr) < abs(cjvl)*eps) break; 1298 + if (::abs(cr) < ::abs(cjvl)*eps) break;
1299 } 1299 }
1300 vg = 1.0 + vl; 1300 vg = 1.0 + vl;
1301 ga = gamma(vg); 1301 ga = gamma(vg);
@@ -1348,7 +1348,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1348,7 +1348,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1348 for (k=1;k<=40;k++) { 1348 for (k=1;k<=40;k++) {
1349 cr *= -0.25*z2/(k*(k-vl)); 1349 cr *= -0.25*z2/(k*(k-vl));
1350 cjvl += cr; 1350 cjvl += cr;
1351 - if (abs(cr) < abs(cjvl)*eps) break; 1351 + if (::abs(cr) < ::abs(cjvl)*eps) break;
1352 } 1352 }
1353 vg = 1.0-vl; 1353 vg = 1.0-vl;
1354 gb = gamma(vg); 1354 gb = gamma(vg);
@@ -1381,16 +1381,16 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1381,16 +1381,16 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1381 cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1); 1381 cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1);
1382 } 1382 }
1383 } 1383 }
1384 - if (real(z) < 0.0) { 1384 + if (::real(z) < 0.0) {
1385 cfac0 = exp(pv0*cii); 1385 cfac0 = exp(pv0*cii);
1386 cfac1 = exp(pv1*cii); 1386 cfac1 = exp(pv1*cii);
1387 - if (imag(z) < 0.0) { 1387 + if (::imag(z) < 0.0) {
1388 cyv0 = cfac0*cyv0-(P)2.0*(complex<P>)cii*cos(pv0)*cjv0; 1388 cyv0 = cfac0*cyv0-(P)2.0*(complex<P>)cii*cos(pv0)*cjv0;
1389 cyv1 = cfac1*cyv1-(P)2.0*(complex<P>)cii*cos(pv1)*cjv1; 1389 cyv1 = cfac1*cyv1-(P)2.0*(complex<P>)cii*cos(pv1)*cjv1;
1390 cjv0 /= cfac0; 1390 cjv0 /= cfac0;
1391 cjv1 /= cfac1; 1391 cjv1 /= cfac1;
1392 } 1392 }
1393 - else if (imag(z) > 0.0) { 1393 + else if (::imag(z) > 0.0) {
1394 cyv0 = cyv0/cfac0+(P)2.0*(complex<P>)cii*cos(pv0)*cjv0; 1394 cyv0 = cyv0/cfac0+(P)2.0*(complex<P>)cii*cos(pv0)*cjv0;
1395 cyv1 = cyv1/cfac1+(P)2.0*(complex<P>)cii*cos(pv1)*cjv1; 1395 cyv1 = cyv1/cfac1+(P)2.0*(complex<P>)cii*cos(pv1)*cjv1;
1396 cjv0 *= cfac0; 1396 cjv0 *= cfac0;
@@ -1421,7 +1421,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1421,7 +1421,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1421 cf2 = cf1; 1421 cf2 = cf1;
1422 cf1 = cf; 1422 cf1 = cf;
1423 } 1423 }
1424 - if (abs(cjv0) > abs(cjv1)) cs = cjv0/cf; 1424 + if (::abs(cjv0) > ::abs(cjv1)) cs = cjv0/cf;
1425 else cs = cjv1/cf2; 1425 else cs = cjv1/cf2;
1426 for (k=0;k<=n;k++) { 1426 for (k=0;k<=n;k++) {
1427 cjv[k] *= cs; 1427 cjv[k] *= cs;
@@ -1433,21 +1433,21 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1433,21 +1433,21 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1433 } 1433 }
1434 cyv[0] = cyv0; 1434 cyv[0] = cyv0;
1435 cyv[1] = cyv1; 1435 cyv[1] = cyv1;
1436 - ya0 = abs(cyv0); 1436 + ya0 = ::abs(cyv0);
1437 lb = 0; 1437 lb = 0;
1438 cg0 = cyv0; 1438 cg0 = cyv0;
1439 cg1 = cyv1; 1439 cg1 = cyv1;
1440 for (k=2;k<=n;k++) { 1440 for (k=2;k<=n;k++) {
1441 cyk = 2.0*(v0+k-1.0)*cg1/z-cg0; 1441 cyk = 2.0*(v0+k-1.0)*cg1/z-cg0;
1442 - yak = abs(cyk);  
1443 - ya1 = abs(cg0); 1442 + yak = ::abs(cyk);
  1443 + ya1 = ::abs(cg0);
1444 if ((yak < ya0) && (yak< ya1)) lb = k; 1444 if ((yak < ya0) && (yak< ya1)) lb = k;
1445 cyv[k] = cyk; 1445 cyv[k] = cyk;
1446 cg0 = cg1; 1446 cg0 = cg1;
1447 cg1 = cyk; 1447 cg1 = cyk;
1448 } 1448 }
1449 lb0 = 0; 1449 lb0 = 0;
1450 - if ((lb > 4) && (imag(z) != 0.0)) { 1450 + if ((lb > 4) && (::imag(z) != 0.0)) {
1451 while(lb != lb0) { 1451 while(lb != lb0) {
1452 ch2 = cone; 1452 ch2 = cone;
1453 ch1 = czero; 1453 ch1 = czero;
@@ -1470,7 +1470,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1470,7 +1470,7 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1470 cp21 = ch2; 1470 cp21 = ch2;
1471 if (lb == n) 1471 if (lb == n)
1472 cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1]; 1472 cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1];
1473 - if (abs(cjv[0]) > abs(cjv[1])) { 1473 + if (::abs(cjv[0]) > ::abs(cjv[1])) {
1474 cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0]; 1474 cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0];
1475 cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0]; 1475 cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0];
1476 } 1476 }
@@ -1495,8 +1495,8 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1495,8 +1495,8 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1495 cyl2 = cylk; 1495 cyl2 = cylk;
1496 } 1496 }
1497 for (k=2;k<=n;k++) { 1497 for (k=2;k<=n;k++) {
1498 - wa = abs(cyv[k]);  
1499 - if (wa < abs(cyv[k-1])) lb = k; 1498 + wa = ::abs(cyv[k]);
  1499 + if (wa < ::abs(cyv[k-1])) lb = k;
1500 } 1500 }
1501 } 1501 }
1502 } 1502 }
@@ -1515,12 +1515,18 @@ int cbessjyva_sph(int v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv, @@ -1515,12 +1515,18 @@ int cbessjyva_sph(int v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1515 //first, compute the bessel functions of fractional order 1515 //first, compute the bessel functions of fractional order
1516 cbessjyva<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); 1516 cbessjyva<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
1517 1517
  1518 + if(z == 0){ //handle degenerate case of z = 0
  1519 + memset(cjv, 0, sizeof(P) * (v+1));
  1520 + cjv[0] = 1;
  1521 + }
  1522 +
1518 //iterate through each and scale 1523 //iterate through each and scale
1519 for(int n = 0; n<=v; n++) 1524 for(int n = 0; n<=v; n++)
1520 { 1525 {
1521 -  
1522 - cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));  
1523 - cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); 1526 + if(z != 0){ //handle degenerate case of z = 0
  1527 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  1528 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
  1529 + }
1524 1530
1525 cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); 1531 cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
1526 cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); 1532 cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
stim/math/circle.h
@@ -17,7 +17,7 @@ class circle : plane&lt;T&gt; @@ -17,7 +17,7 @@ class circle : plane&lt;T&gt;
17 17
18 private: 18 private:
19 19
20 - stim::vec<T> Y; 20 + stim::vec3<T> Y;
21 21
22 CUDA_CALLABLE void 22 CUDA_CALLABLE void
23 init() 23 init()
@@ -48,7 +48,7 @@ public: @@ -48,7 +48,7 @@ public:
48 circle(T size, T z_pos = (T)0) : plane<T>() 48 circle(T size, T z_pos = (T)0) : plane<T>()
49 { 49 {
50 init(); 50 init();
51 - center(stim::vec<T>(0,0,z_pos)); 51 + center(stim::vec3<T>(0,0,z_pos));
52 scale(size); 52 scale(size);
53 } 53 }
54 54
@@ -56,7 +56,7 @@ public: @@ -56,7 +56,7 @@ public:
56 ///@param c: x,y,z location of the center. 56 ///@param c: x,y,z location of the center.
57 ///@param n: x,y,z direction of the normal. 57 ///@param n: x,y,z direction of the normal.
58 CUDA_CALLABLE 58 CUDA_CALLABLE
59 - circle(vec<T> c, vec<T> n = vec<T>(0,0,1)) : plane<T>() 59 + circle(vec3<T> c, vec3<T> n = vec3<T>(0,0,1)) : plane<T>()
60 { 60 {
61 center(c); 61 center(c);
62 normal(n); 62 normal(n);
@@ -68,7 +68,7 @@ public: @@ -68,7 +68,7 @@ public:
68 ///@param s: size of the rectangle. 68 ///@param s: size of the rectangle.
69 ///@param n: x,y,z direction of the normal. 69 ///@param n: x,y,z direction of the normal.
70 CUDA_CALLABLE 70 CUDA_CALLABLE
71 - circle(vec<T> c, T s, vec<T> n = vec<T>(0,0,1)) : plane<T>() 71 + circle(vec3<T> c, T s, vec3<T> n = vec3<T>(0,0,1)) : plane<T>()
72 { 72 {
73 init(); 73 init();
74 center(c); 74 center(c);
@@ -82,7 +82,7 @@ public: @@ -82,7 +82,7 @@ public:
82 ///@param n: x,y,z direction of the normal. 82 ///@param n: x,y,z direction of the normal.
83 ///@param u: x,y,z direction for the zero vector (from where the rotation starts) 83 ///@param u: x,y,z direction for the zero vector (from where the rotation starts)
84 CUDA_CALLABLE 84 CUDA_CALLABLE
85 - circle(vec<T> c, T s, vec<T> n = vec<T>(0,0,1), vec<T> u = vec<T>(1, 0, 0)) : plane<T>() 85 + circle(vec3<T> c, T s, vec3<T> n = vec3<T>(0,0,1), vec3<T> u = vec3<T>(1, 0, 0)) : plane<T>()
86 { 86 {
87 init(); 87 init();
88 setU(u); 88 setU(u);
@@ -103,16 +103,15 @@ public: @@ -103,16 +103,15 @@ public:
103 ///sets the normal for the cirlce 103 ///sets the normal for the cirlce
104 ///@param n: x,y,z direction of the normal. 104 ///@param n: x,y,z direction of the normal.
105 CUDA_CALLABLE void 105 CUDA_CALLABLE void
106 - normal(vec<T> n) 106 + normal(vec3<T> n)
107 { 107 {
108 rotate(n, Y); 108 rotate(n, Y);
109 } 109 }
110 110
111 ///sets the center of the circle. 111 ///sets the center of the circle.
112 ///@param n: x,y,z location of the center. 112 ///@param n: x,y,z location of the center.
113 - CUDA_CALLABLE T  
114 - center(vec<T> p)  
115 - { 113 + CUDA_CALLABLE void
  114 + center(vec3<T> p){
116 this->P = p; 115 this->P = p;
117 } 116 }
118 117
@@ -127,17 +126,17 @@ public: @@ -127,17 +126,17 @@ public:
127 } 126 }
128 127
129 ///get the world space value given the planar coordinates a, b in [0, 1] 128 ///get the world space value given the planar coordinates a, b in [0, 1]
130 - CUDA_CALLABLE stim::vec<T> p(T a, T b) 129 + CUDA_CALLABLE stim::vec3<T> p(T a, T b)
131 { 130 {
132 - stim::vec<T> result; 131 + stim::vec3<T> result;
133 132
134 - vec<T> A = this->P - this->U * (T)0.5 - Y * (T)0.5; 133 + vec3<T> A = this->P - this->U * (T)0.5 - Y * (T)0.5;
135 result = A + this->U * a + Y * b; 134 result = A + this->U * a + Y * b;
136 return result; 135 return result;
137 } 136 }
138 137
139 ///parenthesis operator returns the world space given rectangular coordinates a and b in [0 1] 138 ///parenthesis operator returns the world space given rectangular coordinates a and b in [0 1]
140 - CUDA_CALLABLE stim::vec<T> operator()(T a, T b) 139 + CUDA_CALLABLE stim::vec3<T> operator()(T a, T b)
141 { 140 {
142 return p(a,b); 141 return p(a,b);
143 } 142 }
@@ -145,11 +144,11 @@ public: @@ -145,11 +144,11 @@ public:
145 ///returns a vector with the points on the initialized circle. 144 ///returns a vector with the points on the initialized circle.
146 ///connecting the points results in a circle. 145 ///connecting the points results in a circle.
147 ///@param n: integer for the number of points representing the circle. 146 ///@param n: integer for the number of points representing the circle.
148 - std::vector<stim::vec<T> > 147 + std::vector<stim::vec3<T> >
149 getPoints(int n) 148 getPoints(int n)
150 { 149 {
151 - std::vector<stim::vec<T> > result;  
152 - stim::vec<T> point; 150 + std::vector<stim::vec3<T> > result;
  151 + stim::vec3<T> point;
153 T x,y; 152 T x,y;
154 float step = 360.0/(float) n; 153 float step = 360.0/(float) n;
155 for(float j = 0; j <= 360.0; j += step) 154 for(float j = 0; j <= 360.0; j += step)
@@ -164,7 +163,7 @@ public: @@ -164,7 +163,7 @@ public:
164 ///returns a vector with the points on the initialized circle. 163 ///returns a vector with the points on the initialized circle.
165 ///connecting the points results in a circle. 164 ///connecting the points results in a circle.
166 ///@param n: integer for the number of points representing the circle. 165 ///@param n: integer for the number of points representing the circle.
167 - stim::vec<T> 166 + stim::vec3<T>
168 p(T theta) 167 p(T theta)
169 { 168 {
170 T x,y; 169 T x,y;
stim/math/constants.h
1 #ifndef STIM_CONSTANTS_H 1 #ifndef STIM_CONSTANTS_H
2 #define STIM_CONSTANTS_H 2 #define STIM_CONSTANTS_H
3 3
  4 +#include "stim/cuda/cudatools/callable.h"
4 namespace stim{ 5 namespace stim{
5 const double PI = 3.1415926535897932384626433832795028841971693993751058209749445923078164062862; 6 const double PI = 3.1415926535897932384626433832795028841971693993751058209749445923078164062862;
6 const double TAU = 2 * stim::PI; 7 const double TAU = 2 * stim::PI;
stim/math/matrix.h
@@ -5,6 +5,7 @@ @@ -5,6 +5,7 @@
5 #include <string.h> 5 #include <string.h>
6 #include <iostream> 6 #include <iostream>
7 #include <stim/math/vector.h> 7 #include <stim/math/vector.h>
  8 +#include <stim/math/vec3.h>
8 #include <stim/cuda/cudatools/callable.h> 9 #include <stim/cuda/cudatools/callable.h>
9 10
10 namespace stim{ 11 namespace stim{
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 #define STIM_PLANE_H 2 #define STIM_PLANE_H
3 3
4 #include <iostream> 4 #include <iostream>
5 -#include <stim/math/vector.h> 5 +#include <stim/math/vec3.h>
6 #include <stim/cuda/cudatools/callable.h> 6 #include <stim/cuda/cudatools/callable.h>
7 #include <stim/math/quaternion.h> 7 #include <stim/math/quaternion.h>
8 8
@@ -22,17 +22,17 @@ template &lt;typename T&gt; @@ -22,17 +22,17 @@ template &lt;typename T&gt;
22 class plane 22 class plane
23 { 23 {
24 protected: 24 protected:
25 - stim::vec<T> P;  
26 - stim::vec<T> N;  
27 - stim::vec<T> U; 25 + stim::vec3<T> P;
  26 + stim::vec3<T> N;
  27 + stim::vec3<T> U;
28 28
29 ///Initializes the plane with standard coordinates. 29 ///Initializes the plane with standard coordinates.
30 /// 30 ///
31 CUDA_CALLABLE void init() 31 CUDA_CALLABLE void init()
32 { 32 {
33 - P = stim::vec<T>(0, 0, 0);  
34 - N = stim::vec<T>(0, 0, 1);  
35 - U = stim::vec<T>(1, 0, 0); 33 + P = stim::vec3<T>(0, 0, 0);
  34 + N = stim::vec3<T>(0, 0, 1);
  35 + U = stim::vec3<T>(1, 0, 0);
36 } 36 }
37 37
38 public: 38 public:
@@ -42,7 +42,7 @@ class plane @@ -42,7 +42,7 @@ class plane
42 init(); 42 init();
43 } 43 }
44 44
45 - CUDA_CALLABLE plane(vec<T> n, vec<T> p = vec<T>(0, 0, 0)) 45 + CUDA_CALLABLE plane(vec3<T> n, vec3<T> p = vec3<T>(0, 0, 0))
46 { 46 {
47 init(); 47 init();
48 P = p; 48 P = p;
@@ -56,11 +56,11 @@ class plane @@ -56,11 +56,11 @@ class plane
56 } 56 }
57 57
58 //create a plane from three points (a triangle) 58 //create a plane from three points (a triangle)
59 - CUDA_CALLABLE plane(vec<T> a, vec<T> b, vec<T> c) 59 + CUDA_CALLABLE plane(vec3<T> a, vec3<T> b, vec3<T> c)
60 { 60 {
61 init(); 61 init();
62 P = c; 62 P = c;
63 - stim::vec<T> n = (c - a).cross(b - a); 63 + stim::vec3<T> n = (c - a).cross(b - a);
64 try 64 try
65 { 65 {
66 if(n.len() != 0) 66 if(n.len() != 0)
@@ -84,17 +84,17 @@ class plane @@ -84,17 +84,17 @@ class plane
84 84
85 } 85 }
86 86
87 - CUDA_CALLABLE vec<T> n() 87 + CUDA_CALLABLE vec3<T> n()
88 { 88 {
89 return N; 89 return N;
90 } 90 }
91 91
92 - CUDA_CALLABLE vec<T> p() 92 + CUDA_CALLABLE vec3<T> p()
93 { 93 {
94 return P; 94 return P;
95 } 95 }
96 96
97 - CUDA_CALLABLE vec<T> u() 97 + CUDA_CALLABLE vec3<T> u()
98 { 98 {
99 return U; 99 return U;
100 } 100 }
@@ -107,7 +107,7 @@ class plane @@ -107,7 +107,7 @@ class plane
107 } 107 }
108 108
109 //determines how a vector v intersects the plane (1 = intersects front, 0 = within plane, -1 = intersects back) 109 //determines how a vector v intersects the plane (1 = intersects front, 0 = within plane, -1 = intersects back)
110 - CUDA_CALLABLE int face(vec<T> v){ 110 + CUDA_CALLABLE int face(vec3<T> v){
111 111
112 T dprod = v.dot(N); //get the dot product between v and N 112 T dprod = v.dot(N); //get the dot product between v and N
113 113
@@ -121,46 +121,46 @@ class plane @@ -121,46 +121,46 @@ class plane
121 } 121 }
122 122
123 //determine on which side of the plane a point lies (1 = front, 0 = on the plane, -1 = bac k) 123 //determine on which side of the plane a point lies (1 = front, 0 = on the plane, -1 = bac k)
124 - CUDA_CALLABLE int side(vec<T> p){ 124 + CUDA_CALLABLE int side(vec3<T> p){
125 125
126 - vec<T> v = p - P; //get the vector from P to the query point p 126 + vec3<T> v = p - P; //get the vector from P to the query point p
127 127
128 return face(v); 128 return face(v);
129 } 129 }
130 130
131 //compute the component of v that is perpendicular to the plane 131 //compute the component of v that is perpendicular to the plane
132 - CUDA_CALLABLE vec<T> perpendicular(vec<T> v){ 132 + CUDA_CALLABLE vec3<T> perpendicular(vec3<T> v){
133 return N * v.dot(N); 133 return N * v.dot(N);
134 } 134 }
135 135
136 //compute the projection of v in the plane 136 //compute the projection of v in the plane
137 - CUDA_CALLABLE vec<T> parallel(vec<T> v){ 137 + CUDA_CALLABLE vec3<T> parallel(vec3<T> v){
138 return v - perpendicular(v); 138 return v - perpendicular(v);
139 } 139 }
140 140
141 - CUDA_CALLABLE void setU(vec<T> v) 141 + CUDA_CALLABLE void setU(vec3<T> v)
142 { 142 {
143 U = (parallel(v.norm())).norm(); 143 U = (parallel(v.norm())).norm();
144 } 144 }
145 145
146 - CUDA_CALLABLE void decompose(vec<T> v, vec<T>& para, vec<T>& perp){ 146 + CUDA_CALLABLE void decompose(vec3<T> v, vec3<T>& para, vec3<T>& perp){
147 perp = N * v.dot(N); 147 perp = N * v.dot(N);
148 para = v - perp; 148 para = v - perp;
149 } 149 }
150 150
151 //get both the parallel and perpendicular components of a vector v w.r.t. the plane 151 //get both the parallel and perpendicular components of a vector v w.r.t. the plane
152 - CUDA_CALLABLE void project(vec<T> v, vec<T> &v_par, vec<T> &v_perp){ 152 + CUDA_CALLABLE void project(vec3<T> v, vec3<T> &v_par, vec3<T> &v_perp){
153 153
154 v_perp = v.dot(N); 154 v_perp = v.dot(N);
155 v_par = v - v_perp; 155 v_par = v - v_perp;
156 } 156 }
157 157
158 //compute the reflection of v off of the plane 158 //compute the reflection of v off of the plane
159 - CUDA_CALLABLE vec<T> reflect(vec<T> v){ 159 + CUDA_CALLABLE vec3<T> reflect(vec3<T> v){
160 160
161 //compute the reflection using N_prime as the plane normal 161 //compute the reflection using N_prime as the plane normal
162 - vec<T> par = parallel(v);  
163 - vec<T> r = (-v) + par * 2; 162 + vec3<T> par = parallel(v);
  163 + vec3<T> r = (-v) + par * 2;
164 return r; 164 return r;
165 165
166 } 166 }
@@ -184,7 +184,7 @@ class plane @@ -184,7 +184,7 @@ class plane
184 } 184 }
185 185
186 186
187 - CUDA_CALLABLE void rotate(vec<T> n) 187 + CUDA_CALLABLE void rotate(vec3<T> n)
188 { 188 {
189 quaternion<T> q; 189 quaternion<T> q;
190 q.CreateRotation(N, n); 190 q.CreateRotation(N, n);
@@ -194,7 +194,7 @@ class plane @@ -194,7 +194,7 @@ class plane
194 194
195 } 195 }
196 196
197 - CUDA_CALLABLE void rotate(vec<T> n, vec<T> &Y) 197 + CUDA_CALLABLE void rotate(vec3<T> n, vec3<T> &Y)
198 { 198 {
199 quaternion<T> q; 199 quaternion<T> q;
200 q.CreateRotation(N, n); 200 q.CreateRotation(N, n);
@@ -205,7 +205,7 @@ class plane @@ -205,7 +205,7 @@ class plane
205 205
206 } 206 }
207 207
208 - CUDA_CALLABLE void rotate(vec<T> n, vec<T> &X, vec<T> &Y) 208 + CUDA_CALLABLE void rotate(vec3<T> n, vec3<T> &X, vec3<T> &Y)
209 { 209 {
210 quaternion<T> q; 210 quaternion<T> q;
211 q.CreateRotation(N, n); 211 q.CreateRotation(N, n);
stim/math/quaternion.h
@@ -43,6 +43,8 @@ public: @@ -43,6 +43,8 @@ public:
43 43
44 CUDA_CALLABLE void CreateRotation(vec3<T> from, vec3<T> to){ 44 CUDA_CALLABLE void CreateRotation(vec3<T> from, vec3<T> to){
45 45
  46 + from = from.norm();
  47 + to = to.norm();
46 vec3<T> r = from.cross(to); //compute the rotation vector 48 vec3<T> r = from.cross(to); //compute the rotation vector
47 T theta = asin(r.len()); //compute the angle of the rotation about r 49 T theta = asin(r.len()); //compute the angle of the rotation about r
48 //deal with a zero vector (both k and kn point in the same direction) 50 //deal with a zero vector (both k and kn point in the same direction)
@@ -217,12 +217,12 @@ public: @@ -217,12 +217,12 @@ public:
217 std::string str() const{ 217 std::string str() const{
218 std::stringstream ss; 218 std::stringstream ss;
219 219
220 - size_t N = size(); 220 + const size_t N = 3;
221 221
222 ss<<"["; 222 ss<<"[";
223 for(size_t i=0; i<N; i++) 223 for(size_t i=0; i<N; i++)
224 { 224 {
225 - ss<<at(i); 225 + ss<<ptr[i];
226 if(i != N-1) 226 if(i != N-1)
227 ss<<", "; 227 ss<<", ";
228 } 228 }
@@ -230,7 +230,10 @@ public: @@ -230,7 +230,10 @@ public:
230 230
231 return ss.str(); 231 return ss.str();
232 } 232 }
233 - }; //end class triple 233 +
  234 + size_t size(){ return 3; }
  235 +
  236 + }; //end class vec3
234 } //end namespace stim 237 } //end namespace stim
235 238
236 /// Multiply a vector by a constant when the vector is on the right hand side 239 /// Multiply a vector by a constant when the vector is on the right hand side
stim/math/vector.h
@@ -317,6 +317,15 @@ struct vec : public std::vector&lt;T&gt; @@ -317,6 +317,15 @@ struct vec : public std::vector&lt;T&gt;
317 return *this; 317 return *this;
318 } 318 }
319 319
  320 + /// Cast to a vec3
  321 + operator vec3<T>(){
  322 + vec3<T> r;
  323 + size_t N = std::min<size_t>(size(), 3);
  324 + for(size_t i = 0; i < N; i++)
  325 + r[i] = at(i);
  326 + return r;
  327 + }
  328 +
320 /// Casting and assignment 329 /// Casting and assignment
321 template<typename Y> 330 template<typename Y>
322 vec<T> & operator=(vec<Y> rhs){ 331 vec<T> & operator=(vec<Y> rhs){
stim/optics/mie.h 0 โ†’ 100644
  1 +#ifndef STIM_MIE_H
  2 +#define STIM_MIE_H
  3 +
  4 +#include "scalarwave.h"
  5 +#include "../math/bessel.h"
  6 +#include "../cuda/cudatools/devices.h"
  7 +#include <cmath>
  8 +
  9 +namespace stim{
  10 +
  11 +
  12 +/// Calculate the scattering coefficients for a spherical scatterer
  13 +template<typename T>
  14 +void B_coefficients(stim::complex<T>* B, T a, T k, stim::complex<T> n, int Nl){
  15 +
  16 + //temporary variables
  17 + double vm; //allocate space to store the return values for the bessel function calculation
  18 + double* j_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
  19 + double* y_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
  20 + double* dj_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  21 + double* dy_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  22 +
  23 + stim::complex<double>* j_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  24 + stim::complex<double>* y_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  25 + stim::complex<double>* dj_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  26 + stim::complex<double>* dy_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  27 +
  28 + double ka = k * a; //store k*a (argument for spherical bessel and Hankel functions)
  29 + stim::complex<double> kna = k * n * a; //store k*n*a (argument for spherical bessel functions and derivatives)
  30 +
  31 + stim::bessjyv_sph<double>(Nl, ka, vm, j_ka, y_ka, dj_ka, dy_ka); //calculate bessel functions and derivatives for k*a
  32 + stim::cbessjyva_sph<double>(Nl, kna, vm, j_kna, y_kna, dj_kna, dy_kna); //calculate complex bessel functions for k*n*a
  33 +
  34 + stim::complex<double> h_ka, dh_ka;
  35 + stim::complex<double> numerator, denominator;
  36 + stim::complex<double> i(0, 1);
  37 + for(int l = 0; l <= Nl; l++){
  38 + h_ka.r = j_ka[l];
  39 + h_ka.i = y_ka[l];
  40 + dh_ka.r = dj_ka[l];
  41 + dh_ka.i = dy_ka[l];
  42 +
  43 + numerator = j_ka[l] * dj_kna[l] * (stim::complex<double>)n - j_kna[l] * dj_ka[l];
  44 + denominator = j_kna[l] * dh_ka - h_ka * dj_kna[l] * (stim::complex<double>)n;
  45 + B[l] = (2 * l + 1) * pow(i, l) * numerator / denominator;
  46 + std::cout<<B[l]<<std::endl;
  47 + }
  48 +}
  49 +
  50 +template<typename T>
  51 +void A_coefficients(stim::complex<T>* A, T a, T k, stim::complex<T> n, int Nl){
  52 + //temporary variables
  53 + double vm; //allocate space to store the return values for the bessel function calculation
  54 + double* j_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
  55 + double* y_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
  56 + double* dj_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  57 + double* dy_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  58 +
  59 + stim::complex<double>* j_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  60 + stim::complex<double>* y_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  61 + stim::complex<double>* dj_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  62 + stim::complex<double>* dy_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  63 +
  64 + double ka = k * a; //store k*a (argument for spherical bessel and Hankel functions)
  65 + stim::complex<double> kna = k * n * a; //store k*n*a (argument for spherical bessel functions and derivatives)
  66 +
  67 + stim::bessjyv_sph<double>(Nl, ka, vm, j_ka, y_ka, dj_ka, dy_ka); //calculate bessel functions and derivatives for k*a
  68 + stim::cbessjyva_sph<double>(Nl, kna, vm, j_kna, y_kna, dj_kna, dy_kna); //calculate complex bessel functions for k*n*a
  69 +
  70 + stim::complex<double> h_ka, dh_ka;
  71 + stim::complex<double> numerator, denominator;
  72 + stim::complex<double> i(0, 1);
  73 + for(size_t l = 0; l <= Nl; l++){
  74 + h_ka.r = j_ka[l];
  75 + h_ka.i = y_ka[l];
  76 + dh_ka.r = dj_ka[l];
  77 + dh_ka.i = dy_ka[l];
  78 +
  79 + numerator = j_ka[l] * dh_ka - dj_ka[l] * h_ka;
  80 + denominator = j_kna[l] * dh_ka - h_ka * dj_kna[l] * (stim::complex<double>)n;
  81 + A[l] = (2 * l + 1) * pow(i, l) * numerator / denominator;
  82 + }
  83 +}
  84 +
  85 +#define LOCAL_NL 16
  86 +template<typename T>
  87 +__global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* hB, T kr_min, T dkr, size_t N_hB, int Nl){
  88 + extern __shared__ stim::complex<T> shared_hB[]; //declare the list of waves in shared memory
  89 +
  90 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  91 + if(i >= N) return; //exit if this thread is outside the array
  92 + stim::vec3<T> p;
  93 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  94 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  95 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  96 +
  97 + T r = p.len(); //calculate the distance from the sphere
  98 + if(r < a) return; //exit if the point is inside the sphere (we only calculate the internal field)
  99 + T k = W[0].kmag();
  100 + size_t NC = Nl + 1; //calculate the number of coefficients to be used
  101 + T kr = r * k; //calculate the thread value for k*r
  102 + T fij = (kr - kr_min)/dkr; //FP index into the spherical bessel LUT
  103 + size_t ij = (size_t) fij; //convert to an integral index
  104 + T alpha = fij - ij; //calculate the fractional portion of the index
  105 + size_t n0j = ij * (NC); //start of the first entry in the LUT
  106 + size_t n1j = (ij+1) * (NC); //start of the second entry in the LUT
  107 +
  108 + T cos_phi;
  109 + T Pl_2, Pl_1, Pl; //declare registers to store the previous two Legendre polynomials
  110 +
  111 + stim::complex<T> hBl;
  112 + stim::complex<T> Ei = 0; //create a register to store the result
  113 + int l;
  114 +
  115 + stim::complex<T> hlBl[LOCAL_NL+1];
  116 + int shared_start = threadIdx.x * (Nl - LOCAL_NL);
  117 +
  118 + #pragma unroll LOCAL_NL+1
  119 + for(l = 0; l <= LOCAL_NL; l++)
  120 + hlBl[l] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha );
  121 +
  122 + for(l = LOCAL_NL+1; l <= Nl; l++)
  123 + shared_hB[shared_start + (l - (LOCAL_NL+1))] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha );
  124 +
  125 + for(size_t w = 0; w < nW; w++){
  126 + cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle between the k vector and the direction from the sphere
  127 + Pl_2 = 1;
  128 + Pl_1 = cos_phi;
  129 + Ei += W[w].E() * hlBl[0] * Pl_2;
  130 + Ei += W[w].E() * hlBl[1] * Pl_1;
  131 +
  132 + #pragma unroll LOCAL_NL-1
  133 + for(l = 2; l <= LOCAL_NL; l++){
  134 + Pl = ( (2 * l + 1) * cos_phi * Pl_1 - (l) * Pl_2 ) / (l+1);
  135 + Ei += W[w].E() * hlBl[l] * Pl;
  136 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  137 + Pl_1 = Pl;
  138 + }
  139 +
  140 + for(l = LOCAL_NL+1; l <= Nl; l++){
  141 + Pl = ( (2 * l + 1) * cos_phi * Pl_1 - (l) * Pl_2 ) / (l+1);
  142 + Ei += W[w].E() * shared_hB[shared_start + (l - (LOCAL_NL+1))] * Pl;
  143 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  144 + Pl_1 = Pl;
  145 +
  146 + }
  147 + }
  148 + E[i] += Ei; //copy the result to device memory
  149 +}
  150 +
  151 +template<typename T>
  152 +void gpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW, T a, stim::complex<T> n, stim::complex<T>* hB, T kr_min, T dkr, size_t N_hB, size_t Nl){
  153 +
  154 + size_t max_shared_mem = stim::sharedMemPerBlock();
  155 + int hBl_array = sizeof(stim::complex<T>) * (Nl + 1);
  156 + std::cout<<"hl*Bl array size: "<<hBl_array<<std::endl;
  157 + std::cout<<"shared memory: "<<max_shared_mem<<std::endl;
  158 + int threads = (max_shared_mem / hBl_array) / 32 * 32;
  159 + std::cout<<"threads per block: "<<threads<<std::endl;
  160 + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  161 +
  162 + size_t shared_mem;
  163 + if(Nl <= LOCAL_NL) shared_mem = 0;
  164 + else shared_mem = threads * sizeof(stim::complex<T>) * (Nl - LOCAL_NL); //amount of shared memory to allocate
  165 + std::cout<<"shared memory allocated: "<<shared_mem<<std::endl;
  166 + cuda_scalar_mie_scatter<T><<< blocks, threads, shared_mem >>>(E, N, x, y, z, W, nW, a, n, hB, kr_min, dkr, N_hB, (int)Nl); //call the kernel
  167 +
  168 +}
  169 +
  170 +template<typename T>
  171 +__global__ void cuda_dist(T* r, T* x, T* y, T* z, size_t N){
  172 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  173 + if(i >= N) return; //exit if this thread is outside the array
  174 +
  175 + stim::vec3<T> p;
  176 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  177 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  178 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  179 +
  180 + r[i] = p.len();
  181 +}
  182 +/// Calculate the scalar Mie solution for the scattered field produced by a single plane wave
  183 +
  184 +/// @param E is a pointer to the destination field values
  185 +/// @param N is the number of points used to calculate the field
  186 +/// @param x is an array of x coordinates for each point, specified relative to the sphere (x = NULL assumes all zeros)
  187 +/// @param y is an array of y coordinates for each point, specified relative to the sphere (y = NULL assumes all zeros)
  188 +/// @param z is an array of z coordinates for each point, specified relative to the sphere (z = NULL assumes all zeros)
  189 +/// @param W is an array of planewaves that will be scattered
  190 +/// @param a is the radius of the sphere
  191 +/// @param n is the complex refractive index of the sphere
  192 +template<typename T>
  193 +void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std::vector<stim::scalarwave<T>> W, T a, stim::complex<T> n, T r_spacing = 0.1){
  194 + //calculate the necessary number of orders required to represent the scattered field
  195 + T k = W[0].kmag();
  196 +
  197 + int Nl = (int)ceil(k*a + 4 * cbrt( k * a ) + 2);
  198 + if(Nl < LOCAL_NL) Nl = LOCAL_NL; //always do at least the minimum number of local operations (kernel optimization)
  199 + std::cout<<"Nl: "<<Nl<<std::endl;
  200 +
  201 + //calculate the scattering coefficients for the sphere
  202 + stim::complex<T>* B = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * (Nl + 1) ); //allocate space for the scattering coefficients
  203 + B_coefficients(B, a, k, n, Nl);
  204 +
  205 +#ifdef CUDA_FOUND
  206 + stim::complex<T>* dev_E; //allocate space for the field
  207 + cudaMalloc(&dev_E, N * sizeof(stim::complex<T>));
  208 + cudaMemcpy(dev_E, E, N * sizeof(stim::complex<T>), cudaMemcpyHostToDevice);
  209 + //cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used)
  210 +
  211 + // COORDINATES
  212 + T* dev_x = NULL; //allocate space and copy the X coordinate (if specified)
  213 + if(x != NULL){
  214 + HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
  215 + HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
  216 + }
  217 + T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified)
  218 + if(y != NULL){
  219 + HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
  220 + HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
  221 + }
  222 + T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified)
  223 + if(z != NULL){
  224 + HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
  225 + HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
  226 + }
  227 +
  228 + // PLANE WAVES
  229 + stim::scalarwave<T>* dev_W; //allocate space and copy plane waves
  230 + HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) );
  231 + HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) );
  232 +
  233 + // BESSEL FUNCTION LOOK-UP TABLE
  234 + //calculate the distance from the sphere center
  235 + T* dev_r;
  236 + HANDLE_ERROR( cudaMalloc(&dev_r, sizeof(T) * N) );
  237 +
  238 + int threads = stim::maxThreadsPerBlock();
  239 + dim3 blocks((unsigned)(N / threads + 1));
  240 + cuda_dist<T> <<< blocks, threads >>>(dev_r, dev_x, dev_y, dev_z, N);
  241 +
  242 + //Find the minimum and maximum values of r
  243 + cublasStatus_t stat;
  244 + cublasHandle_t handle;
  245 +
  246 + stat = cublasCreate(&handle); //create a cuBLAS handle
  247 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  248 + printf ("CUBLAS initialization failed\n");
  249 + exit(1);
  250 + }
  251 +
  252 + int i_min, i_max;
  253 + stat = cublasIsamin(handle, (int)N, dev_r, 1, &i_min);
  254 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  255 + printf ("CUBLAS Error: failed to calculate minimum r value.\n");
  256 + exit(1);
  257 + }
  258 + stat = cublasIsamax(handle, (int)N, dev_r, 1, &i_max);
  259 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  260 + printf ("CUBLAS Error: failed to calculate maximum r value.\n");
  261 + exit(1);
  262 + }
  263 +
  264 + T r_min, r_max; //allocate space to store the minimum and maximum values
  265 + HANDLE_ERROR( cudaMemcpy(&r_min, dev_r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU
  266 + HANDLE_ERROR( cudaMemcpy(&r_max, dev_r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
  267 +
  268 +
  269 + //size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r
  270 + size_t N_hB_lut = (size_t)((r_max - r_min) / r_spacing + 1);
  271 +
  272 + T kr_min = k * r_min;
  273 + T kr_max = k * r_max;
  274 +
  275 + //temporary variables
  276 + double vm; //allocate space to store the return values for the bessel function calculation
  277 + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
  278 + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
  279 + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
  280 + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
  281 +
  282 + size_t hB_bytes = sizeof(stim::complex<T>) * (Nl+1) * N_hB_lut;
  283 + stim::complex<T>* hB_lut = (stim::complex<T>*) malloc(hB_bytes); //pointer to the look-up table
  284 + T dkr = (kr_max - kr_min) / (N_hB_lut-1); //distance between values in the LUT
  285 + std::cout<<"LUT jl bytes: "<<hB_bytes<<std::endl;
  286 + stim::complex<T> hl;
  287 + for(size_t kri = 0; kri < N_hB_lut; kri++){ //for each value in the LUT
  288 + stim::bessjyv_sph<double>(Nl, kr_min + kri * dkr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]
  289 + for(size_t l = 0; l <= Nl; l++){ //for each order
  290 + hl.r = (T)jv[l];
  291 + hl.i = (T)yv[l];
  292 +
  293 + hB_lut[kri * (Nl + 1) + l] = hl * B[l]; //store the bessel function result
  294 + }
  295 + }
  296 +
  297 + //stim::cpu2image<T>(hankel_lut, "hankel.bmp", Nl+1, Nlut_j, stim::cmBrewer);
  298 +
  299 + //Allocate device memory and copy everything to the GPU
  300 + stim::complex<T>* dev_hB_lut;
  301 + HANDLE_ERROR( cudaMalloc(&dev_hB_lut, hB_bytes) );
  302 + HANDLE_ERROR( cudaMemcpy(dev_hB_lut, hB_lut, hB_bytes, cudaMemcpyHostToDevice) );
  303 +
  304 + gpu_scalar_mie_scatter<T>(dev_E, N, dev_x, dev_y, dev_z, dev_W, W.size(), a, n, dev_hB_lut, kr_min, dkr, N_hB_lut, Nl);
  305 +
  306 + cudaMemcpy(E, dev_E, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory
  307 +
  308 + if(x != NULL) cudaFree(dev_x); //free everything
  309 + if(y != NULL) cudaFree(dev_y);
  310 + if(z != NULL) cudaFree(dev_z);
  311 + cudaFree(dev_E);
  312 +#else
  313 +
  314 +
  315 + //allocate space to store the bessel function call results
  316 + double vm;
  317 + double* j_kr = (double*) malloc( (Nl + 1) * sizeof(double) );
  318 + double* y_kr = (double*) malloc( (Nl + 1) * sizeof(double) );
  319 + double* dj_kr= (double*) malloc( (Nl + 1) * sizeof(double) );
  320 + double* dy_kr= (double*) malloc( (Nl + 1) * sizeof(double) );
  321 +
  322 + T* P = (T*) malloc( (Nl + 1) * sizeof(T) );
  323 +
  324 + T r, kr, cos_phi;
  325 + stim::complex<T> h;
  326 + for(size_t i = 0; i < N; i++){
  327 + stim::vec3<T> p; //declare a 3D point
  328 +
  329 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  330 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  331 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  332 + r = p.len();
  333 + if(r >= a){
  334 + for(size_t w = 0; w < W.size(); w++){
  335 + kr = p.len() * W[w].kmag(); //calculate k*r
  336 + stim::bessjyv_sph<double>(Nl, kr, vm, j_kr, y_kr, dj_kr, dy_kr);
  337 + cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle from the propagating direction
  338 + stim::legendre<T>(Nl, cos_phi, P);
  339 +
  340 + for(size_t l = 0; l <= Nl; l++){
  341 + h.r = j_kr[l];
  342 + h.i = y_kr[l];
  343 + E[i] += W[w].E() * B[l] * h * P[l];
  344 + }
  345 + }
  346 + }
  347 + }
  348 +#endif
  349 +}
  350 +
  351 +template<typename T>
  352 +void cpu_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w, T a, stim::complex<T> n){
  353 + std::vector< stim::scalarwave<T> > W(1, w);
  354 + cpu_scalar_mie_scatter(E, N, x, y, z, W, a, n);
  355 +}
  356 +
  357 +/// Calculate the scalar Mie solution for the internal field produced by a single plane wave scattered by a sphere
  358 +
  359 +/// @param E is a pointer to the destination field values
  360 +/// @param N is the number of points used to calculate the field
  361 +/// @param x is an array of x coordinates for each point, specified relative to the sphere (x = NULL assumes all zeros)
  362 +/// @param y is an array of y coordinates for each point, specified relative to the sphere (y = NULL assumes all zeros)
  363 +/// @param z is an array of z coordinates for each point, specified relative to the sphere (z = NULL assumes all zeros)
  364 +/// @param w is a planewave that will be scattered
  365 +/// @param a is the radius of the sphere
  366 +/// @param n is the complex refractive index of the sphere
  367 +template<typename T>
  368 +void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > W, T a, stim::complex<T> n){
  369 +
  370 + //calculate the necessary number of orders required to represent the scattered field
  371 + T k = W[0].kmag();
  372 +
  373 + size_t Nl = ceil(k*a + 4 * cbrt( k * a ) + 2);
  374 + std::cout<<"Nl: "<<Nl<<std::endl;
  375 +
  376 + //calculate the scattering coefficients for the sphere
  377 + stim::complex<T>* A = (stim::complex<T>*) malloc( sizeof(stim::complex<T>) * (Nl + 1) ); //allocate space for the scattering coefficients
  378 + A_coefficients(A, a, k, n, Nl);
  379 +
  380 + //allocate space to store the bessel function call results
  381 + double vm;
  382 + stim::complex<double>* j_knr = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  383 + stim::complex<double>* y_knr = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  384 + stim::complex<double>* dj_knr= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  385 + stim::complex<double>* dy_knr= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  386 +
  387 + T* P = (T*) malloc( (Nl + 1) * sizeof(T) );
  388 +
  389 + T r, cos_phi;
  390 + stim::complex<double> knr;
  391 + stim::complex<T> h;
  392 + for(size_t i = 0; i < N; i++){
  393 + stim::vec3<T> p; //declare a 3D point
  394 +
  395 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  396 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  397 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
  398 + r = p.len();
  399 + if(r < a){
  400 + E[i] = 0;
  401 + for(size_t w = 0; w < W.size(); w++){
  402 + knr = (stim::complex<double>)n * p.len() * W[w].kmag(); //calculate k*n*r
  403 +
  404 + stim::cbessjyva_sph<double>(Nl, knr, vm, j_knr, y_knr, dj_knr, dy_knr);
  405 + if(r == 0)
  406 + cos_phi = 0;
  407 + else
  408 + cos_phi = p.norm().dot(W[w].kvec().norm()); //calculate the cosine of the angle from the propagating direction
  409 + stim::legendre<T>(Nl, cos_phi, P);
  410 +
  411 + for(size_t l = 0; l <= Nl; l++){
  412 + E[i] += W[w].E() * A[l] * (stim::complex<T>)j_knr[l] * P[l];
  413 + }
  414 + }
  415 + }
  416 + }
  417 +}
  418 +
  419 +template<typename T>
  420 +void cpu_scalar_mie_internal(stim::complex<T>* E, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w, T a, stim::complex<T> n){
  421 + std::vector< stim::scalarwave<T> > W(1, w);
  422 + cpu_scalar_mie_internal(E, N, x, y, z, W, a, n);
  423 +}
  424 +
  425 +}
  426 +
  427 +#endif
0 \ No newline at end of file 428 \ No newline at end of file
stim/optics/scalarbeam.h
@@ -5,7 +5,12 @@ @@ -5,7 +5,12 @@
5 #include "../optics/scalarwave.h" 5 #include "../optics/scalarwave.h"
6 #include "../math/bessel.h" 6 #include "../math/bessel.h"
7 #include "../math/legendre.h" 7 #include "../math/legendre.h"
  8 +#include "../cuda/cudatools/devices.h"
  9 +#include "../cuda/cudatools/timer.h"
  10 +#include <cublas_v2.h>
  11 +#include <math_constants.h>
8 #include <vector> 12 #include <vector>
  13 +#include <stdlib.h>
9 14
10 namespace stim{ 15 namespace stim{
11 16
@@ -105,10 +110,11 @@ public: @@ -105,10 +110,11 @@ public:
105 std::vector< scalarwave<T> > samples(N); //create a vector of plane waves 110 std::vector< scalarwave<T> > samples(N); //create a vector of plane waves
106 T kmag = (T)stim::TAU / lambda; //calculate the wavenumber 111 T kmag = (T)stim::TAU / lambda; //calculate the wavenumber
107 stim::complex<T> apw; //allocate space for the amplitude at the focal point 112 stim::complex<T> apw; //allocate space for the amplitude at the focal point
  113 + T a = (T)(stim::TAU * (1 - cos(asin(NA[0]))) / (double)N);
108 stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction 114 stim::vec3<T> kpw; //declare the new k-vector based on the focused plane wave direction
109 for(size_t i=0; i<N; i++){ //for each sample 115 for(size_t i=0; i<N; i++){ //for each sample
110 kpw = dirs[i] * kmag; //calculate the k-vector for the new plane wave 116 kpw = dirs[i] * kmag; //calculate the k-vector for the new plane wave
111 - apw = exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave 117 + apw = a * exp(stim::complex<T>(0, kpw.dot(-f))); //calculate the amplitude for the new plane wave
112 samples[i] = scalarwave<T>(kpw, apw); //create a plane wave based on the direction 118 samples[i] = scalarwave<T>(kpw, apw); //create a plane wave based on the direction
113 } 119 }
114 120
@@ -148,7 +154,7 @@ public: @@ -148,7 +154,7 @@ public:
148 /// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional) 154 /// Calculate the [0 Nl] terms for the aperture integral based on the give numerical aperture and center obscuration (optional)
149 /// @param C is a pointer to Nl + 1 values where the terms will be stored 155 /// @param C is a pointer to Nl + 1 values where the terms will be stored
150 template<typename T> 156 template<typename T>
151 -CUDA_CALLABLE void cpu_aperture_integral(T* C, size_t Nl, T NA, T NA_in = 0){ 157 +CUDA_CALLABLE void cpu_aperture_integral(T* C, int Nl, T NA, T NA_in = 0){
152 158
153 size_t table_bytes = (Nl + 1) * sizeof(T); //calculate the number of bytes required to store the terms 159 size_t table_bytes = (Nl + 1) * sizeof(T); //calculate the number of bytes required to store the terms
154 T cos_alpha_1 = cos(asin(NA_in)); //calculate the cosine of the angle subtended by the central obscuration 160 T cos_alpha_1 = cos(asin(NA_in)); //calculate the cosine of the angle subtended by the central obscuration
@@ -182,23 +188,156 @@ CUDA_CALLABLE void cpu_aperture_integral(T* C, size_t Nl, T NA, T NA_in = 0){ @@ -182,23 +188,156 @@ CUDA_CALLABLE void cpu_aperture_integral(T* C, size_t Nl, T NA, T NA_in = 0){
182 188
183 /// performs linear interpolation into a look-up table 189 /// performs linear interpolation into a look-up table
184 template<typename T> 190 template<typename T>
185 -T lut_lookup(T* lut, T val, size_t N, T min_val, T delta, size_t stride = 0){  
186 - size_t idx = (size_t)((val - min_val) / delta);  
187 - T alpha = val - idx * delta + min_val; 191 +CUDA_CALLABLE void lut_lookup(T* lut_values, T* lut, T val, size_t N, T min_val, T delta, size_t n_vals){
  192 + T idx = ((val - min_val) / delta);
  193 + size_t i = (size_t) idx;
  194 + T a1 = idx - i;
  195 + T a0 = 1 - a1;
  196 + size_t n0 = i * n_vals;
  197 + size_t n1 = (i+1) * n_vals;
  198 + for(size_t n = 0; n < n_vals; n++){
  199 + lut_values[n] = lut[n0 + n] * a0 + lut[n1 + n] * a1;
  200 + }
  201 +}
  202 +
  203 +template <typename T>
  204 +CUDA_CALLABLE stim::complex<T> clerp(stim::complex<T> v0, stim::complex<T> v1, T t) {
  205 + return stim::complex<T>( fma(t, v1.r, fma(-t, v0.r, v0.r)), fma(t, v1.i, fma(-t, v0.i, v0.i)) );
  206 +}
  207 +
  208 +template <typename T>
  209 +CUDA_CALLABLE T lerp(T v0, T v1, T t) {
  210 + return fma(t, v1, fma(-t, v0, v0));
  211 +}
188 212
189 - if(alpha == 0) return lut[idx];  
190 - else return lut[idx * stride] * (1 - alpha) + lut[ (idx+1) * stride] * alpha; 213 +#ifdef __CUDACC__
  214 +template<typename T>
  215 +__global__ void cuda_scalar_psf(stim::complex<T>* E, size_t N, T* r, T* phi, T k, T A, size_t Nl,
  216 + T* C,
  217 + T* lut_j, size_t Nj, T min_kr, T dkr){
  218 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  219 + if(i >= N) return; //exit if this thread is outside the array
  220 +
  221 + T cos_phi = cos(phi[i]); //calculate the thread value for cos(phi)
  222 + T kr = r[i] * k; //calculate the thread value for k*r
  223 + stim::complex<T> Ei = 0; //initialize the value of the field to zero
  224 + size_t NC = Nl + 1; //calculate the number of coefficients to be used
  225 +
  226 + T fij = (kr - min_kr)/dkr; //FP index into the spherical bessel LUT
  227 + size_t ij = (size_t) fij; //convert to an integral index
  228 + T a = fij - ij; //calculate the fractional portion of the index
  229 + size_t n0j = ij * (NC); //start of the first entry in the LUT
  230 + size_t n1j = (ij+1) * (NC); //start of the second entry in the LUT
  231 +
  232 + T jl; //declare register to store the spherical bessel function
  233 + T Pl_2, Pl_1; //declare registers to store the previous two Legendre polynomials
  234 + T Pl = 1; //initialize the current value for the Legendre polynomial
  235 + stim::complex<T> im(0, 1); //declare i (imaginary 1)
  236 + stim::complex<T> i_pow(1, 0); //i_pow stores the current value of i^l so it doesn't have to be re-computed every iteration
  237 + for(int l = 0; l <= Nl; l++){ //for each order
  238 + jl = lerp<T>( lut_j[n0j + l], lut_j[n1j + l], a ); //read jl from the LUT and interpolate the result
  239 + Ei += i_pow * jl * Pl * C[l]; //calculate the value for the field and sum
  240 + i_pow *= im; //multiply i^l * i for the next iteration
  241 + Pl_2 = Pl_1; //shift Pl_1 -> Pl_2 and Pl -> Pl_1
  242 + Pl_1 = Pl;
  243 + if(l == 0){ //computing Pl is done recursively, where the recursive relation
  244 + Pl = cos_phi; // requires the first two orders. This defines the second.
  245 + }
  246 + else{ //if this is not the first iteration, use the recursive relation to calculate Pl
  247 + Pl = ( (2 * (l+1) - 1) * cos_phi * Pl_1 - (l) * Pl_2 ) / (l+1);
  248 + }
  249 +
  250 + }
  251 + E[i] = Ei * A * 2 * CUDART_PI_F; //scale the integral by the amplitude
191 } 252 }
192 253
193 template<typename T> 254 template<typename T>
194 -void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* r, T* phi, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){  
195 - T k = stim::TAU / lambda; 255 +void gpu_scalar_psf_local(stim::complex<T>* E, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl, T r_spacing){
196 256
197 - T* C = (T*) malloc( (Nl + 1) * sizeof(T) ); //allocate space for the aperture integral terms 257 + //Find the minimum and maximum values of r
  258 + cublasStatus_t stat;
  259 + cublasHandle_t handle;
  260 +
  261 + stat = cublasCreate(&handle); //create a cuBLAS handle
  262 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  263 + printf ("CUBLAS initialization failed\n");
  264 + exit(1);
  265 + }
  266 +
  267 + int i_min, i_max;
  268 + stat = cublasIsamin(handle, (int)N, r, 1, &i_min);
  269 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  270 + printf ("CUBLAS Error: failed to calculate minimum r value.\n");
  271 + exit(1);
  272 + }
  273 + stat = cublasIsamax(handle, (int)N, r, 1, &i_max);
  274 + if (stat != CUBLAS_STATUS_SUCCESS){ //test for failure
  275 + printf ("CUBLAS Error: failed to calculate maximum r value.\n");
  276 + exit(1);
  277 + }
  278 +
  279 + T r_min, r_max; //allocate space to store the minimum and maximum values
  280 + HANDLE_ERROR( cudaMemcpy(&r_min, r + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU
  281 + HANDLE_ERROR( cudaMemcpy(&r_max, r + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
  282 +
  283 + T k = (T)stim::TAU / lambda; //calculate the wavenumber from lambda
  284 + size_t C_bytes = (Nl + 1) * sizeof(T);
  285 + T* C = (T*) malloc( C_bytes ); //allocate space for the aperture integral terms
  286 + cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms
  287 +
  288 + size_t Nlut_j = (size_t)((r_max - r_min) / r_spacing + 1); //number of values in the look-up table based on the user-specified spacing along r
  289 +
  290 + T kr_min = k * r_min;
  291 + T kr_max = k * r_max;
  292 +
  293 + //temporary variables
  294 + double vm; //allocate space to store the return values for the bessel function calculation
  295 + double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );
  296 + double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );
  297 + double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );
  298 + double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) );
  299 +
  300 + size_t lutj_bytes = sizeof(T) * (Nl+1) * Nlut_j;
  301 + T* bessel_lut = (T*) malloc(lutj_bytes); //pointer to the look-up table
  302 + T delta_kr = (kr_max - kr_min) / (Nlut_j-1); //distance between values in the LUT
  303 + std::cout<<"LUT jl bytes: "<<lutj_bytes<<std::endl;
  304 + for(size_t kri = 0; kri < Nlut_j; kri++){ //for each value in the LUT
  305 + stim::bessjyv_sph<double>(Nl, kr_min + kri * delta_kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]
  306 + for(size_t l = 0; l <= Nl; l++){ //for each order
  307 + bessel_lut[kri * (Nl + 1) + l] = (T)jv[l]; //store the bessel function result
  308 + }
  309 + }
  310 +
  311 + stim::cpu2image<T>(bessel_lut, "lut.bmp", Nl+1, Nlut_j, stim::cmBrewer);
  312 +
  313 + //Allocate device memory and copy everything to the GPU
  314 +
  315 + T* gpu_C;
  316 + HANDLE_ERROR( cudaMalloc(&gpu_C, C_bytes) );
  317 + HANDLE_ERROR( cudaMemcpy(gpu_C, C, C_bytes, cudaMemcpyHostToDevice) );
  318 + T* gpu_j_lut;
  319 + HANDLE_ERROR( cudaMalloc(&gpu_j_lut, lutj_bytes) );
  320 + HANDLE_ERROR( cudaMemcpy(gpu_j_lut, bessel_lut, lutj_bytes, cudaMemcpyHostToDevice) );
  321 +
  322 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  323 + dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  324 +
  325 + cuda_scalar_psf<T><<< blocks, threads >>>(E, N, r, phi, (T)stim::TAU/lambda, A, Nl, gpu_C, gpu_j_lut, Nlut_j, kr_min, delta_kr);
  326 +
  327 + //free the LUT and condenser tables
  328 + HANDLE_ERROR( cudaFree(gpu_C) );
  329 + HANDLE_ERROR( cudaFree(gpu_j_lut) );
  330 +}
  331 +#endif
  332 +
  333 +/// Calculate the analytical solution to a scalar point spread function given a set of spherical coordinates about the PSF (beam propagation along phi = theta = 0)
  334 +template<typename T>
  335 +void cpu_scalar_psf_local(stim::complex<T>* F, size_t N, T* r, T* phi, T lambda, T A, T NA, T NA_in, int Nl){
  336 + T k = (T)stim::TAU / lambda;
  337 + size_t C_bytes = (Nl + 1) * sizeof(T);
  338 + T* C = (T*) malloc( C_bytes ); //allocate space for the aperture integral terms
198 cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms 339 cpu_aperture_integral(C, Nl, NA, NA_in); //calculate the aperture integral terms
199 memset(F, 0, N * sizeof(stim::complex<T>)); 340 memset(F, 0, N * sizeof(stim::complex<T>));
200 -#ifdef NO_CUDA  
201 - memset(F, 0, N * sizeof(stim::complex<T>));  
202 T jl, Pl, kr, cos_phi; 341 T jl, Pl, kr, cos_phi;
203 342
204 double vm; 343 double vm;
@@ -225,71 +364,117 @@ void cpu_scalar_psf(stim::complex&lt;T&gt;* F, size_t N, T* r, T* phi, T lambda, T A, @@ -225,71 +364,117 @@ void cpu_scalar_psf(stim::complex&lt;T&gt;* F, size_t N, T* r, T* phi, T lambda, T A,
225 364
226 free(C); 365 free(C);
227 free(Pl_cos_phi); 366 free(Pl_cos_phi);
228 -#else  
229 - T min_r = r[0];  
230 - T max_r = r[0];  
231 - for(size_t i = 0; i < N; i++){ //find the minimum and maximum values of r (min and max distance from the focal point)  
232 - if(r[i] < min_r) min_r = r[i];  
233 - if(r[i] > max_r) max_r = r[i];  
234 - }  
235 - T min_kr = k * min_r;  
236 - T max_kr = k * max_r; 367 +}
237 368
238 - //temporary variables  
239 - double vm;  
240 - double* jv = (double*) malloc( (Nl + 1) * sizeof(double) );  
241 - double* yv = (double*) malloc( (Nl + 1) * sizeof(double) );  
242 - double* djv= (double*) malloc( (Nl + 1) * sizeof(double) );  
243 - double* dyv= (double*) malloc( (Nl + 1) * sizeof(double) ); 369 +/// Converts a set of cartesian points into spherical coordinates surrounding a point spread function (PSF)
  370 +/// @param r is the output distance from the PSF
  371 +/// @param phi is the non-symmetric direction about the PSF
  372 +/// @param x (x, y, z) are the cartesian coordinates in world space
  373 +/// @f is the focal point of the PSF in cartesian coordinates
  374 +/// @d is the propagation direction of the PSF in cartesian coordinates
  375 +template<typename T>
  376 +__global__ void cuda_cart2psf(T* r, T* phi, size_t N, T* x, T* y, T* z, stim::vec3<T> f, stim::quaternion<T> q){
244 377
245 - size_t Nlut = (size_t)sqrt(N) * 2;  
246 - T* bessel_lut = (T*) malloc(sizeof(T) * (Nl+1) * Nlut);  
247 - T delta_kr = (max_kr - min_kr) / (Nlut-1);  
248 - for(size_t kri = 0; kri < Nlut; kri++){  
249 - stim::bessjyv_sph<double>(Nl, min_kr + kri * delta_kr, vm, jv, yv, djv, dyv); //compute the list of spherical bessel functions from [0 Nl]  
250 - for(size_t l = 0; l <= Nl; l++){  
251 - bessel_lut[kri * (Nl + 1) + l] = (T)jv[l];  
252 - }  
253 - } 378 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array
  379 + if(i >= N) return; //exit if this thread is outside the array
254 380
255 - T* Pl_cos_phi = (T*) malloc((Nl + 1) * sizeof(T));  
256 - T kr, cos_phi, jl, Pl;  
257 - for(size_t n = 0; n < N; n++){ //for each point in the field  
258 - kr = k * r[n]; //calculate kr (the optical distance between the focal point and p)  
259 - cos_phi = std::cos(phi[n]); //calculate the cosine of phi  
260 - stim::legendre<T>(Nl, cos_phi, Pl_cos_phi); //calculate the [0 Nl] legendre polynomials for this point 381 + stim::vec3<T> p; //declare a 3D point
  382 +
  383 + (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
  384 + (y == NULL) ? p[1] = 0 : p[1] = y[i];
  385 + (z == NULL) ? p[2] = 0 : p[2] = z[i];
261 386
262 - for(int l = 0; l <= Nl; l++){  
263 - jl = lut_lookup<T>(&bessel_lut[l], kr, Nlut, min_kr, delta_kr, Nl+1);  
264 - Pl = Pl_cos_phi[l];  
265 - F[n] += pow(complex<T>(0, 1), l) * jl * Pl * C[l];  
266 - }  
267 - F[n] *= A * stim::TAU;  
268 - }  
269 -#endif 387 + p = p - f; //shift the point to the center of the PSF (focal point)
  388 + p = q.toMatrix3() * p; //rotate the point to align with the propagation direction
  389 +
  390 + stim::vec3<T> ps = p.cart2sph(); //convert from cartesian to spherical coordinates
  391 + r[i] = ps[0]; //store r
  392 + phi[i] = ps[2]; //phi = [0 pi]
270 } 393 }
271 394
  395 +#ifdef __CUDACC__
  396 +/// Calculate the analytical solution to a point spread function given a set of points in cartesian coordinates
  397 +template<typename T>
  398 +void gpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
  399 +
  400 + T* gpu_r; //allocate space for the coordinates in r
  401 + HANDLE_ERROR( cudaMalloc(&gpu_r, sizeof(T) * N) );
  402 + T* gpu_phi;
  403 + HANDLE_ERROR( cudaMalloc(&gpu_phi, sizeof(T) * N) );
  404 + //stim::complex<T>* gpu_E;
  405 + //HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
  406 +
  407 + stim::quaternion<T> q; //create a quaternion
  408 + q.CreateRotation(d, stim::vec3<T>(0, 0, 1)); //create a mapping from the propagation direction to the PSF space
  409 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  410 + dim3 blocks( (unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  411 + cuda_cart2psf<T> <<< blocks, threads >>> (gpu_r, gpu_phi, N, x, y, z, f, q); //call the CUDA kernel to move the cartesian coordinates to PSF space
  412 +
  413 + gpu_scalar_psf_local(E, N, gpu_r, gpu_phi, lambda, A, NA, NA_in, Nl, r_spacing);
  414 +
  415 +}
  416 +#endif
272 417
273 template<typename T> 418 template<typename T>
274 -void cpu_scalar_psf(stim::complex<T>* F, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, T NA, T NA_in, int Nl){ 419 +void cpu_scalar_psf_cart(stim::complex<T>* E, size_t N, T* x, T* y, T* z, T lambda, T A, stim::vec3<T> f, stim::vec3<T> d, T NA, T NA_in, int Nl, T r_spacing = 1){
  420 +
  421 +// If CUDA is available, copy the cartesian points to the GPU and evaluate them in a kernel
  422 +#ifdef __CUDACC__
  423 +
  424 + T* gpu_x = NULL;
  425 + if(x != NULL){
  426 + HANDLE_ERROR( cudaMalloc(&gpu_x, sizeof(T) * N) );
  427 + HANDLE_ERROR( cudaMemcpy(gpu_x, x, sizeof(T) * N, cudaMemcpyHostToDevice) );
  428 + }
  429 + T* gpu_y = NULL;
  430 + if(y != NULL){
  431 + HANDLE_ERROR( cudaMalloc(&gpu_y, sizeof(T) * N) );
  432 + HANDLE_ERROR( cudaMemcpy(gpu_y, y, sizeof(T) * N, cudaMemcpyHostToDevice) );
  433 + }
  434 + T* gpu_z = NULL;
  435 + if(z != NULL){
  436 + HANDLE_ERROR( cudaMalloc(&gpu_z, sizeof(T) * N) );
  437 + HANDLE_ERROR( cudaMemcpy(gpu_z, z, sizeof(T) * N, cudaMemcpyHostToDevice) );
  438 + }
  439 +
  440 + stim::complex<T>* gpu_E;
  441 + HANDLE_ERROR( cudaMalloc(&gpu_E, sizeof(stim::complex<T>) * N) );
  442 + HANDLE_ERROR( cudaMemcpy(gpu_E, E, sizeof(stim::complex<T>) * N, cudaMemcpyHostToDevice) );
  443 + gpu_scalar_psf_cart<T>(gpu_E, N, gpu_x, gpu_y, gpu_z, lambda, A, f, d, NA, NA_in, Nl, r_spacing);
  444 + HANDLE_ERROR( cudaMemcpy(E, gpu_E, sizeof(stim::complex<T>) * N, cudaMemcpyDeviceToHost) );
  445 +
  446 + HANDLE_ERROR( cudaFree(gpu_x) );
  447 + HANDLE_ERROR( cudaFree(gpu_y) );
  448 + HANDLE_ERROR( cudaFree(gpu_z) );
  449 + HANDLE_ERROR( cudaFree(gpu_E) );
  450 +
  451 +#else
275 T* r = (T*) malloc(N * sizeof(T)); //allocate space for p in spherical coordinates 452 T* r = (T*) malloc(N * sizeof(T)); //allocate space for p in spherical coordinates
276 T* phi = (T*) malloc(N * sizeof(T)); // only r and phi are necessary (the scalar PSF is symmetric about theta) 453 T* phi = (T*) malloc(N * sizeof(T)); // only r and phi are necessary (the scalar PSF is symmetric about theta)
277 454
278 - stim::vec3<T> p, ps; 455 + stim::quaternion<T> q;
  456 + q.CreateRotation(d, stim::vec3<T>(0, 0, 1));
  457 + stim::matrix<T, 3> R = q.toMatrix3();
  458 + stim::vec3<T> p, ps, ds;
279 for(size_t i = 0; i < N; i++){ 459 for(size_t i = 0; i < N; i++){
280 (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions 460 (x == NULL) ? p[0] = 0 : p[0] = x[i]; // test for NULL values and set positions
281 (y == NULL) ? p[1] = 0 : p[1] = y[i]; 461 (y == NULL) ? p[1] = 0 : p[1] = y[i];
282 (z == NULL) ? p[2] = 0 : p[2] = z[i]; 462 (z == NULL) ? p[2] = 0 : p[2] = z[i];
283 463
  464 + p = p - f;
  465 +
  466 + p = R * p; //rotate the cartesian point
  467 +
284 ps = p.cart2sph(); //convert from cartesian to spherical coordinates 468 ps = p.cart2sph(); //convert from cartesian to spherical coordinates
285 r[i] = ps[0]; //store r 469 r[i] = ps[0]; //store r
286 phi[i] = ps[2]; //phi = [0 pi] 470 phi[i] = ps[2]; //phi = [0 pi]
287 } 471 }
288 472
289 - cpu_scalar_psf(F, N, r, phi, lambda, A, f, NA, NA_in, Nl); //call the spherical coordinate CPU function 473 + cpu_scalar_psf_local(F, N, r, phi, lambda, A, NA, NA_in, Nl); //call the spherical coordinate CPU function
290 474
291 free(r); 475 free(r);
292 free(phi); 476 free(phi);
  477 +#endif
293 } 478 }
294 479
295 } //end namespace stim 480 } //end namespace stim
stim/optics/scalarwave.h
@@ -23,7 +23,7 @@ namespace stim{ @@ -23,7 +23,7 @@ namespace stim{
23 template<typename T> 23 template<typename T>
24 class scalarwave{ 24 class scalarwave{
25 25
26 -protected: 26 +public:
27 27
28 stim::vec3<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda 28 stim::vec3<T> k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
29 stim::complex<T> E0; //amplitude 29 stim::complex<T> E0; //amplitude
@@ -60,7 +60,7 @@ public: @@ -60,7 +60,7 @@ public:
60 return k.len(); 60 return k.len();
61 } 61 }
62 62
63 - CUDA_CALLABLE vec3< complex<T> > E(){ 63 + CUDA_CALLABLE complex<T> E(){
64 return E0; 64 return E0;
65 } 65 }
66 66
@@ -235,6 +235,32 @@ void gpu_scalarwave(stim::complex&lt;T&gt;* F, size_t N, T* x, T* y, T* z, stim::scala @@ -235,6 +235,32 @@ void gpu_scalarwave(stim::complex&lt;T&gt;* F, size_t N, T* x, T* y, T* z, stim::scala
235 cuda_scalarwave<T><<< blocks, threads >>>(F, N, x, y, z, w); //call the kernel 235 cuda_scalarwave<T><<< blocks, threads >>>(F, N, x, y, z, w); //call the kernel
236 } 236 }
237 237
  238 +template<typename T>
  239 +void gpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW){
  240 +
  241 + size_t wave_bytes = sizeof(stim::scalarwave<T>);
  242 + size_t shared_bytes = stim::sharedMemPerBlock(); //calculate the maximum amount of shared memory available
  243 + size_t max_batch = shared_bytes / wave_bytes; //calculate number of plane waves that will fit into shared memory
  244 + size_t batch_bytes = min(nW, max_batch) * wave_bytes; //initialize the batch size (in bytes) to the maximum batch required
  245 +
  246 + stim::scalarwave<T>* batch_W;
  247 + HANDLE_ERROR(cudaMalloc(&batch_W, batch_bytes)); //allocate memory for a single batch of plane waves
  248 +
  249 + int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device
  250 + dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks
  251 +
  252 + size_t batch_size; //declare a variable to store the size of the current batch
  253 + size_t waves_processed = 0; //initialize the number of waves processed to zero
  254 + while(waves_processed < nW){ //while there are still waves to be processed
  255 + batch_size = min<size_t>(max_batch, nW - waves_processed); //process either a whole batch, or whatever is left
  256 + batch_bytes = batch_size * sizeof(stim::scalarwave<T>);
  257 + HANDLE_ERROR(cudaMemcpy(batch_W, W + waves_processed, batch_bytes, cudaMemcpyDeviceToDevice)); //copy the plane waves into global memory
  258 + cuda_scalarwave<T><<< blocks, threads, batch_bytes >>>(F, N, x, y, z, batch_W, batch_size); //call the kernel
  259 + waves_processed += batch_size; //increment the counter indicating how many waves have been processed
  260 + }
  261 + cudaFree(batch_W);
  262 +}
  263 +
238 /// Sums a series of coherent plane waves at a specified point 264 /// Sums a series of coherent plane waves at a specified point
239 /// @param field is the output array of field values corresponding to each input point 265 /// @param field is the output array of field values corresponding to each input point
240 /// @param x is an array of x coordinates for the field point 266 /// @param x is an array of x coordinates for the field point
@@ -245,24 +271,13 @@ void gpu_scalarwave(stim::complex&lt;T&gt;* F, size_t N, T* x, T* y, T* z, stim::scala @@ -245,24 +271,13 @@ void gpu_scalarwave(stim::complex&lt;T&gt;* F, size_t N, T* x, T* y, T* z, stim::scala
245 /// @param A is the list of amplitudes for each wave 271 /// @param A is the list of amplitudes for each wave
246 /// @param S is the list of propagation directions for each wave 272 /// @param S is the list of propagation directions for each wave
247 template<typename T> 273 template<typename T>
248 -void cpu_sum_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > w_array){  
249 - size_t S = w_array.size(); //store the number of waves  
250 -#ifdef NO_CUDA  
251 - memset(F, 0, N * sizeof(stim::complex<T>));  
252 - T px, py, pz;  
253 - for(size_t i = 0; i < N; i++){ // for each element in the array  
254 - (x == NULL) ? px = 0 : px = x[i]; // test for NULL values  
255 - (y == NULL) ? py = 0 : py = y[i];  
256 - (z == NULL) ? pz = 0 : pz = z[i];  
257 -  
258 - for(size_t s = 0; s < S; s++){  
259 - F[i] += w_array[s].pos(px, py, pz); //sum all plane waves at this point  
260 - }  
261 - }  
262 -#else 274 +void cpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > W){
  275 + size_t S = W.size(); //store the number of waves
  276 +#ifdef __CUDACC__
263 stim::complex<T>* dev_F; //allocate space for the field 277 stim::complex<T>* dev_F; //allocate space for the field
264 cudaMalloc(&dev_F, N * sizeof(stim::complex<T>)); 278 cudaMalloc(&dev_F, N * sizeof(stim::complex<T>));
265 - cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used) 279 + cudaMemcpy(dev_F, F, N * sizeof(stim::complex<T>), cudaMemcpyHostToDevice);
  280 + //cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>)); //set the field to zero (necessary because a sum is used)
266 281
267 T* dev_x = NULL; //allocate space and copy the X coordinate (if specified) 282 T* dev_x = NULL; //allocate space and copy the X coordinate (if specified)
268 if(x != NULL){ 283 if(x != NULL){
@@ -282,28 +297,11 @@ void cpu_sum_scalarwaves(stim::complex&lt;T&gt;* F, size_t N, T* x, T* y, T* z, std::v @@ -282,28 +297,11 @@ void cpu_sum_scalarwaves(stim::complex&lt;T&gt;* F, size_t N, T* x, T* y, T* z, std::v
282 HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice)); 297 HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
283 } 298 }
284 299
285 - size_t wave_bytes = sizeof(stim::scalarwave<T>);  
286 - size_t shared_bytes = stim::sharedMemPerBlock(); //calculate the maximum amount of shared memory available  
287 - size_t array_bytes = w_array.size() * wave_bytes; //calculate the maximum number of bytes required for the planewave array  
288 - size_t max_batch = shared_bytes / wave_bytes; //calculate number of plane waves that will fit into shared memory  
289 - size_t num_batches = w_array.size() / max_batch + 1; //calculate the number of batches required to process all plane waves  
290 - size_t batch_bytes = min(w_array.size(), max_batch) * wave_bytes; //initialize the batch size (in bytes) to the maximum batch required  
291 -  
292 - stim::scalarwave<T>* dev_w;  
293 - HANDLE_ERROR(cudaMalloc(&dev_w, batch_bytes)); //allocate memory for a single batch of plane waves  
294 -  
295 - int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device  
296 - dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks 300 + stim::scalarwave<T>* dev_W;
  301 + HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) );
  302 + HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) );
297 303
298 - size_t batch_size; //declare a variable to store the size of the current batch  
299 - size_t waves_processed = 0; //initialize the number of waves processed to zero  
300 - while(waves_processed < w_array.size()){ //while there are still waves to be processed  
301 - batch_size = min<size_t>(max_batch, w_array.size() - waves_processed); //process either a whole batch, or whatever is left  
302 - batch_bytes = batch_size * sizeof(stim::scalarwave<T>);  
303 - HANDLE_ERROR(cudaMemcpy(dev_w, &w_array[waves_processed], batch_bytes, cudaMemcpyHostToDevice)); //copy the plane waves into global memory  
304 - cuda_scalarwave<T><<< blocks, threads, batch_bytes >>>(dev_F, N, dev_x, dev_y, dev_z, dev_w, batch_size); //call the kernel  
305 - waves_processed += batch_size; //increment the counter indicating how many waves have been processed  
306 - } 304 + gpu_scalarwaves(dev_F, N, dev_x, dev_y, dev_z, dev_W, W.size());
307 305
308 cudaMemcpy(F, dev_F, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory 306 cudaMemcpy(F, dev_F, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost); //copy the field from device memory
309 307
@@ -311,15 +309,31 @@ void cpu_sum_scalarwaves(stim::complex&lt;T&gt;* F, size_t N, T* x, T* y, T* z, std::v @@ -311,15 +309,31 @@ void cpu_sum_scalarwaves(stim::complex&lt;T&gt;* F, size_t N, T* x, T* y, T* z, std::v
311 if(y != NULL) cudaFree(dev_y); 309 if(y != NULL) cudaFree(dev_y);
312 if(z != NULL) cudaFree(dev_z); 310 if(z != NULL) cudaFree(dev_z);
313 cudaFree(dev_F); 311 cudaFree(dev_F);
314 - cudaFree(dev_w); 312 +#else
  313 + memset(F, 0, N * sizeof(stim::complex<T>));
  314 + T px, py, pz;
  315 + for(size_t i = 0; i < N; i++){ // for each element in the array
  316 + (x == NULL) ? px = 0 : px = x[i]; // test for NULL values
  317 + (y == NULL) ? py = 0 : py = y[i];
  318 + (z == NULL) ? pz = 0 : pz = z[i];
315 319
  320 + for(size_t s = 0; s < S; s++){
  321 + F[i] += w_array[s].pos(px, py, pz); //sum all plane waves at this point
  322 + }
  323 + }
316 #endif 324 #endif
317 } 325 }
318 326
319 template<typename T> 327 template<typename T>
320 void cpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){ 328 void cpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
321 std::vector< stim::scalarwave<T> > w_array(1, w); 329 std::vector< stim::scalarwave<T> > w_array(1, w);
322 - cpu_sum_scalarwaves(F, N, x, y, z, w_array); 330 + cpu_scalarwaves(F, N, x, y, z, w_array);
  331 +}
  332 +
  333 +template<typename T>
  334 +void cpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  335 + std::vector< stim::scalarwave<T> > w_array(1, w);
  336 + cpu_scalarwaves(F, N, x, y, z, w_array);
323 } 337 }
324 338
325 339
@@ -331,7 +345,7 @@ void cpu_scalarwave(stim::complex&lt;T&gt;* F, size_t N, T* x, T* y, T* z, stim::scala @@ -331,7 +345,7 @@ void cpu_scalarwave(stim::complex&lt;T&gt;* F, size_t N, T* x, T* y, T* z, stim::scala
331 /// @param A is the list of amplitudes for each wave 345 /// @param A is the list of amplitudes for each wave
332 /// @param S is the list of propagation directions for each wave 346 /// @param S is the list of propagation directions for each wave
333 template<typename T> 347 template<typename T>
334 -CUDA_CALLABLE stim::complex<T> sum_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave<T> > W){ 348 +CUDA_CALLABLE stim::complex<T> cpu_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave<T> > W){
335 size_t N = W.size(); //get the number of plane wave samples 349 size_t N = W.size(); //get the number of plane wave samples
336 stim::complex<T> field(0, 0); //initialize the field to zero (0) 350 stim::complex<T> field(0, 0); //initialize the field to zero (0)
337 stim::vec3<T> k; //allocate space for the direction vector 351 stim::vec3<T> k; //allocate space for the direction vector
stim/visualization/aaboundingbox.h
@@ -10,8 +10,8 @@ class aaboundingbox{ @@ -10,8 +10,8 @@ class aaboundingbox{
10 10
11 public: 11 public:
12 bool set; //has the bounding box been set to include any points? 12 bool set; //has the bounding box been set to include any points?
13 - stim::vec<T> A; //minimum point in the bounding box  
14 - stim::vec<T> B; //maximum point in the bounding box 13 + stim::vec3<T> A; //minimum point in the bounding box
  14 + stim::vec3<T> B; //maximum point in the bounding box
15 15
16 aaboundingbox(){ //constructor generates an empty bounding box 16 aaboundingbox(){ //constructor generates an empty bounding box
17 set = false; 17 set = false;
@@ -21,7 +21,7 @@ public: @@ -21,7 +21,7 @@ public:
21 /// Test if a point is inside of the bounding box and returns true if it is. 21 /// Test if a point is inside of the bounding box and returns true if it is.
22 22
23 /// @param p is the point to be tested 23 /// @param p is the point to be tested
24 - bool test(stim::vec<T> p){ 24 + bool test(stim::vec3<T> p){
25 25
26 for(unsigned d = 0; d < p.size(); p++){ //for each dimension 26 for(unsigned d = 0; d < p.size(); p++){ //for each dimension
27 if(p[d] < A[d]) return false; //if the point is less than the minimum bound, return false 27 if(p[d] < A[d]) return false; //if the point is less than the minimum bound, return false
@@ -33,7 +33,7 @@ public: @@ -33,7 +33,7 @@ public:
33 /// Expand the bounding box to include the specified point. 33 /// Expand the bounding box to include the specified point.
34 34
35 /// @param p is the point to be included 35 /// @param p is the point to be included
36 - void expand(stim::vec<T> p){ 36 + void expand(stim::vec3<T> p){
37 37
38 if(!set){ //if the bounding box is empty, fill it with the current point 38 if(!set){ //if the bounding box is empty, fill it with the current point
39 A = B = p; 39 A = B = p;
@@ -47,12 +47,12 @@ public: @@ -47,12 +47,12 @@ public:
47 } 47 }
48 48
49 /// Return the center point of the bounding box as a stim::vec 49 /// Return the center point of the bounding box as a stim::vec
50 - stim::vec<T> center(){ 50 + stim::vec3<T> center(){
51 return (B + A) * 0.5; 51 return (B + A) * 0.5;
52 } 52 }
53 53
54 /// Return the size of the bounding box as a stim::vec 54 /// Return the size of the bounding box as a stim::vec
55 - stim::vec<T> size(){ 55 + stim::vec3<T> size(){
56 return (B - A); 56 return (B - A);
57 } 57 }
58 58
stim/visualization/camera.h
@@ -11,32 +11,32 @@ namespace stim{ @@ -11,32 +11,32 @@ namespace stim{
11 11
12 class camera 12 class camera
13 { 13 {
14 - vec<float> d; //direction that the camera is pointing  
15 - vec<float> p; //position of the camera  
16 - vec<float> up; //"up" direction 14 + vec3<float> d; //direction that the camera is pointing
  15 + vec3<float> p; //position of the camera
  16 + vec3<float> up; //"up" direction
17 float focus; //focal length of the camera 17 float focus; //focal length of the camera
18 float fov; 18 float fov;
19 19
20 //private function makes sure that the up vector is orthogonal to the direction vector and both are normalized 20 //private function makes sure that the up vector is orthogonal to the direction vector and both are normalized
21 void stabalize() 21 void stabalize()
22 { 22 {
23 - vec<float> side = up.cross(d); 23 + vec3<float> side = up.cross(d);
24 up = d.cross(side); 24 up = d.cross(side);
25 up = up.norm(); 25 up = up.norm();
26 d = d.norm(); 26 d = d.norm();
27 } 27 }
28 28
29 public: 29 public:
30 - void setPosition(vec<float> pos) 30 + void setPosition(vec3<float> pos)
31 { 31 {
32 p = pos; 32 p = pos;
33 } 33 }
34 - void setPosition(float x, float y, float z){setPosition(vec<float>(x, y, z));} 34 + void setPosition(float x, float y, float z){setPosition(vec3<float>(x, y, z));}
35 35
36 void setFocalDistance(float distance){focus = distance;} 36 void setFocalDistance(float distance){focus = distance;}
37 void setFOV(float field_of_view){fov = field_of_view;} 37 void setFOV(float field_of_view){fov = field_of_view;}
38 38
39 - void LookAt(vec<float> pos) 39 + void LookAt(vec3<float> pos)
40 { 40 {
41 //find the new direction 41 //find the new direction
42 d = pos - p; 42 d = pos - p;
@@ -47,22 +47,22 @@ public: @@ -47,22 +47,22 @@ public:
47 //stabalize the camera 47 //stabalize the camera
48 stabalize(); 48 stabalize();
49 } 49 }
50 - void LookAt(float px, float py, float pz){LookAt(vec<float>(px, py, pz));}  
51 - void LookAt(vec<float> pos, vec<float> new_up){up = new_up; LookAt(pos);}  
52 - void LookAt(float px, float py, float pz, float ux, float uy, float uz){LookAt(vec<float>(px, py, pz), vec<float>(ux, uy, uz));} 50 + void LookAt(float px, float py, float pz){LookAt(vec3<float>(px, py, pz));}
  51 + void LookAt(vec3<float> pos, vec3<float> new_up){up = new_up; LookAt(pos);}
  52 + void LookAt(float px, float py, float pz, float ux, float uy, float uz){LookAt(vec3<float>(px, py, pz), vec3<float>(ux, uy, uz));}
53 void LookAtDolly(float lx, float ly, float lz) 53 void LookAtDolly(float lx, float ly, float lz)
54 { 54 {
55 //find the current focus point 55 //find the current focus point
56 - vec<float> f = p + focus*d;  
57 - vec<float> T = vec<float>(lx, ly, lz) - f; 56 + vec3<float> f = p + focus*d;
  57 + vec3<float> T = vec3<float>(lx, ly, lz) - f;
58 p = p + T; 58 p = p + T;
59 } 59 }
60 60
61 - void Dolly(vec<float> direction) 61 + void Dolly(vec3<float> direction)
62 { 62 {
63 p = p+direction; 63 p = p+direction;
64 } 64 }
65 - void Dolly(float x, float y, float z){Dolly(vec<float>(x, y, z));} 65 + void Dolly(float x, float y, float z){Dolly(vec3<float>(x, y, z));}
66 void Push(float delta) 66 void Push(float delta)
67 { 67 {
68 if(delta > focus) 68 if(delta > focus)
@@ -80,7 +80,7 @@ public: @@ -80,7 +80,7 @@ public:
80 qx.CreateRotation(theta_x, up[0], up[1], up[2]); 80 qx.CreateRotation(theta_x, up[0], up[1], up[2]);
81 81
82 //y rotation is around the side axis 82 //y rotation is around the side axis
83 - vec<float> side = up.cross(d); 83 + vec3<float> side = up.cross(d);
84 quaternion<float> qy; 84 quaternion<float> qy;
85 qy.CreateRotation(theta_y, side[0], side[1], side[2]); 85 qy.CreateRotation(theta_y, side[0], side[1], side[2]);
86 86
@@ -118,28 +118,28 @@ public: @@ -118,28 +118,28 @@ public:
118 void OrbitFocus(float theta_x, float theta_y) 118 void OrbitFocus(float theta_x, float theta_y)
119 { 119 {
120 //find the focal point 120 //find the focal point
121 - vec<float> focal_point = p + focus*d; 121 + vec3<float> focal_point = p + focus*d;
122 122
123 //center the coordinate system on the focal point 123 //center the coordinate system on the focal point
124 - vec<float> centered = p - (focal_point - vec<float>(0, 0, 0)); 124 + vec3<float> centered = p - (focal_point - vec3<float>(0, 0, 0));
125 125
126 //create the x rotation (around the up vector) 126 //create the x rotation (around the up vector)
127 quaternion<float> qx; 127 quaternion<float> qx;
128 qx.CreateRotation(theta_x, up[0], up[1], up[2]); 128 qx.CreateRotation(theta_x, up[0], up[1], up[2]);
129 - centered = vec<float>(0, 0, 0) + qx.toMatrix3()*(centered - vec<float>(0, 0, 0)); 129 + centered = vec3<float>(0, 0, 0) + qx.toMatrix3()*(centered - vec3<float>(0, 0, 0));
130 130
131 //get a side vector for theta_y rotation 131 //get a side vector for theta_y rotation
132 - vec<float> side = up.cross((vec<float>(0, 0, 0) - centered).norm()); 132 + vec3<float> side = up.cross((vec3<float>(0, 0, 0) - centered).norm());
133 133
134 quaternion<float> qy; 134 quaternion<float> qy;
135 qy.CreateRotation(theta_y, side[0], side[1], side[2]); 135 qy.CreateRotation(theta_y, side[0], side[1], side[2]);
136 - centered = vec<float>(0, 0, 0) + qy.toMatrix3()*(centered - vec<float>(0, 0, 0)); 136 + centered = vec3<float>(0, 0, 0) + qy.toMatrix3()*(centered - vec3<float>(0, 0, 0));
137 137
138 //perform the rotation on the centered camera position 138 //perform the rotation on the centered camera position
139 //centered = final.toMatrix()*centered; 139 //centered = final.toMatrix()*centered;
140 140
141 //re-position the camera 141 //re-position the camera
142 - p = centered + (focal_point - vec<float>(0, 0, 0)); 142 + p = centered + (focal_point - vec3<float>(0, 0, 0));
143 143
144 //make sure we are looking at the focal point 144 //make sure we are looking at the focal point
145 LookAt(focal_point); 145 LookAt(focal_point);
@@ -151,17 +151,17 @@ public: @@ -151,17 +151,17 @@ public:
151 151
152 void Slide(float u, float v) 152 void Slide(float u, float v)
153 { 153 {
154 - vec<float> V = up.norm();  
155 - vec<float> U = up.cross(d).norm(); 154 + vec3<float> V = up.norm();
  155 + vec3<float> U = up.cross(d).norm();
156 156
157 p = p + (V * v) + (U * u); 157 p = p + (V * v) + (U * u);
158 } 158 }
159 159
160 //accessor methods 160 //accessor methods
161 - vec<float> getPosition(){return p;}  
162 - vec<float> getUp(){return up;}  
163 - vec<float> getDirection(){return d;}  
164 - vec<float> getLookAt(){return p + focus*d;} 161 + vec3<float> getPosition(){return p;}
  162 + vec3<float> getUp(){return up;}
  163 + vec3<float> getDirection(){return d;}
  164 + vec3<float> getLookAt(){return p + focus*d;}
165 float getFOV(){return fov;} 165 float getFOV(){return fov;}
166 166
167 //output the camera settings 167 //output the camera settings
@@ -182,9 +182,9 @@ public: @@ -182,9 +182,9 @@ public:
182 //constructor 182 //constructor
183 camera() 183 camera()
184 { 184 {
185 - p = vec<float>(0, 0, 0);  
186 - d = vec<float>(0, 0, 1);  
187 - up = vec<float>(0, 1, 0); 185 + p = vec3<float>(0, 0, 0);
  186 + d = vec3<float>(0, 0, 1);
  187 + up = vec3<float>(0, 1, 0);
188 focus = 1; 188 focus = 1;
189 189
190 } 190 }
stim/visualization/cylinder.h
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 #define STIM_CYLINDER_H 2 #define STIM_CYLINDER_H
3 #include <iostream> 3 #include <iostream>
4 #include <stim/math/circle.h> 4 #include <stim/math/circle.h>
5 -#include <stim/math/vector.h> 5 +#include <stim/math/vec3.h>
6 6
7 7
8 namespace stim 8 namespace stim
@@ -25,11 +25,11 @@ class cylinder @@ -25,11 +25,11 @@ class cylinder
25 25
26 ///inits the cylinder from a list of points (inP) and radii (inM) 26 ///inits the cylinder from a list of points (inP) and radii (inM)
27 void 27 void
28 - init(std::vector<stim::vec<T> > inP, std::vector<stim::vec<T> > inM) 28 + init(std::vector<stim::vec3<T> > inP, std::vector<stim::vec<T> > inM)
29 { 29 {
30 mags = inM; 30 mags = inM;
31 - stim::vec<float> v1;  
32 - stim::vec<float> v2; 31 + stim::vec3<float> v1;
  32 + stim::vec3<float> v2;
33 e.resize(inP.size()); 33 e.resize(inP.size());
34 if(inP.size() < 2) 34 if(inP.size() < 2)
35 return; 35 return;
@@ -38,16 +38,16 @@ class cylinder @@ -38,16 +38,16 @@ class cylinder
38 L.resize(inP.size()); 38 L.resize(inP.size());
39 T temp = (T)0; 39 T temp = (T)0;
40 L[0] = 0; 40 L[0] = 0;
41 - for(int i = 1; i < L.size(); i++) 41 + for(size_t i = 1; i < L.size(); i++)
42 { 42 {
43 temp += (inP[i-1] - inP[i]).len(); 43 temp += (inP[i-1] - inP[i]).len();
44 L[i] = temp; 44 L[i] = temp;
45 } 45 }
46 46
47 - stim::vec<T> dr = (inP[1] - inP[0]).norm();  
48 - s = stim::circle<T>(inP[0], inM[0][0], dr, stim::vec<T>(1,0,0)); 47 + stim::vec3<T> dr = (inP[1] - inP[0]).norm();
  48 + s = stim::circle<T>(inP[0], inM[0][0], dr, stim::vec3<T>(1,0,0));
49 e[0] = s; 49 e[0] = s;
50 - for(int i = 1; i < inP.size()-1; i++) 50 + for(size_t i = 1; i < inP.size()-1; i++)
51 { 51 {
52 s.center(inP[i]); 52 s.center(inP[i]);
53 v1 = (inP[i] - inP[i-1]).norm(); 53 v1 = (inP[i] - inP[i-1]).norm();
@@ -67,7 +67,7 @@ class cylinder @@ -67,7 +67,7 @@ class cylinder
67 } 67 }
68 68
69 ///returns the direction vector at point idx. 69 ///returns the direction vector at point idx.
70 - stim::vec<T> 70 + stim::vec3<T>
71 d(int idx) 71 d(int idx)
72 { 72 {
73 if(idx == 0) 73 if(idx == 0)
@@ -81,15 +81,15 @@ class cylinder @@ -81,15 +81,15 @@ class cylinder
81 else 81 else
82 { 82 {
83 // return (e[idx+1].P - e[idx].P).norm(); 83 // return (e[idx+1].P - e[idx].P).norm();
84 - stim::vec<float> v1 = (e[idx].P-e[idx-1].P).norm();  
85 - stim::vec<float> v2 = (e[idx+1].P-e[idx].P).norm(); 84 + stim::vec3<float> v1 = (e[idx].P-e[idx-1].P).norm();
  85 + stim::vec3<float> v2 = (e[idx+1].P-e[idx].P).norm();
86 return (v1+v2).norm(); 86 return (v1+v2).norm();
87 } 87 }
88 // return e[idx].N; 88 // return e[idx].N;
89 89
90 } 90 }
91 91
92 - stim::vec<T> 92 + stim::vec3<T>
93 d(T l, int idx) 93 d(T l, int idx)
94 { 94 {
95 if(idx == 0 || idx == e.size()-1) 95 if(idx == 0 || idx == e.size()-1)
@@ -144,13 +144,13 @@ class cylinder @@ -144,13 +144,13 @@ class cylinder
144 ///constructor to create a cylinder from a set of points, radii, and the number of sides for the cylinder. 144 ///constructor to create a cylinder from a set of points, radii, and the number of sides for the cylinder.
145 ///@param inP: Vector of stim vecs composing the points of the centerline. 145 ///@param inP: Vector of stim vecs composing the points of the centerline.
146 ///@param inM: Vector of stim vecs composing the radii of the centerline. 146 ///@param inM: Vector of stim vecs composing the radii of the centerline.
147 - cylinder(std::vector<stim::vec<T> > inP, std::vector<stim::vec<T> > inM){ 147 + cylinder(std::vector<stim::vec3<T> > inP, std::vector<stim::vec3<T> > inM){
148 init(inP, inM); 148 init(inP, inM);
149 } 149 }
150 150
151 ///Constructor defines a cylinder with centerline inP and magnitudes of zero 151 ///Constructor defines a cylinder with centerline inP and magnitudes of zero
152 ///@param inP: Vector of stim vecs composing the points of the centerline 152 ///@param inP: Vector of stim vecs composing the points of the centerline
153 - cylinder(std::vector< stim::vec<T> > inP){ 153 + cylinder(std::vector< stim::vec3<T> > inP){
154 std::vector< stim::vec<T> > inM; //create an array of arbitrary magnitudes 154 std::vector< stim::vec<T> > inM; //create an array of arbitrary magnitudes
155 155
156 stim::vec<T> zero; 156 stim::vec<T> zero;
@@ -171,12 +171,12 @@ class cylinder @@ -171,12 +171,12 @@ class cylinder
171 ///Returns a position vector at the given p-value (p value ranges from 0 to 1). 171 ///Returns a position vector at the given p-value (p value ranges from 0 to 1).
172 ///interpolates the position along the line. 172 ///interpolates the position along the line.
173 ///@param pvalue: the location of the in the cylinder, from 0 (beginning to 1). 173 ///@param pvalue: the location of the in the cylinder, from 0 (beginning to 1).
174 - stim::vec<T> 174 + stim::vec3<T>
175 p(T pvalue) 175 p(T pvalue)
176 { 176 {
177 if(pvalue < 0.0 || pvalue > 1.0) 177 if(pvalue < 0.0 || pvalue > 1.0)
178 { 178 {
179 - return stim::vec<float>(-1,-1,-1); 179 + return stim::vec3<float>(-1,-1,-1);
180 } 180 }
181 T l = pvalue*L[L.size()-1]; 181 T l = pvalue*L[L.size()-1];
182 int idx = findIdx(l); 182 int idx = findIdx(l);
@@ -188,7 +188,7 @@ class cylinder @@ -188,7 +188,7 @@ class cylinder
188 ///Interpolates the radius along the line. 188 ///Interpolates the radius along the line.
189 ///@param l: the location of the in the cylinder. 189 ///@param l: the location of the in the cylinder.
190 ///@param idx: integer location of the point closest to l but prior to it. 190 ///@param idx: integer location of the point closest to l but prior to it.
191 - stim::vec<T> 191 + stim::vec3<T>
192 p(T l, int idx) 192 p(T l, int idx)
193 { 193 {
194 T rat = (l-L[idx])/(L[idx+1]-L[idx]); 194 T rat = (l-L[idx])/(L[idx+1]-L[idx]);
@@ -252,16 +252,16 @@ class cylinder @@ -252,16 +252,16 @@ class cylinder
252 ///in x, y, z coordinates. Theta is in degrees from 0 to 360. 252 ///in x, y, z coordinates. Theta is in degrees from 0 to 360.
253 ///@param pvalue: the location of the in the cylinder, from 0 (beginning to 1). 253 ///@param pvalue: the location of the in the cylinder, from 0 (beginning to 1).
254 ///@param theta: the angle to the point of a circle. 254 ///@param theta: the angle to the point of a circle.
255 - stim::vec<T> 255 + stim::vec3<T>
256 surf(T pvalue, T theta) 256 surf(T pvalue, T theta)
257 { 257 {
258 if(pvalue < 0.0 || pvalue > 1.0) 258 if(pvalue < 0.0 || pvalue > 1.0)
259 { 259 {
260 - return stim::vec<float>(-1,-1,-1); 260 + return stim::vec3<float>(-1,-1,-1);
261 } else { 261 } else {
262 T l = pvalue*L[L.size()-1]; 262 T l = pvalue*L[L.size()-1];
263 int idx = findIdx(l); 263 int idx = findIdx(l);
264 - stim::vec<T> ps = p(l, idx); 264 + stim::vec3<T> ps = p(l, idx);
265 T m = r(l, idx); 265 T m = r(l, idx);
266 s = e[idx]; 266 s = e[idx];
267 s.center(ps); 267 s.center(ps);
@@ -273,10 +273,10 @@ class cylinder @@ -273,10 +273,10 @@ class cylinder
273 273
274 ///returns a vector of points necessary to create a circle at every position in the fiber. 274 ///returns a vector of points necessary to create a circle at every position in the fiber.
275 ///@param sides: the number of sides of each circle. 275 ///@param sides: the number of sides of each circle.
276 - std::vector<std::vector<vec<T> > > 276 + std::vector<std::vector<vec3<T> > >
277 getPoints(int sides) 277 getPoints(int sides)
278 { 278 {
279 - std::vector<std::vector <vec<T> > > points; 279 + std::vector<std::vector <vec3<T> > > points;
280 points.resize(e.size()); 280 points.resize(e.size());
281 for(int i = 0; i < e.size(); i++) 281 for(int i = 0; i < e.size(); i++)
282 { 282 {
@@ -293,7 +293,7 @@ class cylinder @@ -293,7 +293,7 @@ class cylinder
293 } 293 }
294 /// Allows a point on the centerline to be accessed using bracket notation 294 /// Allows a point on the centerline to be accessed using bracket notation
295 295
296 - vec<T> operator[](unsigned int i){ 296 + vec3<T> operator[](unsigned int i){
297 return e[i].P; 297 return e[i].P;
298 } 298 }
299 299
@@ -309,7 +309,7 @@ class cylinder @@ -309,7 +309,7 @@ class cylinder
309 T M = 0; //initialize the integral to zero 309 T M = 0; //initialize the integral to zero
310 T m0, m1; //allocate space for both magnitudes in a single segment 310 T m0, m1; //allocate space for both magnitudes in a single segment
311 311
312 - //vec<T> p0, p1; //allocate space for both points in a single segment 312 + //vec3<T> p0, p1; //allocate space for both points in a single segment
313 313
314 m0 = mags[0][m]; //initialize the first point and magnitude to the first point in the cylinder 314 m0 = mags[0][m]; //initialize the first point and magnitude to the first point in the cylinder
315 //p0 = pos[0]; 315 //p0 = pos[0];
@@ -325,7 +325,7 @@ class cylinder @@ -325,7 +325,7 @@ class cylinder
325 if(p > 1) len = (L[p-1] - L[p-2]); //calculate the segment length using the L array 325 if(p > 1) len = (L[p-1] - L[p-2]); //calculate the segment length using the L array
326 326
327 //add the average magnitude, weighted by the segment length 327 //add the average magnitude, weighted by the segment length
328 - M += (m0 + m1)/2.0 * len; 328 + M += (m0 + m1)/(T)2.0 * len;
329 329
330 m0 = m1; //move to the next segment by shifting points 330 m0 = m1; //move to the next segment by shifting points
331 } 331 }
@@ -345,21 +345,21 @@ class cylinder @@ -345,21 +345,21 @@ class cylinder
345 /// @param spacing is the maximum spacing allowed between sample points 345 /// @param spacing is the maximum spacing allowed between sample points
346 cylinder<T> resample(T spacing){ 346 cylinder<T> resample(T spacing){
347 347
348 - std::vector< vec<T> > result; 348 + std::vector< vec3<T> > result;
349 349
350 - vec<T> p0 = e[0].P; //initialize p0 to the first point on the centerline  
351 - vec<T> p1; 350 + vec3<T> p0 = e[0].P; //initialize p0 to the first point on the centerline
  351 + vec3<T> p1;
352 unsigned N = size(); //number of points in the current centerline 352 unsigned N = size(); //number of points in the current centerline
353 353
354 //for each line segment on the centerline 354 //for each line segment on the centerline
355 for(unsigned int i = 1; i < N; i++){ 355 for(unsigned int i = 1; i < N; i++){
356 p1 = e[i].P; //get the second point in the line segment 356 p1 = e[i].P; //get the second point in the line segment
357 357
358 - vec<T> v = p1 - p0; //calculate the vector between these two points 358 + vec3<T> v = p1 - p0; //calculate the vector between these two points
359 T d = v.len(); //calculate the distance between these two points (length of the line segment) 359 T d = v.len(); //calculate the distance between these two points (length of the line segment)
360 360
361 - unsigned nsteps = d / spacing+1; //calculate the number of steps to take along the segment to meet the spacing criteria  
362 - T stepsize = 1.0 / nsteps; //calculate the parametric step size between new centerline points 361 + size_t nsteps = (size_t)std::ceil(d / spacing); //calculate the number of steps to take along the segment to meet the spacing criteria
  362 + T stepsize = (T)1.0 / nsteps; //calculate the parametric step size between new centerline points
363 363
364 //for each step along the line segment 364 //for each step along the line segment
365 for(unsigned s = 0; s < nsteps; s++){ 365 for(unsigned s = 0; s < nsteps; s++){