Commit 81e0d2215b30a17309ce7ba7615f94e2eda5c67e

Authored by David Mayerich
1 parent 7b3948ab

separated executable arguments and options in the arglist class

math/complex.h
1   -/*RTS Complex number class. This class is CUDA compatible,
2   -and can therefore be used in CUDA code and on CUDA devices.
3   -*/
4   -
5   -#ifndef RTS_COMPLEX
6   -#define RTS_COMPLEX
7   -
8   -#include "../cuda/callable.h"
9   -#include <cmath>
10   -#include <string>
11   -#include <sstream>
12   -#include <iostream>
13   -
14   -namespace stim
15   -{
16   -
17   -template <class T>
18   -struct complex
19   -{
20   - T r, i;
21   -
22   - //default constructor
23   - CUDA_CALLABLE complex()
24   - {
25   - r = 0;
26   - i = 0;
27   - }
28   -
29   - //constructor when given real and imaginary values
30   - CUDA_CALLABLE complex(T r, T i = 0)
31   - {
32   - this->r = r;
33   - this->i = i;
34   - }
35   -
36   - //access methods
37   - CUDA_CALLABLE T real()
38   - {
39   - return r;
40   - }
41   -
42   - CUDA_CALLABLE T real(T r_val)
43   - {
44   - r = r_val;
45   - return r_val;
46   - }
47   -
48   - CUDA_CALLABLE T imag()
49   - {
50   - return i;
51   - }
52   - CUDA_CALLABLE T imag(T i_val)
53   - {
54   - i = i_val;
55   - return i_val;
56   - }
57   -
58   -
59   -
60   - //return the current value multiplied by i
61   - CUDA_CALLABLE complex<T> imul()
62   - {
63   - complex<T> result;
64   - result.r = -i;
65   - result.i = r;
66   -
67   - return result;
68   - }
69   -
70   - //returns the complex signum (-1, 0, 1)
71   - CUDA_CALLABLE int sgn(){
72   - if(r > 0) return 1;
73   - else if(r < 0) return -1;
74   - else return (0 < i - i < 0);
75   - }
76   -
77   - //ARITHMETIC OPERATORS--------------------
78   -
79   - //binary + operator (returns the result of adding two complex values)
80   - CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const
81   - {
82   - complex<T> result;
83   - result.r = r + rhs.r;
84   - result.i = i + rhs.i;
85   - return result;
86   - }
87   -
88   - CUDA_CALLABLE complex<T> operator+ (const T rhs) const
89   - {
90   - complex<T> result;
91   - result.r = r + rhs;
92   - result.i = i;
93   - return result;
94   - }
95   -
96   - //binary - operator (returns the result of adding two complex values)
97   - CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const
98   - {
99   - complex<T> result;
100   - result.r = r - rhs.r;
101   - result.i = i - rhs.i;
102   - return result;
103   - }
104   -
105   - //binary - operator (returns the result of adding two complex values)
106   - CUDA_CALLABLE complex<T> operator- (const T rhs)
107   - {
108   - complex<T> result;
109   - result.r = r - rhs;
110   - result.i = i;
111   - return result;
112   - }
113   -
114   - //binary MULTIPLICATION operators (returns the result of multiplying complex values)
115   - CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const
116   - {
117   - complex<T> result;
118   - result.r = r * rhs.r - i * rhs.i;
119   - result.i = r * rhs.i + i * rhs.r;
120   - return result;
121   - }
122   - CUDA_CALLABLE complex<T> operator* (const T rhs)
123   - {
124   - return complex<T>(r * rhs, i * rhs);
125   - }
126   -
127   - //binary DIVISION operators (returns the result of dividing complex values)
128   - CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const
129   - {
130   - complex<T> result;
131   - T denom = rhs.r * rhs.r + rhs.i * rhs.i;
132   - result.r = (r * rhs.r + i * rhs.i) / denom;
133   - result.i = (- r * rhs.i + i * rhs.r) / denom;
134   -
135   - return result;
136   - }
137   - CUDA_CALLABLE complex<T> operator/ (const T rhs)
138   - {
139   - return complex<T>(r / rhs, i / rhs);
140   - }
141   -
142   - //ASSIGNMENT operators-----------------------------------
143   - CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs)
144   - {
145   - //check for self-assignment
146   - if(this != &rhs)
147   - {
148   - this->r = rhs.r;
149   - this->i = rhs.i;
150   - }
151   - return *this;
152   - }
153   - CUDA_CALLABLE complex<T> & operator=(const T &rhs)
154   - {
155   - this->r = rhs;
156   - this->i = 0;
157   -
158   - return *this;
159   - }
160   -
161   - //arithmetic assignment operators
162   - CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs)
163   - {
164   - *this = *this + rhs;
165   - return *this;
166   - }
167   - CUDA_CALLABLE complex<T> operator+=(const T &rhs)
168   - {
169   - *this = *this + rhs;
170   - return *this;
171   - }
172   -
173   - CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs)
174   - {
175   - *this = *this - rhs;
176   - return *this;
177   - }
178   - CUDA_CALLABLE complex<T> operator-=(const T &rhs)
179   - {
180   - *this = *this - rhs;
181   - return *this;
182   - }
183   -
184   - CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs)
185   - {
186   - *this = *this * rhs;
187   - return *this;
188   - }
189   - CUDA_CALLABLE complex<T> operator*=(const T &rhs)
190   - {
191   - *this = *this * rhs;
192   - return *this;
193   - }
194   - //divide and assign
195   - CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs)
196   - {
197   - *this = *this / rhs;
198   - return *this;
199   - }
200   - CUDA_CALLABLE complex<T> operator/=(const T &rhs)
201   - {
202   - *this = *this / rhs;
203   - return *this;
204   - }
205   -
206   - //absolute value operator (returns the absolute value of the complex number)
207   - CUDA_CALLABLE T abs()
208   - {
209   - return std::sqrt(r * r + i * i);
210   - }
211   -
212   - CUDA_CALLABLE complex<T> log()
213   - {
214   - complex<T> result;
215   - result.r = (T)std::log(std::sqrt(r * r + i * i));
216   - result.i = (T)std::atan2(i, r);
217   -
218   -
219   - return result;
220   - }
221   -
222   - CUDA_CALLABLE complex<T> exp()
223   - {
224   - complex<T> result;
225   -
226   - T e_r = std::exp(r);
227   - result.r = e_r * (T)std::cos(i);
228   - result.i = e_r * (T)std::sin(i);
229   -
230   - return result;
231   - }
232   -
233   - /*CUDA_CALLABLE complex<T> pow(int y)
234   - {
235   -
236   - return pow((double)y);
237   - }*/
238   -
239   - CUDA_CALLABLE complex<T> pow(T y)
240   - {
241   - complex<T> result;
242   -
243   - result = log() * y;
244   -
245   - return result.exp();
246   - }
247   -
248   - CUDA_CALLABLE complex<T> sqrt()
249   - {
250   - complex<T> result;
251   -
252   - //convert to polar coordinates
253   - T a = std::sqrt(r*r + i*i);
254   - T theta = std::atan2(i, r);
255   -
256   - //find the square root
257   - T a_p = std::sqrt(a);
258   - T theta_p = theta/2.0f;
259   -
260   - //convert back to cartesian coordinates
261   - result.r = a_p * std::cos(theta_p);
262   - result.i = a_p * std::sin(theta_p);
263   -
264   - return result;
265   - }
266   -
267   - std::string str()
268   - {
269   - std::stringstream ss;
270   - ss<<"("<<r<<","<<i<<")";
271   -
272   - return ss.str();
273   - }
274   -
275   - //COMPARISON operators
276   - CUDA_CALLABLE bool operator==(complex<T> rhs)
277   - {
278   - if(r == rhs.r && i == rhs.i)
279   - return true;
280   - return false;
281   - }
282   -
283   - CUDA_CALLABLE bool operator==(T rhs)
284   - {
285   - if(r == rhs && i == 0)
286   - return true;
287   - return false;
288   - }
289   -
290   - CUDA_CALLABLE bool operator!=(T rhs)
291   - {
292   - if(r != rhs || i != 0)
293   - return true;
294   - return false;
295   - }
296   -
297   - CUDA_CALLABLE bool operator<(complex<T> rhs){
298   - return abs() < rhs.abs();
299   - }
300   - CUDA_CALLABLE bool operator<=(complex<T> rhs){
301   - return abs() <= rhs.abs();
302   - }
303   - CUDA_CALLABLE bool operator>(complex<T> rhs){
304   - return abs() > rhs.abs();
305   - }
306   - CUDA_CALLABLE bool operator >=(complex<T> rhs){
307   - return abs() >= rhs.abs();
308   - }
309   -
310   - //CASTING operators
311   - template < typename otherT >
312   - operator complex<otherT>()
313   - {
314   - complex<otherT> result((otherT)r, (otherT)i);
315   - return result;
316   - }
317   - template< typename otherT >
318   - complex( const complex<otherT> &rhs)
319   - {
320   - r = (T)rhs.r;
321   - i = (T)rhs.i;
322   - }
323   - template< typename otherT >
324   - complex& operator=(const complex<otherT> &rhs)
325   - {
326   - r = (T)rhs.r;
327   - i = (T)rhs.i;
328   - return *this;
329   - }
330   -
331   -};
332   -
333   -} //end RTS namespace
334   -
335   -//addition
336   -template<typename T>
337   -CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b)
338   -{
339   - return stim::complex<T>((T)a + b.r, b.i);
340   -}
341   -
342   -//subtraction with a real value
343   -template<typename T>
344   -CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b)
345   -{
346   - return stim::complex<T>((T)a - b.r, -b.i);
347   -}
348   -
349   -//minus sign
350   -template<typename T>
351   -CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs)
352   -{
353   - return stim::complex<T>(-rhs.r, -rhs.i);
354   -}
355   -
356   -//multiply a T value by a complex value
357   -template<typename T>
358   -CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b)
359   -{
360   - return stim::complex<T>((T)a * b.r, (T)a * b.i);
361   -}
362   -
363   -//divide a T value by a complex value
364   -template<typename T>
365   -CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b)
366   -{
367   - stim::complex<T> result;
368   -
369   - T denom = b.r * b.r + b.i * b.i;
370   -
371   - result.r = ((T)a * b.r) / denom;
372   - result.i = -((T)a * b.i) / denom;
373   -
374   - return result;
375   -}
376   -
377   -
378   -template<typename T>
379   -CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y)
380   -{
381   - return x.pow(y);
382   -}
383   -template<typename T>
384   -CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y)
385   -{
386   - return x.pow(y);
387   -}
388   -
389   -//log function
390   -template<typename T>
391   -CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x)
392   -{
393   - return x.log();
394   -}
395   -
396   -//exp function
397   -template<typename T>
398   -CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x)
399   -{
400   - return x.exp();
401   -}
402   -
403   -//sqrt function
404   -template<typename T>
405   -CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x)
406   -{
407   - return x.sqrt();
408   -}
409   -
410   -
411   -template <typename T>
412   -CUDA_CALLABLE static T abs(stim::complex<T> a)
413   -{
414   - return a.abs();
415   -}
416   -
417   -template <typename T>
418   -CUDA_CALLABLE static T real(stim::complex<T> a)
419   -{
420   - return a.r;
421   -}
422   -
423   -//template <typename T>
424   -CUDA_CALLABLE static float real(float a)
425   -{
426   - return a;
427   -}
428   -
429   -template <typename T>
430   -CUDA_CALLABLE static T imag(stim::complex<T> a)
431   -{
432   - return a.i;
433   -}
434   -
435   -//trigonometric functions
436   -//template<class A>
437   -/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)
438   -{
439   - stim::complex<float> result;
440   - result.r = sinf(x.r) * coshf(x.i);
441   - result.i = cosf(x.r) * sinhf(x.i);
442   -
443   - return result;
444   -}*/
445   -
446   -template<class A>
447   -CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)
448   -{
449   - stim::complex<A> result;
450   - result.r = (A)std::sin(x.r) * (A)std::cosh(x.i);
451   - result.i = (A)std::cos(x.r) * (A)std::sinh(x.i);
452   -
453   - return result;
454   -}
455   -
456   -//floating point template
457   -//template<class A>
458   -/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)
459   -{
460   - stim::complex<float> result;
461   - result.r = cosf(x.r) * coshf(x.i);
462   - result.i = -(sinf(x.r) * sinhf(x.i));
463   -
464   - return result;
465   -}*/
466   -
467   -template<class A>
468   -CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)
469   -{
470   - stim::complex<A> result;
471   - result.r = (A)std::cos(x.r) * (A)std::cosh(x.i);
472   - result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i));
473   -
474   - return result;
475   -}
476   -
477   -
478   -template<class A>
479   -std::ostream& operator<<(std::ostream& os, stim::complex<A> x)
480   -{
481   - os<<x.str();
482   - return os;
483   -}
484   -
485   -template<class A>
486   -std::istream& operator>>(std::istream& is, stim::complex<A>& x)
487   -{
488   - A r, i;
489   - r = i = 0; //initialize the real and imaginary parts to zero
490   - is>>r; //parse
491   - is>>i;
492   -
493   - x.real(r); //assign the parsed values to x
494   - x.imag(i);
495   -
496   - return is; //return the stream
497   -}
498   -
499   -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
500   -//template<class T> using rtsComplex = stim::complex<T>;
501   -//#endif
502   -
503   -
504   -
505   -#endif
  1 +/*RTS Complex number class. This class is CUDA compatible,
  2 +and can therefore be used in CUDA code and on CUDA devices.
  3 +*/
  4 +
  5 +#ifndef RTS_COMPLEX
  6 +#define RTS_COMPLEX
  7 +
  8 +#include "../cuda/callable.h"
  9 +#include <cmath>
  10 +#include <string>
  11 +#include <sstream>
  12 +#include <iostream>
  13 +
  14 +namespace stim
  15 +{
  16 +
  17 +template <class T>
  18 +struct complex
  19 +{
  20 + T r, i;
  21 +
  22 + //default constructor
  23 + CUDA_CALLABLE complex()
  24 + {
  25 + r = 0;
  26 + i = 0;
  27 + }
  28 +
  29 + //constructor when given real and imaginary values
  30 + CUDA_CALLABLE complex(T r, T i = 0)
  31 + {
  32 + this->r = r;
  33 + this->i = i;
  34 + }
  35 +
  36 + //access methods
  37 + CUDA_CALLABLE T real()
  38 + {
  39 + return r;
  40 + }
  41 +
  42 + CUDA_CALLABLE T real(T r_val)
  43 + {
  44 + r = r_val;
  45 + return r_val;
  46 + }
  47 +
  48 + CUDA_CALLABLE T imag()
  49 + {
  50 + return i;
  51 + }
  52 + CUDA_CALLABLE T imag(T i_val)
  53 + {
  54 + i = i_val;
  55 + return i_val;
  56 + }
  57 +
  58 +
  59 +
  60 + //return the current value multiplied by i
  61 + CUDA_CALLABLE complex<T> imul()
  62 + {
  63 + complex<T> result;
  64 + result.r = -i;
  65 + result.i = r;
  66 +
  67 + return result;
  68 + }
  69 +
  70 + //returns the complex signum (-1, 0, 1)
  71 + CUDA_CALLABLE int sgn(){
  72 + if(r > 0) return 1;
  73 + else if(r < 0) return -1;
  74 + else return (0 < i - i < 0);
  75 + }
  76 +
  77 + //ARITHMETIC OPERATORS--------------------
  78 +
  79 + //binary + operator (returns the result of adding two complex values)
  80 + CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const
  81 + {
  82 + complex<T> result;
  83 + result.r = r + rhs.r;
  84 + result.i = i + rhs.i;
  85 + return result;
  86 + }
  87 +
  88 + CUDA_CALLABLE complex<T> operator+ (const T rhs) const
  89 + {
  90 + complex<T> result;
  91 + result.r = r + rhs;
  92 + result.i = i;
  93 + return result;
  94 + }
  95 +
  96 + //binary - operator (returns the result of adding two complex values)
  97 + CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const
  98 + {
  99 + complex<T> result;
  100 + result.r = r - rhs.r;
  101 + result.i = i - rhs.i;
  102 + return result;
  103 + }
  104 +
  105 + //binary - operator (returns the result of adding two complex values)
  106 + CUDA_CALLABLE complex<T> operator- (const T rhs)
  107 + {
  108 + complex<T> result;
  109 + result.r = r - rhs;
  110 + result.i = i;
  111 + return result;
  112 + }
  113 +
  114 + //binary MULTIPLICATION operators (returns the result of multiplying complex values)
  115 + CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const
  116 + {
  117 + complex<T> result;
  118 + result.r = r * rhs.r - i * rhs.i;
  119 + result.i = r * rhs.i + i * rhs.r;
  120 + return result;
  121 + }
  122 + CUDA_CALLABLE complex<T> operator* (const T rhs)
  123 + {
  124 + return complex<T>(r * rhs, i * rhs);
  125 + }
  126 +
  127 + //binary DIVISION operators (returns the result of dividing complex values)
  128 + CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const
  129 + {
  130 + complex<T> result;
  131 + T denom = rhs.r * rhs.r + rhs.i * rhs.i;
  132 + result.r = (r * rhs.r + i * rhs.i) / denom;
  133 + result.i = (- r * rhs.i + i * rhs.r) / denom;
  134 +
  135 + return result;
  136 + }
  137 + CUDA_CALLABLE complex<T> operator/ (const T rhs)
  138 + {
  139 + return complex<T>(r / rhs, i / rhs);
  140 + }
  141 +
  142 + //ASSIGNMENT operators-----------------------------------
  143 + CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs)
  144 + {
  145 + //check for self-assignment
  146 + if(this != &rhs)
  147 + {
  148 + this->r = rhs.r;
  149 + this->i = rhs.i;
  150 + }
  151 + return *this;
  152 + }
  153 + CUDA_CALLABLE complex<T> & operator=(const T &rhs)
  154 + {
  155 + this->r = rhs;
  156 + this->i = 0;
  157 +
  158 + return *this;
  159 + }
  160 +
  161 + //arithmetic assignment operators
  162 + CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs)
  163 + {
  164 + *this = *this + rhs;
  165 + return *this;
  166 + }
  167 + CUDA_CALLABLE complex<T> operator+=(const T &rhs)
  168 + {
  169 + *this = *this + rhs;
  170 + return *this;
  171 + }
  172 +
  173 + CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs)
  174 + {
  175 + *this = *this - rhs;
  176 + return *this;
  177 + }
  178 + CUDA_CALLABLE complex<T> operator-=(const T &rhs)
  179 + {
  180 + *this = *this - rhs;
  181 + return *this;
  182 + }
  183 +
  184 + CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs)
  185 + {
  186 + *this = *this * rhs;
  187 + return *this;
  188 + }
  189 + CUDA_CALLABLE complex<T> operator*=(const T &rhs)
  190 + {
  191 + *this = *this * rhs;
  192 + return *this;
  193 + }
  194 + //divide and assign
  195 + CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs)
  196 + {
  197 + *this = *this / rhs;
  198 + return *this;
  199 + }
  200 + CUDA_CALLABLE complex<T> operator/=(const T &rhs)
  201 + {
  202 + *this = *this / rhs;
  203 + return *this;
  204 + }
  205 +
  206 + //absolute value operator (returns the absolute value of the complex number)
  207 + CUDA_CALLABLE T abs()
  208 + {
  209 + return std::sqrt(r * r + i * i);
  210 + }
  211 +
  212 + CUDA_CALLABLE complex<T> log()
  213 + {
  214 + complex<T> result;
  215 + result.r = (T)std::log(std::sqrt(r * r + i * i));
  216 + result.i = (T)std::atan2(i, r);
  217 +
  218 +
  219 + return result;
  220 + }
  221 +
  222 + CUDA_CALLABLE complex<T> exp()
  223 + {
  224 + complex<T> result;
  225 +
  226 + T e_r = std::exp(r);
  227 + result.r = e_r * (T)std::cos(i);
  228 + result.i = e_r * (T)std::sin(i);
  229 +
  230 + return result;
  231 + }
  232 +
  233 + /*CUDA_CALLABLE complex<T> pow(int y)
  234 + {
  235 +
  236 + return pow((double)y);
  237 + }*/
  238 +
  239 + CUDA_CALLABLE complex<T> pow(T y)
  240 + {
  241 + complex<T> result;
  242 +
  243 + result = log() * y;
  244 +
  245 + return result.exp();
  246 + }
  247 +
  248 + CUDA_CALLABLE complex<T> sqrt()
  249 + {
  250 + complex<T> result;
  251 +
  252 + //convert to polar coordinates
  253 + T a = std::sqrt(r*r + i*i);
  254 + T theta = std::atan2(i, r);
  255 +
  256 + //find the square root
  257 + T a_p = std::sqrt(a);
  258 + T theta_p = theta/2.0f;
  259 +
  260 + //convert back to cartesian coordinates
  261 + result.r = a_p * std::cos(theta_p);
  262 + result.i = a_p * std::sin(theta_p);
  263 +
  264 + return result;
  265 + }
  266 +
  267 + std::string str()
  268 + {
  269 + std::stringstream ss;
  270 + ss<<"("<<r<<","<<i<<")";
  271 +
  272 + return ss.str();
  273 + }
  274 +
  275 + //COMPARISON operators
  276 + CUDA_CALLABLE bool operator==(complex<T> rhs)
  277 + {
  278 + if(r == rhs.r && i == rhs.i)
  279 + return true;
  280 + return false;
  281 + }
  282 +
  283 + CUDA_CALLABLE bool operator==(T rhs)
  284 + {
  285 + if(r == rhs && i == 0)
  286 + return true;
  287 + return false;
  288 + }
  289 +
  290 + CUDA_CALLABLE bool operator!=(T rhs)
  291 + {
  292 + if(r != rhs || i != 0)
  293 + return true;
  294 + return false;
  295 + }
  296 +
  297 + CUDA_CALLABLE bool operator<(complex<T> rhs){
  298 + return abs() < rhs.abs();
  299 + }
  300 + CUDA_CALLABLE bool operator<=(complex<T> rhs){
  301 + return abs() <= rhs.abs();
  302 + }
  303 + CUDA_CALLABLE bool operator>(complex<T> rhs){
  304 + return abs() > rhs.abs();
  305 + }
  306 + CUDA_CALLABLE bool operator >=(complex<T> rhs){
  307 + return abs() >= rhs.abs();
  308 + }
  309 +
  310 + //CASTING operators
  311 + template < typename otherT >
  312 + operator complex<otherT>()
  313 + {
  314 + complex<otherT> result((otherT)r, (otherT)i);
  315 + return result;
  316 + }
  317 + template< typename otherT >
  318 + complex( const complex<otherT> &rhs)
  319 + {
  320 + r = (T)rhs.r;
  321 + i = (T)rhs.i;
  322 + }
  323 + template< typename otherT >
  324 + complex& operator=(const complex<otherT> &rhs)
  325 + {
  326 + r = (T)rhs.r;
  327 + i = (T)rhs.i;
  328 + return *this;
  329 + }
  330 +
  331 +};
  332 +
  333 +} //end RTS namespace
  334 +
  335 +//addition
  336 +template<typename T>
  337 +CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b)
  338 +{
  339 + return stim::complex<T>((T)a + b.r, b.i);
  340 +}
  341 +
  342 +//subtraction with a real value
  343 +template<typename T>
  344 +CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b)
  345 +{
  346 + return stim::complex<T>((T)a - b.r, -b.i);
  347 +}
  348 +
  349 +//minus sign
  350 +template<typename T>
  351 +CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs)
  352 +{
  353 + return stim::complex<T>(-rhs.r, -rhs.i);
  354 +}
  355 +
  356 +//multiply a T value by a complex value
  357 +template<typename T>
  358 +CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b)
  359 +{
  360 + return stim::complex<T>((T)a * b.r, (T)a * b.i);
  361 +}
  362 +
  363 +//divide a T value by a complex value
  364 +template<typename T>
  365 +CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b)
  366 +{
  367 + stim::complex<T> result;
  368 +
  369 + T denom = b.r * b.r + b.i * b.i;
  370 +
  371 + result.r = ((T)a * b.r) / denom;
  372 + result.i = -((T)a * b.i) / denom;
  373 +
  374 + return result;
  375 +}
  376 +
  377 +
  378 +template<typename T>
  379 +CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y)
  380 +{
  381 + return x.pow(y);
  382 +}
  383 +template<typename T>
  384 +CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y)
  385 +{
  386 + return x.pow(y);
  387 +}
  388 +
  389 +//log function
  390 +template<typename T>
  391 +CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x)
  392 +{
  393 + return x.log();
  394 +}
  395 +
  396 +//exp function
  397 +template<typename T>
  398 +CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x)
  399 +{
  400 + return x.exp();
  401 +}
  402 +
  403 +//sqrt function
  404 +template<typename T>
  405 +CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x)
  406 +{
  407 + return x.sqrt();
  408 +}
  409 +
  410 +
  411 +template <typename T>
  412 +CUDA_CALLABLE static T abs(stim::complex<T> a)
  413 +{
  414 + return a.abs();
  415 +}
  416 +
  417 +template <typename T>
  418 +CUDA_CALLABLE static T real(stim::complex<T> a)
  419 +{
  420 + return a.r;
  421 +}
  422 +
  423 +//template <typename T>
  424 +CUDA_CALLABLE static float real(float a)
  425 +{
  426 + return a;
  427 +}
  428 +
  429 +template <typename T>
  430 +CUDA_CALLABLE static T imag(stim::complex<T> a)
  431 +{
  432 + return a.i;
  433 +}
  434 +
  435 +//trigonometric functions
  436 +//template<class A>
  437 +/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)
  438 +{
  439 + stim::complex<float> result;
  440 + result.r = sinf(x.r) * coshf(x.i);
  441 + result.i = cosf(x.r) * sinhf(x.i);
  442 +
  443 + return result;
  444 +}*/
  445 +
  446 +template<class A>
  447 +CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)
  448 +{
  449 + stim::complex<A> result;
  450 + result.r = (A)std::sin(x.r) * (A)std::cosh(x.i);
  451 + result.i = (A)std::cos(x.r) * (A)std::sinh(x.i);
  452 +
  453 + return result;
  454 +}
  455 +
  456 +//floating point template
  457 +//template<class A>
  458 +/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)
  459 +{
  460 + stim::complex<float> result;
  461 + result.r = cosf(x.r) * coshf(x.i);
  462 + result.i = -(sinf(x.r) * sinhf(x.i));
  463 +
  464 + return result;
  465 +}*/
  466 +
  467 +template<class A>
  468 +CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)
  469 +{
  470 + stim::complex<A> result;
  471 + result.r = (A)std::cos(x.r) * (A)std::cosh(x.i);
  472 + result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i));
  473 +
  474 + return result;
  475 +}
  476 +
  477 +
  478 +template<class A>
  479 +std::ostream& operator<<(std::ostream& os, stim::complex<A> x)
  480 +{
  481 + os<<x.str();
  482 + return os;
  483 +}
  484 +
  485 +template<class A>
  486 +std::istream& operator>>(std::istream& is, stim::complex<A>& x)
  487 +{
  488 + A r, i;
  489 + r = i = 0; //initialize the real and imaginary parts to zero
  490 + is>>r; //parse
  491 + is>>i;
  492 +
  493 + x.real(r); //assign the parsed values to x
  494 + x.imag(i);
  495 +
  496 + return is; //return the stream
  497 +}
  498 +
  499 +//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
  500 +//template<class T> using rtsComplex = stim::complex<T>;
  501 +//#endif
  502 +
  503 +
  504 +
  505 +#endif
... ...
math/complexfield.cuh
1   -#ifndef RTS_COMPLEXFIELD_H
2   -#define RTS_COMPLEXFIELD_H
3   -
4   -#include "cublas_v2.h"
5   -#include <cuda_runtime.h>
6   -
7   -#include "../math/field.cuh"
8   -#include "../math/complex.h"
9   -#include "../math/realfield.cuh"
10   -
11   -namespace stim{
12   -
13   -template<typename T>
14   -__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){
15   -
16   - int iu = blockIdx.x * blockDim.x + threadIdx.x;
17   - int iv = blockIdx.y * blockDim.y + threadIdx.y;
18   -
19   - //make sure that the thread indices are in-bounds
20   - if(iu >= r0 || iv >= r1) return;
21   -
22   - //compute the index into the field
23   - int i = iv*r0 + iu;
24   -
25   - //calculate and store the result
26   - dest[i] = source[i].abs();
27   -}
28   -
29   -/*This class stores functions for saving images of complex fields
30   -*/
31   -template<typename T, unsigned int D = 1>
32   -class complexfield : public field< stim::complex<T>, D >{
33   - using field< stim::complex<T>, D >::R;
34   - using field< stim::complex<T>, D >::X;
35   - using field< stim::complex<T>, D >::shape;
36   - using field< stim::complex<T>, D >::cuda_params;
37   -
38   -
39   -
40   -public:
41   -
42   - //find the maximum value of component n
43   - stim::complex<T> find_max(unsigned int n){
44   - cublasStatus_t stat;
45   - cublasHandle_t handle;
46   -
47   - //create a CUBLAS handle
48   - stat = cublasCreate(&handle);
49   - if(stat != CUBLAS_STATUS_SUCCESS){
50   - std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
51   - exit(1);
52   - }
53   -
54   - int L = R[0] * R[1]; //compute the number of discrete points in a slice
55   - int index; //result of the max operation
56   - stim::complex<T> result;
57   -
58   - if(sizeof(T) == 8)
59   - stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index);
60   - else
61   - stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index);
62   -
63   - index -= 1; //adjust for 1-based indexing
64   -
65   - //if there was a GPU error, terminate
66   - if(stat != CUBLAS_STATUS_SUCCESS){
67   - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
68   - exit(1);
69   - }
70   -
71   - //retrieve the maximum value for this slice and store it in the maxVal array
72   - std::cout<<X[n]<<std::endl;
73   - HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost));
74   - return result;
75   - }
76   -
77   -public:
78   -
79   - enum attribute {magnitude, real, imaginary};
80   -
81   - //constructor (no parameters)
82   - complexfield() : field<stim::complex<T>, D>(){};
83   -
84   - //constructor (resolution specified)
85   - complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){};
86   -
87   - //assignment from a field of complex values
88   - complexfield & operator=(const field< stim::complex<T>, D > rhs){
89   - field< complex<T>, D >::operator=(rhs);
90   - return *this;
91   - }
92   -
93   - //assignment operator (scalar value)
94   - complexfield & operator= (const complex<T> rhs){
95   -
96   - field< complex<T>, D >::operator=(rhs);
97   - return *this;
98   - }
99   -
100   - //assignment operator (vector value)
101   - complexfield & operator= (const vec< complex<T>, D > rhs){
102   -
103   - field< complex<T>, D >::operator=(rhs);
104   - return *this;
105   - }
106   -
107   - //cropping
108   - complexfield crop(unsigned int width, unsigned int height){
109   -
110   - complexfield<T, D> result;
111   - result = field< complex<T>, D>::crop(width, height);
112   - return result;
113   - }
114   -
115   - void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){
116   -
117   - field<T, 1> rf(R[0], R[1]);
118   -
119   - //get cuda parameters
120   - dim3 blocks, grids;
121   - cuda_params(grids, blocks);
122   -
123   - if(type == magnitude){
124   - gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]);
125   - rf.toImage(filename, n, true);
126   - }
127   -
128   - }
129   -
130   -
131   -};
132   -
133   -
134   -} //end namespace rts
135   -
136   -
137   -#endif
  1 +#ifndef RTS_COMPLEXFIELD_H
  2 +#define RTS_COMPLEXFIELD_H
  3 +
  4 +#include "cublas_v2.h"
  5 +#include <cuda_runtime.h>
  6 +
  7 +#include "../math/field.cuh"
  8 +#include "../math/complex.h"
  9 +#include "../math/realfield.cuh"
  10 +
  11 +namespace stim{
  12 +
  13 +template<typename T>
  14 +__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){
  15 +
  16 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  17 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  18 +
  19 + //make sure that the thread indices are in-bounds
  20 + if(iu >= r0 || iv >= r1) return;
  21 +
  22 + //compute the index into the field
  23 + int i = iv*r0 + iu;
  24 +
  25 + //calculate and store the result
  26 + dest[i] = source[i].abs();
  27 +}
  28 +
  29 +/*This class stores functions for saving images of complex fields
  30 +*/
  31 +template<typename T, unsigned int D = 1>
  32 +class complexfield : public field< stim::complex<T>, D >{
  33 + using field< stim::complex<T>, D >::R;
  34 + using field< stim::complex<T>, D >::X;
  35 + using field< stim::complex<T>, D >::shape;
  36 + using field< stim::complex<T>, D >::cuda_params;
  37 +
  38 +
  39 +
  40 +public:
  41 +
  42 + //find the maximum value of component n
  43 + stim::complex<T> find_max(unsigned int n){
  44 + cublasStatus_t stat;
  45 + cublasHandle_t handle;
  46 +
  47 + //create a CUBLAS handle
  48 + stat = cublasCreate(&handle);
  49 + if(stat != CUBLAS_STATUS_SUCCESS){
  50 + std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
  51 + exit(1);
  52 + }
  53 +
  54 + int L = R[0] * R[1]; //compute the number of discrete points in a slice
  55 + int index; //result of the max operation
  56 + stim::complex<T> result;
  57 +
  58 + if(sizeof(T) == 8)
  59 + stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index);
  60 + else
  61 + stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index);
  62 +
  63 + index -= 1; //adjust for 1-based indexing
  64 +
  65 + //if there was a GPU error, terminate
  66 + if(stat != CUBLAS_STATUS_SUCCESS){
  67 + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
  68 + exit(1);
  69 + }
  70 +
  71 + //retrieve the maximum value for this slice and store it in the maxVal array
  72 + std::cout<<X[n]<<std::endl;
  73 + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost));
  74 + return result;
  75 + }
  76 +
  77 +public:
  78 +
  79 + enum attribute {magnitude, real, imaginary};
  80 +
  81 + //constructor (no parameters)
  82 + complexfield() : field<stim::complex<T>, D>(){};
  83 +
  84 + //constructor (resolution specified)
  85 + complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){};
  86 +
  87 + //assignment from a field of complex values
  88 + complexfield & operator=(const field< stim::complex<T>, D > rhs){
  89 + field< complex<T>, D >::operator=(rhs);
  90 + return *this;
  91 + }
  92 +
  93 + //assignment operator (scalar value)
  94 + complexfield & operator= (const complex<T> rhs){
  95 +
  96 + field< complex<T>, D >::operator=(rhs);
  97 + return *this;
  98 + }
  99 +
  100 + //assignment operator (vector value)
  101 + complexfield & operator= (const vec< complex<T>, D > rhs){
  102 +
  103 + field< complex<T>, D >::operator=(rhs);
  104 + return *this;
  105 + }
  106 +
  107 + //cropping
  108 + complexfield crop(unsigned int width, unsigned int height){
  109 +
  110 + complexfield<T, D> result;
  111 + result = field< complex<T>, D>::crop(width, height);
  112 + return result;
  113 + }
  114 +
  115 + void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){
  116 +
  117 + field<T, 1> rf(R[0], R[1]);
  118 +
  119 + //get cuda parameters
  120 + dim3 blocks, grids;
  121 + cuda_params(grids, blocks);
  122 +
  123 + if(type == magnitude){
  124 + gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]);
  125 + rf.toImage(filename, n, true);
  126 + }
  127 +
  128 + }
  129 +
  130 +
  131 +};
  132 +
  133 +
  134 +} //end namespace rts
  135 +
  136 +
  137 +#endif
... ...
math/field.cuh
1   -#ifndef RTS_FIELD_CUH
2   -#define RTS_FIELD_CUH
3   -
4   -#include <vector>
5   -#include <string>
6   -#include <sstream>
7   -
8   -#include "cublas_v2.h"
9   -#include <cuda_runtime.h>
10   -
11   -#include "../math/rect.h"
12   -#include "../cuda/threads.h"
13   -#include "../cuda/error.h"
14   -#include "../cuda/devices.h"
15   -#include "../visualization/colormap.h"
16   -
17   -
18   -namespace stim{
19   -
20   -//multiply R = X * Y
21   -template<typename T>
22   -__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){
23   -
24   - int iu = blockIdx.x * blockDim.x + threadIdx.x;
25   - int iv = blockIdx.y * blockDim.y + threadIdx.y;
26   -
27   - //make sure that the thread indices are in-bounds
28   - if(iu >= r0 || iv >= r1) return;
29   -
30   - //compute the index into the field
31   - int i = iv*r0 + iu;
32   -
33   - //calculate and store the result
34   - R[i] = X[i] * Y[i];
35   -}
36   -
37   -//assign a constant value to all points
38   -template<typename T>
39   -__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){
40   -
41   - int iu = blockIdx.x * blockDim.x + threadIdx.x;
42   - int iv = blockIdx.y * blockDim.y + threadIdx.y;
43   -
44   - //make sure that the thread indices are in-bounds
45   - if(iu >= r0 || iv >= r1) return;
46   -
47   - //compute the index into the field
48   - int i = iv*r0 + iu;
49   -
50   - //calculate and store the result
51   - ptr[i] = val;
52   -}
53   -
54   -//crop the field to the new dimensions (width x height)
55   -template<typename T>
56   -__global__ void gpu_field_crop(T* dest, T* source,
57   - unsigned int r0, unsigned int r1,
58   - unsigned int width, unsigned int height){
59   -
60   - int iu = blockIdx.x * blockDim.x + threadIdx.x;
61   - int iv = blockIdx.y * blockDim.y + threadIdx.y;
62   -
63   - //make sure that the thread indices are in-bounds
64   - if(iu >= width || iv >= height) return;
65   -
66   - //compute the index into the field
67   - int is = iv*r0 + iu;
68   - int id = iv*width + iu;
69   -
70   - //calculate and store the result
71   - dest[id] = source[is];
72   -}
73   -
74   -template<typename T, unsigned int D = 1>
75   -class field{
76   -
77   -protected:
78   -
79   - T* X[D]; //pointer to the field data
80   - unsigned int R[2]; //field resolution
81   - stim::rect<T> shape; //position and shape of the field slice
82   -
83   - //calculates the optimal block and grid sizes using information from the GPU
84   - void cuda_params(dim3& grids, dim3& blocks){
85   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
86   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
87   -
88   - //create one thread for each detector pixel
89   - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
90   - grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
91   - }
92   -
93   - //find the maximum value of component n
94   - T find_max(unsigned int n){
95   - cublasStatus_t stat;
96   - cublasHandle_t handle;
97   -
98   - //create a CUBLAS handle
99   - stat = cublasCreate(&handle);
100   - if(stat != CUBLAS_STATUS_SUCCESS){
101   - std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
102   - exit(1);
103   - }
104   -
105   - int L = R[0] * R[1]; //compute the number of discrete points in a slice
106   - int index; //result of the max operation
107   - T result;
108   -
109   - if(sizeof(T) == 4)
110   - stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index);
111   - else
112   - stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index);
113   -
114   - index -= 1; //adjust for 1-based indexing
115   -
116   - //if there was a GPU error, terminate
117   - if(stat != CUBLAS_STATUS_SUCCESS){
118   - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
119   - exit(1);
120   - }
121   -
122   - //retrieve the maximum value for this slice and store it in the maxVal array
123   - HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost));
124   - return result;
125   - }
126   -
127   -public:
128   -
129   - //returns a list of file names given an input string with wild cards
130   - std::vector<std::string> process_filename(std::string name){
131   - std::stringstream ss(name);
132   - std::string item;
133   - std::vector<std::string> elems;
134   - while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension)
135   - {
136   - elems.push_back(item);
137   - }
138   -
139   - std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters)
140   - std::string ext = elems[1]; //file extension (ex. .bmp, .png)
141   - ext = std::string(".") + ext; //add a period back into the extension
142   -
143   - size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'')
144   - size_t i1 = prefix.find_last_of("?");
145   -
146   - std::string postfix = prefix.substr(i1+1);
147   - prefix = prefix.substr(0, i0);
148   -
149   - unsigned int digits = i1 - i0 + 1; //compute the number of wildcards
150   -
151   - std::vector<std::string> flist; //create a vector of file names
152   - //fill the list
153   - for(unsigned int d=0; d<D; d++){
154   - std::stringstream ss; //assemble the file name
155   - ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext;
156   - flist.push_back(ss.str());
157   - }
158   -
159   - return flist;
160   - }
161   -
162   - void init(){
163   - for(unsigned int n=0; n<D; n++)
164   - X[n] = NULL;
165   - }
166   - void destroy(){
167   - for(unsigned int n=0; n<D; n++)
168   - if(X[n] != NULL)
169   - HANDLE_ERROR(cudaFree(X[n]));
170   - }
171   -
172   -public:
173   - //field constructor
174   - field(){
175   - R[0] = R[1] = 0;
176   - init();
177   - }
178   -
179   - field(unsigned int x, unsigned int y){
180   - //set the resolution
181   - R[0] = x;
182   - R[1] = y;
183   - //allocate memory on the GPU
184   - for(unsigned int n=0; n<D; n++){
185   - HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] ));
186   - }
187   - clear(); //zero the field
188   - }
189   -
190   - ///copy constructor
191   - field(const field &rhs){
192   - //first make a shallow copy
193   - R[0] = rhs.R[0];
194   - R[1] = rhs.R[1];
195   -
196   - for(unsigned int n=0; n<D; n++){
197   - //do we have to make a deep copy?
198   - if(rhs.X[n] == NULL)
199   - X[n] = NULL; //no
200   - else{
201   - //allocate the necessary memory
202   - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
203   -
204   - //copy the slice
205   - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
206   - }
207   - }
208   - }
209   -
210   - ~field(){
211   - destroy();
212   - }
213   -
214   - //assignment operator
215   - field & operator= (const field & rhs){
216   -
217   - //de-allocate any existing GPU memory
218   - destroy();
219   -
220   - //copy the slice resolution
221   - R[0] = rhs.R[0];
222   - R[1] = rhs.R[1];
223   -
224   - for(unsigned int n=0; n<D; n++)
225   - {
226   - //allocate the necessary memory
227   - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
228   - //copy the slice
229   - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
230   - }
231   - return *this;
232   - }
233   -
234   - field & operator= (const T rhs){
235   -
236   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
237   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
238   -
239   - //create one thread for each detector pixel
240   - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
241   - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
242   -
243   - //assign the constant value to all positions and dimensions
244   - for(int n=0; n<D; n++)
245   - stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]);
246   -
247   - return *this;
248   - }
249   -
250   - //assignment of vector component
251   - field & operator= (const vec<T, D> rhs){
252   -
253   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
254   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
255   -
256   - //create one thread for each detector pixel
257   - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
258   - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
259   -
260   - //assign the constant value to all positions and dimensions
261   - for(unsigned int n=0; n<D; n++)
262   - stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]);
263   -
264   - return *this;
265   -
266   - }
267   -
268   - //multiply two fields (element-wise multiplication)
269   - field<T, D> operator* (const field & rhs){
270   -
271   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
272   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
273   -
274   - //create one thread for each detector pixel
275   - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
276   - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
277   -
278   - //create a scalar field to store the result
279   - field<T, D> result(R[0], R[1]);
280   -
281   - for(int n=0; n<D; n++)
282   - stim::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]);
283   -
284   - return result;
285   - }
286   -
287   - T* ptr(unsigned int n = 0){
288   - if(n < D)
289   - return X[n];
290   - else return NULL;
291   - }
292   -
293   - //return the vector component at position (u, v)
294   - vec<T, D> get(unsigned int u, unsigned int v){
295   -
296   - vec<T, D> result;
297   - for(unsigned int d=0; d<D; d++){
298   - HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost));
299   - }
300   -
301   - return result;
302   - }
303   -
304   - //set all components of the field to zero
305   - void clear(){
306   - for(unsigned int n=0; n<D; n++)
307   - if(X[n] != NULL)
308   - HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1]));
309   - }
310   -
311   - //crop the field
312   - field<T, D> crop(unsigned int width, unsigned int height){
313   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
314   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
315   -
316   - //create one thread for each detector pixel
317   - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
318   - dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK);
319   -
320   - //create a scalar field to store the result
321   - field<T, D> result(width, height);
322   -
323   - for(int n=0; n<D; n++)
324   - stim::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height);
325   -
326   - return result;
327   - }
328   -
329   - //save an image representing component n
330   - void toImage(std::string filename, unsigned int n = 0,
331   - bool positive = false, stim::colormapType cmap = stim::cmBrewer){
332   - T max_val = find_max(n); //find the maximum value
333   -
334   - if(positive) //if the field is positive, use the range [0 max_val]
335   - stim::gpu2image<T>(X[n], filename, R[0], R[1], 0, max_val, cmap);
336   - else
337   - stim::gpu2image<T>(X[n], filename, R[0], R[1], -max_val, max_val, cmap);
338   - }
339   -
340   -};
341   -
342   -} //end namespace rts
343   -#endif
  1 +#ifndef RTS_FIELD_CUH
  2 +#define RTS_FIELD_CUH
  3 +
  4 +#include <vector>
  5 +#include <string>
  6 +#include <sstream>
  7 +
  8 +#include "cublas_v2.h"
  9 +#include <cuda_runtime.h>
  10 +
  11 +#include "../math/rect.h"
  12 +#include "../cuda/threads.h"
  13 +#include "../cuda/error.h"
  14 +#include "../cuda/devices.h"
  15 +#include "../visualization/colormap.h"
  16 +
  17 +
  18 +namespace stim{
  19 +
  20 +//multiply R = X * Y
  21 +template<typename T>
  22 +__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){
  23 +
  24 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  25 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  26 +
  27 + //make sure that the thread indices are in-bounds
  28 + if(iu >= r0 || iv >= r1) return;
  29 +
  30 + //compute the index into the field
  31 + int i = iv*r0 + iu;
  32 +
  33 + //calculate and store the result
  34 + R[i] = X[i] * Y[i];
  35 +}
  36 +
  37 +//assign a constant value to all points
  38 +template<typename T>
  39 +__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){
  40 +
  41 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  42 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  43 +
  44 + //make sure that the thread indices are in-bounds
  45 + if(iu >= r0 || iv >= r1) return;
  46 +
  47 + //compute the index into the field
  48 + int i = iv*r0 + iu;
  49 +
  50 + //calculate and store the result
  51 + ptr[i] = val;
  52 +}
  53 +
  54 +//crop the field to the new dimensions (width x height)
  55 +template<typename T>
  56 +__global__ void gpu_field_crop(T* dest, T* source,
  57 + unsigned int r0, unsigned int r1,
  58 + unsigned int width, unsigned int height){
  59 +
  60 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  61 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  62 +
  63 + //make sure that the thread indices are in-bounds
  64 + if(iu >= width || iv >= height) return;
  65 +
  66 + //compute the index into the field
  67 + int is = iv*r0 + iu;
  68 + int id = iv*width + iu;
  69 +
  70 + //calculate and store the result
  71 + dest[id] = source[is];
  72 +}
  73 +
  74 +template<typename T, unsigned int D = 1>
  75 +class field{
  76 +
  77 +protected:
  78 +
  79 + T* X[D]; //pointer to the field data
  80 + unsigned int R[2]; //field resolution
  81 + stim::rect<T> shape; //position and shape of the field slice
  82 +
  83 + //calculates the optimal block and grid sizes using information from the GPU
  84 + void cuda_params(dim3& grids, dim3& blocks){
  85 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  86 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  87 +
  88 + //create one thread for each detector pixel
  89 + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
  90 + grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  91 + }
  92 +
  93 + //find the maximum value of component n
  94 + T find_max(unsigned int n){
  95 + cublasStatus_t stat;
  96 + cublasHandle_t handle;
  97 +
  98 + //create a CUBLAS handle
  99 + stat = cublasCreate(&handle);
  100 + if(stat != CUBLAS_STATUS_SUCCESS){
  101 + std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
  102 + exit(1);
  103 + }
  104 +
  105 + int L = R[0] * R[1]; //compute the number of discrete points in a slice
  106 + int index; //result of the max operation
  107 + T result;
  108 +
  109 + if(sizeof(T) == 4)
  110 + stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index);
  111 + else
  112 + stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index);
  113 +
  114 + index -= 1; //adjust for 1-based indexing
  115 +
  116 + //if there was a GPU error, terminate
  117 + if(stat != CUBLAS_STATUS_SUCCESS){
  118 + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
  119 + exit(1);
  120 + }
  121 +
  122 + //retrieve the maximum value for this slice and store it in the maxVal array
  123 + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost));
  124 + return result;
  125 + }
  126 +
  127 +public:
  128 +
  129 + //returns a list of file names given an input string with wild cards
  130 + std::vector<std::string> process_filename(std::string name){
  131 + std::stringstream ss(name);
  132 + std::string item;
  133 + std::vector<std::string> elems;
  134 + while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension)
  135 + {
  136 + elems.push_back(item);
  137 + }
  138 +
  139 + std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters)
  140 + std::string ext = elems[1]; //file extension (ex. .bmp, .png)
  141 + ext = std::string(".") + ext; //add a period back into the extension
  142 +
  143 + size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'')
  144 + size_t i1 = prefix.find_last_of("?");
  145 +
  146 + std::string postfix = prefix.substr(i1+1);
  147 + prefix = prefix.substr(0, i0);
  148 +
  149 + unsigned int digits = i1 - i0 + 1; //compute the number of wildcards
  150 +
  151 + std::vector<std::string> flist; //create a vector of file names
  152 + //fill the list
  153 + for(unsigned int d=0; d<D; d++){
  154 + std::stringstream ss; //assemble the file name
  155 + ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext;
  156 + flist.push_back(ss.str());
  157 + }
  158 +
  159 + return flist;
  160 + }
  161 +
  162 + void init(){
  163 + for(unsigned int n=0; n<D; n++)
  164 + X[n] = NULL;
  165 + }
  166 + void destroy(){
  167 + for(unsigned int n=0; n<D; n++)
  168 + if(X[n] != NULL)
  169 + HANDLE_ERROR(cudaFree(X[n]));
  170 + }
  171 +
  172 +public:
  173 + //field constructor
  174 + field(){
  175 + R[0] = R[1] = 0;
  176 + init();
  177 + }
  178 +
  179 + field(unsigned int x, unsigned int y){
  180 + //set the resolution
  181 + R[0] = x;
  182 + R[1] = y;
  183 + //allocate memory on the GPU
  184 + for(unsigned int n=0; n<D; n++){
  185 + HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] ));
  186 + }
  187 + clear(); //zero the field
  188 + }
  189 +
  190 + ///copy constructor
  191 + field(const field &rhs){
  192 + //first make a shallow copy
  193 + R[0] = rhs.R[0];
  194 + R[1] = rhs.R[1];
  195 +
  196 + for(unsigned int n=0; n<D; n++){
  197 + //do we have to make a deep copy?
  198 + if(rhs.X[n] == NULL)
  199 + X[n] = NULL; //no
  200 + else{
  201 + //allocate the necessary memory
  202 + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
  203 +
  204 + //copy the slice
  205 + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  206 + }
  207 + }
  208 + }
  209 +
  210 + ~field(){
  211 + destroy();
  212 + }
  213 +
  214 + //assignment operator
  215 + field & operator= (const field & rhs){
  216 +
  217 + //de-allocate any existing GPU memory
  218 + destroy();
  219 +
  220 + //copy the slice resolution
  221 + R[0] = rhs.R[0];
  222 + R[1] = rhs.R[1];
  223 +
  224 + for(unsigned int n=0; n<D; n++)
  225 + {
  226 + //allocate the necessary memory
  227 + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
  228 + //copy the slice
  229 + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  230 + }
  231 + return *this;
  232 + }
  233 +
  234 + field & operator= (const T rhs){
  235 +
  236 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  237 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  238 +
  239 + //create one thread for each detector pixel
  240 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  241 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  242 +
  243 + //assign the constant value to all positions and dimensions
  244 + for(int n=0; n<D; n++)
  245 + stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]);
  246 +
  247 + return *this;
  248 + }
  249 +
  250 + //assignment of vector component
  251 + field & operator= (const vec<T, D> rhs){
  252 +
  253 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  254 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  255 +
  256 + //create one thread for each detector pixel
  257 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  258 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  259 +
  260 + //assign the constant value to all positions and dimensions
  261 + for(unsigned int n=0; n<D; n++)
  262 + stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]);
  263 +
  264 + return *this;
  265 +
  266 + }
  267 +
  268 + //multiply two fields (element-wise multiplication)
  269 + field<T, D> operator* (const field & rhs){
  270 +
  271 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  272 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  273 +
  274 + //create one thread for each detector pixel
  275 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  276 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  277 +
  278 + //create a scalar field to store the result
  279 + field<T, D> result(R[0], R[1]);
  280 +
  281 + for(int n=0; n<D; n++)
  282 + stim::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]);
  283 +
  284 + return result;
  285 + }
  286 +
  287 + T* ptr(unsigned int n = 0){
  288 + if(n < D)
  289 + return X[n];
  290 + else return NULL;
  291 + }
  292 +
  293 + //return the vector component at position (u, v)
  294 + vec<T, D> get(unsigned int u, unsigned int v){
  295 +
  296 + vec<T, D> result;
  297 + for(unsigned int d=0; d<D; d++){
  298 + HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost));
  299 + }
  300 +
  301 + return result;
  302 + }
  303 +
  304 + //set all components of the field to zero
  305 + void clear(){
  306 + for(unsigned int n=0; n<D; n++)
  307 + if(X[n] != NULL)
  308 + HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1]));
  309 + }
  310 +
  311 + //crop the field
  312 + field<T, D> crop(unsigned int width, unsigned int height){
  313 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  314 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  315 +
  316 + //create one thread for each detector pixel
  317 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  318 + dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK);
  319 +
  320 + //create a scalar field to store the result
  321 + field<T, D> result(width, height);
  322 +
  323 + for(int n=0; n<D; n++)
  324 + stim::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height);
  325 +
  326 + return result;
  327 + }
  328 +
  329 + //save an image representing component n
  330 + void toImage(std::string filename, unsigned int n = 0,
  331 + bool positive = false, stim::colormapType cmap = stim::cmBrewer){
  332 + T max_val = find_max(n); //find the maximum value
  333 +
  334 + if(positive) //if the field is positive, use the range [0 max_val]
  335 + stim::gpu2image<T>(X[n], filename, R[0], R[1], 0, max_val, cmap);
  336 + else
  337 + stim::gpu2image<T>(X[n], filename, R[0], R[1], -max_val, max_val, cmap);
  338 + }
  339 +
  340 +};
  341 +
  342 +} //end namespace rts
  343 +#endif
... ...
math/rect.h
1   -#ifndef RTS_RECT_H
2   -#define RTS_RECT_H
3   -
4   -//enable CUDA_CALLABLE macro
5   -#include "../cuda/callable.h"
6   -#include "../math/vector.h"
7   -#include "../math/triangle.h"
8   -#include "../math/quaternion.h"
9   -#include <iostream>
10   -#include <iomanip>
11   -#include <algorithm>
12   -
13   -namespace stim{
14   -
15   -//template for a rectangle class in ND space
16   -template <class T, int N = 3>
17   -struct rect
18   -{
19   - /*
20   - ^ O
21   - |
22   - |
23   - Y C
24   - |
25   - |
26   - O---------X--------->
27   - */
28   -
29   -private:
30   -
31   - stim::vec<T, N> C;
32   - stim::vec<T, N> X;
33   - stim::vec<T, N> Y;
34   -
35   - CUDA_CALLABLE void scale(T factor){
36   - X *= factor;
37   - Y *= factor;
38   - }
39   -
40   - CUDA_CALLABLE void normal(vec<T, N> n){ //orient the rectangle along the specified normal
41   -
42   - n = n.norm(); //normalize, just in case
43   - vec<T, N> n_current = X.cross(Y).norm(); //compute the current normal
44   - quaternion<T> q; //create a quaternion
45   - q.CreateRotation(n_current, n); //initialize a rotation from n_current to n
46   -
47   - //apply the quaternion to the vectors and position
48   - X = q.toMatrix3() * X;
49   - Y = q.toMatrix3() * Y;
50   - }
51   -
52   - CUDA_CALLABLE void init(){
53   - C = vec<T, N>(0, 0, 0);
54   - X = vec<T, N>(1, 0, 0);
55   - Y = vec<T, N>(0, 1, 0);
56   - }
57   -
58   -public:
59   -
60   - CUDA_CALLABLE rect(){
61   - init();
62   - }
63   -
64   - CUDA_CALLABLE rect(T size, T z_pos = (T)0){
65   - init(); //use the default setup
66   - scale(size); //scale the rectangle
67   - C[2] = z_pos;
68   - }
69   -
70   - CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){
71   - init(); //start with the default setting
72   - C = c;
73   - scale(size); //scale the rectangle
74   - normal(n); //orient
75   -
76   - }
77   -
78   - /*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c)
79   - {
80   - A = a;
81   - Y = b - a;
82   - X = c - a - Y;
83   -
84   - }*/
85   -
86   - /*******************************************************************
87   - Constructor - create a rect from a position, normal, and rotation
88   - *******************************************************************/
89   - /*CUDA_CALLABLE rect(stim::vec<T, N> c, stim::vec<T, N> normal, T width, T height, T theta)
90   - {
91   -
92   - //compute the X direction - start along world-space X
93   - Y = stim::vec<T, N>(0, 1, 0);
94   - if(Y == normal)
95   - Y = stim::vec<T, N>(0, 0, 1);
96   -
97   - X = Y.cross(normal).norm();
98   -
99   - std::cout<<X<<std::endl;
100   -
101   - //rotate the X axis by theta radians
102   - stim::quaternion<T> q;
103   - q.CreateRotation(theta, normal);
104   - X = q.toMatrix3() * X;
105   - Y = normal.cross(X);
106   -
107   - //normalize everything
108   - X = X.norm();
109   - Y = Y.norm();
110   -
111   - //scale to match the rect width and height
112   - X = X * width;
113   - Y = Y * height;
114   -
115   - //set the corner of the plane
116   - A = c - X * 0.5f - Y * 0.5f;
117   -
118   - std::cout<<X<<std::endl;
119   - }*/
120   -
121   - //boolean comparison
122   - bool operator==(const rect<T, N> & rhs)
123   - {
124   - if(C == rhs.C && X == rhs.X && Y == rhs.Y)
125   - return true;
126   - else
127   - return false;
128   - }
129   -
130   - /*******************************************
131   - Return the normal for the rect
132   - *******************************************/
133   - CUDA_CALLABLE stim::vec<T, N> n()
134   - {
135   - return (X.cross(Y)).norm();
136   - }
137   -
138   - CUDA_CALLABLE stim::vec<T, N> p(T a, T b)
139   - {
140   - stim::vec<T, N> result;
141   - //given the two parameters a, b = [0 1], returns the position in world space
142   - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
143   - result = A + X * a + Y * b;
144   -
145   - return result;
146   - }
147   -
148   - CUDA_CALLABLE stim::vec<T, N> operator()(T a, T b)
149   - {
150   - return p(a, b);
151   - }
152   -
153   - std::string str()
154   - {
155   - std::stringstream ss;
156   - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
157   - ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
158   - ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl;
159   - ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X;
160   -
161   - return ss.str();
162   -
163   - }
164   -
165   - CUDA_CALLABLE rect<T, N> operator*(T rhs)
166   - {
167   - //scales the plane by a scalar value
168   -
169   - //create the new rectangle
170   - rect<T, N> result = *this;
171   - result.scale(rhs);
172   -
173   - return result;
174   -
175   - }
176   -
177   - CUDA_CALLABLE T dist(vec<T, N> p)
178   - {
179   - //compute the distance between a point and this rect
180   -
181   - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
182   -
183   - //first break the rect up into two triangles
184   - triangle<T, N> T0(A, A+X, A+Y);
185   - triangle<T, N> T1(A+X+Y, A+X, A+Y);
186   -
187   -
188   - T d0 = T0.dist(p);
189   - T d1 = T1.dist(p);
190   -
191   - if(d0 < d1)
192   - return d0;
193   - else
194   - return d1;
195   - }
196   -
197   - CUDA_CALLABLE T dist_max(vec<T, N> p)
198   - {
199   - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
200   - T da = (A - p).len();
201   - T db = (A+X - p).len();
202   - T dc = (A+Y - p).len();
203   - T dd = (A+X+Y - p).len();
204   -
205   - return std::max( da, std::max(db, std::max(dc, dd) ) );
206   - }
207   -};
208   -
209   -} //end namespace rts
210   -
211   -template <typename T, int N>
212   -std::ostream& operator<<(std::ostream& os, stim::rect<T, N> R)
213   -{
214   - os<<R.str();
215   - return os;
216   -}
217   -
218   -
219   -#endif
  1 +#ifndef RTS_RECT_H
  2 +#define RTS_RECT_H
  3 +
  4 +//enable CUDA_CALLABLE macro
  5 +#include "../cuda/callable.h"
  6 +#include "../math/vector.h"
  7 +#include "../math/triangle.h"
  8 +#include "../math/quaternion.h"
  9 +#include <iostream>
  10 +#include <iomanip>
  11 +#include <algorithm>
  12 +
  13 +namespace stim{
  14 +
  15 +//template for a rectangle class in ND space
  16 +template <class T, int N = 3>
  17 +struct rect
  18 +{
  19 + /*
  20 + ^ O
  21 + |
  22 + |
  23 + Y C
  24 + |
  25 + |
  26 + O---------X--------->
  27 + */
  28 +
  29 +private:
  30 +
  31 + stim::vec<T, N> C;
  32 + stim::vec<T, N> X;
  33 + stim::vec<T, N> Y;
  34 +
  35 + CUDA_CALLABLE void scale(T factor){
  36 + X *= factor;
  37 + Y *= factor;
  38 + }
  39 +
  40 + CUDA_CALLABLE void normal(vec<T, N> n){ //orient the rectangle along the specified normal
  41 +
  42 + n = n.norm(); //normalize, just in case
  43 + vec<T, N> n_current = X.cross(Y).norm(); //compute the current normal
  44 + quaternion<T> q; //create a quaternion
  45 + q.CreateRotation(n_current, n); //initialize a rotation from n_current to n
  46 +
  47 + //apply the quaternion to the vectors and position
  48 + X = q.toMatrix3() * X;
  49 + Y = q.toMatrix3() * Y;
  50 + }
  51 +
  52 + CUDA_CALLABLE void init(){
  53 + C = vec<T, N>(0, 0, 0);
  54 + X = vec<T, N>(1, 0, 0);
  55 + Y = vec<T, N>(0, 1, 0);
  56 + }
  57 +
  58 +public:
  59 +
  60 + CUDA_CALLABLE rect(){
  61 + init();
  62 + }
  63 +
  64 + CUDA_CALLABLE rect(T size, T z_pos = (T)0){
  65 + init(); //use the default setup
  66 + scale(size); //scale the rectangle
  67 + C[2] = z_pos;
  68 + }
  69 +
  70 + CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){
  71 + init(); //start with the default setting
  72 + C = c;
  73 + scale(size); //scale the rectangle
  74 + normal(n); //orient
  75 +
  76 + }
  77 +
  78 + /*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c)
  79 + {
  80 + A = a;
  81 + Y = b - a;
  82 + X = c - a - Y;
  83 +
  84 + }*/
  85 +
  86 + /*******************************************************************
  87 + Constructor - create a rect from a position, normal, and rotation
  88 + *******************************************************************/
  89 + /*CUDA_CALLABLE rect(stim::vec<T, N> c, stim::vec<T, N> normal, T width, T height, T theta)
  90 + {
  91 +
  92 + //compute the X direction - start along world-space X
  93 + Y = stim::vec<T, N>(0, 1, 0);
  94 + if(Y == normal)
  95 + Y = stim::vec<T, N>(0, 0, 1);
  96 +
  97 + X = Y.cross(normal).norm();
  98 +
  99 + std::cout<<X<<std::endl;
  100 +
  101 + //rotate the X axis by theta radians
  102 + stim::quaternion<T> q;
  103 + q.CreateRotation(theta, normal);
  104 + X = q.toMatrix3() * X;
  105 + Y = normal.cross(X);
  106 +
  107 + //normalize everything
  108 + X = X.norm();
  109 + Y = Y.norm();
  110 +
  111 + //scale to match the rect width and height
  112 + X = X * width;
  113 + Y = Y * height;
  114 +
  115 + //set the corner of the plane
  116 + A = c - X * 0.5f - Y * 0.5f;
  117 +
  118 + std::cout<<X<<std::endl;
  119 + }*/
  120 +
  121 + //boolean comparison
  122 + bool operator==(const rect<T, N> & rhs)
  123 + {
  124 + if(C == rhs.C && X == rhs.X && Y == rhs.Y)
  125 + return true;
  126 + else
  127 + return false;
  128 + }
  129 +
  130 + /*******************************************
  131 + Return the normal for the rect
  132 + *******************************************/
  133 + CUDA_CALLABLE stim::vec<T, N> n()
  134 + {
  135 + return (X.cross(Y)).norm();
  136 + }
  137 +
  138 + CUDA_CALLABLE stim::vec<T, N> p(T a, T b)
  139 + {
  140 + stim::vec<T, N> result;
  141 + //given the two parameters a, b = [0 1], returns the position in world space
  142 + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
  143 + result = A + X * a + Y * b;
  144 +
  145 + return result;
  146 + }
  147 +
  148 + CUDA_CALLABLE stim::vec<T, N> operator()(T a, T b)
  149 + {
  150 + return p(a, b);
  151 + }
  152 +
  153 + std::string str()
  154 + {
  155 + std::stringstream ss;
  156 + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
  157 + ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
  158 + ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl;
  159 + ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X;
  160 +
  161 + return ss.str();
  162 +
  163 + }
  164 +
  165 + CUDA_CALLABLE rect<T, N> operator*(T rhs)
  166 + {
  167 + //scales the plane by a scalar value
  168 +
  169 + //create the new rectangle
  170 + rect<T, N> result = *this;
  171 + result.scale(rhs);
  172 +
  173 + return result;
  174 +
  175 + }
  176 +
  177 + CUDA_CALLABLE T dist(vec<T, N> p)
  178 + {
  179 + //compute the distance between a point and this rect
  180 +
  181 + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
  182 +
  183 + //first break the rect up into two triangles
  184 + triangle<T, N> T0(A, A+X, A+Y);
  185 + triangle<T, N> T1(A+X+Y, A+X, A+Y);
  186 +
  187 +
  188 + T d0 = T0.dist(p);
  189 + T d1 = T1.dist(p);
  190 +
  191 + if(d0 < d1)
  192 + return d0;
  193 + else
  194 + return d1;
  195 + }
  196 +
  197 + CUDA_CALLABLE T dist_max(vec<T, N> p)
  198 + {
  199 + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
  200 + T da = (A - p).len();
  201 + T db = (A+X - p).len();
  202 + T dc = (A+Y - p).len();
  203 + T dd = (A+X+Y - p).len();
  204 +
  205 + return std::max( da, std::max(db, std::max(dc, dd) ) );
  206 + }
  207 +};
  208 +
  209 +} //end namespace rts
  210 +
  211 +template <typename T, int N>
  212 +std::ostream& operator<<(std::ostream& os, stim::rect<T, N> R)
  213 +{
  214 + os<<R.str();
  215 + return os;
  216 +}
  217 +
  218 +
  219 +#endif
... ...
math/triangle.h
1   -#ifndef RTS_TRIANGLE_H
2   -#define RTS_TRIANGLE_H
3   -
4   -//enable CUDA_CALLABLE macro
5   -#include "../cuda/callable.h"
6   -#include "../math/vector.h"
7   -#include <iostream>
8   -
9   -namespace stim{
10   -
11   -template <class T, int N=3>
12   -struct triangle
13   -{
14   - /*
15   - A------>B
16   - | /
17   - | /
18   - | /
19   - | /
20   - | /
21   - | /
22   - C
23   - */
24   - private:
25   -
26   - vec<T, N> A;
27   - vec<T, N> B;
28   - vec<T, N> C;
29   -
30   - CUDA_CALLABLE vec<T, N> _p(T s, T t)
31   - {
32   - //This function returns the point specified by p = A + s(B-A) + t(C-A)
33   - vec<T, N> E0 = B-A;
34   - vec<T, N> E1 = C-A;
35   -
36   - return A + s*E0 + t*E1;
37   - }
38   -
39   -
40   - public:
41   -
42   -
43   -
44   - CUDA_CALLABLE triangle()
45   - {
46   -
47   - }
48   -
49   - CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c)
50   - {
51   - A = a;
52   - B = b;
53   - C = c;
54   - }
55   -
56   - CUDA_CALLABLE stim::vec<T, N> operator()(T s, T t)
57   - {
58   - return _p(s, t);
59   - }
60   -
61   - CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p)
62   - {
63   - //comptue the distance between a point and this triangle
64   - // This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf
65   -
66   - vec<T, N> E0 = B-A;
67   - vec<T, N> E1 = C-A;
68   - vec<T, N> D = A - p;
69   -
70   - T a = E0.dot(E0);
71   - T b = E0.dot(E1);
72   - T c = E1.dot(E1);
73   - T d = E0.dot(D);
74   - T e = E1.dot(D);
75   - //T f = D.dot(D);
76   -
77   - T det = a*c - b*b;
78   - T s = b*e - c*d;
79   - T t = b*d - a*e;
80   -
81   - /*std::cout<<"E0: "<<E0<<std::endl;
82   - std::cout<<"E1: "<<E1<<std::endl;
83   - std::cout<<"a: "<<a<<std::endl;
84   - std::cout<<"b: "<<b<<std::endl;
85   - std::cout<<"c: "<<c<<std::endl;
86   - std::cout<<"d: "<<d<<std::endl;
87   - std::cout<<"e: "<<e<<std::endl;
88   - std::cout<<"f: "<<f<<std::endl;
89   - std::cout<<"det: "<<det<<std::endl;
90   - std::cout<<"s: "<<s<<std::endl;
91   - std::cout<<"t: "<<t<<std::endl;*/
92   -
93   -
94   - if( s+t <= det)
95   - {
96   - if(s < 0)
97   - {
98   - if(t < 0)
99   - {
100   - //region 4
101   - //std::cout<<"Region 4"<<std::endl;
102   - s = 0;
103   - t = 0;
104   - //done?
105   - }
106   - else
107   - {
108   - //region 3
109   - //std::cout<<"Region 3"<<std::endl;
110   - s=0;
111   - t = ( e >= 0 ? 0 : ( -e >= c ? 1 : -e/c ) );
112   - //done
113   - }
114   - }
115   - else if(t < 0)
116   - {
117   - //region 5
118   - //std::cout<<"Region 5"<<std::endl;
119   - s = ( d >= 0 ? 0 : ( -d >= a ? 1 : -d/a ) );
120   - t = 0;
121   - //done
122   - }
123   - else
124   - {
125   - //region 0
126   - //std::cout<<"Region 0"<<std::endl;
127   - T invDet = (T)1.0/det;
128   - s *= invDet;
129   - t *= invDet;
130   - //done
131   - }
132   - }
133   - else
134   - {
135   - if(s < 0)
136   - {
137   - //region 2
138   - //std::cout<<"Region 2"<<std::endl;
139   - s = 0;
140   - t = 1;
141   - //done?
142   -
143   - }
144   - else if(t < 0)
145   - {
146   - //region 6
147   - //std::cout<<"Region 6"<<std::endl;
148   - s = 1;
149   - t = 0;
150   - //done?
151   - }
152   - else
153   - {
154   - //region 1
155   - //std::cout<<"Region 1"<<std::endl;
156   - T numer = c + e - b - d;
157   - if( numer <= 0 )
158   - s = 0;
159   - else
160   - {
161   - T denom = a - 2 * b + c;
162   - s = ( numer >= denom ? 1 : numer/denom );
163   - }
164   - t = 1 - s;
165   - //done
166   - }
167   - }
168   -
169   - //std::cout<<"s: "<<s<<std::endl;
170   - //std::cout<<"t: "<<t<<std::endl;
171   -
172   - //std::cout<<"p: "<<_p(s, t)<<std::endl;
173   -
174   - return _p(s, t);
175   -
176   - }
177   -
178   - CUDA_CALLABLE T dist(vec<T, N> p)
179   - {
180   - vec<T, N> n = nearest(p);
181   -
182   - return (p - n).len();
183   - }
184   -};
185   -
186   -}
187   -
188   -#endif
  1 +#ifndef RTS_TRIANGLE_H
  2 +#define RTS_TRIANGLE_H
  3 +
  4 +//enable CUDA_CALLABLE macro
  5 +#include "../cuda/callable.h"
  6 +#include "../math/vector.h"
  7 +#include <iostream>
  8 +
  9 +namespace stim{
  10 +
  11 +template <class T, int N=3>
  12 +struct triangle
  13 +{
  14 + /*
  15 + A------>B
  16 + | /
  17 + | /
  18 + | /
  19 + | /
  20 + | /
  21 + | /
  22 + C
  23 + */
  24 + private:
  25 +
  26 + vec<T, N> A;
  27 + vec<T, N> B;
  28 + vec<T, N> C;
  29 +
  30 + CUDA_CALLABLE vec<T, N> _p(T s, T t)
  31 + {
  32 + //This function returns the point specified by p = A + s(B-A) + t(C-A)
  33 + vec<T, N> E0 = B-A;
  34 + vec<T, N> E1 = C-A;
  35 +
  36 + return A + s*E0 + t*E1;
  37 + }
  38 +
  39 +
  40 + public:
  41 +
  42 +
  43 +
  44 + CUDA_CALLABLE triangle()
  45 + {
  46 +
  47 + }
  48 +
  49 + CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c)
  50 + {
  51 + A = a;
  52 + B = b;
  53 + C = c;
  54 + }
  55 +
  56 + CUDA_CALLABLE stim::vec<T, N> operator()(T s, T t)
  57 + {
  58 + return _p(s, t);
  59 + }
  60 +
  61 + CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p)
  62 + {
  63 + //comptue the distance between a point and this triangle
  64 + // This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf
  65 +
  66 + vec<T, N> E0 = B-A;
  67 + vec<T, N> E1 = C-A;
  68 + vec<T, N> D = A - p;
  69 +
  70 + T a = E0.dot(E0);
  71 + T b = E0.dot(E1);
  72 + T c = E1.dot(E1);
  73 + T d = E0.dot(D);
  74 + T e = E1.dot(D);
  75 + //T f = D.dot(D);
  76 +
  77 + T det = a*c - b*b;
  78 + T s = b*e - c*d;
  79 + T t = b*d - a*e;
  80 +
  81 + /*std::cout<<"E0: "<<E0<<std::endl;
  82 + std::cout<<"E1: "<<E1<<std::endl;
  83 + std::cout<<"a: "<<a<<std::endl;
  84 + std::cout<<"b: "<<b<<std::endl;
  85 + std::cout<<"c: "<<c<<std::endl;
  86 + std::cout<<"d: "<<d<<std::endl;
  87 + std::cout<<"e: "<<e<<std::endl;
  88 + std::cout<<"f: "<<f<<std::endl;
  89 + std::cout<<"det: "<<det<<std::endl;
  90 + std::cout<<"s: "<<s<<std::endl;
  91 + std::cout<<"t: "<<t<<std::endl;*/
  92 +
  93 +
  94 + if( s+t <= det)
  95 + {
  96 + if(s < 0)
  97 + {
  98 + if(t < 0)
  99 + {
  100 + //region 4
  101 + //std::cout<<"Region 4"<<std::endl;
  102 + s = 0;
  103 + t = 0;
  104 + //done?
  105 + }
  106 + else
  107 + {
  108 + //region 3
  109 + //std::cout<<"Region 3"<<std::endl;
  110 + s=0;
  111 + t = ( e >= 0 ? 0 : ( -e >= c ? 1 : -e/c ) );
  112 + //done
  113 + }
  114 + }
  115 + else if(t < 0)
  116 + {
  117 + //region 5
  118 + //std::cout<<"Region 5"<<std::endl;
  119 + s = ( d >= 0 ? 0 : ( -d >= a ? 1 : -d/a ) );
  120 + t = 0;
  121 + //done
  122 + }
  123 + else
  124 + {
  125 + //region 0
  126 + //std::cout<<"Region 0"<<std::endl;
  127 + T invDet = (T)1.0/det;
  128 + s *= invDet;
  129 + t *= invDet;
  130 + //done
  131 + }
  132 + }
  133 + else
  134 + {
  135 + if(s < 0)
  136 + {
  137 + //region 2
  138 + //std::cout<<"Region 2"<<std::endl;
  139 + s = 0;
  140 + t = 1;
  141 + //done?
  142 +
  143 + }
  144 + else if(t < 0)
  145 + {
  146 + //region 6
  147 + //std::cout<<"Region 6"<<std::endl;
  148 + s = 1;
  149 + t = 0;
  150 + //done?
  151 + }
  152 + else
  153 + {
  154 + //region 1
  155 + //std::cout<<"Region 1"<<std::endl;
  156 + T numer = c + e - b - d;
  157 + if( numer <= 0 )
  158 + s = 0;
  159 + else
  160 + {
  161 + T denom = a - 2 * b + c;
  162 + s = ( numer >= denom ? 1 : numer/denom );
  163 + }
  164 + t = 1 - s;
  165 + //done
  166 + }
  167 + }
  168 +
  169 + //std::cout<<"s: "<<s<<std::endl;
  170 + //std::cout<<"t: "<<t<<std::endl;
  171 +
  172 + //std::cout<<"p: "<<_p(s, t)<<std::endl;
  173 +
  174 + return _p(s, t);
  175 +
  176 + }
  177 +
  178 + CUDA_CALLABLE T dist(vec<T, N> p)
  179 + {
  180 + vec<T, N> n = nearest(p);
  181 +
  182 + return (p - n).len();
  183 + }
  184 +};
  185 +
  186 +}
  187 +
  188 +#endif
... ...
optics/material.h
1   -#ifndef RTS_MATERIAL_H
2   -#define RTS_MATERIAL_H
3   -
4   -#include <vector>
5   -#include <ostream>
6   -#include <iostream>
7   -#include <fstream>
8   -#include <complex>
9   -#include <algorithm>
10   -#include <sstream>
11   -#include "../math/complex.h"
12   -#include "../math/constants.h"
13   -#include "../math/function.h"
14   -
15   -namespace stim{
16   -
17   -//Material class - default representation for the material property is the refractive index (RI)
18   -template<typename T>
19   -class material : public function< T, complex<T> >{
20   -
21   -public:
22   - enum wave_property{microns, inverse_cm};
23   - enum material_property{ri, absorbance};
24   -
25   -private:
26   -
27   - using function< T, complex<T> >::X;
28   - using function< T, complex<T> >::Y;
29   - using function< T, complex<T> >::insert;
30   - using function< T, complex<T> >::bounding;
31   -
32   - std::string name; //name for the material (defaults to file name)
33   -
34   - void process_header(std::string str, wave_property& wp, material_property& mp){
35   -
36   - std::stringstream ss(str); //create a stream from the data string
37   - std::string line;
38   - std::getline(ss, line); //get the first line as a string
39   - while(line[0] == '#'){ //continue looping while the line is a comment
40   -
41   - std::stringstream lstream(line); //create a stream from the line
42   - lstream.ignore(); //ignore the first character ('#')
43   -
44   - std::string prop; //get the property name
45   - lstream>>prop;
46   -
47   - if(prop == "X"){
48   - std::string wp_name;
49   - lstream>>wp_name;
50   - if(wp_name == "microns") wp = microns;
51   - else if(wp_name == "inverse_cm") wp = inverse_cm;
52   - }
53   - else if(prop == "Y"){
54   - std::string mp_name;
55   - lstream>>mp_name;
56   - if(mp_name == "ri") mp = ri;
57   - else if(mp_name == "absorbance") mp = absorbance;
58   - }
59   -
60   - std::getline(ss, line); //get the next line
61   - }
62   -
63   - function< T, stim::complex<T> >::process_string(str);
64   - }
65   -
66   - void from_inverse_cm(){
67   - //convert inverse centimeters to wavelength (in microns)
68   - for(unsigned int i=0; i<X.size(); i++)
69   - X[i] = 10000 / X[i];
70   -
71   - //reverse the function array
72   - std::reverse(X.begin(), X.end());
73   - std::reverse(Y.begin(), Y.end());
74   -
75   - }
76   -
77   - void init(){
78   - bounding[0] = bounding[1] = stim::complex<T>(1, 0);
79   - }
80   -
81   -
82   -public:
83   -
84   - material(std::string filename, wave_property wp, material_property mp){
85   - name = filename;
86   - load(filename, wp, mp);
87   - }
88   -
89   - material(std::string filename){
90   - name = filename;
91   - load(filename);
92   - }
93   -
94   - material(){
95   - init();
96   - }
97   -
98   - complex<T> getN(T lambda){
99   - return function< T, complex<T> >::linear(lambda);
100   - }
101   -
102   - void load(std::string filename, wave_property wp, material_property mp){
103   -
104   - //load the file as a function
105   - function< T, complex<T> >::load(filename);
106   - }
107   -
108   - void load(std::string filename){
109   -
110   - wave_property wp = inverse_cm;
111   - material_property mp = ri;
112   - //turn the file into a string
113   - std::ifstream t(filename.c_str()); //open the file as a stream
114   -
115   - if(!t){
116   - std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;
117   - exit(1);
118   - }
119   - std::string str((std::istreambuf_iterator<char>(t)),
120   - std::istreambuf_iterator<char>());
121   -
122   - //process the header information
123   - process_header(str, wp, mp);
124   -
125   - //convert units
126   - if(wp == inverse_cm)
127   - from_inverse_cm();
128   - //set the bounding values
129   - bounding[0] = Y[0];
130   - bounding[1] = Y.back();
131   - }
132   - std::string str(){
133   - std::stringstream ss;
134   - ss<<name<<std::endl;
135   - ss<<function< T, complex<T> >::str();
136   - return ss.str();
137   - }
138   - std::string get_name(){
139   - return name;
140   - }
141   -
142   - void set_name(std::string str){
143   - name = str;
144   - }
145   -
146   -};
147   -
148   -}
149   -
150   -
151   -
152   -
153   -#endif
  1 +#ifndef RTS_MATERIAL_H
  2 +#define RTS_MATERIAL_H
  3 +
  4 +#include <vector>
  5 +#include <ostream>
  6 +#include <iostream>
  7 +#include <fstream>
  8 +#include <complex>
  9 +#include <algorithm>
  10 +#include <sstream>
  11 +#include "../math/complex.h"
  12 +#include "../math/constants.h"
  13 +#include "../math/function.h"
  14 +
  15 +namespace stim{
  16 +
  17 +//Material class - default representation for the material property is the refractive index (RI)
  18 +template<typename T>
  19 +class material : public function< T, complex<T> >{
  20 +
  21 +public:
  22 + enum wave_property{microns, inverse_cm};
  23 + enum material_property{ri, absorbance};
  24 +
  25 +private:
  26 +
  27 + using function< T, complex<T> >::X;
  28 + using function< T, complex<T> >::Y;
  29 + using function< T, complex<T> >::insert;
  30 + using function< T, complex<T> >::bounding;
  31 +
  32 + std::string name; //name for the material (defaults to file name)
  33 +
  34 + void process_header(std::string str, wave_property& wp, material_property& mp){
  35 +
  36 + std::stringstream ss(str); //create a stream from the data string
  37 + std::string line;
  38 + std::getline(ss, line); //get the first line as a string
  39 + while(line[0] == '#'){ //continue looping while the line is a comment
  40 +
  41 + std::stringstream lstream(line); //create a stream from the line
  42 + lstream.ignore(); //ignore the first character ('#')
  43 +
  44 + std::string prop; //get the property name
  45 + lstream>>prop;
  46 +
  47 + if(prop == "X"){
  48 + std::string wp_name;
  49 + lstream>>wp_name;
  50 + if(wp_name == "microns") wp = microns;
  51 + else if(wp_name == "inverse_cm") wp = inverse_cm;
  52 + }
  53 + else if(prop == "Y"){
  54 + std::string mp_name;
  55 + lstream>>mp_name;
  56 + if(mp_name == "ri") mp = ri;
  57 + else if(mp_name == "absorbance") mp = absorbance;
  58 + }
  59 +
  60 + std::getline(ss, line); //get the next line
  61 + }
  62 +
  63 + function< T, stim::complex<T> >::process_string(str);
  64 + }
  65 +
  66 + void from_inverse_cm(){
  67 + //convert inverse centimeters to wavelength (in microns)
  68 + for(unsigned int i=0; i<X.size(); i++)
  69 + X[i] = 10000 / X[i];
  70 +
  71 + //reverse the function array
  72 + std::reverse(X.begin(), X.end());
  73 + std::reverse(Y.begin(), Y.end());
  74 +
  75 + }
  76 +
  77 + void init(){
  78 + bounding[0] = bounding[1] = stim::complex<T>(1, 0);
  79 + }
  80 +
  81 +
  82 +public:
  83 +
  84 + material(std::string filename, wave_property wp, material_property mp){
  85 + name = filename;
  86 + load(filename, wp, mp);
  87 + }
  88 +
  89 + material(std::string filename){
  90 + name = filename;
  91 + load(filename);
  92 + }
  93 +
  94 + material(){
  95 + init();
  96 + }
  97 +
  98 + complex<T> getN(T lambda){
  99 + return function< T, complex<T> >::linear(lambda);
  100 + }
  101 +
  102 + void load(std::string filename, wave_property wp, material_property mp){
  103 +
  104 + //load the file as a function
  105 + function< T, complex<T> >::load(filename);
  106 + }
  107 +
  108 + void load(std::string filename){
  109 +
  110 + wave_property wp = inverse_cm;
  111 + material_property mp = ri;
  112 + //turn the file into a string
  113 + std::ifstream t(filename.c_str()); //open the file as a stream
  114 +
  115 + if(!t){
  116 + std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;
  117 + exit(1);
  118 + }
  119 + std::string str((std::istreambuf_iterator<char>(t)),
  120 + std::istreambuf_iterator<char>());
  121 +
  122 + //process the header information
  123 + process_header(str, wp, mp);
  124 +
  125 + //convert units
  126 + if(wp == inverse_cm)
  127 + from_inverse_cm();
  128 + //set the bounding values
  129 + bounding[0] = Y[0];
  130 + bounding[1] = Y.back();
  131 + }
  132 + std::string str(){
  133 + std::stringstream ss;
  134 + ss<<name<<std::endl;
  135 + ss<<function< T, complex<T> >::str();
  136 + return ss.str();
  137 + }
  138 + std::string get_name(){
  139 + return name;
  140 + }
  141 +
  142 + void set_name(std::string str){
  143 + name = str;
  144 + }
  145 +
  146 +};
  147 +
  148 +}
  149 +
  150 +
  151 +
  152 +
  153 +#endif
... ...
optics/mirst-1d.cuh
1   -#include "../optics/material.h"
2   -#include "../math/complexfield.cuh"
3   -#include "../math/constants.h"
4   -//#include "../envi/bil.h"
5   -
6   -#include "cufft.h"
7   -
8   -#include <vector>
9   -#include <sstream>
10   -
11   -namespace stim{
12   -
13   -//this function writes a sinc function to "dest" such that an iFFT produces a slab
14   -template<typename T>
15   -__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,
16   - T* src, T* zf,
17   - T w, unsigned int zR, unsigned int nuR){
18   - //dest = complex field representing the sample
19   - //ri = refractive indices for each wavelength
20   - //src = intensity of the light source for each wavelength
21   - //zf = z position of the slab interface for each wavelength (accounting for optical path length)
22   - //w = width of the slab (in pixels)
23   - //zR = number of z-axis samples
24   - //nuR = number of wavelengths
25   -
26   - //get the current coordinate in the plane slice
27   - int ifz = blockIdx.x * blockDim.x + threadIdx.x;
28   - int inu = blockIdx.y * blockDim.y + threadIdx.y;
29   -
30   - //make sure that the thread indices are in-bounds
31   - if(inu >= nuR || ifz >= zR) return;
32   -
33   - int i = inu * zR + ifz;
34   -
35   - T fz;
36   - if(ifz < zR/2)
37   - fz = ifz / (T)zR;
38   - else
39   - fz = -(zR - ifz) / (T)zR;
40   -
41   - //if the slab starts outside of the simulation domain, just return
42   - if(zf[inu] >= zR) return;
43   -
44   - //fill the array along z with a sinc function representing the Fourier transform of the layer
45   -
46   - T opl = w * ri[inu].real(); //optical path length
47   -
48   - //handle the case where the slab goes outside the simulation domain
49   - if(zf[inu] + opl >= zR)
50   - opl = zR - zf[inu];
51   -
52   - if(opl == 0) return;
53   -
54   - //T l = w * ri[inu].real();
55   - //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
56   - complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
57   -
58   - complex<T> eta = ri[inu] * ri[inu] - 1;
59   -
60   - //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
61   - if(ifz == 0)
62   - dest[i] += opl * exp(e) * eta * src[inu];
63   - else
64   - dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
65   -}
66   -
67   -template<typename T>
68   -__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
69   - //zf = current z depth (optical path length) in pixels
70   - //ri = refractive index of the material
71   - //w = actual width of the layer (in pixels)
72   -
73   -
74   - //compute the index for this thread
75   - int i = blockIdx.x * blockDim.x + threadIdx.x;
76   - if(i >= S) return;
77   -
78   - if(ri == NULL)
79   - zf[i] += w;
80   - else
81   - zf[i] += ri[i].real() * w;
82   -}
83   -
84   -//apply the 1D MIRST filter to an existing sample (overwriting the sample)
85   -template<typename T>
86   -__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,
87   - T dFz,
88   - T inNA, T outNA,
89   - unsigned int lambdaR, unsigned int zR,
90   - T sigma = 0){
91   - //sampleFFT = the sample in the Fourier domain (will be overwritten)
92   - //lambda = list of wavelengths
93   - //dFz = delta along the Fz axis in the frequency domain
94   - //inNA = NA of the internal obscuration
95   - //outNA = NA of the objective
96   - //zR = number of pixels along the Fz axis (same as the z-axis)
97   - //lambdaR = number of wavelengths
98   - //sigma = width of the Gaussian source
99   - int ifz = blockIdx.x * blockDim.x + threadIdx.x;
100   - int inu = blockIdx.y * blockDim.y + threadIdx.y;
101   -
102   - if(inu >= lambdaR || ifz >= zR) return;
103   -
104   - //calculate the index into the sample FT
105   - int i = inu * zR + ifz;
106   -
107   - //compute the frequency (and set all negative spatial frequencies to zero)
108   - T fz;
109   - if(ifz < zR / 2)
110   - fz = ifz * dFz;
111   - //if the spatial frequency is negative, set it to zero and exit
112   - else{
113   - sampleFFT[i] = 0;
114   - return;
115   - }
116   -
117   - //compute the frequency in inverse microns
118   - T nu = 1/lambda[inu];
119   -
120   - //determine the radius of the integration circle
121   - T nu_sq = nu * nu;
122   - T fz_sq = (fz * fz) / 4;
123   -
124   - //cut off frequencies above the diffraction limit
125   - T r;
126   - if(fz_sq < nu_sq)
127   - r = sqrt(nu_sq - fz_sq);
128   - else
129   - r = 0;
130   -
131   - //account for the optics
132   - T Q = 0;
133   - if(r > nu * inNA && r < nu * outNA)
134   - Q = 1;
135   -
136   - //account for the source
137   - //T sigma = 30.0;
138   - T s = exp( - (r*r * sigma*sigma) / 2 );
139   - //T s=1;
140   -
141   - //compute the final filter
142   - T mirst = 0;
143   - if(fz != 0)
144   - mirst = 2 * stimPI * r * s * Q * (1/fz);
145   -
146   - sampleFFT[i] *= mirst;
147   -
148   -}
149   -
150   -/*This object performs a 1-dimensional (layered) MIRST simulation
151   -*/
152   -template<typename T>
153   -class mirst1d{
154   -
155   -private:
156   - unsigned int Z; //z-axis resolution
157   - unsigned int pad; //pixel padding on either side of the sample
158   -
159   - std::vector< material<T> > matlist; //list of materials
160   - std::vector< T > layers; //list of layer thicknesses
161   -
162   - std::vector< T > lambdas; //list of wavelengths that are being simulated
163   - unsigned int S; //number of wavelengths (size of "lambdas")
164   -
165   - T NA[2]; //numerical aperature (central obscuration and outer diameter)
166   -
167   - function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)
168   -
169   - complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.
170   -
171   - void fft(int direction = CUFFT_FORWARD){
172   -
173   - unsigned padZ = Z + pad;
174   -
175   - //create cuFFT handles
176   - cufftHandle plan;
177   - cufftResult result;
178   -
179   - if(sizeof(T) == 4)
180   - result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision
181   - else
182   - result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision
183   -
184   - //check for Plan 1D errors
185   - if(result != CUFFT_SUCCESS){
186   - std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
187   - CufftError(result);
188   - exit(1);
189   - }
190   -
191   - if(sizeof(T) == 4)
192   - result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
193   - else
194   - result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
195   -
196   - //check for FFT errors
197   - if(result != CUFFT_SUCCESS){
198   - std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
199   - CufftError(result);
200   - exit(1);
201   - }
202   -
203   - cufftDestroy(plan);
204   - }
205   -
206   -
207   - //initialize the scratch memory
208   - void init_scratch(){
209   - scratch = complexfield<T, 1>(Z + pad , lambdas.size());
210   - scratch = 0;
211   - }
212   -
213   - //get the list of scattering efficiency (eta) values for a specified layer
214   - std::vector< complex<T> > layer_etas(unsigned int l){
215   -
216   - std::vector< complex<T> > etas;
217   -
218   - //fill the list of etas
219   - for(unsigned int i=0; i<lambdas.size(); i++)
220   - etas.push_back( matlist[l].eta(lambdas[i]) );
221   - return etas;
222   - }
223   -
224   - //calculates the optimal block and grid sizes using information from the GPU
225   - void cuda_params(dim3& grids, dim3& blocks){
226   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
227   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
228   -
229   - //create one thread for each detector pixel
230   - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
231   - grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
232   - }
233   -
234   - //add the fourier transform of layer n to the scratch space
235   - void build_layer_fft(unsigned int n, T* zf){
236   - unsigned int paddedZ = Z + pad;
237   -
238   - T wpx = layers[n] / dz(); //calculate the width of the layer in pixels
239   -
240   - //allocate memory for the refractive index
241   - complex<T>* gpuRi;
242   - HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
243   -
244   - //allocate memory for the source profile
245   - T* gpuSrc;
246   - HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
247   -
248   - complex<T> ri;
249   - T source;
250   - //store the refractive index and source profile in a CPU array
251   - for(int inu=0; inu<S; inu++){
252   - //save the refractive index to the GPU
253   - ri = matlist[n].getN(lambdas[inu]);
254   - HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
255   -
256   - //save the source profile to the GPU
257   - source = source_profile(10000 / lambdas[inu]);
258   - HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
259   -
260   - }
261   -
262   - //create one thread for each pixel of the field slice
263   - dim3 gridDim, blockDim;
264   - cuda_params(gridDim, blockDim);
265   - stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
266   -
267   - int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
268   - int linGrid = S / linBlock + 1;
269   - stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
270   -
271   - //free memory
272   - HANDLE_ERROR(cudaFree(gpuRi));
273   - HANDLE_ERROR(cudaFree(gpuSrc));
274   - }
275   -
276   - void build_sample(){
277   - init_scratch(); //initialize the GPU scratch space
278   - //build_layer(1);
279   -
280   - T* zf;
281   - HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
282   - HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
283   -
284   - //render each layer of the sample
285   - for(unsigned int l=0; l<layers.size(); l++){
286   - build_layer_fft(l, zf);
287   - }
288   -
289   - HANDLE_ERROR(cudaFree(zf));
290   - }
291   -
292   - void apply_filter(){
293   - dim3 gridDim, blockDim;
294   - cuda_params(gridDim, blockDim);
295   -
296   - unsigned int Zpad = Z + pad;
297   -
298   - T sim_range = dz() * Zpad;
299   - T dFz = 1 / sim_range;
300   -
301   - //copy the array of wavelengths to the GPU
302   - T* gpuLambdas;
303   - HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
304   - HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
305   - stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,
306   - dFz,
307   - NA[0], NA[1],
308   - S, Zpad);
309   - }
310   -
311   - //crop the image to the sample thickness - keep in mind that sample thickness != optical path length
312   - void crop(){
313   -
314   - scratch = scratch.crop(Z, S);
315   - }
316   -
317   - //save the scratch field as a binary file
318   - void to_binary(std::string filename){
319   -
320   - }
321   -
322   -
323   -public:
324   -
325   - //constructor
326   - mirst1d(unsigned int rZ = 100,
327   - unsigned int padding = 0){
328   - Z = rZ;
329   - pad = padding;
330   - NA[0] = 0;
331   - NA[1] = 0.8;
332   - S = 0;
333   - source_profile = 1;
334   - }
335   -
336   - //add a layer, thickness = microns
337   - void add_layer(material<T> mat, T thickness){
338   - matlist.push_back(mat);
339   - layers.push_back(thickness);
340   - }
341   -
342   - void add_layer(std::string filename, T thickness){
343   - add_layer(material<T>(filename), thickness);
344   - }
345   -
346   - //adds a profile spectrum for the light source
347   - void set_source(std::string filename){
348   - source_profile.load(filename);
349   - }
350   -
351   - //adds a block of wavenumbers (cm^-1) to the simulation parameters
352   - void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
353   - unsigned int nu = start;
354   - while(nu <= stop){
355   - lambdas.push_back((T)10000 / nu);
356   - nu += step;
357   - }
358   - S = lambdas.size(); //increment the number of wavelengths (shorthand for later)
359   - }
360   -
361   - T thickness(){
362   - T t = 0;
363   - for(unsigned int l=0; l<layers.size(); l++)
364   - t += layers[l];
365   - return t;
366   - }
367   -
368   - void padding(unsigned int padding = 0){
369   - pad = padding;
370   - }
371   -
372   - T dz(){
373   - return thickness() / Z; //calculate the z-axis step size
374   - }
375   -
376   - void na(T in, T out){
377   - NA[0] = in;
378   - NA[1] = out;
379   - }
380   -
381   - void na(T out){
382   - na(0, out);
383   - }
384   -
385   - stim::function<T, T> get_source(){
386   - return source_profile;
387   - }
388   -
389   - void save_sample(std::string filename){
390   - //create a sample and save the magnitude as an image
391   - build_sample();
392   - fft(CUFFT_INVERSE);
393   - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
394   - }
395   -
396   - void save_mirst(std::string filename, bool binary = true){
397   - //apply the MIRST filter to a sample and save the image
398   -
399   - //build the sample in the Fourier domain
400   - build_sample();
401   -
402   - //apply the MIRST filter
403   - apply_filter();
404   -
405   - //apply an inverse FFT to bring the results back into the spatial domain
406   - fft(CUFFT_INVERSE);
407   -
408   - crop();
409   -
410   - //save the image
411   - if(binary)
412   - to_binary(filename);
413   - else
414   - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
415   - }
416   -
417   -
418   -
419   -
420   - std::string str(){
421   -
422   - stringstream ss;
423   - ss<<"1D MIRST Simulation========================="<<std::endl;
424   - ss<<"z-axis resolution: "<<Z<<std::endl;
425   - ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
426   - ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
427   - ss<<"padding: "<<pad<<std::endl;
428   - ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
429   - ss<<"dz: "<<dz()<<" um"<<std::endl;
430   - ss<<std::endl;
431   - ss<<layers.size()<<" layers-------------"<<std::endl;
432   - for(unsigned int l=0; l<layers.size(); l++)
433   - ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
434   -
435   - ss<<"source profile-----------"<<std::endl;
436   - ss<<get_source().str()<<std::endl;
437   -
438   - return ss.str();
439   -
440   -
441   - }
442   -
443   -
444   -
445   -};
446   -
447   -}
  1 +#include "../optics/material.h"
  2 +#include "../math/complexfield.cuh"
  3 +#include "../math/constants.h"
  4 +//#include "../envi/bil.h"
  5 +
  6 +#include "cufft.h"
  7 +
  8 +#include <vector>
  9 +#include <sstream>
  10 +
  11 +namespace stim{
  12 +
  13 +//this function writes a sinc function to "dest" such that an iFFT produces a slab
  14 +template<typename T>
  15 +__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,
  16 + T* src, T* zf,
  17 + T w, unsigned int zR, unsigned int nuR){
  18 + //dest = complex field representing the sample
  19 + //ri = refractive indices for each wavelength
  20 + //src = intensity of the light source for each wavelength
  21 + //zf = z position of the slab interface for each wavelength (accounting for optical path length)
  22 + //w = width of the slab (in pixels)
  23 + //zR = number of z-axis samples
  24 + //nuR = number of wavelengths
  25 +
  26 + //get the current coordinate in the plane slice
  27 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  28 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  29 +
  30 + //make sure that the thread indices are in-bounds
  31 + if(inu >= nuR || ifz >= zR) return;
  32 +
  33 + int i = inu * zR + ifz;
  34 +
  35 + T fz;
  36 + if(ifz < zR/2)
  37 + fz = ifz / (T)zR;
  38 + else
  39 + fz = -(zR - ifz) / (T)zR;
  40 +
  41 + //if the slab starts outside of the simulation domain, just return
  42 + if(zf[inu] >= zR) return;
  43 +
  44 + //fill the array along z with a sinc function representing the Fourier transform of the layer
  45 +
  46 + T opl = w * ri[inu].real(); //optical path length
  47 +
  48 + //handle the case where the slab goes outside the simulation domain
  49 + if(zf[inu] + opl >= zR)
  50 + opl = zR - zf[inu];
  51 +
  52 + if(opl == 0) return;
  53 +
  54 + //T l = w * ri[inu].real();
  55 + //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
  56 + complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
  57 +
  58 + complex<T> eta = ri[inu] * ri[inu] - 1;
  59 +
  60 + //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
  61 + if(ifz == 0)
  62 + dest[i] += opl * exp(e) * eta * src[inu];
  63 + else
  64 + dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
  65 +}
  66 +
  67 +template<typename T>
  68 +__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
  69 + //zf = current z depth (optical path length) in pixels
  70 + //ri = refractive index of the material
  71 + //w = actual width of the layer (in pixels)
  72 +
  73 +
  74 + //compute the index for this thread
  75 + int i = blockIdx.x * blockDim.x + threadIdx.x;
  76 + if(i >= S) return;
  77 +
  78 + if(ri == NULL)
  79 + zf[i] += w;
  80 + else
  81 + zf[i] += ri[i].real() * w;
  82 +}
  83 +
  84 +//apply the 1D MIRST filter to an existing sample (overwriting the sample)
  85 +template<typename T>
  86 +__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,
  87 + T dFz,
  88 + T inNA, T outNA,
  89 + unsigned int lambdaR, unsigned int zR,
  90 + T sigma = 0){
  91 + //sampleFFT = the sample in the Fourier domain (will be overwritten)
  92 + //lambda = list of wavelengths
  93 + //dFz = delta along the Fz axis in the frequency domain
  94 + //inNA = NA of the internal obscuration
  95 + //outNA = NA of the objective
  96 + //zR = number of pixels along the Fz axis (same as the z-axis)
  97 + //lambdaR = number of wavelengths
  98 + //sigma = width of the Gaussian source
  99 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  100 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  101 +
  102 + if(inu >= lambdaR || ifz >= zR) return;
  103 +
  104 + //calculate the index into the sample FT
  105 + int i = inu * zR + ifz;
  106 +
  107 + //compute the frequency (and set all negative spatial frequencies to zero)
  108 + T fz;
  109 + if(ifz < zR / 2)
  110 + fz = ifz * dFz;
  111 + //if the spatial frequency is negative, set it to zero and exit
  112 + else{
  113 + sampleFFT[i] = 0;
  114 + return;
  115 + }
  116 +
  117 + //compute the frequency in inverse microns
  118 + T nu = 1/lambda[inu];
  119 +
  120 + //determine the radius of the integration circle
  121 + T nu_sq = nu * nu;
  122 + T fz_sq = (fz * fz) / 4;
  123 +
  124 + //cut off frequencies above the diffraction limit
  125 + T r;
  126 + if(fz_sq < nu_sq)
  127 + r = sqrt(nu_sq - fz_sq);
  128 + else
  129 + r = 0;
  130 +
  131 + //account for the optics
  132 + T Q = 0;
  133 + if(r > nu * inNA && r < nu * outNA)
  134 + Q = 1;
  135 +
  136 + //account for the source
  137 + //T sigma = 30.0;
  138 + T s = exp( - (r*r * sigma*sigma) / 2 );
  139 + //T s=1;
  140 +
  141 + //compute the final filter
  142 + T mirst = 0;
  143 + if(fz != 0)
  144 + mirst = 2 * stimPI * r * s * Q * (1/fz);
  145 +
  146 + sampleFFT[i] *= mirst;
  147 +
  148 +}
  149 +
  150 +/*This object performs a 1-dimensional (layered) MIRST simulation
  151 +*/
  152 +template<typename T>
  153 +class mirst1d{
  154 +
  155 +private:
  156 + unsigned int Z; //z-axis resolution
  157 + unsigned int pad; //pixel padding on either side of the sample
  158 +
  159 + std::vector< material<T> > matlist; //list of materials
  160 + std::vector< T > layers; //list of layer thicknesses
  161 +
  162 + std::vector< T > lambdas; //list of wavelengths that are being simulated
  163 + unsigned int S; //number of wavelengths (size of "lambdas")
  164 +
  165 + T NA[2]; //numerical aperature (central obscuration and outer diameter)
  166 +
  167 + function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)
  168 +
  169 + complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.
  170 +
  171 + void fft(int direction = CUFFT_FORWARD){
  172 +
  173 + unsigned padZ = Z + pad;
  174 +
  175 + //create cuFFT handles
  176 + cufftHandle plan;
  177 + cufftResult result;
  178 +
  179 + if(sizeof(T) == 4)
  180 + result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision
  181 + else
  182 + result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision
  183 +
  184 + //check for Plan 1D errors
  185 + if(result != CUFFT_SUCCESS){
  186 + std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
  187 + CufftError(result);
  188 + exit(1);
  189 + }
  190 +
  191 + if(sizeof(T) == 4)
  192 + result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
  193 + else
  194 + result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
  195 +
  196 + //check for FFT errors
  197 + if(result != CUFFT_SUCCESS){
  198 + std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
  199 + CufftError(result);
  200 + exit(1);
  201 + }
  202 +
  203 + cufftDestroy(plan);
  204 + }
  205 +
  206 +
  207 + //initialize the scratch memory
  208 + void init_scratch(){
  209 + scratch = complexfield<T, 1>(Z + pad , lambdas.size());
  210 + scratch = 0;
  211 + }
  212 +
  213 + //get the list of scattering efficiency (eta) values for a specified layer
  214 + std::vector< complex<T> > layer_etas(unsigned int l){
  215 +
  216 + std::vector< complex<T> > etas;
  217 +
  218 + //fill the list of etas
  219 + for(unsigned int i=0; i<lambdas.size(); i++)
  220 + etas.push_back( matlist[l].eta(lambdas[i]) );
  221 + return etas;
  222 + }
  223 +
  224 + //calculates the optimal block and grid sizes using information from the GPU
  225 + void cuda_params(dim3& grids, dim3& blocks){
  226 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  227 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  228 +
  229 + //create one thread for each detector pixel
  230 + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
  231 + grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
  232 + }
  233 +
  234 + //add the fourier transform of layer n to the scratch space
  235 + void build_layer_fft(unsigned int n, T* zf){
  236 + unsigned int paddedZ = Z + pad;
  237 +
  238 + T wpx = layers[n] / dz(); //calculate the width of the layer in pixels
  239 +
  240 + //allocate memory for the refractive index
  241 + complex<T>* gpuRi;
  242 + HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
  243 +
  244 + //allocate memory for the source profile
  245 + T* gpuSrc;
  246 + HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
  247 +
  248 + complex<T> ri;
  249 + T source;
  250 + //store the refractive index and source profile in a CPU array
  251 + for(int inu=0; inu<S; inu++){
  252 + //save the refractive index to the GPU
  253 + ri = matlist[n].getN(lambdas[inu]);
  254 + HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
  255 +
  256 + //save the source profile to the GPU
  257 + source = source_profile(10000 / lambdas[inu]);
  258 + HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
  259 +
  260 + }
  261 +
  262 + //create one thread for each pixel of the field slice
  263 + dim3 gridDim, blockDim;
  264 + cuda_params(gridDim, blockDim);
  265 + stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
  266 +
  267 + int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
  268 + int linGrid = S / linBlock + 1;
  269 + stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
  270 +
  271 + //free memory
  272 + HANDLE_ERROR(cudaFree(gpuRi));
  273 + HANDLE_ERROR(cudaFree(gpuSrc));
  274 + }
  275 +
  276 + void build_sample(){
  277 + init_scratch(); //initialize the GPU scratch space
  278 + //build_layer(1);
  279 +
  280 + T* zf;
  281 + HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
  282 + HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
  283 +
  284 + //render each layer of the sample
  285 + for(unsigned int l=0; l<layers.size(); l++){
  286 + build_layer_fft(l, zf);
  287 + }
  288 +
  289 + HANDLE_ERROR(cudaFree(zf));
  290 + }
  291 +
  292 + void apply_filter(){
  293 + dim3 gridDim, blockDim;
  294 + cuda_params(gridDim, blockDim);
  295 +
  296 + unsigned int Zpad = Z + pad;
  297 +
  298 + T sim_range = dz() * Zpad;
  299 + T dFz = 1 / sim_range;
  300 +
  301 + //copy the array of wavelengths to the GPU
  302 + T* gpuLambdas;
  303 + HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
  304 + HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
  305 + stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,
  306 + dFz,
  307 + NA[0], NA[1],
  308 + S, Zpad);
  309 + }
  310 +
  311 + //crop the image to the sample thickness - keep in mind that sample thickness != optical path length
  312 + void crop(){
  313 +
  314 + scratch = scratch.crop(Z, S);
  315 + }
  316 +
  317 + //save the scratch field as a binary file
  318 + void to_binary(std::string filename){
  319 +
  320 + }
  321 +
  322 +
  323 +public:
  324 +
  325 + //constructor
  326 + mirst1d(unsigned int rZ = 100,
  327 + unsigned int padding = 0){
  328 + Z = rZ;
  329 + pad = padding;
  330 + NA[0] = 0;
  331 + NA[1] = 0.8;
  332 + S = 0;
  333 + source_profile = 1;
  334 + }
  335 +
  336 + //add a layer, thickness = microns
  337 + void add_layer(material<T> mat, T thickness){
  338 + matlist.push_back(mat);
  339 + layers.push_back(thickness);
  340 + }
  341 +
  342 + void add_layer(std::string filename, T thickness){
  343 + add_layer(material<T>(filename), thickness);
  344 + }
  345 +
  346 + //adds a profile spectrum for the light source
  347 + void set_source(std::string filename){
  348 + source_profile.load(filename);
  349 + }
  350 +
  351 + //adds a block of wavenumbers (cm^-1) to the simulation parameters
  352 + void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
  353 + unsigned int nu = start;
  354 + while(nu <= stop){
  355 + lambdas.push_back((T)10000 / nu);
  356 + nu += step;
  357 + }
  358 + S = lambdas.size(); //increment the number of wavelengths (shorthand for later)
  359 + }
  360 +
  361 + T thickness(){
  362 + T t = 0;
  363 + for(unsigned int l=0; l<layers.size(); l++)
  364 + t += layers[l];
  365 + return t;
  366 + }
  367 +
  368 + void padding(unsigned int padding = 0){
  369 + pad = padding;
  370 + }
  371 +
  372 + T dz(){
  373 + return thickness() / Z; //calculate the z-axis step size
  374 + }
  375 +
  376 + void na(T in, T out){
  377 + NA[0] = in;
  378 + NA[1] = out;
  379 + }
  380 +
  381 + void na(T out){
  382 + na(0, out);
  383 + }
  384 +
  385 + stim::function<T, T> get_source(){
  386 + return source_profile;
  387 + }
  388 +
  389 + void save_sample(std::string filename){
  390 + //create a sample and save the magnitude as an image
  391 + build_sample();
  392 + fft(CUFFT_INVERSE);
  393 + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  394 + }
  395 +
  396 + void save_mirst(std::string filename, bool binary = true){
  397 + //apply the MIRST filter to a sample and save the image
  398 +
  399 + //build the sample in the Fourier domain
  400 + build_sample();
  401 +
  402 + //apply the MIRST filter
  403 + apply_filter();
  404 +
  405 + //apply an inverse FFT to bring the results back into the spatial domain
  406 + fft(CUFFT_INVERSE);
  407 +
  408 + crop();
  409 +
  410 + //save the image
  411 + if(binary)
  412 + to_binary(filename);
  413 + else
  414 + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  415 + }
  416 +
  417 +
  418 +
  419 +
  420 + std::string str(){
  421 +
  422 + stringstream ss;
  423 + ss<<"1D MIRST Simulation========================="<<std::endl;
  424 + ss<<"z-axis resolution: "<<Z<<std::endl;
  425 + ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
  426 + ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
  427 + ss<<"padding: "<<pad<<std::endl;
  428 + ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
  429 + ss<<"dz: "<<dz()<<" um"<<std::endl;
  430 + ss<<std::endl;
  431 + ss<<layers.size()<<" layers-------------"<<std::endl;
  432 + for(unsigned int l=0; l<layers.size(); l++)
  433 + ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
  434 +
  435 + ss<<"source profile-----------"<<std::endl;
  436 + ss<<get_source().str()<<std::endl;
  437 +
  438 + return ss.str();
  439 +
  440 +
  441 + }
  442 +
  443 +
  444 +
  445 +};
  446 +
  447 +}
... ...
ui/arguments.h
... ... @@ -15,7 +15,7 @@
15 15  
16 16 namespace stim{
17 17  
18   - class argument
  18 + class cmd_option
19 19 {
20 20 private:
21 21 bool ansi;
... ... @@ -59,8 +59,8 @@ namespace stim{
59 59  
60 60 public:
61 61 void set_ansi(bool b){ ansi = b; }
62   - //create an argument with a given name, description, and default value
63   - argument(std::string _name, std::string _desc, std::string _default = "", std::string _range = "")
  62 + //create an option with a given name, description, and default value
  63 + cmd_option(std::string _name, std::string _desc, std::string _default = "", std::string _range = "")
64 64 {
65 65 name = _name;
66 66 parse_desc(_desc);
... ... @@ -81,12 +81,12 @@ namespace stim{
81 81 return vals.size();
82 82 }
83 83  
84   - //return the value of a text argument
  84 + //return the value of a text option
85 85 std::string as_string(unsigned int n = 0)
86 86 {
87 87 if(!flag)
88 88 {
89   - std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl;
  89 + std::cout<<"ERROR - Option requested without being set: "<<name<<std::endl;
90 90 exit(1);
91 91 }
92 92  
... ... @@ -96,12 +96,12 @@ namespace stim{
96 96 else return "";
97 97 }
98 98  
99   - //return the value of a floating point argument
  99 + //return the value of a floating point option
100 100 float as_float(unsigned int n = 0)
101 101 {
102 102 if(!flag)
103 103 {
104   - std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl;
  104 + std::cout<<"ERROR - option requested without being set: "<<name<<std::endl;
105 105 exit(1);
106 106 }
107 107  
... ... @@ -115,12 +115,12 @@ namespace stim{
115 115 else return 0;
116 116 }
117 117  
118   - //return the value of an integer argument
  118 + //return the value of an integer option
119 119 int as_int(unsigned int n = 0)
120 120 {
121 121 if(!flag)
122 122 {
123   - std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl;
  123 + std::cout<<"ERROR - option requested without being set: "<<name<<std::endl;
124 124 exit(1);
125 125 }
126 126  
... ... @@ -138,7 +138,7 @@ namespace stim{
138 138 int col_width()
139 139 {
140 140 int n = 3;
141   - //add the length of the argument name
  141 + //add the length of the option name
142 142 n += name.size();
143 143  
144 144 //if there are any default parameters
... ... @@ -147,7 +147,7 @@ namespace stim{
147 147 //padding (parenthesis, =, etc.)
148 148 n += 6;
149 149  
150   - //for each default argument value
  150 + //for each default option value
151 151 for(unsigned int v=0; v<vals.size(); v++)
152 152 n += vals[v].size() + 1;
153 153 }
... ... @@ -209,13 +209,13 @@ namespace stim{
209 209 return ss.str();
210 210 }
211 211  
212   - //compare the name of the argument to a string
  212 + //compare the name of the option to a string
213 213 bool operator==(std::string rhs)
214 214 {
215 215 return (name == rhs);
216 216 }
217 217  
218   - //set the argument to a given value
  218 + //set the option to a given value
219 219 void set(std::string _value)
220 220 {
221 221 parse_val(_value);
... ... @@ -242,10 +242,11 @@ namespace stim{
242 242 private:
243 243 bool ansi;
244 244  
245   - //vector of arguments
246   - std::vector<argument> args;
  245 + //vector of options
  246 + std::vector<cmd_option> opts;
  247 + std::vector<std::string> args;
247 248  
248   - //column width of the longest argument
  249 + //column width of the longest option
249 250 int col_width;
250 251  
251 252 //list of sections
... ... @@ -261,28 +262,28 @@ namespace stim{
261 262 void set_ansi(bool b)
262 263 {
263 264 ansi = b;
264   - for(unsigned int i=0; i<args.size(); i++)
265   - args[i].set_ansi(ansi);
  265 + for(unsigned int i=0; i<opts.size(); i++)
  266 + opts[i].set_ansi(ansi);
266 267 }
267 268  
268 269 void add(std::string _name, std::string _desc, std::string _default = "", std::string _range = "")
269 270 {
270   - argument arg(_name, _desc, _default, _range);
271   - arg.set_ansi(ansi);
272   - args.push_back(arg);
  271 + cmd_option opt(_name, _desc, _default, _range);
  272 + opt.set_ansi(ansi);
  273 + opts.push_back(opt);
273 274  
274   - col_width = std::max<int>(col_width, arg.col_width());
  275 + col_width = std::max<int>(col_width, opt.col_width());
275 276 }
276 277  
277 278 void section(std::string _name)
278 279 {
279 280 argsection s;
280 281 s.name = _name;
281   - s.index = args.size();
  282 + s.index = opts.size();
282 283 sections.push_back(s);
283 284 }
284 285  
285   - //output the arguments (generally in response to --help)
  286 + //output the options (generally in response to --help)
286 287 std::string str()
287 288 {
288 289 std::stringstream ss;
... ... @@ -292,8 +293,8 @@ namespace stim{
292 293 if(sections.size() > 0)
293 294 si = 0;
294 295  
295   - //for each argument
296   - for(unsigned int a=0; a<args.size(); a++)
  296 + //for each option
  297 + for(unsigned int a=0; a<opts.size(); a++)
297 298 {
298 299 if(si != -1 && a == sections[si].index)
299 300 {
... ... @@ -305,7 +306,7 @@ namespace stim{
305 306 if(si == (int)sections.size()) si = -1;
306 307 }
307 308  
308   - ss<<args[a].toStr(col_width)<<std::endl;
  309 + ss<<opts[a].toStr(col_width)<<std::endl;
309 310 }
310 311  
311 312 return ss.str();
... ... @@ -313,9 +314,9 @@ namespace stim{
313 314  
314 315 int index(std::string _name)
315 316 {
316   - unsigned int i = find(args.begin(), args.end(), _name) - args.begin();
  317 + unsigned int i = find(opts.begin(), opts.end(), _name) - opts.begin();
317 318  
318   - if(i >= args.size())
  319 + if(i >= opts.size())
319 320 return -1;
320 321  
321 322 return (int)i;
... ... @@ -327,52 +328,57 @@ namespace stim{
327 328  
328 329 if(i != -1)
329 330 {
330   - args[i].set(_value);
  331 + opts[i].set(_value);
331 332 //adjust the column width if necessary
332   - col_width = (std::max)(col_width, args[i].col_width());
  333 + col_width = (std::max)(col_width, opts[i].col_width());
333 334 }
334 335 else
335   - std::cout<<"ERROR - Argument not recognized: "<<_name<<std::endl;
  336 + std::cout<<"ERROR - option not recognized: "<<_name<<std::endl;
336 337 }
337 338  
338 339 //parse a parameter string
339 340 void parse(int argc, char* argv[])
340 341 {
341   - //if the number of arguments is 1, we're done
  342 + //if the number of options is 1, we're done
342 343 if(argc <= 1) return;
343 344  
344 345 std::string name;
345 346 std::string params;
346 347  
  348 + bool args_done = false; //create a flag that turns true when the first option is encountered
  349 +
347 350 for(int i=1; i<argc; i++)
348 351 {
349   - //if the argument is a parameter name
  352 + //if the argument is an option
350 353 if(argv[i][0] == '-' && argv[i][1] == '-')
351 354 {
352   - //add any previous arguments
  355 + args_done = true; //arguments for the executable are done, all options now
  356 + //add any previous options
353 357 if(name != "")
354 358 set(name, params);
355   - //set the current argument to this name
  359 + //set the current option to this name
356 360 name = argv[i]+2;
357 361 //clear the parameters list
358 362 params = "";
359 363 }
360   - else
361   - {
  364 + else if(!args_done){
  365 + args.push_back(argv[i]);
  366 + }
  367 + else{ //everything else is an arg for the most recent option
362 368 if(params != "")
363 369 params += " ";
364 370 params += argv[i];
365 371 }
366 372 }
367 373  
368   - //set the last argument
  374 + //set the last option
369 375 set(name, params);
370 376 }
371 377  
372 378 //determine if a parameter has been set (either specified by the user or with a default value)
373 379 bool operator()(std::string _name)
374 380 {
375   - int i = find(args.begin(), args.end(), _name) - args.begin();
  381 + int i = find(opts.begin(), opts.end(), _name) - opts.begin();
376 382  
377 383 if(i < 0)
378 384 {
... ... @@ -380,12 +386,13 @@ namespace stim{
380 386 exit(1);
381 387 }
382 388  
383   - return args[i].is_set();
  389 + return opts[i].is_set();
384 390 }
385 391  
386   - int nargs(std::string _name)
  392 + //number of arguments in a specified option
  393 + unsigned int nargs(std::string _name)
387 394 {
388   - int i = find(args.begin(), args.end(), _name) - args.begin();
  395 + int i = find(opts.begin(), opts.end(), _name) - opts.begin();
389 396  
390 397 if(i < 0)
391 398 {
... ... @@ -393,12 +400,22 @@ namespace stim{
393 400 exit(1);
394 401 }
395 402  
396   - return args[i].nargs();
  403 + return opts[i].nargs();
  404 + }
  405 +
  406 + //number of arguments for the executable
  407 + unsigned int nargs(){
  408 + return args.size();
  409 + }
  410 +
  411 + //return the a'th executable argument
  412 + std::string arg(unsigned int a){
  413 + return args[a];
397 414 }
398 415  
399   - argument operator[](std::string _name)
  416 + cmd_option operator[](std::string _name)
400 417 {
401   - int i = find(args.begin(), args.end(), _name) - args.begin();
  418 + int i = find(opts.begin(), opts.end(), _name) - opts.begin();
402 419  
403 420 if(i < 0)
404 421 {
... ... @@ -406,7 +423,7 @@ namespace stim{
406 423 exit(1);
407 424 }
408 425  
409   - return args[i];
  426 + return opts[i];
410 427 }
411 428  
412 429  
... ...