Commit 81e0d2215b30a17309ce7ba7615f94e2eda5c67e

Authored by David Mayerich
1 parent 7b3948ab

separated executable arguments and options in the arglist class

math/complex.h
1   -/*RTS Complex number class. This class is CUDA compatible,
2   -and can therefore be used in CUDA code and on CUDA devices.
3   -*/
4   -
5   -#ifndef RTS_COMPLEX
6   -#define RTS_COMPLEX
7   -
8   -#include "../cuda/callable.h"
9   -#include <cmath>
10   -#include <string>
11   -#include <sstream>
12   -#include <iostream>
13   -
14   -namespace stim
15   -{
16   -
17   -template <class T>
18   -struct complex
19   -{
20   - T r, i;
21   -
22   - //default constructor
23   - CUDA_CALLABLE complex()
24   - {
25   - r = 0;
26   - i = 0;
27   - }
28   -
29   - //constructor when given real and imaginary values
30   - CUDA_CALLABLE complex(T r, T i = 0)
31   - {
32   - this->r = r;
33   - this->i = i;
34   - }
35   -
36   - //access methods
37   - CUDA_CALLABLE T real()
38   - {
39   - return r;
40   - }
41   -
42   - CUDA_CALLABLE T real(T r_val)
43   - {
44   - r = r_val;
45   - return r_val;
46   - }
47   -
48   - CUDA_CALLABLE T imag()
49   - {
50   - return i;
51   - }
52   - CUDA_CALLABLE T imag(T i_val)
53   - {
54   - i = i_val;
55   - return i_val;
56   - }
57   -
58   -
59   -
60   - //return the current value multiplied by i
61   - CUDA_CALLABLE complex<T> imul()
62   - {
63   - complex<T> result;
64   - result.r = -i;
65   - result.i = r;
66   -
67   - return result;
68   - }
69   -
70   - //returns the complex signum (-1, 0, 1)
71   - CUDA_CALLABLE int sgn(){
72   - if(r > 0) return 1;
73   - else if(r < 0) return -1;
74   - else return (0 < i - i < 0);
75   - }
76   -
77   - //ARITHMETIC OPERATORS--------------------
78   -
79   - //binary + operator (returns the result of adding two complex values)
80   - CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const
81   - {
82   - complex<T> result;
83   - result.r = r + rhs.r;
84   - result.i = i + rhs.i;
85   - return result;
86   - }
87   -
88   - CUDA_CALLABLE complex<T> operator+ (const T rhs) const
89   - {
90   - complex<T> result;
91   - result.r = r + rhs;
92   - result.i = i;
93   - return result;
94   - }
95   -
96   - //binary - operator (returns the result of adding two complex values)
97   - CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const
98   - {
99   - complex<T> result;
100   - result.r = r - rhs.r;
101   - result.i = i - rhs.i;
102   - return result;
103   - }
104   -
105   - //binary - operator (returns the result of adding two complex values)
106   - CUDA_CALLABLE complex<T> operator- (const T rhs)
107   - {
108   - complex<T> result;
109   - result.r = r - rhs;
110   - result.i = i;
111   - return result;
112   - }
113   -
114   - //binary MULTIPLICATION operators (returns the result of multiplying complex values)
115   - CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const
116   - {
117   - complex<T> result;
118   - result.r = r * rhs.r - i * rhs.i;
119   - result.i = r * rhs.i + i * rhs.r;
120   - return result;
121   - }
122   - CUDA_CALLABLE complex<T> operator* (const T rhs)
123   - {
124   - return complex<T>(r * rhs, i * rhs);
125   - }
126   -
127   - //binary DIVISION operators (returns the result of dividing complex values)
128   - CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const
129   - {
130   - complex<T> result;
131   - T denom = rhs.r * rhs.r + rhs.i * rhs.i;
132   - result.r = (r * rhs.r + i * rhs.i) / denom;
133   - result.i = (- r * rhs.i + i * rhs.r) / denom;
134   -
135   - return result;
136   - }
137   - CUDA_CALLABLE complex<T> operator/ (const T rhs)
138   - {
139   - return complex<T>(r / rhs, i / rhs);
140   - }
141   -
142   - //ASSIGNMENT operators-----------------------------------
143   - CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs)
144   - {
145   - //check for self-assignment
146   - if(this != &rhs)
147   - {
148   - this->r = rhs.r;
149   - this->i = rhs.i;
150   - }
151   - return *this;
152   - }
153   - CUDA_CALLABLE complex<T> & operator=(const T &rhs)
154   - {
155   - this->r = rhs;
156   - this->i = 0;
157   -
158   - return *this;
159   - }
160   -
161   - //arithmetic assignment operators
162   - CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs)
163   - {
164   - *this = *this + rhs;
165   - return *this;
166   - }
167   - CUDA_CALLABLE complex<T> operator+=(const T &rhs)
168   - {
169   - *this = *this + rhs;
170   - return *this;
171   - }
172   -
173   - CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs)
174   - {
175   - *this = *this - rhs;
176   - return *this;
177   - }
178   - CUDA_CALLABLE complex<T> operator-=(const T &rhs)
179   - {
180   - *this = *this - rhs;
181   - return *this;
182   - }
183   -
184   - CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs)
185   - {
186   - *this = *this * rhs;
187   - return *this;
188   - }
189   - CUDA_CALLABLE complex<T> operator*=(const T &rhs)
190   - {
191   - *this = *this * rhs;
192   - return *this;
193   - }
194   - //divide and assign
195   - CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs)
196   - {
197   - *this = *this / rhs;
198   - return *this;
199   - }
200   - CUDA_CALLABLE complex<T> operator/=(const T &rhs)
201   - {
202   - *this = *this / rhs;
203   - return *this;
204   - }
205   -
206   - //absolute value operator (returns the absolute value of the complex number)
207   - CUDA_CALLABLE T abs()
208   - {
209   - return std::sqrt(r * r + i * i);
210   - }
211   -
212   - CUDA_CALLABLE complex<T> log()
213   - {
214   - complex<T> result;
215   - result.r = (T)std::log(std::sqrt(r * r + i * i));
216   - result.i = (T)std::atan2(i, r);
217   -
218   -
219   - return result;
220   - }
221   -
222   - CUDA_CALLABLE complex<T> exp()
223   - {
224   - complex<T> result;
225   -
226   - T e_r = std::exp(r);
227   - result.r = e_r * (T)std::cos(i);
228   - result.i = e_r * (T)std::sin(i);
229   -
230   - return result;
231   - }
232   -
233   - /*CUDA_CALLABLE complex<T> pow(int y)
234   - {
235   -
236   - return pow((double)y);
237   - }*/
238   -
239   - CUDA_CALLABLE complex<T> pow(T y)
240   - {
241   - complex<T> result;
242   -
243   - result = log() * y;
244   -
245   - return result.exp();
246   - }
247   -
248   - CUDA_CALLABLE complex<T> sqrt()
249   - {
250   - complex<T> result;
251   -
252   - //convert to polar coordinates
253   - T a = std::sqrt(r*r + i*i);
254   - T theta = std::atan2(i, r);
255   -
256   - //find the square root
257   - T a_p = std::sqrt(a);
258   - T theta_p = theta/2.0f;
259   -
260   - //convert back to cartesian coordinates
261   - result.r = a_p * std::cos(theta_p);
262   - result.i = a_p * std::sin(theta_p);
263   -
264   - return result;
265   - }
266   -
267   - std::string str()
268   - {
269   - std::stringstream ss;
270   - ss<<"("<<r<<","<<i<<")";
271   -
272   - return ss.str();
273   - }
274   -
275   - //COMPARISON operators
276   - CUDA_CALLABLE bool operator==(complex<T> rhs)
277   - {
278   - if(r == rhs.r && i == rhs.i)
279   - return true;
280   - return false;
281   - }
282   -
283   - CUDA_CALLABLE bool operator==(T rhs)
284   - {
285   - if(r == rhs && i == 0)
286   - return true;
287   - return false;
288   - }
289   -
290   - CUDA_CALLABLE bool operator!=(T rhs)
291   - {
292   - if(r != rhs || i != 0)
293   - return true;
294   - return false;
295   - }
296   -
297   - CUDA_CALLABLE bool operator<(complex<T> rhs){
298   - return abs() < rhs.abs();
299   - }
300   - CUDA_CALLABLE bool operator<=(complex<T> rhs){
301   - return abs() <= rhs.abs();
302   - }
303   - CUDA_CALLABLE bool operator>(complex<T> rhs){
304   - return abs() > rhs.abs();
305   - }
306   - CUDA_CALLABLE bool operator >=(complex<T> rhs){
307   - return abs() >= rhs.abs();
308   - }
309   -
310   - //CASTING operators
311   - template < typename otherT >
312   - operator complex<otherT>()
313   - {
314   - complex<otherT> result((otherT)r, (otherT)i);
315   - return result;
316   - }
317   - template< typename otherT >
318   - complex( const complex<otherT> &rhs)
319   - {
320   - r = (T)rhs.r;
321   - i = (T)rhs.i;
322   - }
323   - template< typename otherT >
324   - complex& operator=(const complex<otherT> &rhs)
325   - {
326   - r = (T)rhs.r;
327   - i = (T)rhs.i;
328   - return *this;
329   - }
330   -
331   -};
332   -
333   -} //end RTS namespace
334   -
335   -//addition
336   -template<typename T>
337   -CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b)
338   -{
339   - return stim::complex<T>((T)a + b.r, b.i);
340   -}
341   -
342   -//subtraction with a real value
343   -template<typename T>
344   -CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b)
345   -{
346   - return stim::complex<T>((T)a - b.r, -b.i);
347   -}
348   -
349   -//minus sign
350   -template<typename T>
351   -CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs)
352   -{
353   - return stim::complex<T>(-rhs.r, -rhs.i);
354   -}
355   -
356   -//multiply a T value by a complex value
357   -template<typename T>
358   -CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b)
359   -{
360   - return stim::complex<T>((T)a * b.r, (T)a * b.i);
361   -}
362   -
363   -//divide a T value by a complex value
364   -template<typename T>
365   -CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b)
366   -{
367   - stim::complex<T> result;
368   -
369   - T denom = b.r * b.r + b.i * b.i;
370   -
371   - result.r = ((T)a * b.r) / denom;
372   - result.i = -((T)a * b.i) / denom;
373   -
374   - return result;
375   -}
376   -
377   -
378   -template<typename T>
379   -CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y)
380   -{
381   - return x.pow(y);
382   -}
383   -template<typename T>
384   -CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y)
385   -{
386   - return x.pow(y);
387   -}
388   -
389   -//log function
390   -template<typename T>
391   -CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x)
392   -{
393   - return x.log();
394   -}
395   -
396   -//exp function
397   -template<typename T>
398   -CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x)
399   -{
400   - return x.exp();
401   -}
402   -
403   -//sqrt function
404   -template<typename T>
405   -CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x)
406   -{
407   - return x.sqrt();
408   -}
409   -
410   -
411   -template <typename T>
412   -CUDA_CALLABLE static T abs(stim::complex<T> a)
413   -{
414   - return a.abs();
415   -}
416   -
417   -template <typename T>
418   -CUDA_CALLABLE static T real(stim::complex<T> a)
419   -{
420   - return a.r;
421   -}
422   -
423   -//template <typename T>
424   -CUDA_CALLABLE static float real(float a)
425   -{
426   - return a;
427   -}
428   -
429   -template <typename T>
430   -CUDA_CALLABLE static T imag(stim::complex<T> a)
431   -{
432   - return a.i;
433   -}
434   -
435   -//trigonometric functions
436   -//template<class A>
437   -/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)
438   -{
439   - stim::complex<float> result;
440   - result.r = sinf(x.r) * coshf(x.i);
441   - result.i = cosf(x.r) * sinhf(x.i);
442   -
443   - return result;
444   -}*/
445   -
446   -template<class A>
447   -CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)
448   -{
449   - stim::complex<A> result;
450   - result.r = (A)std::sin(x.r) * (A)std::cosh(x.i);
451   - result.i = (A)std::cos(x.r) * (A)std::sinh(x.i);
452   -
453   - return result;
454   -}
455   -
456   -//floating point template
457   -//template<class A>
458   -/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)
459   -{
460   - stim::complex<float> result;
461   - result.r = cosf(x.r) * coshf(x.i);
462   - result.i = -(sinf(x.r) * sinhf(x.i));
463   -
464   - return result;
465   -}*/
466   -
467   -template<class A>
468   -CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)
469   -{
470   - stim::complex<A> result;
471   - result.r = (A)std::cos(x.r) * (A)std::cosh(x.i);
472   - result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i));
473   -
474   - return result;
475   -}
476   -
477   -
478   -template<class A>
479   -std::ostream& operator<<(std::ostream& os, stim::complex<A> x)
480   -{
481   - os<<x.str();
482   - return os;
483   -}
484   -
485   -template<class A>
486   -std::istream& operator>>(std::istream& is, stim::complex<A>& x)
487   -{
488   - A r, i;
489   - r = i = 0; //initialize the real and imaginary parts to zero
490   - is>>r; //parse
491   - is>>i;
492   -
493   - x.real(r); //assign the parsed values to x
494   - x.imag(i);
495   -
496   - return is; //return the stream
497   -}
498   -
499   -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
500   -//template<class T> using rtsComplex = stim::complex<T>;
501   -//#endif
502   -
503   -
504   -
505   -#endif
  1 +/*RTS Complex number class. This class is CUDA compatible,
  2 +and can therefore be used in CUDA code and on CUDA devices.
  3 +*/
  4 +
  5 +#ifndef RTS_COMPLEX
  6 +#define RTS_COMPLEX
  7 +
  8 +#include "../cuda/callable.h"
  9 +#include <cmath>
  10 +#include <string>
  11 +#include <sstream>
  12 +#include <iostream>
  13 +
  14 +namespace stim
  15 +{
  16 +
  17 +template <class T>
  18 +struct complex
  19 +{
  20 + T r, i;
  21 +
  22 + //default constructor
  23 + CUDA_CALLABLE complex()
  24 + {
  25 + r = 0;
  26 + i = 0;
  27 + }
  28 +
  29 + //constructor when given real and imaginary values
  30 + CUDA_CALLABLE complex(T r, T i = 0)
  31 + {
  32 + this->r = r;
  33 + this->i = i;
  34 + }
  35 +
  36 + //access methods
  37 + CUDA_CALLABLE T real()
  38 + {
  39 + return r;
  40 + }
  41 +
  42 + CUDA_CALLABLE T real(T r_val)
  43 + {
  44 + r = r_val;
  45 + return r_val;
  46 + }
  47 +
  48 + CUDA_CALLABLE T imag()
  49 + {
  50 + return i;
  51 + }
  52 + CUDA_CALLABLE T imag(T i_val)
  53 + {
  54 + i = i_val;
  55 + return i_val;
  56 + }
  57 +
  58 +
  59 +
  60 + //return the current value multiplied by i
  61 + CUDA_CALLABLE complex<T> imul()
  62 + {
  63 + complex<T> result;
  64 + result.r = -i;
  65 + result.i = r;
  66 +
  67 + return result;
  68 + }
  69 +
  70 + //returns the complex signum (-1, 0, 1)
  71 + CUDA_CALLABLE int sgn(){
  72 + if(r > 0) return 1;
  73 + else if(r < 0) return -1;
  74 + else return (0 < i - i < 0);
  75 + }
  76 +
  77 + //ARITHMETIC OPERATORS--------------------
  78 +
  79 + //binary + operator (returns the result of adding two complex values)
  80 + CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const
  81 + {
  82 + complex<T> result;
  83 + result.r = r + rhs.r;
  84 + result.i = i + rhs.i;
  85 + return result;
  86 + }
  87 +
  88 + CUDA_CALLABLE complex<T> operator+ (const T rhs) const
  89 + {
  90 + complex<T> result;
  91 + result.r = r + rhs;
  92 + result.i = i;
  93 + return result;
  94 + }
  95 +
  96 + //binary - operator (returns the result of adding two complex values)
  97 + CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const
  98 + {
  99 + complex<T> result;
  100 + result.r = r - rhs.r;
  101 + result.i = i - rhs.i;
  102 + return result;
  103 + }
  104 +
  105 + //binary - operator (returns the result of adding two complex values)
  106 + CUDA_CALLABLE complex<T> operator- (const T rhs)
  107 + {
  108 + complex<T> result;
  109 + result.r = r - rhs;
  110 + result.i = i;
  111 + return result;
  112 + }
  113 +
  114 + //binary MULTIPLICATION operators (returns the result of multiplying complex values)
  115 + CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const
  116 + {
  117 + complex<T> result;
  118 + result.r = r * rhs.r - i * rhs.i;
  119 + result.i = r * rhs.i + i * rhs.r;
  120 + return result;
  121 + }
  122 + CUDA_CALLABLE complex<T> operator* (const T rhs)
  123 + {
  124 + return complex<T>(r * rhs, i * rhs);
  125 + }
  126 +
  127 + //binary DIVISION operators (returns the result of dividing complex values)
  128 + CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const
  129 + {
  130 + complex<T> result;
  131 + T denom = rhs.r * rhs.r + rhs.i * rhs.i;
  132 + result.r = (r * rhs.r + i * rhs.i) / denom;
  133 + result.i = (- r * rhs.i + i * rhs.r) / denom;
  134 +
  135 + return result;
  136 + }
  137 + CUDA_CALLABLE complex<T> operator/ (const T rhs)
  138 + {
  139 + return complex<T>(r / rhs, i / rhs);
  140 + }
  141 +
  142 + //ASSIGNMENT operators-----------------------------------
  143 + CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs)
  144 + {
  145 + //check for self-assignment
  146 + if(this != &rhs)
  147 + {
  148 + this->r = rhs.r;
  149 + this->i = rhs.i;
  150 + }
  151 + return *this;
  152 + }
  153 + CUDA_CALLABLE complex<T> & operator=(const T &rhs)
  154 + {
  155 + this->r = rhs;
  156 + this->i = 0;
  157 +
  158 + return *this;
  159 + }
  160 +
  161 + //arithmetic assignment operators
  162 + CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs)
  163 + {
  164 + *this = *this + rhs;
  165 + return *this;
  166 + }
  167 + CUDA_CALLABLE complex<T> operator+=(const T &rhs)
  168 + {
  169 + *this = *this + rhs;
  170 + return *this;
  171 + }
  172 +
  173 + CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs)
  174 + {
  175 + *this = *this - rhs;
  176 + return *this;
  177 + }
  178 + CUDA_CALLABLE complex<T> operator-=(const T &rhs)
  179 + {
  180 + *this = *this - rhs;
  181 + return *this;
  182 + }
  183 +
  184 + CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs)
  185 + {
  186 + *this = *this * rhs;
  187 + return *this;
  188 + }
  189 + CUDA_CALLABLE complex<T> operator*=(const T &rhs)
  190 + {
  191 + *this = *this * rhs;
  192 + return *this;
  193 + }
  194 + //divide and assign
  195 + CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs)
  196 + {
  197 + *this = *this / rhs;
  198 + return *this;
  199 + }
  200 + CUDA_CALLABLE complex<T> operator/=(const T &rhs)
  201 + {
  202 + *this = *this / rhs;
  203 + return *this;
  204 + }
  205 +
  206 + //absolute value operator (returns the absolute value of the complex number)
  207 + CUDA_CALLABLE T abs()
  208 + {
  209 + return std::sqrt(r * r + i * i);
  210 + }
  211 +
  212 + CUDA_CALLABLE complex<T> log()
  213 + {
  214 + complex<T> result;
  215 + result.r = (T)std::log(std::sqrt(r * r + i * i));
  216 + result.i = (T)std::atan2(i, r);
  217 +
  218 +
  219 + return result;
  220 + }
  221 +
  222 + CUDA_CALLABLE complex<T> exp()
  223 + {
  224 + complex<T> result;
  225 +
  226 + T e_r = std::exp(r);
  227 + result.r = e_r * (T)std::cos(i);
  228 + result.i = e_r * (T)std::sin(i);
  229 +
  230 + return result;
  231 + }
  232 +
  233 + /*CUDA_CALLABLE complex<T> pow(int y)
  234 + {
  235 +
  236 + return pow((double)y);
  237 + }*/
  238 +
  239 + CUDA_CALLABLE complex<T> pow(T y)
  240 + {
  241 + complex<T> result;
  242 +
  243 + result = log() * y;
  244 +
  245 + return result.exp();
  246 + }
  247 +
  248 + CUDA_CALLABLE complex<T> sqrt()
  249 + {
  250 + complex<T> result;
  251 +
  252 + //convert to polar coordinates
  253 + T a = std::sqrt(r*r + i*i);
  254 + T theta = std::atan2(i, r);
  255 +
  256 + //find the square root
  257 + T a_p = std::sqrt(a);
  258 + T theta_p = theta/2.0f;
  259 +
  260 + //convert back to cartesian coordinates
  261 + result.r = a_p * std::cos(theta_p);
  262 + result.i = a_p * std::sin(theta_p);
  263 +
  264 + return result;
  265 + }
  266 +
  267 + std::string str()
  268 + {
  269 + std::stringstream ss;
  270 + ss<<"("<<r<<","<<i<<")";
  271 +
  272 + return ss.str();
  273 + }
  274 +
  275 + //COMPARISON operators
  276 + CUDA_CALLABLE bool operator==(complex<T> rhs)
  277 + {
  278 + if(r == rhs.r && i == rhs.i)
  279 + return true;
  280 + return false;
  281 + }
  282 +
  283 + CUDA_CALLABLE bool operator==(T rhs)
  284 + {
  285 + if(r == rhs && i == 0)
  286 + return true;
  287 + return false;
  288 + }
  289 +
  290 + CUDA_CALLABLE bool operator!=(T rhs)
  291 + {
  292 + if(r != rhs || i != 0)
  293 + return true;
  294 + return false;
  295 + }
  296 +
  297 + CUDA_CALLABLE bool operator<(complex<T> rhs){
  298 + return abs() < rhs.abs();
  299 + }
  300 + CUDA_CALLABLE bool operator<=(complex<T> rhs){
  301 + return abs() <= rhs.abs();
  302 + }
  303 + CUDA_CALLABLE bool operator>(complex<T> rhs){
  304 + return abs() > rhs.abs();
  305 + }
  306 + CUDA_CALLABLE bool operator >=(complex<T> rhs){
  307 + return abs() >= rhs.abs();
  308 + }
  309 +
  310 + //CASTING operators
  311 + template < typename otherT >
  312 + operator complex<otherT>()
  313 + {
  314 + complex<otherT> result((otherT)r, (otherT)i);
  315 + return result;
  316 + }
  317 + template< typename otherT >
  318 + complex( const complex<otherT> &rhs)
  319 + {
  320 + r = (T)rhs.r;
  321 + i = (T)rhs.i;
  322 + }
  323 + template< typename otherT >
  324 + complex& operator=(const complex<otherT> &rhs)
  325 + {
  326 + r = (T)rhs.r;
  327 + i = (T)rhs.i;
  328 + return *this;
  329 + }
  330 +
  331 +};
  332 +
  333 +} //end RTS namespace
  334 +
  335 +//addition
  336 +template<typename T>
  337 +CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b)
  338 +{
  339 + return stim::complex<T>((T)a + b.r, b.i);
  340 +}
  341 +
  342 +//subtraction with a real value
  343 +template<typename T>
  344 +CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b)
  345 +{
  346 + return stim::complex<T>((T)a - b.r, -b.i);
  347 +}
  348 +
  349 +//minus sign
  350 +template<typename T>
  351 +CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs)
  352 +{
  353 + return stim::complex<T>(-rhs.r, -rhs.i);
  354 +}
  355 +
  356 +//multiply a T value by a complex value
  357 +template<typename T>
  358 +CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b)
  359 +{
  360 + return stim::complex<T>((T)a * b.r, (T)a * b.i);
  361 +}
  362 +
  363 +//divide a T value by a complex value
  364 +template<typename T>
  365 +CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b)
  366 +{
  367 + stim::complex<T> result;
  368 +
  369 + T denom = b.r * b.r + b.i * b.i;
  370 +
  371 + result.r = ((T)a * b.r) / denom;
  372 + result.i = -((T)a * b.i) / denom;
  373 +
  374 + return result;
  375 +}
  376 +
  377 +
  378 +template<typename T>
  379 +CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y)
  380 +{
  381 + return x.pow(y);
  382 +}
  383 +template<typename T>
  384 +CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y)
  385 +{
  386 + return x.pow(y);
  387 +}
  388 +
  389 +//log function
  390 +template<typename T>
  391 +CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x)
  392 +{
  393 + return x.log();
  394 +}
  395 +
  396 +//exp function
  397 +template<typename T>
  398 +CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x)
  399 +{
  400 + return x.exp();
  401 +}
  402 +
  403 +//sqrt function
  404 +template<typename T>
  405 +CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x)
  406 +{
  407 + return x.sqrt();
  408 +}
  409 +
  410 +
  411 +template <typename T>
  412 +CUDA_CALLABLE static T abs(stim::complex<T> a)
  413 +{
  414 + return a.abs();
  415 +}
  416 +
  417 +template <typename T>
  418 +CUDA_CALLABLE static T real(stim::complex<T> a)
  419 +{
  420 + return a.r;
  421 +}
  422 +
  423 +//template <typename T>
  424 +CUDA_CALLABLE static float real(float a)
  425 +{
  426 + return a;
  427 +}
  428 +
  429 +template <typename T>
  430 +CUDA_CALLABLE static T imag(stim::complex<T> a)
  431 +{
  432 + return a.i;
  433 +}
  434 +
  435 +//trigonometric functions
  436 +//template<class A>
  437 +/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)
  438 +{
  439 + stim::complex<float> result;
  440 + result.r = sinf(x.r) * coshf(x.i);
  441 + result.i = cosf(x.r) * sinhf(x.i);
  442 +
  443 + return result;
  444 +}*/
  445 +
  446 +template<class A>
  447 +CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)
  448 +{
  449 + stim::complex<A> result;
  450 + result.r = (A)std::sin(x.r) * (A)std::cosh(x.i);
  451 + result.i = (A)std::cos(x.r) * (A)std::sinh(x.i);
  452 +
  453 + return result;
  454 +}
  455 +
  456 +//floating point template
  457 +//template<class A>
  458 +/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)
  459 +{
  460 + stim::complex<float> result;
  461 + result.r = cosf(x.r) * coshf(x.i);
  462 + result.i = -(sinf(x.r) * sinhf(x.i));
  463 +
  464 + return result;
  465 +}*/
  466 +
  467 +template<class A>
  468 +CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)
  469 +{
  470 + stim::complex<A> result;
  471 + result.r = (A)std::cos(x.r) * (A)std::cosh(x.i);
  472 + result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i));
  473 +
  474 + return result;
  475 +}
  476 +
  477 +
  478 +template<class A>
  479 +std::ostream& operator<<(std::ostream& os, stim::complex<A> x)
  480 +{
  481 + os<<x.str();
  482 + return os;
  483 +}
  484 +
  485 +template<class A>
  486 +std::istream& operator>>(std::istream& is, stim::complex<A>& x)
  487 +{
  488 + A r, i;
  489 + r = i = 0; //initialize the real and imaginary parts to zero
  490 + is>>r; //parse
  491 + is>>i;
  492 +
  493 + x.real(r); //assign the parsed values to x
  494 + x.imag(i);
  495 +
  496 + return is; //return the stream
  497 +}
  498 +
  499 +//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
  500 +//template<class T> using rtsComplex = stim::complex<T>;
  501 +//#endif
  502 +
  503 +
  504 +
  505 +#endif
... ...
math/complexfield.cuh
1   -#ifndef RTS_COMPLEXFIELD_H
2   -#define RTS_COMPLEXFIELD_H
3   -
4   -#include "cublas_v2.h"
5   -#include <cuda_runtime.h>
6   -
7   -#include "../math/field.cuh"
8   -#include "../math/complex.h"
9   -#include "../math/realfield.cuh"
10   -
11   -namespace stim{
12   -
13   -template<typename T>
14   -__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){
15   -
16   - int iu = blockIdx.x * blockDim.x + threadIdx.x;
17   - int iv = blockIdx.y * blockDim.y + threadIdx.y;
18   -
19   - //make sure that the thread indices are in-bounds
20   - if(iu >= r0 || iv >= r1) return;
21   -
22   - //compute the index into the field
23   - int i = iv*r0 + iu;
24   -
25   - //calculate and store the result
26   - dest[i] = source[i].abs();
27   -}
28   -
29   -/*This class stores functions for saving images of complex fields
30   -*/
31   -template<typename T, unsigned int D = 1>
32   -class complexfield : public field< stim::complex<T>, D >{
33   - using field< stim::complex<T>, D >::R;
34   - using field< stim::complex<T>, D >::X;
35   - using field< stim::complex<T>, D >::shape;
36   - using field< stim::complex<T>, D >::cuda_params;
37   -
38   -
39   -
40   -public:
41   -
42   - //find the maximum value of component n
43   - stim::complex<T> find_max(unsigned int n){
44   - cublasStatus_t stat;
45   - cublasHandle_t handle;
46   -
47   - //create a CUBLAS handle
48   - stat = cublasCreate(&handle);
49   - if(stat != CUBLAS_STATUS_SUCCESS){
50   - std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
51   - exit(1);
52   - }
53   -
54   - int L = R[0] * R[1]; //compute the number of discrete points in a slice
55   - int index; //result of the max operation
56   - stim::complex<T> result;
57   -
58   - if(sizeof(T) == 8)
59   - stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index);
60   - else
61   - stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index);
62   -
63   - index -= 1; //adjust for 1-based indexing
64   -
65   - //if there was a GPU error, terminate
66   - if(stat != CUBLAS_STATUS_SUCCESS){
67   - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
68   - exit(1);
69   - }
70   -
71   - //retrieve the maximum value for this slice and store it in the maxVal array
72   - std::cout<<X[n]<<std::endl;
73   - HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost));
74   - return result;
75   - }
76   -
77   -public:
78   -
79   - enum attribute {magnitude, real, imaginary};
80   -
81   - //constructor (no parameters)
82   - complexfield() : field<stim::complex<T>, D>(){};
83   -
84   - //constructor (resolution specified)
85   - complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){};
86   -
87   - //assignment from a field of complex values
88   - complexfield & operator=(const field< stim::complex<T>, D > rhs){
89   - field< complex<T>, D >::operator=(rhs);
90   - return *this;
91   - }
92   -
93   - //assignment operator (scalar value)
94   - complexfield & operator= (const complex<T> rhs){
95   -
96   - field< complex<T>, D >::operator=(rhs);
97   - return *this;
98   - }
99   -
100   - //assignment operator (vector value)
101   - complexfield & operator= (const vec< complex<T>, D > rhs){
102   -
103   - field< complex<T>, D >::operator=(rhs);
104   - return *this;
105   - }
106   -
107   - //cropping
108   - complexfield crop(unsigned int width, unsigned int height){
109   -
110   - complexfield<T, D> result;
111   - result = field< complex<T>, D>::crop(width, height);
112   - return result;
113   - }
114   -
115   - void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){
116   -
117   - field<T, 1> rf(R[0], R[1]);
118   -
119   - //get cuda parameters
120   - dim3 blocks, grids;
121   - cuda_params(grids, blocks);
122   -
123   - if(type == magnitude){
124   - gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]);
125   - rf.toImage(filename, n, true);
126   - }
127   -
128   - }
129   -
130   -
131   -};
132   -
133   -
134   -} //end namespace rts
135   -
136   -
137   -#endif
  1 +#ifndef RTS_COMPLEXFIELD_H
  2 +#define RTS_COMPLEXFIELD_H
  3 +
  4 +#include "cublas_v2.h"
  5 +#include <cuda_runtime.h>
  6 +
  7 +#include "../math/field.cuh"
  8 +#include "../math/complex.h"
  9 +#include "../math/realfield.cuh"
  10 +
  11 +namespace stim{
  12 +
  13 +template<typename T>
  14 +__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){
  15 +
  16 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  17 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  18 +
  19 + //make sure that the thread indices are in-bounds
  20 + if(iu >= r0 || iv >= r1) return;
  21 +
  22 + //compute the index into the field
  23 + int i = iv*r0 + iu;
  24 +
  25 + //calculate and store the result
  26 + dest[i] = source[i].abs();
  27 +}
  28 +
  29 +/*This class stores functions for saving images of complex fields
  30 +*/
  31 +template<typename T, unsigned int D = 1>
  32 +class complexfield : public field< stim::complex<T>, D >{
  33 + using field< stim::complex<T>, D >::R;
  34 + using field< stim::complex<T>, D >::X;
  35 + using field< stim::complex<T>, D >::shape;
  36 + using field< stim::complex<T>, D >::cuda_params;
  37 +
  38 +
  39 +
  40 +public:
  41 +
  42 + //find the maximum value of component n
  43 + stim::complex<T> find_max(unsigned int n){
  44 + cublasStatus_t stat;
  45 + cublasHandle_t handle;
  46 +
  47 + //create a CUBLAS handle
  48 + stat = cublasCreate(&handle);
  49 + if(stat != CUBLAS_STATUS_SUCCESS){
  50 + std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
  51 + exit(1);
  52 + }
  53 +
  54 + int L = R[0] * R[1]; //compute the number of discrete points in a slice
  55 + int index; //result of the max operation
  56 + stim::complex<T> result;
  57 +
  58 + if(sizeof(T) == 8)
  59 + stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index);
  60 + else
  61 + stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index);
  62 +
  63 + index -= 1; //adjust for 1-based indexing
  64 +
  65 + //if there was a GPU error, terminate
  66 + if(stat != CUBLAS_STATUS_SUCCESS){
  67 + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
  68 + exit(1);
  69 + }
  70 +
  71 + //retrieve the maximum value for this slice and store it in the maxVal array
  72 + std::cout<<X[n]<<std::endl;
  73 + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost));
  74 + return result;
  75 + }
  76 +
  77 +public:
  78 +
  79 + enum attribute {magnitude, real, imaginary};
  80 +
  81 + //constructor (no parameters)
  82 + complexfield() : field<stim::complex<T>, D>(){};
  83 +
  84 + //constructor (resolution specified)
  85 + complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){};
  86 +
  87 + //assignment from a field of complex values
  88 + complexfield & operator=(const field< stim::complex<T>, D > rhs){
  89 + field< complex<T>, D >::operator=(rhs);
  90 + return *this;
  91 + }
  92 +
  93 + //assignment operator (scalar value)
  94 + complexfield & operator= (const complex<T> rhs){
  95 +
  96 + field< complex<T>, D >::operator=(rhs);
  97 + return *this;
  98 + }
  99 +
  100 + //assignment operator (vector value)
  101 + complexfield & operator= (const vec< complex<T>, D > rhs){
  102 +
  103 + field< complex<T>, D >::operator=(rhs);
  104 + return *this;
  105 + }
  106 +
  107 + //cropping
  108 + complexfield crop(unsigned int width, unsigned int height){
  109 +
  110 + complexfield<T, D> result;
  111 + result = field< complex<T>, D>::crop(width, height);
  112 + return result;
  113 + }
  114 +
  115 + void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){
  116 +
  117 + field<T, 1> rf(R[0], R[1]);
  118 +
  119 + //get cuda parameters
  120 + dim3 blocks, grids;
  121 + cuda_params(grids, blocks);
  122 +
  123 + if(type == magnitude){
  124 + gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]);
  125 + rf.toImage(filename, n, true);
  126 + }
  127 +
  128 + }
  129 +
  130 +
  131 +};
  132 +
  133 +
  134 +} //end namespace rts
  135 +
  136 +
  137 +#endif
... ...
math/field.cuh
1   -#ifndef RTS_FIELD_CUH
2   -#define RTS_FIELD_CUH
3   -
4   -#include <vector>
5   -#include <string>
6   -#include <sstream>
7   -
8   -#include "cublas_v2.h"
9   -#include <cuda_runtime.h>
10   -
11   -#include "../math/rect.h"
12   -#include "../cuda/threads.h"
13   -#include "../cuda/error.h"
14   -#include "../cuda/devices.h"
15   -#include "../visualization/colormap.h"
16   -
17   -
18   -namespace stim{
19   -
20   -//multiply R = X * Y
21   -template<typename T>
22   -__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){
23   -
24   - int iu = blockIdx.x * blockDim.x + threadIdx.x;
25   - int iv = blockIdx.y * blockDim.y + threadIdx.y;
26   -
27   - //make sure that the thread indices are in-bounds
28   - if(iu >= r0 || iv >= r1) return;
29   -
30   - //compute the index into the field
31   - int i = iv*r0 + iu;
32   -
33   - //calculate and store the result
34   - R[i] = X[i] * Y[i];
35   -}
36   -
37   -//assign a constant value to all points
38   -template<typename T>
39   -__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){
40   -
41   - int iu = blockIdx.x * blockDim.x + threadIdx.x;
42   - int iv = blockIdx.y * blockDim.y + threadIdx.y;
43   -
44   - //make sure that the thread indices are in-bounds
45   - if(iu >= r0 || iv >= r1) return;
46   -
47   - //compute the index into the field
48   - int i = iv*r0 + iu;
49   -
50   - //calculate and store the result
51   - ptr[i] = val;
52   -}
53   -
54   -//crop the field to the new dimensions (width x height)
55   -template<typename T>
56   -__global__ void gpu_field_crop(T* dest, T* source,
57   - unsigned int r0, unsigned int r1,
58   - unsigned int width, unsigned int height){
59   -
60   - int iu = blockIdx.x * blockDim.x + threadIdx.x;
61   - int iv = blockIdx.y * blockDim.y + threadIdx.y;
62   -
63   - //make sure that the thread indices are in-bounds
64   - if(iu >= width || iv >= height) return;
65   -
66   - //compute the index into the field
67   - int is = iv*r0 + iu;
68   - int id = iv*width + iu;
69   -
70   - //calculate and store the result
71   - dest[id] = source[is];
72   -}
73   -
74   -template<typename T, unsigned int D = 1>
75   -class field{
76   -
77   -protected:
78   -
79   - T* X[D]; //pointer to the field data
80   - unsigned int R[2]; //field resolution
81   - stim::rect<T> shape; //position and shape of the field slice
82   -
83   - //calculates the optimal block and grid sizes using information from the GPU
84   - void cuda_params(dim3& grids, dim3& blocks){
85   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
86   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
87   -
88   - //create one thread for each detector pixel
89   - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
90   - grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
91   - }
92   -
93   - //find the maximum value of component n
94   - T find_max(unsigned int n){
95   - cublasStatus_t stat;
96   - cublasHandle_t handle;
97   -
98   - //create a CUBLAS handle
99   - stat = cublasCreate(&handle);
100   - if(stat != CUBLAS_STATUS_SUCCESS){
101   - std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
102   - exit(1);
103   - }
104   -
105   - int L = R[0] * R[1]; //compute the number of discrete points in a slice
106   - int index; //result of the max operation
107   - T result;
108   -
109   - if(sizeof(T) == 4)
110   - stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index);
111   - else
112   - stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index);
113   -
114   - index -= 1; //adjust for 1-based indexing
115   -
116   - //if there was a GPU error, terminate
117   - if(stat != CUBLAS_STATUS_SUCCESS){
118   - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
119   - exit(1);
120   - }
121   -
122   - //retrieve the maximum value for this slice and store it in the maxVal array
123   - HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost));
124   - return result;
125   - }
126   -
127   -public:
128   -
129   - //returns a list of file names given an input string with wild cards
130   - std::vector<std::string> process_filename(std::string name){
131   - std::stringstream ss(name);
132   - std::string item;
133   - std::vector<std::string> elems;
134   - while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension)
135   - {
136   - elems.push_back(item);
137   - }
138   -
139   - std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters)
140   - std::string ext = elems[1]; //file extension (ex. .bmp, .png)
141   - ext = std::string(".") + ext; //add a period back into the extension
142   -
143   - size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'')
144   - size_t i1 = prefix.find_last_of("?");
145   -
146   - std::string postfix = prefix.substr(i1+1);
147   - prefix = prefix.substr(0, i0);
148   -
149   - unsigned int digits = i1 - i0 + 1; //compute the number of wildcards
150   -
151   - std::vector<std::string> flist; //create a vector of file names
152   - //fill the list
153   - for(unsigned int d=0; d<D; d++){
154   - std::stringstream ss; //assemble the file name
155   - ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext;
156   - flist.push_back(ss.str());
157   - }
158   -
159   - return flist;
160   - }
161   -
162   - void init(){
163   - for(unsigned int n=0; n<D; n++)
164   - X[n] = NULL;
165   - }
166   - void destroy(){
167   - for(unsigned int n=0; n<D; n++)
168   - if(X[n] != NULL)
169   - HANDLE_ERROR(cudaFree(X[n]));
170   - }
171   -
172   -public:
173   - //field constructor
174   - field(){
175   - R[0] = R[1] = 0;
176   - init();
177   - }
178   -
179   - field(unsigned int x, unsigned int y){
180   - //set the resolution
181   - R[0] = x;
182   - R[1] = y;
183   - //allocate memory on the GPU
184   - for(unsigned int n=0; n<D; n++){
185   - HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] ));
186   - }
187   - clear(); //zero the field
188   - }
189   -
190   - ///copy constructor
191   - field(const field &rhs){
192   - //first make a shallow copy
193   - R[0] = rhs.R[0];
194   - R[1] = rhs.R[1];
195   -
196   - for(unsigned int n=0; n<D; n++){
197   - //do we have to make a deep copy?
198   - if(rhs.X[n] == NULL)
199   - X[n] = NULL; //no
200   - else{
201   - //allocate the necessary memory
202   - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
203   -
204   - //copy the slice
205   - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
206   - }
207   - }
208   - }
209   -
210   - ~field(){
211   - destroy();
212   - }
213   -
214   - //assignment operator
215   - field & operator= (const field & rhs){
216   -
217   - //de-allocate any existing GPU memory
218   - destroy();
219   -
220   - //copy the slice resolution
221   - R[0] = rhs.R[0];
222   - R[1] = rhs.R[1];
223   -
224   - for(unsigned int n=0; n<D; n++)
225   - {
226   - //allocate the necessary memory
227   - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
228   - //copy the slice
229   - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
230   - }
231   - return *this;
232   - }
233   -
234   - field & operator= (const T rhs){
235   -
236   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
237   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
238   -
239   - //create one thread for each detector pixel
240   - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
241   - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
242   -
243   - //assign the constant value to all positions and dimensions
244   - for(int n=0; n<D; n++)
245   - stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]);
246   -
247   - return *this;
248   - }
249   -
250   - //assignment of vector component
251   - field & operator= (const vec<T, D> rhs){
252   -
253   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
254   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
255   -
256   - //create one thread for each detector pixel
257   - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
258   - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
259   -
260   - //assign the constant value to all positions and dimensions
261   - for(unsigned int n=0; n<D; n++)
262   - stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]);
263   -
264   - return *this;
265   -
266   - }
267   -
268   - //multiply two fields (element-wise multiplication)
269   - field<T, D> operator* (const field & rhs){
270   -
271   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
272   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
273   -
274   - //create one thread for each detector pixel
275   - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
276   - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
277   -
278   - //create a scalar field to store the result
279   - field<T, D> result(R[0], R[1]);
280   -
281   - for(int n=0; n<D; n++)
282   - stim::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]);
283   -
284   - return result;
285   - }
286   -
287   - T* ptr(unsigned int n = 0){
288   - if(n < D)
289   - return X[n];
290   - else return NULL;
291   - }
292   -
293   - //return the vector component at position (u, v)
294   - vec<T, D> get(unsigned int u, unsigned int v){
295   -
296   - vec<T, D> result;
297   - for(unsigned int d=0; d<D; d++){
298   - HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost));
299   - }
300   -
301   - return result;
302   - }
303   -
304   - //set all components of the field to zero
305   - void clear(){
306   - for(unsigned int n=0; n<D; n++)
307   - if(X[n] != NULL)
308   - HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1]));
309   - }
310   -
311   - //crop the field
312   - field<T, D> crop(unsigned int width, unsigned int height){
313   - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
314   - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
315   -
316   - //create one thread for each detector pixel
317   - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
318   - dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK);
319   -
320   - //create a scalar field to store the result
321   - field<T, D> result(width, height);
322   -
323   - for(int n=0; n<D; n++)
324   - stim::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height);
325   -
326   - return result;
327   - }
328   -
329   - //save an image representing component n
330   - void toImage(std::string filename, unsigned int n = 0,
331   - bool positive = false, stim::colormapType cmap = stim::cmBrewer){
332   - T max_val = find_max(n); //find the maximum value
333   -
334   - if(positive) //if the field is positive, use the range [0 max_val]
335   - stim::gpu2image<T>(X[n], filename, R[0], R[1], 0, max_val, cmap);
336   - else
337   - stim::gpu2image<T>(X[n], filename, R[0], R[1], -max_val, max_val, cmap);
338   - }
339   -
340   -};
341   -
342   -} //end namespace rts
343   -#endif
  1 +#ifndef RTS_FIELD_CUH
  2 +#define RTS_FIELD_CUH
  3 +
  4 +#include <vector>
  5 +#include <string>
  6 +#include <sstream>
  7 +
  8 +#include "cublas_v2.h"
  9 +#include <cuda_runtime.h>
  10 +
  11 +#include "../math/rect.h"
  12 +#include "../cuda/threads.h"
  13 +#include "../cuda/error.h"
  14 +#include "../cuda/devices.h"
  15 +#include "../visualization/colormap.h"
  16 +
  17 +
  18 +namespace stim{
  19 +
  20 +//multiply R = X * Y
  21 +template<typename T>
  22 +__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){
  23 +
  24 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  25 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  26 +
  27 + //make sure that the thread indices are in-bounds
  28 + if(iu >= r0 || iv >= r1) return;
  29 +
  30 + //compute the index into the field
  31 + int i = iv*r0 + iu;
  32 +
  33 + //calculate and store the result
  34 + R[i] = X[i] * Y[i];
  35 +}
  36 +
  37 +//assign a constant value to all points
  38 +template<typename T>
  39 +__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){
  40 +
  41 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  42 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  43 +
  44 + //make sure that the thread indices are in-bounds
  45 + if(iu >= r0 || iv >= r1) return;
  46 +
  47 + //compute the index into the field
  48 + int i = iv*r0 + iu;
  49 +
  50 + //calculate and store the result
  51 + ptr[i] = val;
  52 +}
  53 +
  54 +//crop the field to the new dimensions (width x height)
  55 +template<typename T>
  56 +__global__ void gpu_field_crop(T* dest, T* source,
  57 + unsigned int r0, unsigned int r1,
  58 + unsigned int width, unsigned int height){
  59 +
  60 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  61 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  62 +
  63 + //make sure that the thread indices are in-bounds
  64 + if(iu >= width || iv >= height) return;
  65 +
  66 + //compute the index into the field
  67 + int is = iv*r0 + iu;
  68 + int id = iv*width + iu;
  69 +
  70 + //calculate and store the result
  71 + dest[id] = source[is];
  72 +}
  73 +
  74 +template<typename T, unsigned int D = 1>
  75 +class field{
  76 +
  77 +protected:
  78 +
  79 + T* X[D]; //pointer to the field data
  80 + unsigned int R[2]; //field resolution
  81 + stim::rect<T> shape; //position and shape of the field slice
  82 +
  83 + //calculates the optimal block and grid sizes using information from the GPU
  84 + void cuda_params(dim3& grids, dim3& blocks){
  85 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  86 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  87 +
  88 + //create one thread for each detector pixel
  89 + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
  90 + grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  91 + }
  92 +
  93 + //find the maximum value of component n
  94 + T find_max(unsigned int n){
  95 + cublasStatus_t stat;
  96 + cublasHandle_t handle;
  97 +
  98 + //create a CUBLAS handle
  99 + stat = cublasCreate(&handle);
  100 + if(stat != CUBLAS_STATUS_SUCCESS){
  101 + std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
  102 + exit(1);
  103 + }
  104 +
  105 + int L = R[0] * R[1]; //compute the number of discrete points in a slice
  106 + int index; //result of the max operation
  107 + T result;
  108 +
  109 + if(sizeof(T) == 4)
  110 + stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index);
  111 + else
  112 + stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index);
  113 +
  114 + index -= 1; //adjust for 1-based indexing
  115 +
  116 + //if there was a GPU error, terminate
  117 + if(stat != CUBLAS_STATUS_SUCCESS){
  118 + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
  119 + exit(1);
  120 + }
  121 +
  122 + //retrieve the maximum value for this slice and store it in the maxVal array
  123 + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost));
  124 + return result;
  125 + }
  126 +
  127 +public:
  128 +
  129 + //returns a list of file names given an input string with wild cards
  130 + std::vector<std::string> process_filename(std::string name){
  131 + std::stringstream ss(name);
  132 + std::string item;
  133 + std::vector<std::string> elems;
  134 + while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension)
  135 + {
  136 + elems.push_back(item);
  137 + }
  138 +
  139 + std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters)
  140 + std::string ext = elems[1]; //file extension (ex. .bmp, .png)
  141 + ext = std::string(".") + ext; //add a period back into the extension