Commit 81e0d2215b30a17309ce7ba7615f94e2eda5c67e
1 parent
7b3948ab
separated executable arguments and options in the arglist class
Showing
8 changed files
with
2056 additions
and
2039 deletions
Show diff stats
math/complex.h
1 | -/*RTS Complex number class. This class is CUDA compatible, | |
2 | -and can therefore be used in CUDA code and on CUDA devices. | |
3 | -*/ | |
4 | - | |
5 | -#ifndef RTS_COMPLEX | |
6 | -#define RTS_COMPLEX | |
7 | - | |
8 | -#include "../cuda/callable.h" | |
9 | -#include <cmath> | |
10 | -#include <string> | |
11 | -#include <sstream> | |
12 | -#include <iostream> | |
13 | - | |
14 | -namespace stim | |
15 | -{ | |
16 | - | |
17 | -template <class T> | |
18 | -struct complex | |
19 | -{ | |
20 | - T r, i; | |
21 | - | |
22 | - //default constructor | |
23 | - CUDA_CALLABLE complex() | |
24 | - { | |
25 | - r = 0; | |
26 | - i = 0; | |
27 | - } | |
28 | - | |
29 | - //constructor when given real and imaginary values | |
30 | - CUDA_CALLABLE complex(T r, T i = 0) | |
31 | - { | |
32 | - this->r = r; | |
33 | - this->i = i; | |
34 | - } | |
35 | - | |
36 | - //access methods | |
37 | - CUDA_CALLABLE T real() | |
38 | - { | |
39 | - return r; | |
40 | - } | |
41 | - | |
42 | - CUDA_CALLABLE T real(T r_val) | |
43 | - { | |
44 | - r = r_val; | |
45 | - return r_val; | |
46 | - } | |
47 | - | |
48 | - CUDA_CALLABLE T imag() | |
49 | - { | |
50 | - return i; | |
51 | - } | |
52 | - CUDA_CALLABLE T imag(T i_val) | |
53 | - { | |
54 | - i = i_val; | |
55 | - return i_val; | |
56 | - } | |
57 | - | |
58 | - | |
59 | - | |
60 | - //return the current value multiplied by i | |
61 | - CUDA_CALLABLE complex<T> imul() | |
62 | - { | |
63 | - complex<T> result; | |
64 | - result.r = -i; | |
65 | - result.i = r; | |
66 | - | |
67 | - return result; | |
68 | - } | |
69 | - | |
70 | - //returns the complex signum (-1, 0, 1) | |
71 | - CUDA_CALLABLE int sgn(){ | |
72 | - if(r > 0) return 1; | |
73 | - else if(r < 0) return -1; | |
74 | - else return (0 < i - i < 0); | |
75 | - } | |
76 | - | |
77 | - //ARITHMETIC OPERATORS-------------------- | |
78 | - | |
79 | - //binary + operator (returns the result of adding two complex values) | |
80 | - CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const | |
81 | - { | |
82 | - complex<T> result; | |
83 | - result.r = r + rhs.r; | |
84 | - result.i = i + rhs.i; | |
85 | - return result; | |
86 | - } | |
87 | - | |
88 | - CUDA_CALLABLE complex<T> operator+ (const T rhs) const | |
89 | - { | |
90 | - complex<T> result; | |
91 | - result.r = r + rhs; | |
92 | - result.i = i; | |
93 | - return result; | |
94 | - } | |
95 | - | |
96 | - //binary - operator (returns the result of adding two complex values) | |
97 | - CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const | |
98 | - { | |
99 | - complex<T> result; | |
100 | - result.r = r - rhs.r; | |
101 | - result.i = i - rhs.i; | |
102 | - return result; | |
103 | - } | |
104 | - | |
105 | - //binary - operator (returns the result of adding two complex values) | |
106 | - CUDA_CALLABLE complex<T> operator- (const T rhs) | |
107 | - { | |
108 | - complex<T> result; | |
109 | - result.r = r - rhs; | |
110 | - result.i = i; | |
111 | - return result; | |
112 | - } | |
113 | - | |
114 | - //binary MULTIPLICATION operators (returns the result of multiplying complex values) | |
115 | - CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const | |
116 | - { | |
117 | - complex<T> result; | |
118 | - result.r = r * rhs.r - i * rhs.i; | |
119 | - result.i = r * rhs.i + i * rhs.r; | |
120 | - return result; | |
121 | - } | |
122 | - CUDA_CALLABLE complex<T> operator* (const T rhs) | |
123 | - { | |
124 | - return complex<T>(r * rhs, i * rhs); | |
125 | - } | |
126 | - | |
127 | - //binary DIVISION operators (returns the result of dividing complex values) | |
128 | - CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const | |
129 | - { | |
130 | - complex<T> result; | |
131 | - T denom = rhs.r * rhs.r + rhs.i * rhs.i; | |
132 | - result.r = (r * rhs.r + i * rhs.i) / denom; | |
133 | - result.i = (- r * rhs.i + i * rhs.r) / denom; | |
134 | - | |
135 | - return result; | |
136 | - } | |
137 | - CUDA_CALLABLE complex<T> operator/ (const T rhs) | |
138 | - { | |
139 | - return complex<T>(r / rhs, i / rhs); | |
140 | - } | |
141 | - | |
142 | - //ASSIGNMENT operators----------------------------------- | |
143 | - CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs) | |
144 | - { | |
145 | - //check for self-assignment | |
146 | - if(this != &rhs) | |
147 | - { | |
148 | - this->r = rhs.r; | |
149 | - this->i = rhs.i; | |
150 | - } | |
151 | - return *this; | |
152 | - } | |
153 | - CUDA_CALLABLE complex<T> & operator=(const T &rhs) | |
154 | - { | |
155 | - this->r = rhs; | |
156 | - this->i = 0; | |
157 | - | |
158 | - return *this; | |
159 | - } | |
160 | - | |
161 | - //arithmetic assignment operators | |
162 | - CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs) | |
163 | - { | |
164 | - *this = *this + rhs; | |
165 | - return *this; | |
166 | - } | |
167 | - CUDA_CALLABLE complex<T> operator+=(const T &rhs) | |
168 | - { | |
169 | - *this = *this + rhs; | |
170 | - return *this; | |
171 | - } | |
172 | - | |
173 | - CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs) | |
174 | - { | |
175 | - *this = *this - rhs; | |
176 | - return *this; | |
177 | - } | |
178 | - CUDA_CALLABLE complex<T> operator-=(const T &rhs) | |
179 | - { | |
180 | - *this = *this - rhs; | |
181 | - return *this; | |
182 | - } | |
183 | - | |
184 | - CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs) | |
185 | - { | |
186 | - *this = *this * rhs; | |
187 | - return *this; | |
188 | - } | |
189 | - CUDA_CALLABLE complex<T> operator*=(const T &rhs) | |
190 | - { | |
191 | - *this = *this * rhs; | |
192 | - return *this; | |
193 | - } | |
194 | - //divide and assign | |
195 | - CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs) | |
196 | - { | |
197 | - *this = *this / rhs; | |
198 | - return *this; | |
199 | - } | |
200 | - CUDA_CALLABLE complex<T> operator/=(const T &rhs) | |
201 | - { | |
202 | - *this = *this / rhs; | |
203 | - return *this; | |
204 | - } | |
205 | - | |
206 | - //absolute value operator (returns the absolute value of the complex number) | |
207 | - CUDA_CALLABLE T abs() | |
208 | - { | |
209 | - return std::sqrt(r * r + i * i); | |
210 | - } | |
211 | - | |
212 | - CUDA_CALLABLE complex<T> log() | |
213 | - { | |
214 | - complex<T> result; | |
215 | - result.r = (T)std::log(std::sqrt(r * r + i * i)); | |
216 | - result.i = (T)std::atan2(i, r); | |
217 | - | |
218 | - | |
219 | - return result; | |
220 | - } | |
221 | - | |
222 | - CUDA_CALLABLE complex<T> exp() | |
223 | - { | |
224 | - complex<T> result; | |
225 | - | |
226 | - T e_r = std::exp(r); | |
227 | - result.r = e_r * (T)std::cos(i); | |
228 | - result.i = e_r * (T)std::sin(i); | |
229 | - | |
230 | - return result; | |
231 | - } | |
232 | - | |
233 | - /*CUDA_CALLABLE complex<T> pow(int y) | |
234 | - { | |
235 | - | |
236 | - return pow((double)y); | |
237 | - }*/ | |
238 | - | |
239 | - CUDA_CALLABLE complex<T> pow(T y) | |
240 | - { | |
241 | - complex<T> result; | |
242 | - | |
243 | - result = log() * y; | |
244 | - | |
245 | - return result.exp(); | |
246 | - } | |
247 | - | |
248 | - CUDA_CALLABLE complex<T> sqrt() | |
249 | - { | |
250 | - complex<T> result; | |
251 | - | |
252 | - //convert to polar coordinates | |
253 | - T a = std::sqrt(r*r + i*i); | |
254 | - T theta = std::atan2(i, r); | |
255 | - | |
256 | - //find the square root | |
257 | - T a_p = std::sqrt(a); | |
258 | - T theta_p = theta/2.0f; | |
259 | - | |
260 | - //convert back to cartesian coordinates | |
261 | - result.r = a_p * std::cos(theta_p); | |
262 | - result.i = a_p * std::sin(theta_p); | |
263 | - | |
264 | - return result; | |
265 | - } | |
266 | - | |
267 | - std::string str() | |
268 | - { | |
269 | - std::stringstream ss; | |
270 | - ss<<"("<<r<<","<<i<<")"; | |
271 | - | |
272 | - return ss.str(); | |
273 | - } | |
274 | - | |
275 | - //COMPARISON operators | |
276 | - CUDA_CALLABLE bool operator==(complex<T> rhs) | |
277 | - { | |
278 | - if(r == rhs.r && i == rhs.i) | |
279 | - return true; | |
280 | - return false; | |
281 | - } | |
282 | - | |
283 | - CUDA_CALLABLE bool operator==(T rhs) | |
284 | - { | |
285 | - if(r == rhs && i == 0) | |
286 | - return true; | |
287 | - return false; | |
288 | - } | |
289 | - | |
290 | - CUDA_CALLABLE bool operator!=(T rhs) | |
291 | - { | |
292 | - if(r != rhs || i != 0) | |
293 | - return true; | |
294 | - return false; | |
295 | - } | |
296 | - | |
297 | - CUDA_CALLABLE bool operator<(complex<T> rhs){ | |
298 | - return abs() < rhs.abs(); | |
299 | - } | |
300 | - CUDA_CALLABLE bool operator<=(complex<T> rhs){ | |
301 | - return abs() <= rhs.abs(); | |
302 | - } | |
303 | - CUDA_CALLABLE bool operator>(complex<T> rhs){ | |
304 | - return abs() > rhs.abs(); | |
305 | - } | |
306 | - CUDA_CALLABLE bool operator >=(complex<T> rhs){ | |
307 | - return abs() >= rhs.abs(); | |
308 | - } | |
309 | - | |
310 | - //CASTING operators | |
311 | - template < typename otherT > | |
312 | - operator complex<otherT>() | |
313 | - { | |
314 | - complex<otherT> result((otherT)r, (otherT)i); | |
315 | - return result; | |
316 | - } | |
317 | - template< typename otherT > | |
318 | - complex( const complex<otherT> &rhs) | |
319 | - { | |
320 | - r = (T)rhs.r; | |
321 | - i = (T)rhs.i; | |
322 | - } | |
323 | - template< typename otherT > | |
324 | - complex& operator=(const complex<otherT> &rhs) | |
325 | - { | |
326 | - r = (T)rhs.r; | |
327 | - i = (T)rhs.i; | |
328 | - return *this; | |
329 | - } | |
330 | - | |
331 | -}; | |
332 | - | |
333 | -} //end RTS namespace | |
334 | - | |
335 | -//addition | |
336 | -template<typename T> | |
337 | -CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b) | |
338 | -{ | |
339 | - return stim::complex<T>((T)a + b.r, b.i); | |
340 | -} | |
341 | - | |
342 | -//subtraction with a real value | |
343 | -template<typename T> | |
344 | -CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b) | |
345 | -{ | |
346 | - return stim::complex<T>((T)a - b.r, -b.i); | |
347 | -} | |
348 | - | |
349 | -//minus sign | |
350 | -template<typename T> | |
351 | -CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs) | |
352 | -{ | |
353 | - return stim::complex<T>(-rhs.r, -rhs.i); | |
354 | -} | |
355 | - | |
356 | -//multiply a T value by a complex value | |
357 | -template<typename T> | |
358 | -CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b) | |
359 | -{ | |
360 | - return stim::complex<T>((T)a * b.r, (T)a * b.i); | |
361 | -} | |
362 | - | |
363 | -//divide a T value by a complex value | |
364 | -template<typename T> | |
365 | -CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b) | |
366 | -{ | |
367 | - stim::complex<T> result; | |
368 | - | |
369 | - T denom = b.r * b.r + b.i * b.i; | |
370 | - | |
371 | - result.r = ((T)a * b.r) / denom; | |
372 | - result.i = -((T)a * b.i) / denom; | |
373 | - | |
374 | - return result; | |
375 | -} | |
376 | - | |
377 | - | |
378 | -template<typename T> | |
379 | -CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y) | |
380 | -{ | |
381 | - return x.pow(y); | |
382 | -} | |
383 | -template<typename T> | |
384 | -CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y) | |
385 | -{ | |
386 | - return x.pow(y); | |
387 | -} | |
388 | - | |
389 | -//log function | |
390 | -template<typename T> | |
391 | -CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x) | |
392 | -{ | |
393 | - return x.log(); | |
394 | -} | |
395 | - | |
396 | -//exp function | |
397 | -template<typename T> | |
398 | -CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x) | |
399 | -{ | |
400 | - return x.exp(); | |
401 | -} | |
402 | - | |
403 | -//sqrt function | |
404 | -template<typename T> | |
405 | -CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x) | |
406 | -{ | |
407 | - return x.sqrt(); | |
408 | -} | |
409 | - | |
410 | - | |
411 | -template <typename T> | |
412 | -CUDA_CALLABLE static T abs(stim::complex<T> a) | |
413 | -{ | |
414 | - return a.abs(); | |
415 | -} | |
416 | - | |
417 | -template <typename T> | |
418 | -CUDA_CALLABLE static T real(stim::complex<T> a) | |
419 | -{ | |
420 | - return a.r; | |
421 | -} | |
422 | - | |
423 | -//template <typename T> | |
424 | -CUDA_CALLABLE static float real(float a) | |
425 | -{ | |
426 | - return a; | |
427 | -} | |
428 | - | |
429 | -template <typename T> | |
430 | -CUDA_CALLABLE static T imag(stim::complex<T> a) | |
431 | -{ | |
432 | - return a.i; | |
433 | -} | |
434 | - | |
435 | -//trigonometric functions | |
436 | -//template<class A> | |
437 | -/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x) | |
438 | -{ | |
439 | - stim::complex<float> result; | |
440 | - result.r = sinf(x.r) * coshf(x.i); | |
441 | - result.i = cosf(x.r) * sinhf(x.i); | |
442 | - | |
443 | - return result; | |
444 | -}*/ | |
445 | - | |
446 | -template<class A> | |
447 | -CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x) | |
448 | -{ | |
449 | - stim::complex<A> result; | |
450 | - result.r = (A)std::sin(x.r) * (A)std::cosh(x.i); | |
451 | - result.i = (A)std::cos(x.r) * (A)std::sinh(x.i); | |
452 | - | |
453 | - return result; | |
454 | -} | |
455 | - | |
456 | -//floating point template | |
457 | -//template<class A> | |
458 | -/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x) | |
459 | -{ | |
460 | - stim::complex<float> result; | |
461 | - result.r = cosf(x.r) * coshf(x.i); | |
462 | - result.i = -(sinf(x.r) * sinhf(x.i)); | |
463 | - | |
464 | - return result; | |
465 | -}*/ | |
466 | - | |
467 | -template<class A> | |
468 | -CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x) | |
469 | -{ | |
470 | - stim::complex<A> result; | |
471 | - result.r = (A)std::cos(x.r) * (A)std::cosh(x.i); | |
472 | - result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i)); | |
473 | - | |
474 | - return result; | |
475 | -} | |
476 | - | |
477 | - | |
478 | -template<class A> | |
479 | -std::ostream& operator<<(std::ostream& os, stim::complex<A> x) | |
480 | -{ | |
481 | - os<<x.str(); | |
482 | - return os; | |
483 | -} | |
484 | - | |
485 | -template<class A> | |
486 | -std::istream& operator>>(std::istream& is, stim::complex<A>& x) | |
487 | -{ | |
488 | - A r, i; | |
489 | - r = i = 0; //initialize the real and imaginary parts to zero | |
490 | - is>>r; //parse | |
491 | - is>>i; | |
492 | - | |
493 | - x.real(r); //assign the parsed values to x | |
494 | - x.imag(i); | |
495 | - | |
496 | - return is; //return the stream | |
497 | -} | |
498 | - | |
499 | -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | |
500 | -//template<class T> using rtsComplex = stim::complex<T>; | |
501 | -//#endif | |
502 | - | |
503 | - | |
504 | - | |
505 | -#endif | |
1 | +/*RTS Complex number class. This class is CUDA compatible, | |
2 | +and can therefore be used in CUDA code and on CUDA devices. | |
3 | +*/ | |
4 | + | |
5 | +#ifndef RTS_COMPLEX | |
6 | +#define RTS_COMPLEX | |
7 | + | |
8 | +#include "../cuda/callable.h" | |
9 | +#include <cmath> | |
10 | +#include <string> | |
11 | +#include <sstream> | |
12 | +#include <iostream> | |
13 | + | |
14 | +namespace stim | |
15 | +{ | |
16 | + | |
17 | +template <class T> | |
18 | +struct complex | |
19 | +{ | |
20 | + T r, i; | |
21 | + | |
22 | + //default constructor | |
23 | + CUDA_CALLABLE complex() | |
24 | + { | |
25 | + r = 0; | |
26 | + i = 0; | |
27 | + } | |
28 | + | |
29 | + //constructor when given real and imaginary values | |
30 | + CUDA_CALLABLE complex(T r, T i = 0) | |
31 | + { | |
32 | + this->r = r; | |
33 | + this->i = i; | |
34 | + } | |
35 | + | |
36 | + //access methods | |
37 | + CUDA_CALLABLE T real() | |
38 | + { | |
39 | + return r; | |
40 | + } | |
41 | + | |
42 | + CUDA_CALLABLE T real(T r_val) | |
43 | + { | |
44 | + r = r_val; | |
45 | + return r_val; | |
46 | + } | |
47 | + | |
48 | + CUDA_CALLABLE T imag() | |
49 | + { | |
50 | + return i; | |
51 | + } | |
52 | + CUDA_CALLABLE T imag(T i_val) | |
53 | + { | |
54 | + i = i_val; | |
55 | + return i_val; | |
56 | + } | |
57 | + | |
58 | + | |
59 | + | |
60 | + //return the current value multiplied by i | |
61 | + CUDA_CALLABLE complex<T> imul() | |
62 | + { | |
63 | + complex<T> result; | |
64 | + result.r = -i; | |
65 | + result.i = r; | |
66 | + | |
67 | + return result; | |
68 | + } | |
69 | + | |
70 | + //returns the complex signum (-1, 0, 1) | |
71 | + CUDA_CALLABLE int sgn(){ | |
72 | + if(r > 0) return 1; | |
73 | + else if(r < 0) return -1; | |
74 | + else return (0 < i - i < 0); | |
75 | + } | |
76 | + | |
77 | + //ARITHMETIC OPERATORS-------------------- | |
78 | + | |
79 | + //binary + operator (returns the result of adding two complex values) | |
80 | + CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const | |
81 | + { | |
82 | + complex<T> result; | |
83 | + result.r = r + rhs.r; | |
84 | + result.i = i + rhs.i; | |
85 | + return result; | |
86 | + } | |
87 | + | |
88 | + CUDA_CALLABLE complex<T> operator+ (const T rhs) const | |
89 | + { | |
90 | + complex<T> result; | |
91 | + result.r = r + rhs; | |
92 | + result.i = i; | |
93 | + return result; | |
94 | + } | |
95 | + | |
96 | + //binary - operator (returns the result of adding two complex values) | |
97 | + CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const | |
98 | + { | |
99 | + complex<T> result; | |
100 | + result.r = r - rhs.r; | |
101 | + result.i = i - rhs.i; | |
102 | + return result; | |
103 | + } | |
104 | + | |
105 | + //binary - operator (returns the result of adding two complex values) | |
106 | + CUDA_CALLABLE complex<T> operator- (const T rhs) | |
107 | + { | |
108 | + complex<T> result; | |
109 | + result.r = r - rhs; | |
110 | + result.i = i; | |
111 | + return result; | |
112 | + } | |
113 | + | |
114 | + //binary MULTIPLICATION operators (returns the result of multiplying complex values) | |
115 | + CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const | |
116 | + { | |
117 | + complex<T> result; | |
118 | + result.r = r * rhs.r - i * rhs.i; | |
119 | + result.i = r * rhs.i + i * rhs.r; | |
120 | + return result; | |
121 | + } | |
122 | + CUDA_CALLABLE complex<T> operator* (const T rhs) | |
123 | + { | |
124 | + return complex<T>(r * rhs, i * rhs); | |
125 | + } | |
126 | + | |
127 | + //binary DIVISION operators (returns the result of dividing complex values) | |
128 | + CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const | |
129 | + { | |
130 | + complex<T> result; | |
131 | + T denom = rhs.r * rhs.r + rhs.i * rhs.i; | |
132 | + result.r = (r * rhs.r + i * rhs.i) / denom; | |
133 | + result.i = (- r * rhs.i + i * rhs.r) / denom; | |
134 | + | |
135 | + return result; | |
136 | + } | |
137 | + CUDA_CALLABLE complex<T> operator/ (const T rhs) | |
138 | + { | |
139 | + return complex<T>(r / rhs, i / rhs); | |
140 | + } | |
141 | + | |
142 | + //ASSIGNMENT operators----------------------------------- | |
143 | + CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs) | |
144 | + { | |
145 | + //check for self-assignment | |
146 | + if(this != &rhs) | |
147 | + { | |
148 | + this->r = rhs.r; | |
149 | + this->i = rhs.i; | |
150 | + } | |
151 | + return *this; | |
152 | + } | |
153 | + CUDA_CALLABLE complex<T> & operator=(const T &rhs) | |
154 | + { | |
155 | + this->r = rhs; | |
156 | + this->i = 0; | |
157 | + | |
158 | + return *this; | |
159 | + } | |
160 | + | |
161 | + //arithmetic assignment operators | |
162 | + CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs) | |
163 | + { | |
164 | + *this = *this + rhs; | |
165 | + return *this; | |
166 | + } | |
167 | + CUDA_CALLABLE complex<T> operator+=(const T &rhs) | |
168 | + { | |
169 | + *this = *this + rhs; | |
170 | + return *this; | |
171 | + } | |
172 | + | |
173 | + CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs) | |
174 | + { | |
175 | + *this = *this - rhs; | |
176 | + return *this; | |
177 | + } | |
178 | + CUDA_CALLABLE complex<T> operator-=(const T &rhs) | |
179 | + { | |
180 | + *this = *this - rhs; | |
181 | + return *this; | |
182 | + } | |
183 | + | |
184 | + CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs) | |
185 | + { | |
186 | + *this = *this * rhs; | |
187 | + return *this; | |
188 | + } | |
189 | + CUDA_CALLABLE complex<T> operator*=(const T &rhs) | |
190 | + { | |
191 | + *this = *this * rhs; | |
192 | + return *this; | |
193 | + } | |
194 | + //divide and assign | |
195 | + CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs) | |
196 | + { | |
197 | + *this = *this / rhs; | |
198 | + return *this; | |
199 | + } | |
200 | + CUDA_CALLABLE complex<T> operator/=(const T &rhs) | |
201 | + { | |
202 | + *this = *this / rhs; | |
203 | + return *this; | |
204 | + } | |
205 | + | |
206 | + //absolute value operator (returns the absolute value of the complex number) | |
207 | + CUDA_CALLABLE T abs() | |
208 | + { | |
209 | + return std::sqrt(r * r + i * i); | |
210 | + } | |
211 | + | |
212 | + CUDA_CALLABLE complex<T> log() | |
213 | + { | |
214 | + complex<T> result; | |
215 | + result.r = (T)std::log(std::sqrt(r * r + i * i)); | |
216 | + result.i = (T)std::atan2(i, r); | |
217 | + | |
218 | + | |
219 | + return result; | |
220 | + } | |
221 | + | |
222 | + CUDA_CALLABLE complex<T> exp() | |
223 | + { | |
224 | + complex<T> result; | |
225 | + | |
226 | + T e_r = std::exp(r); | |
227 | + result.r = e_r * (T)std::cos(i); | |
228 | + result.i = e_r * (T)std::sin(i); | |
229 | + | |
230 | + return result; | |
231 | + } | |
232 | + | |
233 | + /*CUDA_CALLABLE complex<T> pow(int y) | |
234 | + { | |
235 | + | |
236 | + return pow((double)y); | |
237 | + }*/ | |
238 | + | |
239 | + CUDA_CALLABLE complex<T> pow(T y) | |
240 | + { | |
241 | + complex<T> result; | |
242 | + | |
243 | + result = log() * y; | |
244 | + | |
245 | + return result.exp(); | |
246 | + } | |
247 | + | |
248 | + CUDA_CALLABLE complex<T> sqrt() | |
249 | + { | |
250 | + complex<T> result; | |
251 | + | |
252 | + //convert to polar coordinates | |
253 | + T a = std::sqrt(r*r + i*i); | |
254 | + T theta = std::atan2(i, r); | |
255 | + | |
256 | + //find the square root | |
257 | + T a_p = std::sqrt(a); | |
258 | + T theta_p = theta/2.0f; | |
259 | + | |
260 | + //convert back to cartesian coordinates | |
261 | + result.r = a_p * std::cos(theta_p); | |
262 | + result.i = a_p * std::sin(theta_p); | |
263 | + | |
264 | + return result; | |
265 | + } | |
266 | + | |
267 | + std::string str() | |
268 | + { | |
269 | + std::stringstream ss; | |
270 | + ss<<"("<<r<<","<<i<<")"; | |
271 | + | |
272 | + return ss.str(); | |
273 | + } | |
274 | + | |
275 | + //COMPARISON operators | |
276 | + CUDA_CALLABLE bool operator==(complex<T> rhs) | |
277 | + { | |
278 | + if(r == rhs.r && i == rhs.i) | |
279 | + return true; | |
280 | + return false; | |
281 | + } | |
282 | + | |
283 | + CUDA_CALLABLE bool operator==(T rhs) | |
284 | + { | |
285 | + if(r == rhs && i == 0) | |
286 | + return true; | |
287 | + return false; | |
288 | + } | |
289 | + | |
290 | + CUDA_CALLABLE bool operator!=(T rhs) | |
291 | + { | |
292 | + if(r != rhs || i != 0) | |
293 | + return true; | |
294 | + return false; | |
295 | + } | |
296 | + | |
297 | + CUDA_CALLABLE bool operator<(complex<T> rhs){ | |
298 | + return abs() < rhs.abs(); | |
299 | + } | |
300 | + CUDA_CALLABLE bool operator<=(complex<T> rhs){ | |
301 | + return abs() <= rhs.abs(); | |
302 | + } | |
303 | + CUDA_CALLABLE bool operator>(complex<T> rhs){ | |
304 | + return abs() > rhs.abs(); | |
305 | + } | |
306 | + CUDA_CALLABLE bool operator >=(complex<T> rhs){ | |
307 | + return abs() >= rhs.abs(); | |
308 | + } | |
309 | + | |
310 | + //CASTING operators | |
311 | + template < typename otherT > | |
312 | + operator complex<otherT>() | |
313 | + { | |
314 | + complex<otherT> result((otherT)r, (otherT)i); | |
315 | + return result; | |
316 | + } | |
317 | + template< typename otherT > | |
318 | + complex( const complex<otherT> &rhs) | |
319 | + { | |
320 | + r = (T)rhs.r; | |
321 | + i = (T)rhs.i; | |
322 | + } | |
323 | + template< typename otherT > | |
324 | + complex& operator=(const complex<otherT> &rhs) | |
325 | + { | |
326 | + r = (T)rhs.r; | |
327 | + i = (T)rhs.i; | |
328 | + return *this; | |
329 | + } | |
330 | + | |
331 | +}; | |
332 | + | |
333 | +} //end RTS namespace | |
334 | + | |
335 | +//addition | |
336 | +template<typename T> | |
337 | +CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b) | |
338 | +{ | |
339 | + return stim::complex<T>((T)a + b.r, b.i); | |
340 | +} | |
341 | + | |
342 | +//subtraction with a real value | |
343 | +template<typename T> | |
344 | +CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b) | |
345 | +{ | |
346 | + return stim::complex<T>((T)a - b.r, -b.i); | |
347 | +} | |
348 | + | |
349 | +//minus sign | |
350 | +template<typename T> | |
351 | +CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs) | |
352 | +{ | |
353 | + return stim::complex<T>(-rhs.r, -rhs.i); | |
354 | +} | |
355 | + | |
356 | +//multiply a T value by a complex value | |
357 | +template<typename T> | |
358 | +CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b) | |
359 | +{ | |
360 | + return stim::complex<T>((T)a * b.r, (T)a * b.i); | |
361 | +} | |
362 | + | |
363 | +//divide a T value by a complex value | |
364 | +template<typename T> | |
365 | +CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b) | |
366 | +{ | |
367 | + stim::complex<T> result; | |
368 | + | |
369 | + T denom = b.r * b.r + b.i * b.i; | |
370 | + | |
371 | + result.r = ((T)a * b.r) / denom; | |
372 | + result.i = -((T)a * b.i) / denom; | |
373 | + | |
374 | + return result; | |
375 | +} | |
376 | + | |
377 | + | |
378 | +template<typename T> | |
379 | +CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y) | |
380 | +{ | |
381 | + return x.pow(y); | |
382 | +} | |
383 | +template<typename T> | |
384 | +CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y) | |
385 | +{ | |
386 | + return x.pow(y); | |
387 | +} | |
388 | + | |
389 | +//log function | |
390 | +template<typename T> | |
391 | +CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x) | |
392 | +{ | |
393 | + return x.log(); | |
394 | +} | |
395 | + | |
396 | +//exp function | |
397 | +template<typename T> | |
398 | +CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x) | |
399 | +{ | |
400 | + return x.exp(); | |
401 | +} | |
402 | + | |
403 | +//sqrt function | |
404 | +template<typename T> | |
405 | +CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x) | |
406 | +{ | |
407 | + return x.sqrt(); | |
408 | +} | |
409 | + | |
410 | + | |
411 | +template <typename T> | |
412 | +CUDA_CALLABLE static T abs(stim::complex<T> a) | |
413 | +{ | |
414 | + return a.abs(); | |
415 | +} | |
416 | + | |
417 | +template <typename T> | |
418 | +CUDA_CALLABLE static T real(stim::complex<T> a) | |
419 | +{ | |
420 | + return a.r; | |
421 | +} | |
422 | + | |
423 | +//template <typename T> | |
424 | +CUDA_CALLABLE static float real(float a) | |
425 | +{ | |
426 | + return a; | |
427 | +} | |
428 | + | |
429 | +template <typename T> | |
430 | +CUDA_CALLABLE static T imag(stim::complex<T> a) | |
431 | +{ | |
432 | + return a.i; | |
433 | +} | |
434 | + | |
435 | +//trigonometric functions | |
436 | +//template<class A> | |
437 | +/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x) | |
438 | +{ | |
439 | + stim::complex<float> result; | |
440 | + result.r = sinf(x.r) * coshf(x.i); | |
441 | + result.i = cosf(x.r) * sinhf(x.i); | |
442 | + | |
443 | + return result; | |
444 | +}*/ | |
445 | + | |
446 | +template<class A> | |
447 | +CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x) | |
448 | +{ | |
449 | + stim::complex<A> result; | |
450 | + result.r = (A)std::sin(x.r) * (A)std::cosh(x.i); | |
451 | + result.i = (A)std::cos(x.r) * (A)std::sinh(x.i); | |
452 | + | |
453 | + return result; | |
454 | +} | |
455 | + | |
456 | +//floating point template | |
457 | +//template<class A> | |
458 | +/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x) | |
459 | +{ | |
460 | + stim::complex<float> result; | |
461 | + result.r = cosf(x.r) * coshf(x.i); | |
462 | + result.i = -(sinf(x.r) * sinhf(x.i)); | |
463 | + | |
464 | + return result; | |
465 | +}*/ | |
466 | + | |
467 | +template<class A> | |
468 | +CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x) | |
469 | +{ | |
470 | + stim::complex<A> result; | |
471 | + result.r = (A)std::cos(x.r) * (A)std::cosh(x.i); | |
472 | + result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i)); | |
473 | + | |
474 | + return result; | |
475 | +} | |
476 | + | |
477 | + | |
478 | +template<class A> | |
479 | +std::ostream& operator<<(std::ostream& os, stim::complex<A> x) | |
480 | +{ | |
481 | + os<<x.str(); | |
482 | + return os; | |
483 | +} | |
484 | + | |
485 | +template<class A> | |
486 | +std::istream& operator>>(std::istream& is, stim::complex<A>& x) | |
487 | +{ | |
488 | + A r, i; | |
489 | + r = i = 0; //initialize the real and imaginary parts to zero | |
490 | + is>>r; //parse | |
491 | + is>>i; | |
492 | + | |
493 | + x.real(r); //assign the parsed values to x | |
494 | + x.imag(i); | |
495 | + | |
496 | + return is; //return the stream | |
497 | +} | |
498 | + | |
499 | +//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | |
500 | +//template<class T> using rtsComplex = stim::complex<T>; | |
501 | +//#endif | |
502 | + | |
503 | + | |
504 | + | |
505 | +#endif | ... | ... |
math/complexfield.cuh
1 | -#ifndef RTS_COMPLEXFIELD_H | |
2 | -#define RTS_COMPLEXFIELD_H | |
3 | - | |
4 | -#include "cublas_v2.h" | |
5 | -#include <cuda_runtime.h> | |
6 | - | |
7 | -#include "../math/field.cuh" | |
8 | -#include "../math/complex.h" | |
9 | -#include "../math/realfield.cuh" | |
10 | - | |
11 | -namespace stim{ | |
12 | - | |
13 | -template<typename T> | |
14 | -__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){ | |
15 | - | |
16 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
17 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
18 | - | |
19 | - //make sure that the thread indices are in-bounds | |
20 | - if(iu >= r0 || iv >= r1) return; | |
21 | - | |
22 | - //compute the index into the field | |
23 | - int i = iv*r0 + iu; | |
24 | - | |
25 | - //calculate and store the result | |
26 | - dest[i] = source[i].abs(); | |
27 | -} | |
28 | - | |
29 | -/*This class stores functions for saving images of complex fields | |
30 | -*/ | |
31 | -template<typename T, unsigned int D = 1> | |
32 | -class complexfield : public field< stim::complex<T>, D >{ | |
33 | - using field< stim::complex<T>, D >::R; | |
34 | - using field< stim::complex<T>, D >::X; | |
35 | - using field< stim::complex<T>, D >::shape; | |
36 | - using field< stim::complex<T>, D >::cuda_params; | |
37 | - | |
38 | - | |
39 | - | |
40 | -public: | |
41 | - | |
42 | - //find the maximum value of component n | |
43 | - stim::complex<T> find_max(unsigned int n){ | |
44 | - cublasStatus_t stat; | |
45 | - cublasHandle_t handle; | |
46 | - | |
47 | - //create a CUBLAS handle | |
48 | - stat = cublasCreate(&handle); | |
49 | - if(stat != CUBLAS_STATUS_SUCCESS){ | |
50 | - std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | |
51 | - exit(1); | |
52 | - } | |
53 | - | |
54 | - int L = R[0] * R[1]; //compute the number of discrete points in a slice | |
55 | - int index; //result of the max operation | |
56 | - stim::complex<T> result; | |
57 | - | |
58 | - if(sizeof(T) == 8) | |
59 | - stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index); | |
60 | - else | |
61 | - stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index); | |
62 | - | |
63 | - index -= 1; //adjust for 1-based indexing | |
64 | - | |
65 | - //if there was a GPU error, terminate | |
66 | - if(stat != CUBLAS_STATUS_SUCCESS){ | |
67 | - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | |
68 | - exit(1); | |
69 | - } | |
70 | - | |
71 | - //retrieve the maximum value for this slice and store it in the maxVal array | |
72 | - std::cout<<X[n]<<std::endl; | |
73 | - HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost)); | |
74 | - return result; | |
75 | - } | |
76 | - | |
77 | -public: | |
78 | - | |
79 | - enum attribute {magnitude, real, imaginary}; | |
80 | - | |
81 | - //constructor (no parameters) | |
82 | - complexfield() : field<stim::complex<T>, D>(){}; | |
83 | - | |
84 | - //constructor (resolution specified) | |
85 | - complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){}; | |
86 | - | |
87 | - //assignment from a field of complex values | |
88 | - complexfield & operator=(const field< stim::complex<T>, D > rhs){ | |
89 | - field< complex<T>, D >::operator=(rhs); | |
90 | - return *this; | |
91 | - } | |
92 | - | |
93 | - //assignment operator (scalar value) | |
94 | - complexfield & operator= (const complex<T> rhs){ | |
95 | - | |
96 | - field< complex<T>, D >::operator=(rhs); | |
97 | - return *this; | |
98 | - } | |
99 | - | |
100 | - //assignment operator (vector value) | |
101 | - complexfield & operator= (const vec< complex<T>, D > rhs){ | |
102 | - | |
103 | - field< complex<T>, D >::operator=(rhs); | |
104 | - return *this; | |
105 | - } | |
106 | - | |
107 | - //cropping | |
108 | - complexfield crop(unsigned int width, unsigned int height){ | |
109 | - | |
110 | - complexfield<T, D> result; | |
111 | - result = field< complex<T>, D>::crop(width, height); | |
112 | - return result; | |
113 | - } | |
114 | - | |
115 | - void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){ | |
116 | - | |
117 | - field<T, 1> rf(R[0], R[1]); | |
118 | - | |
119 | - //get cuda parameters | |
120 | - dim3 blocks, grids; | |
121 | - cuda_params(grids, blocks); | |
122 | - | |
123 | - if(type == magnitude){ | |
124 | - gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]); | |
125 | - rf.toImage(filename, n, true); | |
126 | - } | |
127 | - | |
128 | - } | |
129 | - | |
130 | - | |
131 | -}; | |
132 | - | |
133 | - | |
134 | -} //end namespace rts | |
135 | - | |
136 | - | |
137 | -#endif | |
1 | +#ifndef RTS_COMPLEXFIELD_H | |
2 | +#define RTS_COMPLEXFIELD_H | |
3 | + | |
4 | +#include "cublas_v2.h" | |
5 | +#include <cuda_runtime.h> | |
6 | + | |
7 | +#include "../math/field.cuh" | |
8 | +#include "../math/complex.h" | |
9 | +#include "../math/realfield.cuh" | |
10 | + | |
11 | +namespace stim{ | |
12 | + | |
13 | +template<typename T> | |
14 | +__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){ | |
15 | + | |
16 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
17 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
18 | + | |
19 | + //make sure that the thread indices are in-bounds | |
20 | + if(iu >= r0 || iv >= r1) return; | |
21 | + | |
22 | + //compute the index into the field | |
23 | + int i = iv*r0 + iu; | |
24 | + | |
25 | + //calculate and store the result | |
26 | + dest[i] = source[i].abs(); | |
27 | +} | |
28 | + | |
29 | +/*This class stores functions for saving images of complex fields | |
30 | +*/ | |
31 | +template<typename T, unsigned int D = 1> | |
32 | +class complexfield : public field< stim::complex<T>, D >{ | |
33 | + using field< stim::complex<T>, D >::R; | |
34 | + using field< stim::complex<T>, D >::X; | |
35 | + using field< stim::complex<T>, D >::shape; | |
36 | + using field< stim::complex<T>, D >::cuda_params; | |
37 | + | |
38 | + | |
39 | + | |
40 | +public: | |
41 | + | |
42 | + //find the maximum value of component n | |
43 | + stim::complex<T> find_max(unsigned int n){ | |
44 | + cublasStatus_t stat; | |
45 | + cublasHandle_t handle; | |
46 | + | |
47 | + //create a CUBLAS handle | |
48 | + stat = cublasCreate(&handle); | |
49 | + if(stat != CUBLAS_STATUS_SUCCESS){ | |
50 | + std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | |
51 | + exit(1); | |
52 | + } | |
53 | + | |
54 | + int L = R[0] * R[1]; //compute the number of discrete points in a slice | |
55 | + int index; //result of the max operation | |
56 | + stim::complex<T> result; | |
57 | + | |
58 | + if(sizeof(T) == 8) | |
59 | + stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index); | |
60 | + else | |
61 | + stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index); | |
62 | + | |
63 | + index -= 1; //adjust for 1-based indexing | |
64 | + | |
65 | + //if there was a GPU error, terminate | |
66 | + if(stat != CUBLAS_STATUS_SUCCESS){ | |
67 | + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | |
68 | + exit(1); | |
69 | + } | |
70 | + | |
71 | + //retrieve the maximum value for this slice and store it in the maxVal array | |
72 | + std::cout<<X[n]<<std::endl; | |
73 | + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost)); | |
74 | + return result; | |
75 | + } | |
76 | + | |
77 | +public: | |
78 | + | |
79 | + enum attribute {magnitude, real, imaginary}; | |
80 | + | |
81 | + //constructor (no parameters) | |
82 | + complexfield() : field<stim::complex<T>, D>(){}; | |
83 | + | |
84 | + //constructor (resolution specified) | |
85 | + complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){}; | |
86 | + | |
87 | + //assignment from a field of complex values | |
88 | + complexfield & operator=(const field< stim::complex<T>, D > rhs){ | |
89 | + field< complex<T>, D >::operator=(rhs); | |
90 | + return *this; | |
91 | + } | |
92 | + | |
93 | + //assignment operator (scalar value) | |
94 | + complexfield & operator= (const complex<T> rhs){ | |
95 | + | |
96 | + field< complex<T>, D >::operator=(rhs); | |
97 | + return *this; | |
98 | + } | |
99 | + | |
100 | + //assignment operator (vector value) | |
101 | + complexfield & operator= (const vec< complex<T>, D > rhs){ | |
102 | + | |
103 | + field< complex<T>, D >::operator=(rhs); | |
104 | + return *this; | |
105 | + } | |
106 | + | |
107 | + //cropping | |
108 | + complexfield crop(unsigned int width, unsigned int height){ | |
109 | + | |
110 | + complexfield<T, D> result; | |
111 | + result = field< complex<T>, D>::crop(width, height); | |
112 | + return result; | |
113 | + } | |
114 | + | |
115 | + void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){ | |
116 | + | |
117 | + field<T, 1> rf(R[0], R[1]); | |
118 | + | |
119 | + //get cuda parameters | |
120 | + dim3 blocks, grids; | |
121 | + cuda_params(grids, blocks); | |
122 | + | |
123 | + if(type == magnitude){ | |
124 | + gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]); | |
125 | + rf.toImage(filename, n, true); | |
126 | + } | |
127 | + | |
128 | + } | |
129 | + | |
130 | + | |
131 | +}; | |
132 | + | |
133 | + | |
134 | +} //end namespace rts | |
135 | + | |
136 | + | |
137 | +#endif | ... | ... |
math/field.cuh
1 | -#ifndef RTS_FIELD_CUH | |
2 | -#define RTS_FIELD_CUH | |
3 | - | |
4 | -#include <vector> | |
5 | -#include <string> | |
6 | -#include <sstream> | |
7 | - | |
8 | -#include "cublas_v2.h" | |
9 | -#include <cuda_runtime.h> | |
10 | - | |
11 | -#include "../math/rect.h" | |
12 | -#include "../cuda/threads.h" | |
13 | -#include "../cuda/error.h" | |
14 | -#include "../cuda/devices.h" | |
15 | -#include "../visualization/colormap.h" | |
16 | - | |
17 | - | |
18 | -namespace stim{ | |
19 | - | |
20 | -//multiply R = X * Y | |
21 | -template<typename T> | |
22 | -__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){ | |
23 | - | |
24 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
25 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
26 | - | |
27 | - //make sure that the thread indices are in-bounds | |
28 | - if(iu >= r0 || iv >= r1) return; | |
29 | - | |
30 | - //compute the index into the field | |
31 | - int i = iv*r0 + iu; | |
32 | - | |
33 | - //calculate and store the result | |
34 | - R[i] = X[i] * Y[i]; | |
35 | -} | |
36 | - | |
37 | -//assign a constant value to all points | |
38 | -template<typename T> | |
39 | -__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){ | |
40 | - | |
41 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
42 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
43 | - | |
44 | - //make sure that the thread indices are in-bounds | |
45 | - if(iu >= r0 || iv >= r1) return; | |
46 | - | |
47 | - //compute the index into the field | |
48 | - int i = iv*r0 + iu; | |
49 | - | |
50 | - //calculate and store the result | |
51 | - ptr[i] = val; | |
52 | -} | |
53 | - | |
54 | -//crop the field to the new dimensions (width x height) | |
55 | -template<typename T> | |
56 | -__global__ void gpu_field_crop(T* dest, T* source, | |
57 | - unsigned int r0, unsigned int r1, | |
58 | - unsigned int width, unsigned int height){ | |
59 | - | |
60 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
61 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
62 | - | |
63 | - //make sure that the thread indices are in-bounds | |
64 | - if(iu >= width || iv >= height) return; | |
65 | - | |
66 | - //compute the index into the field | |
67 | - int is = iv*r0 + iu; | |
68 | - int id = iv*width + iu; | |
69 | - | |
70 | - //calculate and store the result | |
71 | - dest[id] = source[is]; | |
72 | -} | |
73 | - | |
74 | -template<typename T, unsigned int D = 1> | |
75 | -class field{ | |
76 | - | |
77 | -protected: | |
78 | - | |
79 | - T* X[D]; //pointer to the field data | |
80 | - unsigned int R[2]; //field resolution | |
81 | - stim::rect<T> shape; //position and shape of the field slice | |
82 | - | |
83 | - //calculates the optimal block and grid sizes using information from the GPU | |
84 | - void cuda_params(dim3& grids, dim3& blocks){ | |
85 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
86 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
87 | - | |
88 | - //create one thread for each detector pixel | |
89 | - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | |
90 | - grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
91 | - } | |
92 | - | |
93 | - //find the maximum value of component n | |
94 | - T find_max(unsigned int n){ | |
95 | - cublasStatus_t stat; | |
96 | - cublasHandle_t handle; | |
97 | - | |
98 | - //create a CUBLAS handle | |
99 | - stat = cublasCreate(&handle); | |
100 | - if(stat != CUBLAS_STATUS_SUCCESS){ | |
101 | - std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | |
102 | - exit(1); | |
103 | - } | |
104 | - | |
105 | - int L = R[0] * R[1]; //compute the number of discrete points in a slice | |
106 | - int index; //result of the max operation | |
107 | - T result; | |
108 | - | |
109 | - if(sizeof(T) == 4) | |
110 | - stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index); | |
111 | - else | |
112 | - stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index); | |
113 | - | |
114 | - index -= 1; //adjust for 1-based indexing | |
115 | - | |
116 | - //if there was a GPU error, terminate | |
117 | - if(stat != CUBLAS_STATUS_SUCCESS){ | |
118 | - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | |
119 | - exit(1); | |
120 | - } | |
121 | - | |
122 | - //retrieve the maximum value for this slice and store it in the maxVal array | |
123 | - HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost)); | |
124 | - return result; | |
125 | - } | |
126 | - | |
127 | -public: | |
128 | - | |
129 | - //returns a list of file names given an input string with wild cards | |
130 | - std::vector<std::string> process_filename(std::string name){ | |
131 | - std::stringstream ss(name); | |
132 | - std::string item; | |
133 | - std::vector<std::string> elems; | |
134 | - while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension) | |
135 | - { | |
136 | - elems.push_back(item); | |
137 | - } | |
138 | - | |
139 | - std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters) | |
140 | - std::string ext = elems[1]; //file extension (ex. .bmp, .png) | |
141 | - ext = std::string(".") + ext; //add a period back into the extension | |
142 | - | |
143 | - size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'') | |
144 | - size_t i1 = prefix.find_last_of("?"); | |
145 | - | |
146 | - std::string postfix = prefix.substr(i1+1); | |
147 | - prefix = prefix.substr(0, i0); | |
148 | - | |
149 | - unsigned int digits = i1 - i0 + 1; //compute the number of wildcards | |
150 | - | |
151 | - std::vector<std::string> flist; //create a vector of file names | |
152 | - //fill the list | |
153 | - for(unsigned int d=0; d<D; d++){ | |
154 | - std::stringstream ss; //assemble the file name | |
155 | - ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext; | |
156 | - flist.push_back(ss.str()); | |
157 | - } | |
158 | - | |
159 | - return flist; | |
160 | - } | |
161 | - | |
162 | - void init(){ | |
163 | - for(unsigned int n=0; n<D; n++) | |
164 | - X[n] = NULL; | |
165 | - } | |
166 | - void destroy(){ | |
167 | - for(unsigned int n=0; n<D; n++) | |
168 | - if(X[n] != NULL) | |
169 | - HANDLE_ERROR(cudaFree(X[n])); | |
170 | - } | |
171 | - | |
172 | -public: | |
173 | - //field constructor | |
174 | - field(){ | |
175 | - R[0] = R[1] = 0; | |
176 | - init(); | |
177 | - } | |
178 | - | |
179 | - field(unsigned int x, unsigned int y){ | |
180 | - //set the resolution | |
181 | - R[0] = x; | |
182 | - R[1] = y; | |
183 | - //allocate memory on the GPU | |
184 | - for(unsigned int n=0; n<D; n++){ | |
185 | - HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] )); | |
186 | - } | |
187 | - clear(); //zero the field | |
188 | - } | |
189 | - | |
190 | - ///copy constructor | |
191 | - field(const field &rhs){ | |
192 | - //first make a shallow copy | |
193 | - R[0] = rhs.R[0]; | |
194 | - R[1] = rhs.R[1]; | |
195 | - | |
196 | - for(unsigned int n=0; n<D; n++){ | |
197 | - //do we have to make a deep copy? | |
198 | - if(rhs.X[n] == NULL) | |
199 | - X[n] = NULL; //no | |
200 | - else{ | |
201 | - //allocate the necessary memory | |
202 | - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | |
203 | - | |
204 | - //copy the slice | |
205 | - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
206 | - } | |
207 | - } | |
208 | - } | |
209 | - | |
210 | - ~field(){ | |
211 | - destroy(); | |
212 | - } | |
213 | - | |
214 | - //assignment operator | |
215 | - field & operator= (const field & rhs){ | |
216 | - | |
217 | - //de-allocate any existing GPU memory | |
218 | - destroy(); | |
219 | - | |
220 | - //copy the slice resolution | |
221 | - R[0] = rhs.R[0]; | |
222 | - R[1] = rhs.R[1]; | |
223 | - | |
224 | - for(unsigned int n=0; n<D; n++) | |
225 | - { | |
226 | - //allocate the necessary memory | |
227 | - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | |
228 | - //copy the slice | |
229 | - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
230 | - } | |
231 | - return *this; | |
232 | - } | |
233 | - | |
234 | - field & operator= (const T rhs){ | |
235 | - | |
236 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
237 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
238 | - | |
239 | - //create one thread for each detector pixel | |
240 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
241 | - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
242 | - | |
243 | - //assign the constant value to all positions and dimensions | |
244 | - for(int n=0; n<D; n++) | |
245 | - stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]); | |
246 | - | |
247 | - return *this; | |
248 | - } | |
249 | - | |
250 | - //assignment of vector component | |
251 | - field & operator= (const vec<T, D> rhs){ | |
252 | - | |
253 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
254 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
255 | - | |
256 | - //create one thread for each detector pixel | |
257 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
258 | - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
259 | - | |
260 | - //assign the constant value to all positions and dimensions | |
261 | - for(unsigned int n=0; n<D; n++) | |
262 | - stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]); | |
263 | - | |
264 | - return *this; | |
265 | - | |
266 | - } | |
267 | - | |
268 | - //multiply two fields (element-wise multiplication) | |
269 | - field<T, D> operator* (const field & rhs){ | |
270 | - | |
271 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
272 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
273 | - | |
274 | - //create one thread for each detector pixel | |
275 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
276 | - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
277 | - | |
278 | - //create a scalar field to store the result | |
279 | - field<T, D> result(R[0], R[1]); | |
280 | - | |
281 | - for(int n=0; n<D; n++) | |
282 | - stim::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]); | |
283 | - | |
284 | - return result; | |
285 | - } | |
286 | - | |
287 | - T* ptr(unsigned int n = 0){ | |
288 | - if(n < D) | |
289 | - return X[n]; | |
290 | - else return NULL; | |
291 | - } | |
292 | - | |
293 | - //return the vector component at position (u, v) | |
294 | - vec<T, D> get(unsigned int u, unsigned int v){ | |
295 | - | |
296 | - vec<T, D> result; | |
297 | - for(unsigned int d=0; d<D; d++){ | |
298 | - HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost)); | |
299 | - } | |
300 | - | |
301 | - return result; | |
302 | - } | |
303 | - | |
304 | - //set all components of the field to zero | |
305 | - void clear(){ | |
306 | - for(unsigned int n=0; n<D; n++) | |
307 | - if(X[n] != NULL) | |
308 | - HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1])); | |
309 | - } | |
310 | - | |
311 | - //crop the field | |
312 | - field<T, D> crop(unsigned int width, unsigned int height){ | |
313 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
314 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
315 | - | |
316 | - //create one thread for each detector pixel | |
317 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
318 | - dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
319 | - | |
320 | - //create a scalar field to store the result | |
321 | - field<T, D> result(width, height); | |
322 | - | |
323 | - for(int n=0; n<D; n++) | |
324 | - stim::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height); | |
325 | - | |
326 | - return result; | |
327 | - } | |
328 | - | |
329 | - //save an image representing component n | |
330 | - void toImage(std::string filename, unsigned int n = 0, | |
331 | - bool positive = false, stim::colormapType cmap = stim::cmBrewer){ | |
332 | - T max_val = find_max(n); //find the maximum value | |
333 | - | |
334 | - if(positive) //if the field is positive, use the range [0 max_val] | |
335 | - stim::gpu2image<T>(X[n], filename, R[0], R[1], 0, max_val, cmap); | |
336 | - else | |
337 | - stim::gpu2image<T>(X[n], filename, R[0], R[1], -max_val, max_val, cmap); | |
338 | - } | |
339 | - | |
340 | -}; | |
341 | - | |
342 | -} //end namespace rts | |
343 | -#endif | |
1 | +#ifndef RTS_FIELD_CUH | |
2 | +#define RTS_FIELD_CUH | |
3 | + | |
4 | +#include <vector> | |
5 | +#include <string> | |
6 | +#include <sstream> | |
7 | + | |
8 | +#include "cublas_v2.h" | |
9 | +#include <cuda_runtime.h> | |
10 | + | |
11 | +#include "../math/rect.h" | |
12 | +#include "../cuda/threads.h" | |
13 | +#include "../cuda/error.h" | |
14 | +#include "../cuda/devices.h" | |
15 | +#include "../visualization/colormap.h" | |
16 | + | |
17 | + | |
18 | +namespace stim{ | |
19 | + | |
20 | +//multiply R = X * Y | |
21 | +template<typename T> | |
22 | +__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){ | |
23 | + | |
24 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
25 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
26 | + | |
27 | + //make sure that the thread indices are in-bounds | |
28 | + if(iu >= r0 || iv >= r1) return; | |
29 | + | |
30 | + //compute the index into the field | |
31 | + int i = iv*r0 + iu; | |
32 | + | |
33 | + //calculate and store the result | |
34 | + R[i] = X[i] * Y[i]; | |
35 | +} | |
36 | + | |
37 | +//assign a constant value to all points | |
38 | +template<typename T> | |
39 | +__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){ | |
40 | + | |
41 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
42 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
43 | + | |
44 | + //make sure that the thread indices are in-bounds | |
45 | + if(iu >= r0 || iv >= r1) return; | |
46 | + | |
47 | + //compute the index into the field | |
48 | + int i = iv*r0 + iu; | |
49 | + | |
50 | + //calculate and store the result | |
51 | + ptr[i] = val; | |
52 | +} | |
53 | + | |
54 | +//crop the field to the new dimensions (width x height) | |
55 | +template<typename T> | |
56 | +__global__ void gpu_field_crop(T* dest, T* source, | |
57 | + unsigned int r0, unsigned int r1, | |
58 | + unsigned int width, unsigned int height){ | |
59 | + | |
60 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
61 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
62 | + | |
63 | + //make sure that the thread indices are in-bounds | |
64 | + if(iu >= width || iv >= height) return; | |
65 | + | |
66 | + //compute the index into the field | |
67 | + int is = iv*r0 + iu; | |
68 | + int id = iv*width + iu; | |
69 | + | |
70 | + //calculate and store the result | |
71 | + dest[id] = source[is]; | |
72 | +} | |
73 | + | |
74 | +template<typename T, unsigned int D = 1> | |
75 | +class field{ | |
76 | + | |
77 | +protected: | |
78 | + | |
79 | + T* X[D]; //pointer to the field data | |
80 | + unsigned int R[2]; //field resolution | |
81 | + stim::rect<T> shape; //position and shape of the field slice | |
82 | + | |
83 | + //calculates the optimal block and grid sizes using information from the GPU | |
84 | + void cuda_params(dim3& grids, dim3& blocks){ | |
85 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
86 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
87 | + | |
88 | + //create one thread for each detector pixel | |
89 | + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | |
90 | + grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
91 | + } | |
92 | + | |
93 | + //find the maximum value of component n | |
94 | + T find_max(unsigned int n){ | |
95 | + cublasStatus_t stat; | |
96 | + cublasHandle_t handle; | |
97 | + | |
98 | + //create a CUBLAS handle | |
99 | + stat = cublasCreate(&handle); | |
100 | + if(stat != CUBLAS_STATUS_SUCCESS){ | |
101 | + std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | |
102 | + exit(1); | |
103 | + } | |
104 | + | |
105 | + int L = R[0] * R[1]; //compute the number of discrete points in a slice | |
106 | + int index; //result of the max operation | |
107 | + T result; | |
108 | + | |
109 | + if(sizeof(T) == 4) | |
110 | + stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index); | |
111 | + else | |
112 | + stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index); | |
113 | + | |
114 | + index -= 1; //adjust for 1-based indexing | |
115 | + | |
116 | + //if there was a GPU error, terminate | |
117 | + if(stat != CUBLAS_STATUS_SUCCESS){ | |
118 | + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | |
119 | + exit(1); | |
120 | + } | |
121 | + | |
122 | + //retrieve the maximum value for this slice and store it in the maxVal array | |
123 | + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost)); | |
124 | + return result; | |
125 | + } | |
126 | + | |
127 | +public: | |
128 | + | |
129 | + //returns a list of file names given an input string with wild cards | |
130 | + std::vector<std::string> process_filename(std::string name){ | |
131 | + std::stringstream ss(name); | |
132 | + std::string item; | |
133 | + std::vector<std::string> elems; | |
134 | + while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension) | |
135 | + { | |
136 | + elems.push_back(item); | |
137 | + } | |
138 | + | |
139 | + std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters) | |
140 | + std::string ext = elems[1]; //file extension (ex. .bmp, .png) | |
141 | + ext = std::string(".") + ext; //add a period back into the extension | |