Commit 81e0d2215b30a17309ce7ba7615f94e2eda5c67e
1 parent
7b3948ab
separated executable arguments and options in the arglist class
Showing
8 changed files
with
2056 additions
and
2039 deletions
Show diff stats
math/complex.h
1 | -/*RTS Complex number class. This class is CUDA compatible, | ||
2 | -and can therefore be used in CUDA code and on CUDA devices. | ||
3 | -*/ | ||
4 | - | ||
5 | -#ifndef RTS_COMPLEX | ||
6 | -#define RTS_COMPLEX | ||
7 | - | ||
8 | -#include "../cuda/callable.h" | ||
9 | -#include <cmath> | ||
10 | -#include <string> | ||
11 | -#include <sstream> | ||
12 | -#include <iostream> | ||
13 | - | ||
14 | -namespace stim | ||
15 | -{ | ||
16 | - | ||
17 | -template <class T> | ||
18 | -struct complex | ||
19 | -{ | ||
20 | - T r, i; | ||
21 | - | ||
22 | - //default constructor | ||
23 | - CUDA_CALLABLE complex() | ||
24 | - { | ||
25 | - r = 0; | ||
26 | - i = 0; | ||
27 | - } | ||
28 | - | ||
29 | - //constructor when given real and imaginary values | ||
30 | - CUDA_CALLABLE complex(T r, T i = 0) | ||
31 | - { | ||
32 | - this->r = r; | ||
33 | - this->i = i; | ||
34 | - } | ||
35 | - | ||
36 | - //access methods | ||
37 | - CUDA_CALLABLE T real() | ||
38 | - { | ||
39 | - return r; | ||
40 | - } | ||
41 | - | ||
42 | - CUDA_CALLABLE T real(T r_val) | ||
43 | - { | ||
44 | - r = r_val; | ||
45 | - return r_val; | ||
46 | - } | ||
47 | - | ||
48 | - CUDA_CALLABLE T imag() | ||
49 | - { | ||
50 | - return i; | ||
51 | - } | ||
52 | - CUDA_CALLABLE T imag(T i_val) | ||
53 | - { | ||
54 | - i = i_val; | ||
55 | - return i_val; | ||
56 | - } | ||
57 | - | ||
58 | - | ||
59 | - | ||
60 | - //return the current value multiplied by i | ||
61 | - CUDA_CALLABLE complex<T> imul() | ||
62 | - { | ||
63 | - complex<T> result; | ||
64 | - result.r = -i; | ||
65 | - result.i = r; | ||
66 | - | ||
67 | - return result; | ||
68 | - } | ||
69 | - | ||
70 | - //returns the complex signum (-1, 0, 1) | ||
71 | - CUDA_CALLABLE int sgn(){ | ||
72 | - if(r > 0) return 1; | ||
73 | - else if(r < 0) return -1; | ||
74 | - else return (0 < i - i < 0); | ||
75 | - } | ||
76 | - | ||
77 | - //ARITHMETIC OPERATORS-------------------- | ||
78 | - | ||
79 | - //binary + operator (returns the result of adding two complex values) | ||
80 | - CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const | ||
81 | - { | ||
82 | - complex<T> result; | ||
83 | - result.r = r + rhs.r; | ||
84 | - result.i = i + rhs.i; | ||
85 | - return result; | ||
86 | - } | ||
87 | - | ||
88 | - CUDA_CALLABLE complex<T> operator+ (const T rhs) const | ||
89 | - { | ||
90 | - complex<T> result; | ||
91 | - result.r = r + rhs; | ||
92 | - result.i = i; | ||
93 | - return result; | ||
94 | - } | ||
95 | - | ||
96 | - //binary - operator (returns the result of adding two complex values) | ||
97 | - CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const | ||
98 | - { | ||
99 | - complex<T> result; | ||
100 | - result.r = r - rhs.r; | ||
101 | - result.i = i - rhs.i; | ||
102 | - return result; | ||
103 | - } | ||
104 | - | ||
105 | - //binary - operator (returns the result of adding two complex values) | ||
106 | - CUDA_CALLABLE complex<T> operator- (const T rhs) | ||
107 | - { | ||
108 | - complex<T> result; | ||
109 | - result.r = r - rhs; | ||
110 | - result.i = i; | ||
111 | - return result; | ||
112 | - } | ||
113 | - | ||
114 | - //binary MULTIPLICATION operators (returns the result of multiplying complex values) | ||
115 | - CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const | ||
116 | - { | ||
117 | - complex<T> result; | ||
118 | - result.r = r * rhs.r - i * rhs.i; | ||
119 | - result.i = r * rhs.i + i * rhs.r; | ||
120 | - return result; | ||
121 | - } | ||
122 | - CUDA_CALLABLE complex<T> operator* (const T rhs) | ||
123 | - { | ||
124 | - return complex<T>(r * rhs, i * rhs); | ||
125 | - } | ||
126 | - | ||
127 | - //binary DIVISION operators (returns the result of dividing complex values) | ||
128 | - CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const | ||
129 | - { | ||
130 | - complex<T> result; | ||
131 | - T denom = rhs.r * rhs.r + rhs.i * rhs.i; | ||
132 | - result.r = (r * rhs.r + i * rhs.i) / denom; | ||
133 | - result.i = (- r * rhs.i + i * rhs.r) / denom; | ||
134 | - | ||
135 | - return result; | ||
136 | - } | ||
137 | - CUDA_CALLABLE complex<T> operator/ (const T rhs) | ||
138 | - { | ||
139 | - return complex<T>(r / rhs, i / rhs); | ||
140 | - } | ||
141 | - | ||
142 | - //ASSIGNMENT operators----------------------------------- | ||
143 | - CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs) | ||
144 | - { | ||
145 | - //check for self-assignment | ||
146 | - if(this != &rhs) | ||
147 | - { | ||
148 | - this->r = rhs.r; | ||
149 | - this->i = rhs.i; | ||
150 | - } | ||
151 | - return *this; | ||
152 | - } | ||
153 | - CUDA_CALLABLE complex<T> & operator=(const T &rhs) | ||
154 | - { | ||
155 | - this->r = rhs; | ||
156 | - this->i = 0; | ||
157 | - | ||
158 | - return *this; | ||
159 | - } | ||
160 | - | ||
161 | - //arithmetic assignment operators | ||
162 | - CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs) | ||
163 | - { | ||
164 | - *this = *this + rhs; | ||
165 | - return *this; | ||
166 | - } | ||
167 | - CUDA_CALLABLE complex<T> operator+=(const T &rhs) | ||
168 | - { | ||
169 | - *this = *this + rhs; | ||
170 | - return *this; | ||
171 | - } | ||
172 | - | ||
173 | - CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs) | ||
174 | - { | ||
175 | - *this = *this - rhs; | ||
176 | - return *this; | ||
177 | - } | ||
178 | - CUDA_CALLABLE complex<T> operator-=(const T &rhs) | ||
179 | - { | ||
180 | - *this = *this - rhs; | ||
181 | - return *this; | ||
182 | - } | ||
183 | - | ||
184 | - CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs) | ||
185 | - { | ||
186 | - *this = *this * rhs; | ||
187 | - return *this; | ||
188 | - } | ||
189 | - CUDA_CALLABLE complex<T> operator*=(const T &rhs) | ||
190 | - { | ||
191 | - *this = *this * rhs; | ||
192 | - return *this; | ||
193 | - } | ||
194 | - //divide and assign | ||
195 | - CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs) | ||
196 | - { | ||
197 | - *this = *this / rhs; | ||
198 | - return *this; | ||
199 | - } | ||
200 | - CUDA_CALLABLE complex<T> operator/=(const T &rhs) | ||
201 | - { | ||
202 | - *this = *this / rhs; | ||
203 | - return *this; | ||
204 | - } | ||
205 | - | ||
206 | - //absolute value operator (returns the absolute value of the complex number) | ||
207 | - CUDA_CALLABLE T abs() | ||
208 | - { | ||
209 | - return std::sqrt(r * r + i * i); | ||
210 | - } | ||
211 | - | ||
212 | - CUDA_CALLABLE complex<T> log() | ||
213 | - { | ||
214 | - complex<T> result; | ||
215 | - result.r = (T)std::log(std::sqrt(r * r + i * i)); | ||
216 | - result.i = (T)std::atan2(i, r); | ||
217 | - | ||
218 | - | ||
219 | - return result; | ||
220 | - } | ||
221 | - | ||
222 | - CUDA_CALLABLE complex<T> exp() | ||
223 | - { | ||
224 | - complex<T> result; | ||
225 | - | ||
226 | - T e_r = std::exp(r); | ||
227 | - result.r = e_r * (T)std::cos(i); | ||
228 | - result.i = e_r * (T)std::sin(i); | ||
229 | - | ||
230 | - return result; | ||
231 | - } | ||
232 | - | ||
233 | - /*CUDA_CALLABLE complex<T> pow(int y) | ||
234 | - { | ||
235 | - | ||
236 | - return pow((double)y); | ||
237 | - }*/ | ||
238 | - | ||
239 | - CUDA_CALLABLE complex<T> pow(T y) | ||
240 | - { | ||
241 | - complex<T> result; | ||
242 | - | ||
243 | - result = log() * y; | ||
244 | - | ||
245 | - return result.exp(); | ||
246 | - } | ||
247 | - | ||
248 | - CUDA_CALLABLE complex<T> sqrt() | ||
249 | - { | ||
250 | - complex<T> result; | ||
251 | - | ||
252 | - //convert to polar coordinates | ||
253 | - T a = std::sqrt(r*r + i*i); | ||
254 | - T theta = std::atan2(i, r); | ||
255 | - | ||
256 | - //find the square root | ||
257 | - T a_p = std::sqrt(a); | ||
258 | - T theta_p = theta/2.0f; | ||
259 | - | ||
260 | - //convert back to cartesian coordinates | ||
261 | - result.r = a_p * std::cos(theta_p); | ||
262 | - result.i = a_p * std::sin(theta_p); | ||
263 | - | ||
264 | - return result; | ||
265 | - } | ||
266 | - | ||
267 | - std::string str() | ||
268 | - { | ||
269 | - std::stringstream ss; | ||
270 | - ss<<"("<<r<<","<<i<<")"; | ||
271 | - | ||
272 | - return ss.str(); | ||
273 | - } | ||
274 | - | ||
275 | - //COMPARISON operators | ||
276 | - CUDA_CALLABLE bool operator==(complex<T> rhs) | ||
277 | - { | ||
278 | - if(r == rhs.r && i == rhs.i) | ||
279 | - return true; | ||
280 | - return false; | ||
281 | - } | ||
282 | - | ||
283 | - CUDA_CALLABLE bool operator==(T rhs) | ||
284 | - { | ||
285 | - if(r == rhs && i == 0) | ||
286 | - return true; | ||
287 | - return false; | ||
288 | - } | ||
289 | - | ||
290 | - CUDA_CALLABLE bool operator!=(T rhs) | ||
291 | - { | ||
292 | - if(r != rhs || i != 0) | ||
293 | - return true; | ||
294 | - return false; | ||
295 | - } | ||
296 | - | ||
297 | - CUDA_CALLABLE bool operator<(complex<T> rhs){ | ||
298 | - return abs() < rhs.abs(); | ||
299 | - } | ||
300 | - CUDA_CALLABLE bool operator<=(complex<T> rhs){ | ||
301 | - return abs() <= rhs.abs(); | ||
302 | - } | ||
303 | - CUDA_CALLABLE bool operator>(complex<T> rhs){ | ||
304 | - return abs() > rhs.abs(); | ||
305 | - } | ||
306 | - CUDA_CALLABLE bool operator >=(complex<T> rhs){ | ||
307 | - return abs() >= rhs.abs(); | ||
308 | - } | ||
309 | - | ||
310 | - //CASTING operators | ||
311 | - template < typename otherT > | ||
312 | - operator complex<otherT>() | ||
313 | - { | ||
314 | - complex<otherT> result((otherT)r, (otherT)i); | ||
315 | - return result; | ||
316 | - } | ||
317 | - template< typename otherT > | ||
318 | - complex( const complex<otherT> &rhs) | ||
319 | - { | ||
320 | - r = (T)rhs.r; | ||
321 | - i = (T)rhs.i; | ||
322 | - } | ||
323 | - template< typename otherT > | ||
324 | - complex& operator=(const complex<otherT> &rhs) | ||
325 | - { | ||
326 | - r = (T)rhs.r; | ||
327 | - i = (T)rhs.i; | ||
328 | - return *this; | ||
329 | - } | ||
330 | - | ||
331 | -}; | ||
332 | - | ||
333 | -} //end RTS namespace | ||
334 | - | ||
335 | -//addition | ||
336 | -template<typename T> | ||
337 | -CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b) | ||
338 | -{ | ||
339 | - return stim::complex<T>((T)a + b.r, b.i); | ||
340 | -} | ||
341 | - | ||
342 | -//subtraction with a real value | ||
343 | -template<typename T> | ||
344 | -CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b) | ||
345 | -{ | ||
346 | - return stim::complex<T>((T)a - b.r, -b.i); | ||
347 | -} | ||
348 | - | ||
349 | -//minus sign | ||
350 | -template<typename T> | ||
351 | -CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs) | ||
352 | -{ | ||
353 | - return stim::complex<T>(-rhs.r, -rhs.i); | ||
354 | -} | ||
355 | - | ||
356 | -//multiply a T value by a complex value | ||
357 | -template<typename T> | ||
358 | -CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b) | ||
359 | -{ | ||
360 | - return stim::complex<T>((T)a * b.r, (T)a * b.i); | ||
361 | -} | ||
362 | - | ||
363 | -//divide a T value by a complex value | ||
364 | -template<typename T> | ||
365 | -CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b) | ||
366 | -{ | ||
367 | - stim::complex<T> result; | ||
368 | - | ||
369 | - T denom = b.r * b.r + b.i * b.i; | ||
370 | - | ||
371 | - result.r = ((T)a * b.r) / denom; | ||
372 | - result.i = -((T)a * b.i) / denom; | ||
373 | - | ||
374 | - return result; | ||
375 | -} | ||
376 | - | ||
377 | - | ||
378 | -template<typename T> | ||
379 | -CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y) | ||
380 | -{ | ||
381 | - return x.pow(y); | ||
382 | -} | ||
383 | -template<typename T> | ||
384 | -CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y) | ||
385 | -{ | ||
386 | - return x.pow(y); | ||
387 | -} | ||
388 | - | ||
389 | -//log function | ||
390 | -template<typename T> | ||
391 | -CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x) | ||
392 | -{ | ||
393 | - return x.log(); | ||
394 | -} | ||
395 | - | ||
396 | -//exp function | ||
397 | -template<typename T> | ||
398 | -CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x) | ||
399 | -{ | ||
400 | - return x.exp(); | ||
401 | -} | ||
402 | - | ||
403 | -//sqrt function | ||
404 | -template<typename T> | ||
405 | -CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x) | ||
406 | -{ | ||
407 | - return x.sqrt(); | ||
408 | -} | ||
409 | - | ||
410 | - | ||
411 | -template <typename T> | ||
412 | -CUDA_CALLABLE static T abs(stim::complex<T> a) | ||
413 | -{ | ||
414 | - return a.abs(); | ||
415 | -} | ||
416 | - | ||
417 | -template <typename T> | ||
418 | -CUDA_CALLABLE static T real(stim::complex<T> a) | ||
419 | -{ | ||
420 | - return a.r; | ||
421 | -} | ||
422 | - | ||
423 | -//template <typename T> | ||
424 | -CUDA_CALLABLE static float real(float a) | ||
425 | -{ | ||
426 | - return a; | ||
427 | -} | ||
428 | - | ||
429 | -template <typename T> | ||
430 | -CUDA_CALLABLE static T imag(stim::complex<T> a) | ||
431 | -{ | ||
432 | - return a.i; | ||
433 | -} | ||
434 | - | ||
435 | -//trigonometric functions | ||
436 | -//template<class A> | ||
437 | -/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x) | ||
438 | -{ | ||
439 | - stim::complex<float> result; | ||
440 | - result.r = sinf(x.r) * coshf(x.i); | ||
441 | - result.i = cosf(x.r) * sinhf(x.i); | ||
442 | - | ||
443 | - return result; | ||
444 | -}*/ | ||
445 | - | ||
446 | -template<class A> | ||
447 | -CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x) | ||
448 | -{ | ||
449 | - stim::complex<A> result; | ||
450 | - result.r = (A)std::sin(x.r) * (A)std::cosh(x.i); | ||
451 | - result.i = (A)std::cos(x.r) * (A)std::sinh(x.i); | ||
452 | - | ||
453 | - return result; | ||
454 | -} | ||
455 | - | ||
456 | -//floating point template | ||
457 | -//template<class A> | ||
458 | -/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x) | ||
459 | -{ | ||
460 | - stim::complex<float> result; | ||
461 | - result.r = cosf(x.r) * coshf(x.i); | ||
462 | - result.i = -(sinf(x.r) * sinhf(x.i)); | ||
463 | - | ||
464 | - return result; | ||
465 | -}*/ | ||
466 | - | ||
467 | -template<class A> | ||
468 | -CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x) | ||
469 | -{ | ||
470 | - stim::complex<A> result; | ||
471 | - result.r = (A)std::cos(x.r) * (A)std::cosh(x.i); | ||
472 | - result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i)); | ||
473 | - | ||
474 | - return result; | ||
475 | -} | ||
476 | - | ||
477 | - | ||
478 | -template<class A> | ||
479 | -std::ostream& operator<<(std::ostream& os, stim::complex<A> x) | ||
480 | -{ | ||
481 | - os<<x.str(); | ||
482 | - return os; | ||
483 | -} | ||
484 | - | ||
485 | -template<class A> | ||
486 | -std::istream& operator>>(std::istream& is, stim::complex<A>& x) | ||
487 | -{ | ||
488 | - A r, i; | ||
489 | - r = i = 0; //initialize the real and imaginary parts to zero | ||
490 | - is>>r; //parse | ||
491 | - is>>i; | ||
492 | - | ||
493 | - x.real(r); //assign the parsed values to x | ||
494 | - x.imag(i); | ||
495 | - | ||
496 | - return is; //return the stream | ||
497 | -} | ||
498 | - | ||
499 | -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | ||
500 | -//template<class T> using rtsComplex = stim::complex<T>; | ||
501 | -//#endif | ||
502 | - | ||
503 | - | ||
504 | - | ||
505 | -#endif | 1 | +/*RTS Complex number class. This class is CUDA compatible, |
2 | +and can therefore be used in CUDA code and on CUDA devices. | ||
3 | +*/ | ||
4 | + | ||
5 | +#ifndef RTS_COMPLEX | ||
6 | +#define RTS_COMPLEX | ||
7 | + | ||
8 | +#include "../cuda/callable.h" | ||
9 | +#include <cmath> | ||
10 | +#include <string> | ||
11 | +#include <sstream> | ||
12 | +#include <iostream> | ||
13 | + | ||
14 | +namespace stim | ||
15 | +{ | ||
16 | + | ||
17 | +template <class T> | ||
18 | +struct complex | ||
19 | +{ | ||
20 | + T r, i; | ||
21 | + | ||
22 | + //default constructor | ||
23 | + CUDA_CALLABLE complex() | ||
24 | + { | ||
25 | + r = 0; | ||
26 | + i = 0; | ||
27 | + } | ||
28 | + | ||
29 | + //constructor when given real and imaginary values | ||
30 | + CUDA_CALLABLE complex(T r, T i = 0) | ||
31 | + { | ||
32 | + this->r = r; | ||
33 | + this->i = i; | ||
34 | + } | ||
35 | + | ||
36 | + //access methods | ||
37 | + CUDA_CALLABLE T real() | ||
38 | + { | ||
39 | + return r; | ||
40 | + } | ||
41 | + | ||
42 | + CUDA_CALLABLE T real(T r_val) | ||
43 | + { | ||
44 | + r = r_val; | ||
45 | + return r_val; | ||
46 | + } | ||
47 | + | ||
48 | + CUDA_CALLABLE T imag() | ||
49 | + { | ||
50 | + return i; | ||
51 | + } | ||
52 | + CUDA_CALLABLE T imag(T i_val) | ||
53 | + { | ||
54 | + i = i_val; | ||
55 | + return i_val; | ||
56 | + } | ||
57 | + | ||
58 | + | ||
59 | + | ||
60 | + //return the current value multiplied by i | ||
61 | + CUDA_CALLABLE complex<T> imul() | ||
62 | + { | ||
63 | + complex<T> result; | ||
64 | + result.r = -i; | ||
65 | + result.i = r; | ||
66 | + | ||
67 | + return result; | ||
68 | + } | ||
69 | + | ||
70 | + //returns the complex signum (-1, 0, 1) | ||
71 | + CUDA_CALLABLE int sgn(){ | ||
72 | + if(r > 0) return 1; | ||
73 | + else if(r < 0) return -1; | ||
74 | + else return (0 < i - i < 0); | ||
75 | + } | ||
76 | + | ||
77 | + //ARITHMETIC OPERATORS-------------------- | ||
78 | + | ||
79 | + //binary + operator (returns the result of adding two complex values) | ||
80 | + CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const | ||
81 | + { | ||
82 | + complex<T> result; | ||
83 | + result.r = r + rhs.r; | ||
84 | + result.i = i + rhs.i; | ||
85 | + return result; | ||
86 | + } | ||
87 | + | ||
88 | + CUDA_CALLABLE complex<T> operator+ (const T rhs) const | ||
89 | + { | ||
90 | + complex<T> result; | ||
91 | + result.r = r + rhs; | ||
92 | + result.i = i; | ||
93 | + return result; | ||
94 | + } | ||
95 | + | ||
96 | + //binary - operator (returns the result of adding two complex values) | ||
97 | + CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const | ||
98 | + { | ||
99 | + complex<T> result; | ||
100 | + result.r = r - rhs.r; | ||
101 | + result.i = i - rhs.i; | ||
102 | + return result; | ||
103 | + } | ||
104 | + | ||
105 | + //binary - operator (returns the result of adding two complex values) | ||
106 | + CUDA_CALLABLE complex<T> operator- (const T rhs) | ||
107 | + { | ||
108 | + complex<T> result; | ||
109 | + result.r = r - rhs; | ||
110 | + result.i = i; | ||
111 | + return result; | ||
112 | + } | ||
113 | + | ||
114 | + //binary MULTIPLICATION operators (returns the result of multiplying complex values) | ||
115 | + CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const | ||
116 | + { | ||
117 | + complex<T> result; | ||
118 | + result.r = r * rhs.r - i * rhs.i; | ||
119 | + result.i = r * rhs.i + i * rhs.r; | ||
120 | + return result; | ||
121 | + } | ||
122 | + CUDA_CALLABLE complex<T> operator* (const T rhs) | ||
123 | + { | ||
124 | + return complex<T>(r * rhs, i * rhs); | ||
125 | + } | ||
126 | + | ||
127 | + //binary DIVISION operators (returns the result of dividing complex values) | ||
128 | + CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const | ||
129 | + { | ||
130 | + complex<T> result; | ||
131 | + T denom = rhs.r * rhs.r + rhs.i * rhs.i; | ||
132 | + result.r = (r * rhs.r + i * rhs.i) / denom; | ||
133 | + result.i = (- r * rhs.i + i * rhs.r) / denom; | ||
134 | + | ||
135 | + return result; | ||
136 | + } | ||
137 | + CUDA_CALLABLE complex<T> operator/ (const T rhs) | ||
138 | + { | ||
139 | + return complex<T>(r / rhs, i / rhs); | ||
140 | + } | ||
141 | + | ||
142 | + //ASSIGNMENT operators----------------------------------- | ||
143 | + CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs) | ||
144 | + { | ||
145 | + //check for self-assignment | ||
146 | + if(this != &rhs) | ||
147 | + { | ||
148 | + this->r = rhs.r; | ||
149 | + this->i = rhs.i; | ||
150 | + } | ||
151 | + return *this; | ||
152 | + } | ||
153 | + CUDA_CALLABLE complex<T> & operator=(const T &rhs) | ||
154 | + { | ||
155 | + this->r = rhs; | ||
156 | + this->i = 0; | ||
157 | + | ||
158 | + return *this; | ||
159 | + } | ||
160 | + | ||
161 | + //arithmetic assignment operators | ||
162 | + CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs) | ||
163 | + { | ||
164 | + *this = *this + rhs; | ||
165 | + return *this; | ||
166 | + } | ||
167 | + CUDA_CALLABLE complex<T> operator+=(const T &rhs) | ||
168 | + { | ||
169 | + *this = *this + rhs; | ||
170 | + return *this; | ||
171 | + } | ||
172 | + | ||
173 | + CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs) | ||
174 | + { | ||
175 | + *this = *this - rhs; | ||
176 | + return *this; | ||
177 | + } | ||
178 | + CUDA_CALLABLE complex<T> operator-=(const T &rhs) | ||
179 | + { | ||
180 | + *this = *this - rhs; | ||
181 | + return *this; | ||
182 | + } | ||
183 | + | ||
184 | + CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs) | ||
185 | + { | ||
186 | + *this = *this * rhs; | ||
187 | + return *this; | ||
188 | + } | ||
189 | + CUDA_CALLABLE complex<T> operator*=(const T &rhs) | ||
190 | + { | ||
191 | + *this = *this * rhs; | ||
192 | + return *this; | ||
193 | + } | ||
194 | + //divide and assign | ||
195 | + CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs) | ||
196 | + { | ||
197 | + *this = *this / rhs; | ||
198 | + return *this; | ||
199 | + } | ||
200 | + CUDA_CALLABLE complex<T> operator/=(const T &rhs) | ||
201 | + { | ||
202 | + *this = *this / rhs; | ||
203 | + return *this; | ||
204 | + } | ||
205 | + | ||
206 | + //absolute value operator (returns the absolute value of the complex number) | ||
207 | + CUDA_CALLABLE T abs() | ||
208 | + { | ||
209 | + return std::sqrt(r * r + i * i); | ||
210 | + } | ||
211 | + | ||
212 | + CUDA_CALLABLE complex<T> log() | ||
213 | + { | ||
214 | + complex<T> result; | ||
215 | + result.r = (T)std::log(std::sqrt(r * r + i * i)); | ||
216 | + result.i = (T)std::atan2(i, r); | ||
217 | + | ||
218 | + | ||
219 | + return result; | ||
220 | + } | ||
221 | + | ||
222 | + CUDA_CALLABLE complex<T> exp() | ||
223 | + { | ||
224 | + complex<T> result; | ||
225 | + | ||
226 | + T e_r = std::exp(r); | ||
227 | + result.r = e_r * (T)std::cos(i); | ||
228 | + result.i = e_r * (T)std::sin(i); | ||
229 | + | ||
230 | + return result; | ||
231 | + } | ||
232 | + | ||
233 | + /*CUDA_CALLABLE complex<T> pow(int y) | ||
234 | + { | ||
235 | + | ||
236 | + return pow((double)y); | ||
237 | + }*/ | ||
238 | + | ||
239 | + CUDA_CALLABLE complex<T> pow(T y) | ||
240 | + { | ||
241 | + complex<T> result; | ||
242 | + | ||
243 | + result = log() * y; | ||
244 | + | ||
245 | + return result.exp(); | ||
246 | + } | ||
247 | + | ||
248 | + CUDA_CALLABLE complex<T> sqrt() | ||
249 | + { | ||
250 | + complex<T> result; | ||
251 | + | ||
252 | + //convert to polar coordinates | ||
253 | + T a = std::sqrt(r*r + i*i); | ||
254 | + T theta = std::atan2(i, r); | ||
255 | + | ||
256 | + //find the square root | ||
257 | + T a_p = std::sqrt(a); | ||
258 | + T theta_p = theta/2.0f; | ||
259 | + | ||
260 | + //convert back to cartesian coordinates | ||
261 | + result.r = a_p * std::cos(theta_p); | ||
262 | + result.i = a_p * std::sin(theta_p); | ||
263 | + | ||
264 | + return result; | ||
265 | + } | ||
266 | + | ||
267 | + std::string str() | ||
268 | + { | ||
269 | + std::stringstream ss; | ||
270 | + ss<<"("<<r<<","<<i<<")"; | ||
271 | + | ||
272 | + return ss.str(); | ||
273 | + } | ||
274 | + | ||
275 | + //COMPARISON operators | ||
276 | + CUDA_CALLABLE bool operator==(complex<T> rhs) | ||
277 | + { | ||
278 | + if(r == rhs.r && i == rhs.i) | ||
279 | + return true; | ||
280 | + return false; | ||
281 | + } | ||
282 | + | ||
283 | + CUDA_CALLABLE bool operator==(T rhs) | ||
284 | + { | ||
285 | + if(r == rhs && i == 0) | ||
286 | + return true; | ||
287 | + return false; | ||
288 | + } | ||
289 | + | ||
290 | + CUDA_CALLABLE bool operator!=(T rhs) | ||
291 | + { | ||
292 | + if(r != rhs || i != 0) | ||
293 | + return true; | ||
294 | + return false; | ||
295 | + } | ||
296 | + | ||
297 | + CUDA_CALLABLE bool operator<(complex<T> rhs){ | ||
298 | + return abs() < rhs.abs(); | ||
299 | + } | ||
300 | + CUDA_CALLABLE bool operator<=(complex<T> rhs){ | ||
301 | + return abs() <= rhs.abs(); | ||
302 | + } | ||
303 | + CUDA_CALLABLE bool operator>(complex<T> rhs){ | ||
304 | + return abs() > rhs.abs(); | ||
305 | + } | ||
306 | + CUDA_CALLABLE bool operator >=(complex<T> rhs){ | ||
307 | + return abs() >= rhs.abs(); | ||
308 | + } | ||
309 | + | ||
310 | + //CASTING operators | ||
311 | + template < typename otherT > | ||
312 | + operator complex<otherT>() | ||
313 | + { | ||
314 | + complex<otherT> result((otherT)r, (otherT)i); | ||
315 | + return result; | ||
316 | + } | ||
317 | + template< typename otherT > | ||
318 | + complex( const complex<otherT> &rhs) | ||
319 | + { | ||
320 | + r = (T)rhs.r; | ||
321 | + i = (T)rhs.i; | ||
322 | + } | ||
323 | + template< typename otherT > | ||
324 | + complex& operator=(const complex<otherT> &rhs) | ||
325 | + { | ||
326 | + r = (T)rhs.r; | ||
327 | + i = (T)rhs.i; | ||
328 | + return *this; | ||
329 | + } | ||
330 | + | ||
331 | +}; | ||
332 | + | ||
333 | +} //end RTS namespace | ||
334 | + | ||
335 | +//addition | ||
336 | +template<typename T> | ||
337 | +CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b) | ||
338 | +{ | ||
339 | + return stim::complex<T>((T)a + b.r, b.i); | ||
340 | +} | ||
341 | + | ||
342 | +//subtraction with a real value | ||
343 | +template<typename T> | ||
344 | +CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b) | ||
345 | +{ | ||
346 | + return stim::complex<T>((T)a - b.r, -b.i); | ||
347 | +} | ||
348 | + | ||
349 | +//minus sign | ||
350 | +template<typename T> | ||
351 | +CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs) | ||
352 | +{ | ||
353 | + return stim::complex<T>(-rhs.r, -rhs.i); | ||
354 | +} | ||
355 | + | ||
356 | +//multiply a T value by a complex value | ||
357 | +template<typename T> | ||
358 | +CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b) | ||
359 | +{ | ||
360 | + return stim::complex<T>((T)a * b.r, (T)a * b.i); | ||
361 | +} | ||
362 | + | ||
363 | +//divide a T value by a complex value | ||
364 | +template<typename T> | ||
365 | +CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b) | ||
366 | +{ | ||
367 | + stim::complex<T> result; | ||
368 | + | ||
369 | + T denom = b.r * b.r + b.i * b.i; | ||
370 | + | ||
371 | + result.r = ((T)a * b.r) / denom; | ||
372 | + result.i = -((T)a * b.i) / denom; | ||
373 | + | ||
374 | + return result; | ||
375 | +} | ||
376 | + | ||
377 | + | ||
378 | +template<typename T> | ||
379 | +CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y) | ||
380 | +{ | ||
381 | + return x.pow(y); | ||
382 | +} | ||
383 | +template<typename T> | ||
384 | +CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y) | ||
385 | +{ | ||
386 | + return x.pow(y); | ||
387 | +} | ||
388 | + | ||
389 | +//log function | ||
390 | +template<typename T> | ||
391 | +CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x) | ||
392 | +{ | ||
393 | + return x.log(); | ||
394 | +} | ||
395 | + | ||
396 | +//exp function | ||
397 | +template<typename T> | ||
398 | +CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x) | ||
399 | +{ | ||
400 | + return x.exp(); | ||
401 | +} | ||
402 | + | ||
403 | +//sqrt function | ||
404 | +template<typename T> | ||
405 | +CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x) | ||
406 | +{ | ||
407 | + return x.sqrt(); | ||
408 | +} | ||
409 | + | ||
410 | + | ||
411 | +template <typename T> | ||
412 | +CUDA_CALLABLE static T abs(stim::complex<T> a) | ||
413 | +{ | ||
414 | + return a.abs(); | ||
415 | +} | ||
416 | + | ||
417 | +template <typename T> | ||
418 | +CUDA_CALLABLE static T real(stim::complex<T> a) | ||
419 | +{ | ||
420 | + return a.r; | ||
421 | +} | ||
422 | + | ||
423 | +//template <typename T> | ||
424 | +CUDA_CALLABLE static float real(float a) | ||
425 | +{ | ||
426 | + return a; | ||
427 | +} | ||
428 | + | ||
429 | +template <typename T> | ||
430 | +CUDA_CALLABLE static T imag(stim::complex<T> a) | ||
431 | +{ | ||
432 | + return a.i; | ||
433 | +} | ||
434 | + | ||
435 | +//trigonometric functions | ||
436 | +//template<class A> | ||
437 | +/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x) | ||
438 | +{ | ||
439 | + stim::complex<float> result; | ||
440 | + result.r = sinf(x.r) * coshf(x.i); | ||
441 | + result.i = cosf(x.r) * sinhf(x.i); | ||
442 | + | ||
443 | + return result; | ||
444 | +}*/ | ||
445 | + | ||
446 | +template<class A> | ||
447 | +CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x) | ||
448 | +{ | ||
449 | + stim::complex<A> result; | ||
450 | + result.r = (A)std::sin(x.r) * (A)std::cosh(x.i); | ||
451 | + result.i = (A)std::cos(x.r) * (A)std::sinh(x.i); | ||
452 | + | ||
453 | + return result; | ||
454 | +} | ||
455 | + | ||
456 | +//floating point template | ||
457 | +//template<class A> | ||
458 | +/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x) | ||
459 | +{ | ||
460 | + stim::complex<float> result; | ||
461 | + result.r = cosf(x.r) * coshf(x.i); | ||
462 | + result.i = -(sinf(x.r) * sinhf(x.i)); | ||
463 | + | ||
464 | + return result; | ||
465 | +}*/ | ||
466 | + | ||
467 | +template<class A> | ||
468 | +CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x) | ||
469 | +{ | ||
470 | + stim::complex<A> result; | ||
471 | + result.r = (A)std::cos(x.r) * (A)std::cosh(x.i); | ||
472 | + result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i)); | ||
473 | + | ||
474 | + return result; | ||
475 | +} | ||
476 | + | ||
477 | + | ||
478 | +template<class A> | ||
479 | +std::ostream& operator<<(std::ostream& os, stim::complex<A> x) | ||
480 | +{ | ||
481 | + os<<x.str(); | ||
482 | + return os; | ||
483 | +} | ||
484 | + | ||
485 | +template<class A> | ||
486 | +std::istream& operator>>(std::istream& is, stim::complex<A>& x) | ||
487 | +{ | ||
488 | + A r, i; | ||
489 | + r = i = 0; //initialize the real and imaginary parts to zero | ||
490 | + is>>r; //parse | ||
491 | + is>>i; | ||
492 | + | ||
493 | + x.real(r); //assign the parsed values to x | ||
494 | + x.imag(i); | ||
495 | + | ||
496 | + return is; //return the stream | ||
497 | +} | ||
498 | + | ||
499 | +//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | ||
500 | +//template<class T> using rtsComplex = stim::complex<T>; | ||
501 | +//#endif | ||
502 | + | ||
503 | + | ||
504 | + | ||
505 | +#endif |
math/complexfield.cuh
1 | -#ifndef RTS_COMPLEXFIELD_H | ||
2 | -#define RTS_COMPLEXFIELD_H | ||
3 | - | ||
4 | -#include "cublas_v2.h" | ||
5 | -#include <cuda_runtime.h> | ||
6 | - | ||
7 | -#include "../math/field.cuh" | ||
8 | -#include "../math/complex.h" | ||
9 | -#include "../math/realfield.cuh" | ||
10 | - | ||
11 | -namespace stim{ | ||
12 | - | ||
13 | -template<typename T> | ||
14 | -__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){ | ||
15 | - | ||
16 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
17 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
18 | - | ||
19 | - //make sure that the thread indices are in-bounds | ||
20 | - if(iu >= r0 || iv >= r1) return; | ||
21 | - | ||
22 | - //compute the index into the field | ||
23 | - int i = iv*r0 + iu; | ||
24 | - | ||
25 | - //calculate and store the result | ||
26 | - dest[i] = source[i].abs(); | ||
27 | -} | ||
28 | - | ||
29 | -/*This class stores functions for saving images of complex fields | ||
30 | -*/ | ||
31 | -template<typename T, unsigned int D = 1> | ||
32 | -class complexfield : public field< stim::complex<T>, D >{ | ||
33 | - using field< stim::complex<T>, D >::R; | ||
34 | - using field< stim::complex<T>, D >::X; | ||
35 | - using field< stim::complex<T>, D >::shape; | ||
36 | - using field< stim::complex<T>, D >::cuda_params; | ||
37 | - | ||
38 | - | ||
39 | - | ||
40 | -public: | ||
41 | - | ||
42 | - //find the maximum value of component n | ||
43 | - stim::complex<T> find_max(unsigned int n){ | ||
44 | - cublasStatus_t stat; | ||
45 | - cublasHandle_t handle; | ||
46 | - | ||
47 | - //create a CUBLAS handle | ||
48 | - stat = cublasCreate(&handle); | ||
49 | - if(stat != CUBLAS_STATUS_SUCCESS){ | ||
50 | - std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | ||
51 | - exit(1); | ||
52 | - } | ||
53 | - | ||
54 | - int L = R[0] * R[1]; //compute the number of discrete points in a slice | ||
55 | - int index; //result of the max operation | ||
56 | - stim::complex<T> result; | ||
57 | - | ||
58 | - if(sizeof(T) == 8) | ||
59 | - stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index); | ||
60 | - else | ||
61 | - stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index); | ||
62 | - | ||
63 | - index -= 1; //adjust for 1-based indexing | ||
64 | - | ||
65 | - //if there was a GPU error, terminate | ||
66 | - if(stat != CUBLAS_STATUS_SUCCESS){ | ||
67 | - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | ||
68 | - exit(1); | ||
69 | - } | ||
70 | - | ||
71 | - //retrieve the maximum value for this slice and store it in the maxVal array | ||
72 | - std::cout<<X[n]<<std::endl; | ||
73 | - HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost)); | ||
74 | - return result; | ||
75 | - } | ||
76 | - | ||
77 | -public: | ||
78 | - | ||
79 | - enum attribute {magnitude, real, imaginary}; | ||
80 | - | ||
81 | - //constructor (no parameters) | ||
82 | - complexfield() : field<stim::complex<T>, D>(){}; | ||
83 | - | ||
84 | - //constructor (resolution specified) | ||
85 | - complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){}; | ||
86 | - | ||
87 | - //assignment from a field of complex values | ||
88 | - complexfield & operator=(const field< stim::complex<T>, D > rhs){ | ||
89 | - field< complex<T>, D >::operator=(rhs); | ||
90 | - return *this; | ||
91 | - } | ||
92 | - | ||
93 | - //assignment operator (scalar value) | ||
94 | - complexfield & operator= (const complex<T> rhs){ | ||
95 | - | ||
96 | - field< complex<T>, D >::operator=(rhs); | ||
97 | - return *this; | ||
98 | - } | ||
99 | - | ||
100 | - //assignment operator (vector value) | ||
101 | - complexfield & operator= (const vec< complex<T>, D > rhs){ | ||
102 | - | ||
103 | - field< complex<T>, D >::operator=(rhs); | ||
104 | - return *this; | ||
105 | - } | ||
106 | - | ||
107 | - //cropping | ||
108 | - complexfield crop(unsigned int width, unsigned int height){ | ||
109 | - | ||
110 | - complexfield<T, D> result; | ||
111 | - result = field< complex<T>, D>::crop(width, height); | ||
112 | - return result; | ||
113 | - } | ||
114 | - | ||
115 | - void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){ | ||
116 | - | ||
117 | - field<T, 1> rf(R[0], R[1]); | ||
118 | - | ||
119 | - //get cuda parameters | ||
120 | - dim3 blocks, grids; | ||
121 | - cuda_params(grids, blocks); | ||
122 | - | ||
123 | - if(type == magnitude){ | ||
124 | - gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]); | ||
125 | - rf.toImage(filename, n, true); | ||
126 | - } | ||
127 | - | ||
128 | - } | ||
129 | - | ||
130 | - | ||
131 | -}; | ||
132 | - | ||
133 | - | ||
134 | -} //end namespace rts | ||
135 | - | ||
136 | - | ||
137 | -#endif | 1 | +#ifndef RTS_COMPLEXFIELD_H |
2 | +#define RTS_COMPLEXFIELD_H | ||
3 | + | ||
4 | +#include "cublas_v2.h" | ||
5 | +#include <cuda_runtime.h> | ||
6 | + | ||
7 | +#include "../math/field.cuh" | ||
8 | +#include "../math/complex.h" | ||
9 | +#include "../math/realfield.cuh" | ||
10 | + | ||
11 | +namespace stim{ | ||
12 | + | ||
13 | +template<typename T> | ||
14 | +__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){ | ||
15 | + | ||
16 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
17 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
18 | + | ||
19 | + //make sure that the thread indices are in-bounds | ||
20 | + if(iu >= r0 || iv >= r1) return; | ||
21 | + | ||
22 | + //compute the index into the field | ||
23 | + int i = iv*r0 + iu; | ||
24 | + | ||
25 | + //calculate and store the result | ||
26 | + dest[i] = source[i].abs(); | ||
27 | +} | ||
28 | + | ||
29 | +/*This class stores functions for saving images of complex fields | ||
30 | +*/ | ||
31 | +template<typename T, unsigned int D = 1> | ||
32 | +class complexfield : public field< stim::complex<T>, D >{ | ||
33 | + using field< stim::complex<T>, D >::R; | ||
34 | + using field< stim::complex<T>, D >::X; | ||
35 | + using field< stim::complex<T>, D >::shape; | ||
36 | + using field< stim::complex<T>, D >::cuda_params; | ||
37 | + | ||
38 | + | ||
39 | + | ||
40 | +public: | ||
41 | + | ||
42 | + //find the maximum value of component n | ||
43 | + stim::complex<T> find_max(unsigned int n){ | ||
44 | + cublasStatus_t stat; | ||
45 | + cublasHandle_t handle; | ||
46 | + | ||
47 | + //create a CUBLAS handle | ||
48 | + stat = cublasCreate(&handle); | ||
49 | + if(stat != CUBLAS_STATUS_SUCCESS){ | ||
50 | + std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | ||
51 | + exit(1); | ||
52 | + } | ||
53 | + | ||
54 | + int L = R[0] * R[1]; //compute the number of discrete points in a slice | ||
55 | + int index; //result of the max operation | ||
56 | + stim::complex<T> result; | ||
57 | + | ||
58 | + if(sizeof(T) == 8) | ||
59 | + stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index); | ||
60 | + else | ||
61 | + stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index); | ||
62 | + | ||
63 | + index -= 1; //adjust for 1-based indexing | ||
64 | + | ||
65 | + //if there was a GPU error, terminate | ||
66 | + if(stat != CUBLAS_STATUS_SUCCESS){ | ||
67 | + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | ||
68 | + exit(1); | ||
69 | + } | ||
70 | + | ||
71 | + //retrieve the maximum value for this slice and store it in the maxVal array | ||
72 | + std::cout<<X[n]<<std::endl; | ||
73 | + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost)); | ||
74 | + return result; | ||
75 | + } | ||
76 | + | ||
77 | +public: | ||
78 | + | ||
79 | + enum attribute {magnitude, real, imaginary}; | ||
80 | + | ||
81 | + //constructor (no parameters) | ||
82 | + complexfield() : field<stim::complex<T>, D>(){}; | ||
83 | + | ||
84 | + //constructor (resolution specified) | ||
85 | + complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){}; | ||
86 | + | ||
87 | + //assignment from a field of complex values | ||
88 | + complexfield & operator=(const field< stim::complex<T>, D > rhs){ | ||
89 | + field< complex<T>, D >::operator=(rhs); | ||
90 | + return *this; | ||
91 | + } | ||
92 | + | ||
93 | + //assignment operator (scalar value) | ||
94 | + complexfield & operator= (const complex<T> rhs){ | ||
95 | + | ||
96 | + field< complex<T>, D >::operator=(rhs); | ||
97 | + return *this; | ||
98 | + } | ||
99 | + | ||
100 | + //assignment operator (vector value) | ||
101 | + complexfield & operator= (const vec< complex<T>, D > rhs){ | ||
102 | + | ||
103 | + field< complex<T>, D >::operator=(rhs); | ||
104 | + return *this; | ||
105 | + } | ||
106 | + | ||
107 | + //cropping | ||
108 | + complexfield crop(unsigned int width, unsigned int height){ | ||
109 | + | ||
110 | + complexfield<T, D> result; | ||
111 | + result = field< complex<T>, D>::crop(width, height); | ||
112 | + return result; | ||
113 | + } | ||
114 | + | ||
115 | + void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){ | ||
116 | + | ||
117 | + field<T, 1> rf(R[0], R[1]); | ||
118 | + | ||
119 | + //get cuda parameters | ||
120 | + dim3 blocks, grids; | ||
121 | + cuda_params(grids, blocks); | ||
122 | + | ||
123 | + if(type == magnitude){ | ||
124 | + gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]); | ||
125 | + rf.toImage(filename, n, true); | ||
126 | + } | ||
127 | + | ||
128 | + } | ||
129 | + | ||
130 | + | ||
131 | +}; | ||
132 | + | ||
133 | + | ||
134 | +} //end namespace rts | ||
135 | + | ||
136 | + | ||
137 | +#endif |
math/field.cuh
1 | -#ifndef RTS_FIELD_CUH | ||
2 | -#define RTS_FIELD_CUH | ||
3 | - | ||
4 | -#include <vector> | ||
5 | -#include <string> | ||
6 | -#include <sstream> | ||
7 | - | ||
8 | -#include "cublas_v2.h" | ||
9 | -#include <cuda_runtime.h> | ||
10 | - | ||
11 | -#include "../math/rect.h" | ||
12 | -#include "../cuda/threads.h" | ||
13 | -#include "../cuda/error.h" | ||
14 | -#include "../cuda/devices.h" | ||
15 | -#include "../visualization/colormap.h" | ||
16 | - | ||
17 | - | ||
18 | -namespace stim{ | ||
19 | - | ||
20 | -//multiply R = X * Y | ||
21 | -template<typename T> | ||
22 | -__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){ | ||
23 | - | ||
24 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
25 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
26 | - | ||
27 | - //make sure that the thread indices are in-bounds | ||
28 | - if(iu >= r0 || iv >= r1) return; | ||
29 | - | ||
30 | - //compute the index into the field | ||
31 | - int i = iv*r0 + iu; | ||
32 | - | ||
33 | - //calculate and store the result | ||
34 | - R[i] = X[i] * Y[i]; | ||
35 | -} | ||
36 | - | ||
37 | -//assign a constant value to all points | ||
38 | -template<typename T> | ||
39 | -__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){ | ||
40 | - | ||
41 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
42 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
43 | - | ||
44 | - //make sure that the thread indices are in-bounds | ||
45 | - if(iu >= r0 || iv >= r1) return; | ||
46 | - | ||
47 | - //compute the index into the field | ||
48 | - int i = iv*r0 + iu; | ||
49 | - | ||
50 | - //calculate and store the result | ||
51 | - ptr[i] = val; | ||
52 | -} | ||
53 | - | ||
54 | -//crop the field to the new dimensions (width x height) | ||
55 | -template<typename T> | ||
56 | -__global__ void gpu_field_crop(T* dest, T* source, | ||
57 | - unsigned int r0, unsigned int r1, | ||
58 | - unsigned int width, unsigned int height){ | ||
59 | - | ||
60 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
61 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
62 | - | ||
63 | - //make sure that the thread indices are in-bounds | ||
64 | - if(iu >= width || iv >= height) return; | ||
65 | - | ||
66 | - //compute the index into the field | ||
67 | - int is = iv*r0 + iu; | ||
68 | - int id = iv*width + iu; | ||
69 | - | ||
70 | - //calculate and store the result | ||
71 | - dest[id] = source[is]; | ||
72 | -} | ||
73 | - | ||
74 | -template<typename T, unsigned int D = 1> | ||
75 | -class field{ | ||
76 | - | ||
77 | -protected: | ||
78 | - | ||
79 | - T* X[D]; //pointer to the field data | ||
80 | - unsigned int R[2]; //field resolution | ||
81 | - stim::rect<T> shape; //position and shape of the field slice | ||
82 | - | ||
83 | - //calculates the optimal block and grid sizes using information from the GPU | ||
84 | - void cuda_params(dim3& grids, dim3& blocks){ | ||
85 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
86 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
87 | - | ||
88 | - //create one thread for each detector pixel | ||
89 | - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | ||
90 | - grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
91 | - } | ||
92 | - | ||
93 | - //find the maximum value of component n | ||
94 | - T find_max(unsigned int n){ | ||
95 | - cublasStatus_t stat; | ||
96 | - cublasHandle_t handle; | ||
97 | - | ||
98 | - //create a CUBLAS handle | ||
99 | - stat = cublasCreate(&handle); | ||
100 | - if(stat != CUBLAS_STATUS_SUCCESS){ | ||
101 | - std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | ||
102 | - exit(1); | ||
103 | - } | ||
104 | - | ||
105 | - int L = R[0] * R[1]; //compute the number of discrete points in a slice | ||
106 | - int index; //result of the max operation | ||
107 | - T result; | ||
108 | - | ||
109 | - if(sizeof(T) == 4) | ||
110 | - stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index); | ||
111 | - else | ||
112 | - stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index); | ||
113 | - | ||
114 | - index -= 1; //adjust for 1-based indexing | ||
115 | - | ||
116 | - //if there was a GPU error, terminate | ||
117 | - if(stat != CUBLAS_STATUS_SUCCESS){ | ||
118 | - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | ||
119 | - exit(1); | ||
120 | - } | ||
121 | - | ||
122 | - //retrieve the maximum value for this slice and store it in the maxVal array | ||
123 | - HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost)); | ||
124 | - return result; | ||
125 | - } | ||
126 | - | ||
127 | -public: | ||
128 | - | ||
129 | - //returns a list of file names given an input string with wild cards | ||
130 | - std::vector<std::string> process_filename(std::string name){ | ||
131 | - std::stringstream ss(name); | ||
132 | - std::string item; | ||
133 | - std::vector<std::string> elems; | ||
134 | - while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension) | ||
135 | - { | ||
136 | - elems.push_back(item); | ||
137 | - } | ||
138 | - | ||
139 | - std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters) | ||
140 | - std::string ext = elems[1]; //file extension (ex. .bmp, .png) | ||
141 | - ext = std::string(".") + ext; //add a period back into the extension | ||
142 | - | ||
143 | - size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'') | ||
144 | - size_t i1 = prefix.find_last_of("?"); | ||
145 | - | ||
146 | - std::string postfix = prefix.substr(i1+1); | ||
147 | - prefix = prefix.substr(0, i0); | ||
148 | - | ||
149 | - unsigned int digits = i1 - i0 + 1; //compute the number of wildcards | ||
150 | - | ||
151 | - std::vector<std::string> flist; //create a vector of file names | ||
152 | - //fill the list | ||
153 | - for(unsigned int d=0; d<D; d++){ | ||
154 | - std::stringstream ss; //assemble the file name | ||
155 | - ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext; | ||
156 | - flist.push_back(ss.str()); | ||
157 | - } | ||
158 | - | ||
159 | - return flist; | ||
160 | - } | ||
161 | - | ||
162 | - void init(){ | ||
163 | - for(unsigned int n=0; n<D; n++) | ||
164 | - X[n] = NULL; | ||
165 | - } | ||
166 | - void destroy(){ | ||
167 | - for(unsigned int n=0; n<D; n++) | ||
168 | - if(X[n] != NULL) | ||
169 | - HANDLE_ERROR(cudaFree(X[n])); | ||
170 | - } | ||
171 | - | ||
172 | -public: | ||
173 | - //field constructor | ||
174 | - field(){ | ||
175 | - R[0] = R[1] = 0; | ||
176 | - init(); | ||
177 | - } | ||
178 | - | ||
179 | - field(unsigned int x, unsigned int y){ | ||
180 | - //set the resolution | ||
181 | - R[0] = x; | ||
182 | - R[1] = y; | ||
183 | - //allocate memory on the GPU | ||
184 | - for(unsigned int n=0; n<D; n++){ | ||
185 | - HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] )); | ||
186 | - } | ||
187 | - clear(); //zero the field | ||
188 | - } | ||
189 | - | ||
190 | - ///copy constructor | ||
191 | - field(const field &rhs){ | ||
192 | - //first make a shallow copy | ||
193 | - R[0] = rhs.R[0]; | ||
194 | - R[1] = rhs.R[1]; | ||
195 | - | ||
196 | - for(unsigned int n=0; n<D; n++){ | ||
197 | - //do we have to make a deep copy? | ||
198 | - if(rhs.X[n] == NULL) | ||
199 | - X[n] = NULL; //no | ||
200 | - else{ | ||
201 | - //allocate the necessary memory | ||
202 | - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | ||
203 | - | ||
204 | - //copy the slice | ||
205 | - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | ||
206 | - } | ||
207 | - } | ||
208 | - } | ||
209 | - | ||
210 | - ~field(){ | ||
211 | - destroy(); | ||
212 | - } | ||
213 | - | ||
214 | - //assignment operator | ||
215 | - field & operator= (const field & rhs){ | ||
216 | - | ||
217 | - //de-allocate any existing GPU memory | ||
218 | - destroy(); | ||
219 | - | ||
220 | - //copy the slice resolution | ||
221 | - R[0] = rhs.R[0]; | ||
222 | - R[1] = rhs.R[1]; | ||
223 | - | ||
224 | - for(unsigned int n=0; n<D; n++) | ||
225 | - { | ||
226 | - //allocate the necessary memory | ||
227 | - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | ||
228 | - //copy the slice | ||
229 | - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | ||
230 | - } | ||
231 | - return *this; | ||
232 | - } | ||
233 | - | ||
234 | - field & operator= (const T rhs){ | ||
235 | - | ||
236 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
237 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
238 | - | ||
239 | - //create one thread for each detector pixel | ||
240 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
241 | - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
242 | - | ||
243 | - //assign the constant value to all positions and dimensions | ||
244 | - for(int n=0; n<D; n++) | ||
245 | - stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]); | ||
246 | - | ||
247 | - return *this; | ||
248 | - } | ||
249 | - | ||
250 | - //assignment of vector component | ||
251 | - field & operator= (const vec<T, D> rhs){ | ||
252 | - | ||
253 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
254 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
255 | - | ||
256 | - //create one thread for each detector pixel | ||
257 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
258 | - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
259 | - | ||
260 | - //assign the constant value to all positions and dimensions | ||
261 | - for(unsigned int n=0; n<D; n++) | ||
262 | - stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]); | ||
263 | - | ||
264 | - return *this; | ||
265 | - | ||
266 | - } | ||
267 | - | ||
268 | - //multiply two fields (element-wise multiplication) | ||
269 | - field<T, D> operator* (const field & rhs){ | ||
270 | - | ||
271 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
272 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
273 | - | ||
274 | - //create one thread for each detector pixel | ||
275 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
276 | - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
277 | - | ||
278 | - //create a scalar field to store the result | ||
279 | - field<T, D> result(R[0], R[1]); | ||
280 | - | ||
281 | - for(int n=0; n<D; n++) | ||
282 | - stim::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]); | ||
283 | - | ||
284 | - return result; | ||
285 | - } | ||
286 | - | ||
287 | - T* ptr(unsigned int n = 0){ | ||
288 | - if(n < D) | ||
289 | - return X[n]; | ||
290 | - else return NULL; | ||
291 | - } | ||
292 | - | ||
293 | - //return the vector component at position (u, v) | ||
294 | - vec<T, D> get(unsigned int u, unsigned int v){ | ||
295 | - | ||
296 | - vec<T, D> result; | ||
297 | - for(unsigned int d=0; d<D; d++){ | ||
298 | - HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost)); | ||
299 | - } | ||
300 | - | ||
301 | - return result; | ||
302 | - } | ||
303 | - | ||
304 | - //set all components of the field to zero | ||
305 | - void clear(){ | ||
306 | - for(unsigned int n=0; n<D; n++) | ||
307 | - if(X[n] != NULL) | ||
308 | - HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1])); | ||
309 | - } | ||
310 | - | ||
311 | - //crop the field | ||
312 | - field<T, D> crop(unsigned int width, unsigned int height){ | ||
313 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
314 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
315 | - | ||
316 | - //create one thread for each detector pixel | ||
317 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
318 | - dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
319 | - | ||
320 | - //create a scalar field to store the result | ||
321 | - field<T, D> result(width, height); | ||
322 | - | ||
323 | - for(int n=0; n<D; n++) | ||
324 | - stim::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height); | ||
325 | - | ||
326 | - return result; | ||
327 | - } | ||
328 | - | ||
329 | - //save an image representing component n | ||
330 | - void toImage(std::string filename, unsigned int n = 0, | ||
331 | - bool positive = false, stim::colormapType cmap = stim::cmBrewer){ | ||
332 | - T max_val = find_max(n); //find the maximum value | ||
333 | - | ||
334 | - if(positive) //if the field is positive, use the range [0 max_val] | ||
335 | - stim::gpu2image<T>(X[n], filename, R[0], R[1], 0, max_val, cmap); | ||
336 | - else | ||
337 | - stim::gpu2image<T>(X[n], filename, R[0], R[1], -max_val, max_val, cmap); | ||
338 | - } | ||
339 | - | ||
340 | -}; | ||
341 | - | ||
342 | -} //end namespace rts | ||
343 | -#endif | 1 | +#ifndef RTS_FIELD_CUH |
2 | +#define RTS_FIELD_CUH | ||
3 | + | ||
4 | +#include <vector> | ||
5 | +#include <string> | ||
6 | +#include <sstream> | ||
7 | + | ||
8 | +#include "cublas_v2.h" | ||
9 | +#include <cuda_runtime.h> | ||
10 | + | ||
11 | +#include "../math/rect.h" | ||
12 | +#include "../cuda/threads.h" | ||
13 | +#include "../cuda/error.h" | ||
14 | +#include "../cuda/devices.h" | ||
15 | +#include "../visualization/colormap.h" | ||
16 | + | ||
17 | + | ||
18 | +namespace stim{ | ||
19 | + | ||
20 | +//multiply R = X * Y | ||
21 | +template<typename T> | ||
22 | +__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){ | ||
23 | + | ||
24 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
25 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
26 | + | ||
27 | + //make sure that the thread indices are in-bounds | ||
28 | + if(iu >= r0 || iv >= r1) return; | ||
29 | + | ||
30 | + //compute the index into the field | ||
31 | + int i = iv*r0 + iu; | ||
32 | + | ||
33 | + //calculate and store the result | ||
34 | + R[i] = X[i] * Y[i]; | ||
35 | +} | ||
36 | + | ||
37 | +//assign a constant value to all points | ||
38 | +template<typename T> | ||
39 | +__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){ | ||
40 | + | ||
41 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
42 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
43 | + | ||
44 | + //make sure that the thread indices are in-bounds | ||
45 | + if(iu >= r0 || iv >= r1) return; | ||
46 | + | ||
47 | + //compute the index into the field | ||
48 | + int i = iv*r0 + iu; | ||
49 | + | ||
50 | + //calculate and store the result | ||
51 | + ptr[i] = val; | ||
52 | +} | ||
53 | + | ||
54 | +//crop the field to the new dimensions (width x height) | ||
55 | +template<typename T> | ||
56 | +__global__ void gpu_field_crop(T* dest, T* source, | ||
57 | + unsigned int r0, unsigned int r1, | ||
58 | + unsigned int width, unsigned int height){ | ||
59 | + | ||
60 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
61 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
62 | + | ||
63 | + //make sure that the thread indices are in-bounds | ||
64 | + if(iu >= width || iv >= height) return; | ||
65 | + | ||
66 | + //compute the index into the field | ||
67 | + int is = iv*r0 + iu; | ||
68 | + int id = iv*width + iu; | ||
69 | + | ||
70 | + //calculate and store the result | ||
71 | + dest[id] = source[is]; | ||
72 | +} | ||
73 | + | ||
74 | +template<typename T, unsigned int D = 1> | ||
75 | +class field{ | ||
76 | + | ||
77 | +protected: | ||
78 | + | ||
79 | + T* X[D]; //pointer to the field data | ||
80 | + unsigned int R[2]; //field resolution | ||
81 | + stim::rect<T> shape; //position and shape of the field slice | ||
82 | + | ||
83 | + //calculates the optimal block and grid sizes using information from the GPU | ||
84 | + void cuda_params(dim3& grids, dim3& blocks){ | ||
85 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
86 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
87 | + | ||
88 | + //create one thread for each detector pixel | ||
89 | + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | ||
90 | + grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
91 | + } | ||
92 | + | ||
93 | + //find the maximum value of component n | ||
94 | + T find_max(unsigned int n){ | ||
95 | + cublasStatus_t stat; | ||
96 | + cublasHandle_t handle; | ||
97 | + | ||
98 | + //create a CUBLAS handle | ||
99 | + stat = cublasCreate(&handle); | ||
100 | + if(stat != CUBLAS_STATUS_SUCCESS){ | ||
101 | + std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | ||
102 | + exit(1); | ||
103 | + } | ||
104 | + | ||
105 | + int L = R[0] * R[1]; //compute the number of discrete points in a slice | ||
106 | + int index; //result of the max operation | ||
107 | + T result; | ||
108 | + | ||
109 | + if(sizeof(T) == 4) | ||
110 | + stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index); | ||
111 | + else | ||
112 | + stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index); | ||
113 | + | ||
114 | + index -= 1; //adjust for 1-based indexing | ||
115 | + | ||
116 | + //if there was a GPU error, terminate | ||
117 | + if(stat != CUBLAS_STATUS_SUCCESS){ | ||
118 | + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | ||
119 | + exit(1); | ||
120 | + } | ||
121 | + | ||
122 | + //retrieve the maximum value for this slice and store it in the maxVal array | ||
123 | + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost)); | ||
124 | + return result; | ||
125 | + } | ||
126 | + | ||
127 | +public: | ||
128 | + | ||
129 | + //returns a list of file names given an input string with wild cards | ||
130 | + std::vector<std::string> process_filename(std::string name){ | ||
131 | + std::stringstream ss(name); | ||
132 | + std::string item; | ||
133 | + std::vector<std::string> elems; | ||
134 | + while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension) | ||
135 | + { | ||
136 | + elems.push_back(item); | ||
137 | + } | ||
138 | + | ||
139 | + std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters) | ||
140 | + std::string ext = elems[1]; //file extension (ex. .bmp, .png) | ||
141 | + ext = std::string(".") + ext; //add a period back into the extension | ||
142 | + | ||
143 | + size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'') | ||
144 | + size_t i1 = prefix.find_last_of("?"); | ||
145 | + | ||
146 | + std::string postfix = prefix.substr(i1+1); | ||
147 | + prefix = prefix.substr(0, i0); | ||
148 | + | ||
149 | + unsigned int digits = i1 - i0 + 1; //compute the number of wildcards | ||
150 | + | ||
151 | + std::vector<std::string> flist; //create a vector of file names | ||
152 | + //fill the list | ||
153 | + for(unsigned int d=0; d<D; d++){ | ||
154 | + std::stringstream ss; //assemble the file name | ||
155 | + ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext; | ||
156 | + flist.push_back(ss.str()); | ||
157 | + } | ||
158 | + | ||
159 | + return flist; | ||
160 | + } | ||
161 | + | ||
162 | + void init(){ | ||
163 | + for(unsigned int n=0; n<D; n++) | ||
164 | + X[n] = NULL; | ||
165 | + } | ||
166 | + void destroy(){ | ||
167 | + for(unsigned int n=0; n<D; n++) | ||
168 | + if(X[n] != NULL) | ||
169 | + HANDLE_ERROR(cudaFree(X[n])); | ||
170 | + } | ||
171 | + | ||
172 | +public: | ||
173 | + //field constructor | ||
174 | + field(){ | ||
175 | + R[0] = R[1] = 0; | ||
176 | + init(); | ||
177 | + } | ||
178 | + | ||
179 | + field(unsigned int x, unsigned int y){ | ||
180 | + //set the resolution | ||
181 | + R[0] = x; | ||
182 | + R[1] = y; | ||
183 | + //allocate memory on the GPU | ||
184 | + for(unsigned int n=0; n<D; n++){ | ||
185 | + HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] )); | ||
186 | + } | ||
187 | + clear(); //zero the field | ||
188 | + } | ||
189 | + | ||
190 | + ///copy constructor | ||
191 | + field(const field &rhs){ | ||
192 | + //first make a shallow copy | ||
193 | + R[0] = rhs.R[0]; | ||
194 | + R[1] = rhs.R[1]; | ||
195 | + | ||
196 | + for(unsigned int n=0; n<D; n++){ | ||
197 | + //do we have to make a deep copy? | ||
198 | + if(rhs.X[n] == NULL) | ||
199 | + X[n] = NULL; //no | ||
200 | + else{ | ||
201 | + //allocate the necessary memory | ||
202 | + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | ||
203 | + | ||
204 | + //copy the slice | ||
205 | + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | ||
206 | + } | ||
207 | + } | ||
208 | + } | ||
209 | + | ||
210 | + ~field(){ | ||
211 | + destroy(); | ||
212 | + } | ||
213 | + | ||
214 | + //assignment operator | ||
215 | + field & operator= (const field & rhs){ | ||
216 | + | ||
217 | + //de-allocate any existing GPU memory | ||
218 | + destroy(); | ||
219 | + | ||
220 | + //copy the slice resolution | ||
221 | + R[0] = rhs.R[0]; | ||
222 | + R[1] = rhs.R[1]; | ||
223 | + | ||
224 | + for(unsigned int n=0; n<D; n++) | ||
225 | + { | ||
226 | + //allocate the necessary memory | ||
227 | + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | ||
228 | + //copy the slice | ||
229 | + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | ||
230 | + } | ||
231 | + return *this; | ||
232 | + } | ||
233 | + | ||
234 | + field & operator= (const T rhs){ | ||
235 | + | ||
236 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
237 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
238 | + | ||
239 | + //create one thread for each detector pixel | ||
240 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
241 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
242 | + | ||
243 | + //assign the constant value to all positions and dimensions | ||
244 | + for(int n=0; n<D; n++) | ||
245 | + stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]); | ||
246 | + | ||
247 | + return *this; | ||
248 | + } | ||
249 | + | ||
250 | + //assignment of vector component | ||
251 | + field & operator= (const vec<T, D> rhs){ | ||
252 | + | ||
253 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
254 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
255 | + | ||
256 | + //create one thread for each detector pixel | ||
257 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
258 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
259 | + | ||
260 | + //assign the constant value to all positions and dimensions | ||
261 | + for(unsigned int n=0; n<D; n++) | ||
262 | + stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]); | ||
263 | + | ||
264 | + return *this; | ||
265 | + | ||
266 | + } | ||
267 | + | ||
268 | + //multiply two fields (element-wise multiplication) | ||
269 | + field<T, D> operator* (const field & rhs){ | ||
270 | + | ||
271 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
272 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
273 | + | ||
274 | + //create one thread for each detector pixel | ||
275 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
276 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
277 | + | ||
278 | + //create a scalar field to store the result | ||
279 | + field<T, D> result(R[0], R[1]); | ||
280 | + | ||
281 | + for(int n=0; n<D; n++) | ||
282 | + stim::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]); | ||
283 | + | ||
284 | + return result; | ||
285 | + } | ||
286 | + | ||
287 | + T* ptr(unsigned int n = 0){ | ||
288 | + if(n < D) | ||
289 | + return X[n]; | ||
290 | + else return NULL; | ||
291 | + } | ||
292 | + | ||
293 | + //return the vector component at position (u, v) | ||
294 | + vec<T, D> get(unsigned int u, unsigned int v){ | ||
295 | + | ||
296 | + vec<T, D> result; | ||
297 | + for(unsigned int d=0; d<D; d++){ | ||
298 | + HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost)); | ||
299 | + } | ||
300 | + | ||
301 | + return result; | ||
302 | + } | ||
303 | + | ||
304 | + //set all components of the field to zero | ||
305 | + void clear(){ | ||
306 | + for(unsigned int n=0; n<D; n++) | ||
307 | + if(X[n] != NULL) | ||
308 | + HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1])); | ||
309 | + } | ||
310 | + | ||
311 | + //crop the field | ||
312 | + field<T, D> crop(unsigned int width, unsigned int height){ | ||
313 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
314 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
315 | + | ||
316 | + //create one thread for each detector pixel | ||
317 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
318 | + dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
319 | + | ||
320 | + //create a scalar field to store the result | ||
321 | + field<T, D> result(width, height); | ||
322 | + | ||
323 | + for(int n=0; n<D; n++) | ||
324 | + stim::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height); | ||
325 | + | ||
326 | + return result; | ||
327 | + } | ||
328 | + | ||
329 | + //save an image representing component n | ||
330 | + void toImage(std::string filename, unsigned int n = 0, | ||
331 | + bool positive = false, stim::colormapType cmap = stim::cmBrewer){ | ||
332 | + T max_val = find_max(n); //find the maximum value | ||
333 | + | ||
334 | + if(positive) //if the field is positive, use the range [0 max_val] | ||
335 | + stim::gpu2image<T>(X[n], filename, R[0], R[1], 0, max_val, cmap); | ||
336 | + else | ||
337 | + stim::gpu2image<T>(X[n], filename, R[0], R[1], -max_val, max_val, cmap); | ||
338 | + } | ||
339 | + | ||
340 | +}; | ||
341 | + | ||
342 | +} //end namespace rts | ||
343 | +#endif |
math/rect.h
1 | -#ifndef RTS_RECT_H | ||
2 | -#define RTS_RECT_H | ||
3 | - | ||
4 | -//enable CUDA_CALLABLE macro | ||
5 | -#include "../cuda/callable.h" | ||
6 | -#include "../math/vector.h" | ||
7 | -#include "../math/triangle.h" | ||
8 | -#include "../math/quaternion.h" | ||
9 | -#include <iostream> | ||
10 | -#include <iomanip> | ||
11 | -#include <algorithm> | ||
12 | - | ||
13 | -namespace stim{ | ||
14 | - | ||
15 | -//template for a rectangle class in ND space | ||
16 | -template <class T, int N = 3> | ||
17 | -struct rect | ||
18 | -{ | ||
19 | - /* | ||
20 | - ^ O | ||
21 | - | | ||
22 | - | | ||
23 | - Y C | ||
24 | - | | ||
25 | - | | ||
26 | - O---------X---------> | ||
27 | - */ | ||
28 | - | ||
29 | -private: | ||
30 | - | ||
31 | - stim::vec<T, N> C; | ||
32 | - stim::vec<T, N> X; | ||
33 | - stim::vec<T, N> Y; | ||
34 | - | ||
35 | - CUDA_CALLABLE void scale(T factor){ | ||
36 | - X *= factor; | ||
37 | - Y *= factor; | ||
38 | - } | ||
39 | - | ||
40 | - CUDA_CALLABLE void normal(vec<T, N> n){ //orient the rectangle along the specified normal | ||
41 | - | ||
42 | - n = n.norm(); //normalize, just in case | ||
43 | - vec<T, N> n_current = X.cross(Y).norm(); //compute the current normal | ||
44 | - quaternion<T> q; //create a quaternion | ||
45 | - q.CreateRotation(n_current, n); //initialize a rotation from n_current to n | ||
46 | - | ||
47 | - //apply the quaternion to the vectors and position | ||
48 | - X = q.toMatrix3() * X; | ||
49 | - Y = q.toMatrix3() * Y; | ||
50 | - } | ||
51 | - | ||
52 | - CUDA_CALLABLE void init(){ | ||
53 | - C = vec<T, N>(0, 0, 0); | ||
54 | - X = vec<T, N>(1, 0, 0); | ||
55 | - Y = vec<T, N>(0, 1, 0); | ||
56 | - } | ||
57 | - | ||
58 | -public: | ||
59 | - | ||
60 | - CUDA_CALLABLE rect(){ | ||
61 | - init(); | ||
62 | - } | ||
63 | - | ||
64 | - CUDA_CALLABLE rect(T size, T z_pos = (T)0){ | ||
65 | - init(); //use the default setup | ||
66 | - scale(size); //scale the rectangle | ||
67 | - C[2] = z_pos; | ||
68 | - } | ||
69 | - | ||
70 | - CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){ | ||
71 | - init(); //start with the default setting | ||
72 | - C = c; | ||
73 | - scale(size); //scale the rectangle | ||
74 | - normal(n); //orient | ||
75 | - | ||
76 | - } | ||
77 | - | ||
78 | - /*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c) | ||
79 | - { | ||
80 | - A = a; | ||
81 | - Y = b - a; | ||
82 | - X = c - a - Y; | ||
83 | - | ||
84 | - }*/ | ||
85 | - | ||
86 | - /******************************************************************* | ||
87 | - Constructor - create a rect from a position, normal, and rotation | ||
88 | - *******************************************************************/ | ||
89 | - /*CUDA_CALLABLE rect(stim::vec<T, N> c, stim::vec<T, N> normal, T width, T height, T theta) | ||
90 | - { | ||
91 | - | ||
92 | - //compute the X direction - start along world-space X | ||
93 | - Y = stim::vec<T, N>(0, 1, 0); | ||
94 | - if(Y == normal) | ||
95 | - Y = stim::vec<T, N>(0, 0, 1); | ||
96 | - | ||
97 | - X = Y.cross(normal).norm(); | ||
98 | - | ||
99 | - std::cout<<X<<std::endl; | ||
100 | - | ||
101 | - //rotate the X axis by theta radians | ||
102 | - stim::quaternion<T> q; | ||
103 | - q.CreateRotation(theta, normal); | ||
104 | - X = q.toMatrix3() * X; | ||
105 | - Y = normal.cross(X); | ||
106 | - | ||
107 | - //normalize everything | ||
108 | - X = X.norm(); | ||
109 | - Y = Y.norm(); | ||
110 | - | ||
111 | - //scale to match the rect width and height | ||
112 | - X = X * width; | ||
113 | - Y = Y * height; | ||
114 | - | ||
115 | - //set the corner of the plane | ||
116 | - A = c - X * 0.5f - Y * 0.5f; | ||
117 | - | ||
118 | - std::cout<<X<<std::endl; | ||
119 | - }*/ | ||
120 | - | ||
121 | - //boolean comparison | ||
122 | - bool operator==(const rect<T, N> & rhs) | ||
123 | - { | ||
124 | - if(C == rhs.C && X == rhs.X && Y == rhs.Y) | ||
125 | - return true; | ||
126 | - else | ||
127 | - return false; | ||
128 | - } | ||
129 | - | ||
130 | - /******************************************* | ||
131 | - Return the normal for the rect | ||
132 | - *******************************************/ | ||
133 | - CUDA_CALLABLE stim::vec<T, N> n() | ||
134 | - { | ||
135 | - return (X.cross(Y)).norm(); | ||
136 | - } | ||
137 | - | ||
138 | - CUDA_CALLABLE stim::vec<T, N> p(T a, T b) | ||
139 | - { | ||
140 | - stim::vec<T, N> result; | ||
141 | - //given the two parameters a, b = [0 1], returns the position in world space | ||
142 | - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | ||
143 | - result = A + X * a + Y * b; | ||
144 | - | ||
145 | - return result; | ||
146 | - } | ||
147 | - | ||
148 | - CUDA_CALLABLE stim::vec<T, N> operator()(T a, T b) | ||
149 | - { | ||
150 | - return p(a, b); | ||
151 | - } | ||
152 | - | ||
153 | - std::string str() | ||
154 | - { | ||
155 | - std::stringstream ss; | ||
156 | - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | ||
157 | - ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; | ||
158 | - ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl; | ||
159 | - ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X; | ||
160 | - | ||
161 | - return ss.str(); | ||
162 | - | ||
163 | - } | ||
164 | - | ||
165 | - CUDA_CALLABLE rect<T, N> operator*(T rhs) | ||
166 | - { | ||
167 | - //scales the plane by a scalar value | ||
168 | - | ||
169 | - //create the new rectangle | ||
170 | - rect<T, N> result = *this; | ||
171 | - result.scale(rhs); | ||
172 | - | ||
173 | - return result; | ||
174 | - | ||
175 | - } | ||
176 | - | ||
177 | - CUDA_CALLABLE T dist(vec<T, N> p) | ||
178 | - { | ||
179 | - //compute the distance between a point and this rect | ||
180 | - | ||
181 | - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | ||
182 | - | ||
183 | - //first break the rect up into two triangles | ||
184 | - triangle<T, N> T0(A, A+X, A+Y); | ||
185 | - triangle<T, N> T1(A+X+Y, A+X, A+Y); | ||
186 | - | ||
187 | - | ||
188 | - T d0 = T0.dist(p); | ||
189 | - T d1 = T1.dist(p); | ||
190 | - | ||
191 | - if(d0 < d1) | ||
192 | - return d0; | ||
193 | - else | ||
194 | - return d1; | ||
195 | - } | ||
196 | - | ||
197 | - CUDA_CALLABLE T dist_max(vec<T, N> p) | ||
198 | - { | ||
199 | - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | ||
200 | - T da = (A - p).len(); | ||
201 | - T db = (A+X - p).len(); | ||
202 | - T dc = (A+Y - p).len(); | ||
203 | - T dd = (A+X+Y - p).len(); | ||
204 | - | ||
205 | - return std::max( da, std::max(db, std::max(dc, dd) ) ); | ||
206 | - } | ||
207 | -}; | ||
208 | - | ||
209 | -} //end namespace rts | ||
210 | - | ||
211 | -template <typename T, int N> | ||
212 | -std::ostream& operator<<(std::ostream& os, stim::rect<T, N> R) | ||
213 | -{ | ||
214 | - os<<R.str(); | ||
215 | - return os; | ||
216 | -} | ||
217 | - | ||
218 | - | ||
219 | -#endif | 1 | +#ifndef RTS_RECT_H |
2 | +#define RTS_RECT_H | ||
3 | + | ||
4 | +//enable CUDA_CALLABLE macro | ||
5 | +#include "../cuda/callable.h" | ||
6 | +#include "../math/vector.h" | ||
7 | +#include "../math/triangle.h" | ||
8 | +#include "../math/quaternion.h" | ||
9 | +#include <iostream> | ||
10 | +#include <iomanip> | ||
11 | +#include <algorithm> | ||
12 | + | ||
13 | +namespace stim{ | ||
14 | + | ||
15 | +//template for a rectangle class in ND space | ||
16 | +template <class T, int N = 3> | ||
17 | +struct rect | ||
18 | +{ | ||
19 | + /* | ||
20 | + ^ O | ||
21 | + | | ||
22 | + | | ||
23 | + Y C | ||
24 | + | | ||
25 | + | | ||
26 | + O---------X---------> | ||
27 | + */ | ||
28 | + | ||
29 | +private: | ||
30 | + | ||
31 | + stim::vec<T, N> C; | ||
32 | + stim::vec<T, N> X; | ||
33 | + stim::vec<T, N> Y; | ||
34 | + | ||
35 | + CUDA_CALLABLE void scale(T factor){ | ||
36 | + X *= factor; | ||
37 | + Y *= factor; | ||
38 | + } | ||
39 | + | ||
40 | + CUDA_CALLABLE void normal(vec<T, N> n){ //orient the rectangle along the specified normal | ||
41 | + | ||
42 | + n = n.norm(); //normalize, just in case | ||
43 | + vec<T, N> n_current = X.cross(Y).norm(); //compute the current normal | ||
44 | + quaternion<T> q; //create a quaternion | ||
45 | + q.CreateRotation(n_current, n); //initialize a rotation from n_current to n | ||
46 | + | ||
47 | + //apply the quaternion to the vectors and position | ||
48 | + X = q.toMatrix3() * X; | ||
49 | + Y = q.toMatrix3() * Y; | ||
50 | + } | ||
51 | + | ||
52 | + CUDA_CALLABLE void init(){ | ||
53 | + C = vec<T, N>(0, 0, 0); | ||
54 | + X = vec<T, N>(1, 0, 0); | ||
55 | + Y = vec<T, N>(0, 1, 0); | ||
56 | + } | ||
57 | + | ||
58 | +public: | ||
59 | + | ||
60 | + CUDA_CALLABLE rect(){ | ||
61 | + init(); | ||
62 | + } | ||
63 | + | ||
64 | + CUDA_CALLABLE rect(T size, T z_pos = (T)0){ | ||
65 | + init(); //use the default setup | ||
66 | + scale(size); //scale the rectangle | ||
67 | + C[2] = z_pos; | ||
68 | + } | ||
69 | + | ||
70 | + CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){ | ||
71 | + init(); //start with the default setting | ||
72 | + C = c; | ||
73 | + scale(size); //scale the rectangle | ||
74 | + normal(n); //orient | ||
75 | + | ||
76 | + } | ||
77 | + | ||
78 | + /*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c) | ||
79 | + { | ||
80 | + A = a; | ||
81 | + Y = b - a; | ||
82 | + X = c - a - Y; | ||
83 | + | ||
84 | + }*/ | ||
85 | + | ||
86 | + /******************************************************************* | ||
87 | + Constructor - create a rect from a position, normal, and rotation | ||
88 | + *******************************************************************/ | ||
89 | + /*CUDA_CALLABLE rect(stim::vec<T, N> c, stim::vec<T, N> normal, T width, T height, T theta) | ||
90 | + { | ||
91 | + | ||
92 | + //compute the X direction - start along world-space X | ||
93 | + Y = stim::vec<T, N>(0, 1, 0); | ||
94 | + if(Y == normal) | ||
95 | + Y = stim::vec<T, N>(0, 0, 1); | ||
96 | + | ||
97 | + X = Y.cross(normal).norm(); | ||
98 | + | ||
99 | + std::cout<<X<<std::endl; | ||
100 | + | ||
101 | + //rotate the X axis by theta radians | ||
102 | + stim::quaternion<T> q; | ||
103 | + q.CreateRotation(theta, normal); | ||
104 | + X = q.toMatrix3() * X; | ||
105 | + Y = normal.cross(X); | ||
106 | + | ||
107 | + //normalize everything | ||
108 | + X = X.norm(); | ||
109 | + Y = Y.norm(); | ||
110 | + | ||
111 | + //scale to match the rect width and height | ||
112 | + X = X * width; | ||
113 | + Y = Y * height; | ||
114 | + | ||
115 | + //set the corner of the plane | ||
116 | + A = c - X * 0.5f - Y * 0.5f; | ||
117 | + | ||
118 | + std::cout<<X<<std::endl; | ||
119 | + }*/ | ||
120 | + | ||
121 | + //boolean comparison | ||
122 | + bool operator==(const rect<T, N> & rhs) | ||
123 | + { | ||
124 | + if(C == rhs.C && X == rhs.X && Y == rhs.Y) | ||
125 | + return true; | ||
126 | + else | ||
127 | + return false; | ||
128 | + } | ||
129 | + | ||
130 | + /******************************************* | ||
131 | + Return the normal for the rect | ||
132 | + *******************************************/ | ||
133 | + CUDA_CALLABLE stim::vec<T, N> n() | ||
134 | + { | ||
135 | + return (X.cross(Y)).norm(); | ||
136 | + } | ||
137 | + | ||
138 | + CUDA_CALLABLE stim::vec<T, N> p(T a, T b) | ||
139 | + { | ||
140 | + stim::vec<T, N> result; | ||
141 | + //given the two parameters a, b = [0 1], returns the position in world space | ||
142 | + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | ||
143 | + result = A + X * a + Y * b; | ||
144 | + | ||
145 | + return result; | ||
146 | + } | ||
147 | + | ||
148 | + CUDA_CALLABLE stim::vec<T, N> operator()(T a, T b) | ||
149 | + { | ||
150 | + return p(a, b); | ||
151 | + } | ||
152 | + | ||
153 | + std::string str() | ||
154 | + { | ||
155 | + std::stringstream ss; | ||
156 | + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | ||
157 | + ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; | ||
158 | + ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl; | ||
159 | + ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X; | ||
160 | + | ||
161 | + return ss.str(); | ||
162 | + | ||
163 | + } | ||
164 | + | ||
165 | + CUDA_CALLABLE rect<T, N> operator*(T rhs) | ||
166 | + { | ||
167 | + //scales the plane by a scalar value | ||
168 | + | ||
169 | + //create the new rectangle | ||
170 | + rect<T, N> result = *this; | ||
171 | + result.scale(rhs); | ||
172 | + | ||
173 | + return result; | ||
174 | + | ||
175 | + } | ||
176 | + | ||
177 | + CUDA_CALLABLE T dist(vec<T, N> p) | ||
178 | + { | ||
179 | + //compute the distance between a point and this rect | ||
180 | + | ||
181 | + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | ||
182 | + | ||
183 | + //first break the rect up into two triangles | ||
184 | + triangle<T, N> T0(A, A+X, A+Y); | ||
185 | + triangle<T, N> T1(A+X+Y, A+X, A+Y); | ||
186 | + | ||
187 | + | ||
188 | + T d0 = T0.dist(p); | ||
189 | + T d1 = T1.dist(p); | ||
190 | + | ||
191 | + if(d0 < d1) | ||
192 | + return d0; | ||
193 | + else | ||
194 | + return d1; | ||
195 | + } | ||
196 | + | ||
197 | + CUDA_CALLABLE T dist_max(vec<T, N> p) | ||
198 | + { | ||
199 | + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | ||
200 | + T da = (A - p).len(); | ||
201 | + T db = (A+X - p).len(); | ||
202 | + T dc = (A+Y - p).len(); | ||
203 | + T dd = (A+X+Y - p).len(); | ||
204 | + | ||
205 | + return std::max( da, std::max(db, std::max(dc, dd) ) ); | ||
206 | + } | ||
207 | +}; | ||
208 | + | ||
209 | +} //end namespace rts | ||
210 | + | ||
211 | +template <typename T, int N> | ||
212 | +std::ostream& operator<<(std::ostream& os, stim::rect<T, N> R) | ||
213 | +{ | ||
214 | + os<<R.str(); | ||
215 | + return os; | ||
216 | +} | ||
217 | + | ||
218 | + | ||
219 | +#endif |
math/triangle.h
1 | -#ifndef RTS_TRIANGLE_H | ||
2 | -#define RTS_TRIANGLE_H | ||
3 | - | ||
4 | -//enable CUDA_CALLABLE macro | ||
5 | -#include "../cuda/callable.h" | ||
6 | -#include "../math/vector.h" | ||
7 | -#include <iostream> | ||
8 | - | ||
9 | -namespace stim{ | ||
10 | - | ||
11 | -template <class T, int N=3> | ||
12 | -struct triangle | ||
13 | -{ | ||
14 | - /* | ||
15 | - A------>B | ||
16 | - | / | ||
17 | - | / | ||
18 | - | / | ||
19 | - | / | ||
20 | - | / | ||
21 | - | / | ||
22 | - C | ||
23 | - */ | ||
24 | - private: | ||
25 | - | ||
26 | - vec<T, N> A; | ||
27 | - vec<T, N> B; | ||
28 | - vec<T, N> C; | ||
29 | - | ||
30 | - CUDA_CALLABLE vec<T, N> _p(T s, T t) | ||
31 | - { | ||
32 | - //This function returns the point specified by p = A + s(B-A) + t(C-A) | ||
33 | - vec<T, N> E0 = B-A; | ||
34 | - vec<T, N> E1 = C-A; | ||
35 | - | ||
36 | - return A + s*E0 + t*E1; | ||
37 | - } | ||
38 | - | ||
39 | - | ||
40 | - public: | ||
41 | - | ||
42 | - | ||
43 | - | ||
44 | - CUDA_CALLABLE triangle() | ||
45 | - { | ||
46 | - | ||
47 | - } | ||
48 | - | ||
49 | - CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c) | ||
50 | - { | ||
51 | - A = a; | ||
52 | - B = b; | ||
53 | - C = c; | ||
54 | - } | ||
55 | - | ||
56 | - CUDA_CALLABLE stim::vec<T, N> operator()(T s, T t) | ||
57 | - { | ||
58 | - return _p(s, t); | ||
59 | - } | ||
60 | - | ||
61 | - CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p) | ||
62 | - { | ||
63 | - //comptue the distance between a point and this triangle | ||
64 | - // This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf | ||
65 | - | ||
66 | - vec<T, N> E0 = B-A; | ||
67 | - vec<T, N> E1 = C-A; | ||
68 | - vec<T, N> D = A - p; | ||
69 | - | ||
70 | - T a = E0.dot(E0); | ||
71 | - T b = E0.dot(E1); | ||
72 | - T c = E1.dot(E1); | ||
73 | - T d = E0.dot(D); | ||
74 | - T e = E1.dot(D); | ||
75 | - //T f = D.dot(D); | ||
76 | - | ||
77 | - T det = a*c - b*b; | ||
78 | - T s = b*e - c*d; | ||
79 | - T t = b*d - a*e; | ||
80 | - | ||
81 | - /*std::cout<<"E0: "<<E0<<std::endl; | ||
82 | - std::cout<<"E1: "<<E1<<std::endl; | ||
83 | - std::cout<<"a: "<<a<<std::endl; | ||
84 | - std::cout<<"b: "<<b<<std::endl; | ||
85 | - std::cout<<"c: "<<c<<std::endl; | ||
86 | - std::cout<<"d: "<<d<<std::endl; | ||
87 | - std::cout<<"e: "<<e<<std::endl; | ||
88 | - std::cout<<"f: "<<f<<std::endl; | ||
89 | - std::cout<<"det: "<<det<<std::endl; | ||
90 | - std::cout<<"s: "<<s<<std::endl; | ||
91 | - std::cout<<"t: "<<t<<std::endl;*/ | ||
92 | - | ||
93 | - | ||
94 | - if( s+t <= det) | ||
95 | - { | ||
96 | - if(s < 0) | ||
97 | - { | ||
98 | - if(t < 0) | ||
99 | - { | ||
100 | - //region 4 | ||
101 | - //std::cout<<"Region 4"<<std::endl; | ||
102 | - s = 0; | ||
103 | - t = 0; | ||
104 | - //done? | ||
105 | - } | ||
106 | - else | ||
107 | - { | ||
108 | - //region 3 | ||
109 | - //std::cout<<"Region 3"<<std::endl; | ||
110 | - s=0; | ||
111 | - t = ( e >= 0 ? 0 : ( -e >= c ? 1 : -e/c ) ); | ||
112 | - //done | ||
113 | - } | ||
114 | - } | ||
115 | - else if(t < 0) | ||
116 | - { | ||
117 | - //region 5 | ||
118 | - //std::cout<<"Region 5"<<std::endl; | ||
119 | - s = ( d >= 0 ? 0 : ( -d >= a ? 1 : -d/a ) ); | ||
120 | - t = 0; | ||
121 | - //done | ||
122 | - } | ||
123 | - else | ||
124 | - { | ||
125 | - //region 0 | ||
126 | - //std::cout<<"Region 0"<<std::endl; | ||
127 | - T invDet = (T)1.0/det; | ||
128 | - s *= invDet; | ||
129 | - t *= invDet; | ||
130 | - //done | ||
131 | - } | ||
132 | - } | ||
133 | - else | ||
134 | - { | ||
135 | - if(s < 0) | ||
136 | - { | ||
137 | - //region 2 | ||
138 | - //std::cout<<"Region 2"<<std::endl; | ||
139 | - s = 0; | ||
140 | - t = 1; | ||
141 | - //done? | ||
142 | - | ||
143 | - } | ||
144 | - else if(t < 0) | ||
145 | - { | ||
146 | - //region 6 | ||
147 | - //std::cout<<"Region 6"<<std::endl; | ||
148 | - s = 1; | ||
149 | - t = 0; | ||
150 | - //done? | ||
151 | - } | ||
152 | - else | ||
153 | - { | ||
154 | - //region 1 | ||
155 | - //std::cout<<"Region 1"<<std::endl; | ||
156 | - T numer = c + e - b - d; | ||
157 | - if( numer <= 0 ) | ||
158 | - s = 0; | ||
159 | - else | ||
160 | - { | ||
161 | - T denom = a - 2 * b + c; | ||
162 | - s = ( numer >= denom ? 1 : numer/denom ); | ||
163 | - } | ||
164 | - t = 1 - s; | ||
165 | - //done | ||
166 | - } | ||
167 | - } | ||
168 | - | ||
169 | - //std::cout<<"s: "<<s<<std::endl; | ||
170 | - //std::cout<<"t: "<<t<<std::endl; | ||
171 | - | ||
172 | - //std::cout<<"p: "<<_p(s, t)<<std::endl; | ||
173 | - | ||
174 | - return _p(s, t); | ||
175 | - | ||
176 | - } | ||
177 | - | ||
178 | - CUDA_CALLABLE T dist(vec<T, N> p) | ||
179 | - { | ||
180 | - vec<T, N> n = nearest(p); | ||
181 | - | ||
182 | - return (p - n).len(); | ||
183 | - } | ||
184 | -}; | ||
185 | - | ||
186 | -} | ||
187 | - | ||
188 | -#endif | 1 | +#ifndef RTS_TRIANGLE_H |
2 | +#define RTS_TRIANGLE_H | ||
3 | + | ||
4 | +//enable CUDA_CALLABLE macro | ||
5 | +#include "../cuda/callable.h" | ||
6 | +#include "../math/vector.h" | ||
7 | +#include <iostream> | ||
8 | + | ||
9 | +namespace stim{ | ||
10 | + | ||
11 | +template <class T, int N=3> | ||
12 | +struct triangle | ||
13 | +{ | ||
14 | + /* | ||
15 | + A------>B | ||
16 | + | / | ||
17 | + | / | ||
18 | + | / | ||
19 | + | / | ||
20 | + | / | ||
21 | + | / | ||
22 | + C | ||
23 | + */ | ||
24 | + private: | ||
25 | + | ||
26 | + vec<T, N> A; | ||
27 | + vec<T, N> B; | ||
28 | + vec<T, N> C; | ||
29 | + | ||
30 | + CUDA_CALLABLE vec<T, N> _p(T s, T t) | ||
31 | + { | ||
32 | + //This function returns the point specified by p = A + s(B-A) + t(C-A) | ||
33 | + vec<T, N> E0 = B-A; | ||
34 | + vec<T, N> E1 = C-A; | ||
35 | + | ||
36 | + return A + s*E0 + t*E1; | ||
37 | + } | ||
38 | + | ||
39 | + | ||
40 | + public: | ||
41 | + | ||
42 | + | ||
43 | + | ||
44 | + CUDA_CALLABLE triangle() | ||
45 | + { | ||
46 | + | ||
47 | + } | ||
48 | + | ||
49 | + CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c) | ||
50 | + { | ||
51 | + A = a; | ||
52 | + B = b; | ||
53 | + C = c; | ||
54 | + } | ||
55 | + | ||
56 | + CUDA_CALLABLE stim::vec<T, N> operator()(T s, T t) | ||
57 | + { | ||
58 | + return _p(s, t); | ||
59 | + } | ||
60 | + | ||
61 | + CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p) | ||
62 | + { | ||
63 | + //comptue the distance between a point and this triangle | ||
64 | + // This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf | ||
65 | + | ||
66 | + vec<T, N> E0 = B-A; | ||
67 | + vec<T, N> E1 = C-A; | ||
68 | + vec<T, N> D = A - p; | ||
69 | + | ||
70 | + T a = E0.dot(E0); | ||
71 | + T b = E0.dot(E1); | ||
72 | + T c = E1.dot(E1); | ||
73 | + T d = E0.dot(D); | ||
74 | + T e = E1.dot(D); | ||
75 | + //T f = D.dot(D); | ||
76 | + | ||
77 | + T det = a*c - b*b; | ||
78 | + T s = b*e - c*d; | ||
79 | + T t = b*d - a*e; | ||
80 | + | ||
81 | + /*std::cout<<"E0: "<<E0<<std::endl; | ||
82 | + std::cout<<"E1: "<<E1<<std::endl; | ||
83 | + std::cout<<"a: "<<a<<std::endl; | ||
84 | + std::cout<<"b: "<<b<<std::endl; | ||
85 | + std::cout<<"c: "<<c<<std::endl; | ||
86 | + std::cout<<"d: "<<d<<std::endl; | ||
87 | + std::cout<<"e: "<<e<<std::endl; | ||
88 | + std::cout<<"f: "<<f<<std::endl; | ||
89 | + std::cout<<"det: "<<det<<std::endl; | ||
90 | + std::cout<<"s: "<<s<<std::endl; | ||
91 | + std::cout<<"t: "<<t<<std::endl;*/ | ||
92 | + | ||
93 | + | ||
94 | + if( s+t <= det) | ||
95 | + { | ||
96 | + if(s < 0) | ||
97 | + { | ||
98 | + if(t < 0) | ||
99 | + { | ||
100 | + //region 4 | ||
101 | + //std::cout<<"Region 4"<<std::endl; | ||
102 | + s = 0; | ||
103 | + t = 0; | ||
104 | + //done? | ||
105 | + } | ||
106 | + else | ||
107 | + { | ||
108 | + //region 3 | ||
109 | + //std::cout<<"Region 3"<<std::endl; | ||
110 | + s=0; | ||
111 | + t = ( e >= 0 ? 0 : ( -e >= c ? 1 : -e/c ) ); | ||
112 | + //done | ||
113 | + } | ||
114 | + } | ||
115 | + else if(t < 0) | ||
116 | + { | ||
117 | + //region 5 | ||
118 | + //std::cout<<"Region 5"<<std::endl; | ||
119 | + s = ( d >= 0 ? 0 : ( -d >= a ? 1 : -d/a ) ); | ||
120 | + t = 0; | ||
121 | + //done | ||
122 | + } | ||
123 | + else | ||
124 | + { | ||
125 | + //region 0 | ||
126 | + //std::cout<<"Region 0"<<std::endl; | ||
127 | + T invDet = (T)1.0/det; | ||
128 | + s *= invDet; | ||
129 | + t *= invDet; | ||
130 | + //done | ||
131 | + } | ||
132 | + } | ||
133 | + else | ||
134 | + { | ||
135 | + if(s < 0) | ||
136 | + { | ||
137 | + //region 2 | ||
138 | + //std::cout<<"Region 2"<<std::endl; | ||
139 | + s = 0; | ||
140 | + t = 1; | ||
141 | + //done? | ||
142 | + | ||
143 | + } | ||
144 | + else if(t < 0) | ||
145 | + { | ||
146 | + //region 6 | ||
147 | + //std::cout<<"Region 6"<<std::endl; | ||
148 | + s = 1; | ||
149 | + t = 0; | ||
150 | + //done? | ||
151 | + } | ||
152 | + else | ||
153 | + { | ||
154 | + //region 1 | ||
155 | + //std::cout<<"Region 1"<<std::endl; | ||
156 | + T numer = c + e - b - d; | ||
157 | + if( numer <= 0 ) | ||
158 | + s = 0; | ||
159 | + else | ||
160 | + { | ||
161 | + T denom = a - 2 * b + c; | ||
162 | + s = ( numer >= denom ? 1 : numer/denom ); | ||
163 | + } | ||
164 | + t = 1 - s; | ||
165 | + //done | ||
166 | + } | ||
167 | + } | ||
168 | + | ||
169 | + //std::cout<<"s: "<<s<<std::endl; | ||
170 | + //std::cout<<"t: "<<t<<std::endl; | ||
171 | + | ||
172 | + //std::cout<<"p: "<<_p(s, t)<<std::endl; | ||
173 | + | ||
174 | + return _p(s, t); | ||
175 | + | ||
176 | + } | ||
177 | + | ||
178 | + CUDA_CALLABLE T dist(vec<T, N> p) | ||
179 | + { | ||
180 | + vec<T, N> n = nearest(p); | ||
181 | + | ||
182 | + return (p - n).len(); | ||
183 | + } | ||
184 | +}; | ||
185 | + | ||
186 | +} | ||
187 | + | ||
188 | +#endif |
optics/material.h
1 | -#ifndef RTS_MATERIAL_H | ||
2 | -#define RTS_MATERIAL_H | ||
3 | - | ||
4 | -#include <vector> | ||
5 | -#include <ostream> | ||
6 | -#include <iostream> | ||
7 | -#include <fstream> | ||
8 | -#include <complex> | ||
9 | -#include <algorithm> | ||
10 | -#include <sstream> | ||
11 | -#include "../math/complex.h" | ||
12 | -#include "../math/constants.h" | ||
13 | -#include "../math/function.h" | ||
14 | - | ||
15 | -namespace stim{ | ||
16 | - | ||
17 | -//Material class - default representation for the material property is the refractive index (RI) | ||
18 | -template<typename T> | ||
19 | -class material : public function< T, complex<T> >{ | ||
20 | - | ||
21 | -public: | ||
22 | - enum wave_property{microns, inverse_cm}; | ||
23 | - enum material_property{ri, absorbance}; | ||
24 | - | ||
25 | -private: | ||
26 | - | ||
27 | - using function< T, complex<T> >::X; | ||
28 | - using function< T, complex<T> >::Y; | ||
29 | - using function< T, complex<T> >::insert; | ||
30 | - using function< T, complex<T> >::bounding; | ||
31 | - | ||
32 | - std::string name; //name for the material (defaults to file name) | ||
33 | - | ||
34 | - void process_header(std::string str, wave_property& wp, material_property& mp){ | ||
35 | - | ||
36 | - std::stringstream ss(str); //create a stream from the data string | ||
37 | - std::string line; | ||
38 | - std::getline(ss, line); //get the first line as a string | ||
39 | - while(line[0] == '#'){ //continue looping while the line is a comment | ||
40 | - | ||
41 | - std::stringstream lstream(line); //create a stream from the line | ||
42 | - lstream.ignore(); //ignore the first character ('#') | ||
43 | - | ||
44 | - std::string prop; //get the property name | ||
45 | - lstream>>prop; | ||
46 | - | ||
47 | - if(prop == "X"){ | ||
48 | - std::string wp_name; | ||
49 | - lstream>>wp_name; | ||
50 | - if(wp_name == "microns") wp = microns; | ||
51 | - else if(wp_name == "inverse_cm") wp = inverse_cm; | ||
52 | - } | ||
53 | - else if(prop == "Y"){ | ||
54 | - std::string mp_name; | ||
55 | - lstream>>mp_name; | ||
56 | - if(mp_name == "ri") mp = ri; | ||
57 | - else if(mp_name == "absorbance") mp = absorbance; | ||
58 | - } | ||
59 | - | ||
60 | - std::getline(ss, line); //get the next line | ||
61 | - } | ||
62 | - | ||
63 | - function< T, stim::complex<T> >::process_string(str); | ||
64 | - } | ||
65 | - | ||
66 | - void from_inverse_cm(){ | ||
67 | - //convert inverse centimeters to wavelength (in microns) | ||
68 | - for(unsigned int i=0; i<X.size(); i++) | ||
69 | - X[i] = 10000 / X[i]; | ||
70 | - | ||
71 | - //reverse the function array | ||
72 | - std::reverse(X.begin(), X.end()); | ||
73 | - std::reverse(Y.begin(), Y.end()); | ||
74 | - | ||
75 | - } | ||
76 | - | ||
77 | - void init(){ | ||
78 | - bounding[0] = bounding[1] = stim::complex<T>(1, 0); | ||
79 | - } | ||
80 | - | ||
81 | - | ||
82 | -public: | ||
83 | - | ||
84 | - material(std::string filename, wave_property wp, material_property mp){ | ||
85 | - name = filename; | ||
86 | - load(filename, wp, mp); | ||
87 | - } | ||
88 | - | ||
89 | - material(std::string filename){ | ||
90 | - name = filename; | ||
91 | - load(filename); | ||
92 | - } | ||
93 | - | ||
94 | - material(){ | ||
95 | - init(); | ||
96 | - } | ||
97 | - | ||
98 | - complex<T> getN(T lambda){ | ||
99 | - return function< T, complex<T> >::linear(lambda); | ||
100 | - } | ||
101 | - | ||
102 | - void load(std::string filename, wave_property wp, material_property mp){ | ||
103 | - | ||
104 | - //load the file as a function | ||
105 | - function< T, complex<T> >::load(filename); | ||
106 | - } | ||
107 | - | ||
108 | - void load(std::string filename){ | ||
109 | - | ||
110 | - wave_property wp = inverse_cm; | ||
111 | - material_property mp = ri; | ||
112 | - //turn the file into a string | ||
113 | - std::ifstream t(filename.c_str()); //open the file as a stream | ||
114 | - | ||
115 | - if(!t){ | ||
116 | - std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl; | ||
117 | - exit(1); | ||
118 | - } | ||
119 | - std::string str((std::istreambuf_iterator<char>(t)), | ||
120 | - std::istreambuf_iterator<char>()); | ||
121 | - | ||
122 | - //process the header information | ||
123 | - process_header(str, wp, mp); | ||
124 | - | ||
125 | - //convert units | ||
126 | - if(wp == inverse_cm) | ||
127 | - from_inverse_cm(); | ||
128 | - //set the bounding values | ||
129 | - bounding[0] = Y[0]; | ||
130 | - bounding[1] = Y.back(); | ||
131 | - } | ||
132 | - std::string str(){ | ||
133 | - std::stringstream ss; | ||
134 | - ss<<name<<std::endl; | ||
135 | - ss<<function< T, complex<T> >::str(); | ||
136 | - return ss.str(); | ||
137 | - } | ||
138 | - std::string get_name(){ | ||
139 | - return name; | ||
140 | - } | ||
141 | - | ||
142 | - void set_name(std::string str){ | ||
143 | - name = str; | ||
144 | - } | ||
145 | - | ||
146 | -}; | ||
147 | - | ||
148 | -} | ||
149 | - | ||
150 | - | ||
151 | - | ||
152 | - | ||
153 | -#endif | 1 | +#ifndef RTS_MATERIAL_H |
2 | +#define RTS_MATERIAL_H | ||
3 | + | ||
4 | +#include <vector> | ||
5 | +#include <ostream> | ||
6 | +#include <iostream> | ||
7 | +#include <fstream> | ||
8 | +#include <complex> | ||
9 | +#include <algorithm> | ||
10 | +#include <sstream> | ||
11 | +#include "../math/complex.h" | ||
12 | +#include "../math/constants.h" | ||
13 | +#include "../math/function.h" | ||
14 | + | ||
15 | +namespace stim{ | ||
16 | + | ||
17 | +//Material class - default representation for the material property is the refractive index (RI) | ||
18 | +template<typename T> | ||
19 | +class material : public function< T, complex<T> >{ | ||
20 | + | ||
21 | +public: | ||
22 | + enum wave_property{microns, inverse_cm}; | ||
23 | + enum material_property{ri, absorbance}; | ||
24 | + | ||
25 | +private: | ||
26 | + | ||
27 | + using function< T, complex<T> >::X; | ||
28 | + using function< T, complex<T> >::Y; | ||
29 | + using function< T, complex<T> >::insert; | ||
30 | + using function< T, complex<T> >::bounding; | ||
31 | + | ||
32 | + std::string name; //name for the material (defaults to file name) | ||
33 | + | ||
34 | + void process_header(std::string str, wave_property& wp, material_property& mp){ | ||
35 | + | ||
36 | + std::stringstream ss(str); //create a stream from the data string | ||
37 | + std::string line; | ||
38 | + std::getline(ss, line); //get the first line as a string | ||
39 | + while(line[0] == '#'){ //continue looping while the line is a comment | ||
40 | + | ||
41 | + std::stringstream lstream(line); //create a stream from the line | ||
42 | + lstream.ignore(); //ignore the first character ('#') | ||
43 | + | ||
44 | + std::string prop; //get the property name | ||
45 | + lstream>>prop; | ||
46 | + | ||
47 | + if(prop == "X"){ | ||
48 | + std::string wp_name; | ||
49 | + lstream>>wp_name; | ||
50 | + if(wp_name == "microns") wp = microns; | ||
51 | + else if(wp_name == "inverse_cm") wp = inverse_cm; | ||
52 | + } | ||
53 | + else if(prop == "Y"){ | ||
54 | + std::string mp_name; | ||
55 | + lstream>>mp_name; | ||
56 | + if(mp_name == "ri") mp = ri; | ||
57 | + else if(mp_name == "absorbance") mp = absorbance; | ||
58 | + } | ||
59 | + | ||
60 | + std::getline(ss, line); //get the next line | ||
61 | + } | ||
62 | + | ||
63 | + function< T, stim::complex<T> >::process_string(str); | ||
64 | + } | ||
65 | + | ||
66 | + void from_inverse_cm(){ | ||
67 | + //convert inverse centimeters to wavelength (in microns) | ||
68 | + for(unsigned int i=0; i<X.size(); i++) | ||
69 | + X[i] = 10000 / X[i]; | ||
70 | + | ||
71 | + //reverse the function array | ||
72 | + std::reverse(X.begin(), X.end()); | ||
73 | + std::reverse(Y.begin(), Y.end()); | ||
74 | + | ||
75 | + } | ||
76 | + | ||
77 | + void init(){ | ||
78 | + bounding[0] = bounding[1] = stim::complex<T>(1, 0); | ||
79 | + } | ||
80 | + | ||
81 | + | ||
82 | +public: | ||
83 | + | ||
84 | + material(std::string filename, wave_property wp, material_property mp){ | ||
85 | + name = filename; | ||
86 | + load(filename, wp, mp); | ||
87 | + } | ||
88 | + | ||
89 | + material(std::string filename){ | ||
90 | + name = filename; | ||
91 | + load(filename); | ||
92 | + } | ||
93 | + | ||
94 | + material(){ | ||
95 | + init(); | ||
96 | + } | ||
97 | + | ||
98 | + complex<T> getN(T lambda){ | ||
99 | + return function< T, complex<T> >::linear(lambda); | ||
100 | + } | ||
101 | + | ||
102 | + void load(std::string filename, wave_property wp, material_property mp){ | ||
103 | + | ||
104 | + //load the file as a function | ||
105 | + function< T, complex<T> >::load(filename); | ||
106 | + } | ||
107 | + | ||
108 | + void load(std::string filename){ | ||
109 | + | ||
110 | + wave_property wp = inverse_cm; | ||
111 | + material_property mp = ri; | ||
112 | + //turn the file into a string | ||
113 | + std::ifstream t(filename.c_str()); //open the file as a stream | ||
114 | + | ||
115 | + if(!t){ | ||
116 | + std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl; | ||
117 | + exit(1); | ||
118 | + } | ||
119 | + std::string str((std::istreambuf_iterator<char>(t)), | ||
120 | + std::istreambuf_iterator<char>()); | ||
121 | + | ||
122 | + //process the header information | ||
123 | + process_header(str, wp, mp); | ||
124 | + | ||
125 | + //convert units | ||
126 | + if(wp == inverse_cm) | ||
127 | + from_inverse_cm(); | ||
128 | + //set the bounding values | ||
129 | + bounding[0] = Y[0]; | ||
130 | + bounding[1] = Y.back(); | ||
131 | + } | ||
132 | + std::string str(){ | ||
133 | + std::stringstream ss; | ||
134 | + ss<<name<<std::endl; | ||
135 | + ss<<function< T, complex<T> >::str(); | ||
136 | + return ss.str(); | ||
137 | + } | ||
138 | + std::string get_name(){ | ||
139 | + return name; | ||
140 | + } | ||
141 | + | ||
142 | + void set_name(std::string str){ | ||
143 | + name = str; | ||
144 | + } | ||
145 | + | ||
146 | +}; | ||
147 | + | ||
148 | +} | ||
149 | + | ||
150 | + | ||
151 | + | ||
152 | + | ||
153 | +#endif |
optics/mirst-1d.cuh
1 | -#include "../optics/material.h" | ||
2 | -#include "../math/complexfield.cuh" | ||
3 | -#include "../math/constants.h" | ||
4 | -//#include "../envi/bil.h" | ||
5 | - | ||
6 | -#include "cufft.h" | ||
7 | - | ||
8 | -#include <vector> | ||
9 | -#include <sstream> | ||
10 | - | ||
11 | -namespace stim{ | ||
12 | - | ||
13 | -//this function writes a sinc function to "dest" such that an iFFT produces a slab | ||
14 | -template<typename T> | ||
15 | -__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri, | ||
16 | - T* src, T* zf, | ||
17 | - T w, unsigned int zR, unsigned int nuR){ | ||
18 | - //dest = complex field representing the sample | ||
19 | - //ri = refractive indices for each wavelength | ||
20 | - //src = intensity of the light source for each wavelength | ||
21 | - //zf = z position of the slab interface for each wavelength (accounting for optical path length) | ||
22 | - //w = width of the slab (in pixels) | ||
23 | - //zR = number of z-axis samples | ||
24 | - //nuR = number of wavelengths | ||
25 | - | ||
26 | - //get the current coordinate in the plane slice | ||
27 | - int ifz = blockIdx.x * blockDim.x + threadIdx.x; | ||
28 | - int inu = blockIdx.y * blockDim.y + threadIdx.y; | ||
29 | - | ||
30 | - //make sure that the thread indices are in-bounds | ||
31 | - if(inu >= nuR || ifz >= zR) return; | ||
32 | - | ||
33 | - int i = inu * zR + ifz; | ||
34 | - | ||
35 | - T fz; | ||
36 | - if(ifz < zR/2) | ||
37 | - fz = ifz / (T)zR; | ||
38 | - else | ||
39 | - fz = -(zR - ifz) / (T)zR; | ||
40 | - | ||
41 | - //if the slab starts outside of the simulation domain, just return | ||
42 | - if(zf[inu] >= zR) return; | ||
43 | - | ||
44 | - //fill the array along z with a sinc function representing the Fourier transform of the layer | ||
45 | - | ||
46 | - T opl = w * ri[inu].real(); //optical path length | ||
47 | - | ||
48 | - //handle the case where the slab goes outside the simulation domain | ||
49 | - if(zf[inu] + opl >= zR) | ||
50 | - opl = zR - zf[inu]; | ||
51 | - | ||
52 | - if(opl == 0) return; | ||
53 | - | ||
54 | - //T l = w * ri[inu].real(); | ||
55 | - //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0)); | ||
56 | - complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2)); | ||
57 | - | ||
58 | - complex<T> eta = ri[inu] * ri[inu] - 1; | ||
59 | - | ||
60 | - //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz); | ||
61 | - if(ifz == 0) | ||
62 | - dest[i] += opl * exp(e) * eta * src[inu]; | ||
63 | - else | ||
64 | - dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl); | ||
65 | -} | ||
66 | - | ||
67 | -template<typename T> | ||
68 | -__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){ | ||
69 | - //zf = current z depth (optical path length) in pixels | ||
70 | - //ri = refractive index of the material | ||
71 | - //w = actual width of the layer (in pixels) | ||
72 | - | ||
73 | - | ||
74 | - //compute the index for this thread | ||
75 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | ||
76 | - if(i >= S) return; | ||
77 | - | ||
78 | - if(ri == NULL) | ||
79 | - zf[i] += w; | ||
80 | - else | ||
81 | - zf[i] += ri[i].real() * w; | ||
82 | -} | ||
83 | - | ||
84 | -//apply the 1D MIRST filter to an existing sample (overwriting the sample) | ||
85 | -template<typename T> | ||
86 | -__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda, | ||
87 | - T dFz, | ||
88 | - T inNA, T outNA, | ||
89 | - unsigned int lambdaR, unsigned int zR, | ||
90 | - T sigma = 0){ | ||
91 | - //sampleFFT = the sample in the Fourier domain (will be overwritten) | ||
92 | - //lambda = list of wavelengths | ||
93 | - //dFz = delta along the Fz axis in the frequency domain | ||
94 | - //inNA = NA of the internal obscuration | ||
95 | - //outNA = NA of the objective | ||
96 | - //zR = number of pixels along the Fz axis (same as the z-axis) | ||
97 | - //lambdaR = number of wavelengths | ||
98 | - //sigma = width of the Gaussian source | ||
99 | - int ifz = blockIdx.x * blockDim.x + threadIdx.x; | ||
100 | - int inu = blockIdx.y * blockDim.y + threadIdx.y; | ||
101 | - | ||
102 | - if(inu >= lambdaR || ifz >= zR) return; | ||
103 | - | ||
104 | - //calculate the index into the sample FT | ||
105 | - int i = inu * zR + ifz; | ||
106 | - | ||
107 | - //compute the frequency (and set all negative spatial frequencies to zero) | ||
108 | - T fz; | ||
109 | - if(ifz < zR / 2) | ||
110 | - fz = ifz * dFz; | ||
111 | - //if the spatial frequency is negative, set it to zero and exit | ||
112 | - else{ | ||
113 | - sampleFFT[i] = 0; | ||
114 | - return; | ||
115 | - } | ||
116 | - | ||
117 | - //compute the frequency in inverse microns | ||
118 | - T nu = 1/lambda[inu]; | ||
119 | - | ||
120 | - //determine the radius of the integration circle | ||
121 | - T nu_sq = nu * nu; | ||
122 | - T fz_sq = (fz * fz) / 4; | ||
123 | - | ||
124 | - //cut off frequencies above the diffraction limit | ||
125 | - T r; | ||
126 | - if(fz_sq < nu_sq) | ||
127 | - r = sqrt(nu_sq - fz_sq); | ||
128 | - else | ||
129 | - r = 0; | ||
130 | - | ||
131 | - //account for the optics | ||
132 | - T Q = 0; | ||
133 | - if(r > nu * inNA && r < nu * outNA) | ||
134 | - Q = 1; | ||
135 | - | ||
136 | - //account for the source | ||
137 | - //T sigma = 30.0; | ||
138 | - T s = exp( - (r*r * sigma*sigma) / 2 ); | ||
139 | - //T s=1; | ||
140 | - | ||
141 | - //compute the final filter | ||
142 | - T mirst = 0; | ||
143 | - if(fz != 0) | ||
144 | - mirst = 2 * stimPI * r * s * Q * (1/fz); | ||
145 | - | ||
146 | - sampleFFT[i] *= mirst; | ||
147 | - | ||
148 | -} | ||
149 | - | ||
150 | -/*This object performs a 1-dimensional (layered) MIRST simulation | ||
151 | -*/ | ||
152 | -template<typename T> | ||
153 | -class mirst1d{ | ||
154 | - | ||
155 | -private: | ||
156 | - unsigned int Z; //z-axis resolution | ||
157 | - unsigned int pad; //pixel padding on either side of the sample | ||
158 | - | ||
159 | - std::vector< material<T> > matlist; //list of materials | ||
160 | - std::vector< T > layers; //list of layer thicknesses | ||
161 | - | ||
162 | - std::vector< T > lambdas; //list of wavelengths that are being simulated | ||
163 | - unsigned int S; //number of wavelengths (size of "lambdas") | ||
164 | - | ||
165 | - T NA[2]; //numerical aperature (central obscuration and outer diameter) | ||
166 | - | ||
167 | - function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters) | ||
168 | - | ||
169 | - complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc. | ||
170 | - | ||
171 | - void fft(int direction = CUFFT_FORWARD){ | ||
172 | - | ||
173 | - unsigned padZ = Z + pad; | ||
174 | - | ||
175 | - //create cuFFT handles | ||
176 | - cufftHandle plan; | ||
177 | - cufftResult result; | ||
178 | - | ||
179 | - if(sizeof(T) == 4) | ||
180 | - result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision | ||
181 | - else | ||
182 | - result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision | ||
183 | - | ||
184 | - //check for Plan 1D errors | ||
185 | - if(result != CUFFT_SUCCESS){ | ||
186 | - std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl; | ||
187 | - CufftError(result); | ||
188 | - exit(1); | ||
189 | - } | ||
190 | - | ||
191 | - if(sizeof(T) == 4) | ||
192 | - result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction); | ||
193 | - else | ||
194 | - result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction); | ||
195 | - | ||
196 | - //check for FFT errors | ||
197 | - if(result != CUFFT_SUCCESS){ | ||
198 | - std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl; | ||
199 | - CufftError(result); | ||
200 | - exit(1); | ||
201 | - } | ||
202 | - | ||
203 | - cufftDestroy(plan); | ||
204 | - } | ||
205 | - | ||
206 | - | ||
207 | - //initialize the scratch memory | ||
208 | - void init_scratch(){ | ||
209 | - scratch = complexfield<T, 1>(Z + pad , lambdas.size()); | ||
210 | - scratch = 0; | ||
211 | - } | ||
212 | - | ||
213 | - //get the list of scattering efficiency (eta) values for a specified layer | ||
214 | - std::vector< complex<T> > layer_etas(unsigned int l){ | ||
215 | - | ||
216 | - std::vector< complex<T> > etas; | ||
217 | - | ||
218 | - //fill the list of etas | ||
219 | - for(unsigned int i=0; i<lambdas.size(); i++) | ||
220 | - etas.push_back( matlist[l].eta(lambdas[i]) ); | ||
221 | - return etas; | ||
222 | - } | ||
223 | - | ||
224 | - //calculates the optimal block and grid sizes using information from the GPU | ||
225 | - void cuda_params(dim3& grids, dim3& blocks){ | ||
226 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
227 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
228 | - | ||
229 | - //create one thread for each detector pixel | ||
230 | - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | ||
231 | - grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
232 | - } | ||
233 | - | ||
234 | - //add the fourier transform of layer n to the scratch space | ||
235 | - void build_layer_fft(unsigned int n, T* zf){ | ||
236 | - unsigned int paddedZ = Z + pad; | ||
237 | - | ||
238 | - T wpx = layers[n] / dz(); //calculate the width of the layer in pixels | ||
239 | - | ||
240 | - //allocate memory for the refractive index | ||
241 | - complex<T>* gpuRi; | ||
242 | - HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S)); | ||
243 | - | ||
244 | - //allocate memory for the source profile | ||
245 | - T* gpuSrc; | ||
246 | - HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S)); | ||
247 | - | ||
248 | - complex<T> ri; | ||
249 | - T source; | ||
250 | - //store the refractive index and source profile in a CPU array | ||
251 | - for(int inu=0; inu<S; inu++){ | ||
252 | - //save the refractive index to the GPU | ||
253 | - ri = matlist[n].getN(lambdas[inu]); | ||
254 | - HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice )); | ||
255 | - | ||
256 | - //save the source profile to the GPU | ||
257 | - source = source_profile(10000 / lambdas[inu]); | ||
258 | - HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice )); | ||
259 | - | ||
260 | - } | ||
261 | - | ||
262 | - //create one thread for each pixel of the field slice | ||
263 | - dim3 gridDim, blockDim; | ||
264 | - cuda_params(gridDim, blockDim); | ||
265 | - stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S); | ||
266 | - | ||
267 | - int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
268 | - int linGrid = S / linBlock + 1; | ||
269 | - stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S); | ||
270 | - | ||
271 | - //free memory | ||
272 | - HANDLE_ERROR(cudaFree(gpuRi)); | ||
273 | - HANDLE_ERROR(cudaFree(gpuSrc)); | ||
274 | - } | ||
275 | - | ||
276 | - void build_sample(){ | ||
277 | - init_scratch(); //initialize the GPU scratch space | ||
278 | - //build_layer(1); | ||
279 | - | ||
280 | - T* zf; | ||
281 | - HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S)); | ||
282 | - HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S)); | ||
283 | - | ||
284 | - //render each layer of the sample | ||
285 | - for(unsigned int l=0; l<layers.size(); l++){ | ||
286 | - build_layer_fft(l, zf); | ||
287 | - } | ||
288 | - | ||
289 | - HANDLE_ERROR(cudaFree(zf)); | ||
290 | - } | ||
291 | - | ||
292 | - void apply_filter(){ | ||
293 | - dim3 gridDim, blockDim; | ||
294 | - cuda_params(gridDim, blockDim); | ||
295 | - | ||
296 | - unsigned int Zpad = Z + pad; | ||
297 | - | ||
298 | - T sim_range = dz() * Zpad; | ||
299 | - T dFz = 1 / sim_range; | ||
300 | - | ||
301 | - //copy the array of wavelengths to the GPU | ||
302 | - T* gpuLambdas; | ||
303 | - HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad)); | ||
304 | - HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice)); | ||
305 | - stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas, | ||
306 | - dFz, | ||
307 | - NA[0], NA[1], | ||
308 | - S, Zpad); | ||
309 | - } | ||
310 | - | ||
311 | - //crop the image to the sample thickness - keep in mind that sample thickness != optical path length | ||
312 | - void crop(){ | ||
313 | - | ||
314 | - scratch = scratch.crop(Z, S); | ||
315 | - } | ||
316 | - | ||
317 | - //save the scratch field as a binary file | ||
318 | - void to_binary(std::string filename){ | ||
319 | - | ||
320 | - } | ||
321 | - | ||
322 | - | ||
323 | -public: | ||
324 | - | ||
325 | - //constructor | ||
326 | - mirst1d(unsigned int rZ = 100, | ||
327 | - unsigned int padding = 0){ | ||
328 | - Z = rZ; | ||
329 | - pad = padding; | ||
330 | - NA[0] = 0; | ||
331 | - NA[1] = 0.8; | ||
332 | - S = 0; | ||
333 | - source_profile = 1; | ||
334 | - } | ||
335 | - | ||
336 | - //add a layer, thickness = microns | ||
337 | - void add_layer(material<T> mat, T thickness){ | ||
338 | - matlist.push_back(mat); | ||
339 | - layers.push_back(thickness); | ||
340 | - } | ||
341 | - | ||
342 | - void add_layer(std::string filename, T thickness){ | ||
343 | - add_layer(material<T>(filename), thickness); | ||
344 | - } | ||
345 | - | ||
346 | - //adds a profile spectrum for the light source | ||
347 | - void set_source(std::string filename){ | ||
348 | - source_profile.load(filename); | ||
349 | - } | ||
350 | - | ||
351 | - //adds a block of wavenumbers (cm^-1) to the simulation parameters | ||
352 | - void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){ | ||
353 | - unsigned int nu = start; | ||
354 | - while(nu <= stop){ | ||
355 | - lambdas.push_back((T)10000 / nu); | ||
356 | - nu += step; | ||
357 | - } | ||
358 | - S = lambdas.size(); //increment the number of wavelengths (shorthand for later) | ||
359 | - } | ||
360 | - | ||
361 | - T thickness(){ | ||
362 | - T t = 0; | ||
363 | - for(unsigned int l=0; l<layers.size(); l++) | ||
364 | - t += layers[l]; | ||
365 | - return t; | ||
366 | - } | ||
367 | - | ||
368 | - void padding(unsigned int padding = 0){ | ||
369 | - pad = padding; | ||
370 | - } | ||
371 | - | ||
372 | - T dz(){ | ||
373 | - return thickness() / Z; //calculate the z-axis step size | ||
374 | - } | ||
375 | - | ||
376 | - void na(T in, T out){ | ||
377 | - NA[0] = in; | ||
378 | - NA[1] = out; | ||
379 | - } | ||
380 | - | ||
381 | - void na(T out){ | ||
382 | - na(0, out); | ||
383 | - } | ||
384 | - | ||
385 | - stim::function<T, T> get_source(){ | ||
386 | - return source_profile; | ||
387 | - } | ||
388 | - | ||
389 | - void save_sample(std::string filename){ | ||
390 | - //create a sample and save the magnitude as an image | ||
391 | - build_sample(); | ||
392 | - fft(CUFFT_INVERSE); | ||
393 | - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude); | ||
394 | - } | ||
395 | - | ||
396 | - void save_mirst(std::string filename, bool binary = true){ | ||
397 | - //apply the MIRST filter to a sample and save the image | ||
398 | - | ||
399 | - //build the sample in the Fourier domain | ||
400 | - build_sample(); | ||
401 | - | ||
402 | - //apply the MIRST filter | ||
403 | - apply_filter(); | ||
404 | - | ||
405 | - //apply an inverse FFT to bring the results back into the spatial domain | ||
406 | - fft(CUFFT_INVERSE); | ||
407 | - | ||
408 | - crop(); | ||
409 | - | ||
410 | - //save the image | ||
411 | - if(binary) | ||
412 | - to_binary(filename); | ||
413 | - else | ||
414 | - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude); | ||
415 | - } | ||
416 | - | ||
417 | - | ||
418 | - | ||
419 | - | ||
420 | - std::string str(){ | ||
421 | - | ||
422 | - stringstream ss; | ||
423 | - ss<<"1D MIRST Simulation========================="<<std::endl; | ||
424 | - ss<<"z-axis resolution: "<<Z<<std::endl; | ||
425 | - ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl; | ||
426 | - ss<<"number of wavelengths: "<<lambdas.size()<<std::endl; | ||
427 | - ss<<"padding: "<<pad<<std::endl; | ||
428 | - ss<<"sample thickness: "<<thickness()<<" um"<<std::endl; | ||
429 | - ss<<"dz: "<<dz()<<" um"<<std::endl; | ||
430 | - ss<<std::endl; | ||
431 | - ss<<layers.size()<<" layers-------------"<<std::endl; | ||
432 | - for(unsigned int l=0; l<layers.size(); l++) | ||
433 | - ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl; | ||
434 | - | ||
435 | - ss<<"source profile-----------"<<std::endl; | ||
436 | - ss<<get_source().str()<<std::endl; | ||
437 | - | ||
438 | - return ss.str(); | ||
439 | - | ||
440 | - | ||
441 | - } | ||
442 | - | ||
443 | - | ||
444 | - | ||
445 | -}; | ||
446 | - | ||
447 | -} | 1 | +#include "../optics/material.h" |
2 | +#include "../math/complexfield.cuh" | ||
3 | +#include "../math/constants.h" | ||
4 | +//#include "../envi/bil.h" | ||
5 | + | ||
6 | +#include "cufft.h" | ||
7 | + | ||
8 | +#include <vector> | ||
9 | +#include <sstream> | ||
10 | + | ||
11 | +namespace stim{ | ||
12 | + | ||
13 | +//this function writes a sinc function to "dest" such that an iFFT produces a slab | ||
14 | +template<typename T> | ||
15 | +__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri, | ||
16 | + T* src, T* zf, | ||
17 | + T w, unsigned int zR, unsigned int nuR){ | ||
18 | + //dest = complex field representing the sample | ||
19 | + //ri = refractive indices for each wavelength | ||
20 | + //src = intensity of the light source for each wavelength | ||
21 | + //zf = z position of the slab interface for each wavelength (accounting for optical path length) | ||
22 | + //w = width of the slab (in pixels) | ||
23 | + //zR = number of z-axis samples | ||
24 | + //nuR = number of wavelengths | ||
25 | + | ||
26 | + //get the current coordinate in the plane slice | ||
27 | + int ifz = blockIdx.x * blockDim.x + threadIdx.x; | ||
28 | + int inu = blockIdx.y * blockDim.y + threadIdx.y; | ||
29 | + | ||
30 | + //make sure that the thread indices are in-bounds | ||
31 | + if(inu >= nuR || ifz >= zR) return; | ||
32 | + | ||
33 | + int i = inu * zR + ifz; | ||
34 | + | ||
35 | + T fz; | ||
36 | + if(ifz < zR/2) | ||
37 | + fz = ifz / (T)zR; | ||
38 | + else | ||
39 | + fz = -(zR - ifz) / (T)zR; | ||
40 | + | ||
41 | + //if the slab starts outside of the simulation domain, just return | ||
42 | + if(zf[inu] >= zR) return; | ||
43 | + | ||
44 | + //fill the array along z with a sinc function representing the Fourier transform of the layer | ||
45 | + | ||
46 | + T opl = w * ri[inu].real(); //optical path length | ||
47 | + | ||
48 | + //handle the case where the slab goes outside the simulation domain | ||
49 | + if(zf[inu] + opl >= zR) | ||
50 | + opl = zR - zf[inu]; | ||
51 | + | ||
52 | + if(opl == 0) return; | ||
53 | + | ||
54 | + //T l = w * ri[inu].real(); | ||
55 | + //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0)); | ||
56 | + complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2)); | ||
57 | + | ||
58 | + complex<T> eta = ri[inu] * ri[inu] - 1; | ||
59 | + | ||
60 | + //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz); | ||
61 | + if(ifz == 0) | ||
62 | + dest[i] += opl * exp(e) * eta * src[inu]; | ||
63 | + else | ||
64 | + dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl); | ||
65 | +} | ||
66 | + | ||
67 | +template<typename T> | ||
68 | +__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){ | ||
69 | + //zf = current z depth (optical path length) in pixels | ||
70 | + //ri = refractive index of the material | ||
71 | + //w = actual width of the layer (in pixels) | ||
72 | + | ||
73 | + | ||
74 | + //compute the index for this thread | ||
75 | + int i = blockIdx.x * blockDim.x + threadIdx.x; | ||
76 | + if(i >= S) return; | ||
77 | + | ||
78 | + if(ri == NULL) | ||
79 | + zf[i] += w; | ||
80 | + else | ||
81 | + zf[i] += ri[i].real() * w; | ||
82 | +} | ||
83 | + | ||
84 | +//apply the 1D MIRST filter to an existing sample (overwriting the sample) | ||
85 | +template<typename T> | ||
86 | +__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda, | ||
87 | + T dFz, | ||
88 | + T inNA, T outNA, | ||
89 | + unsigned int lambdaR, unsigned int zR, | ||
90 | + T sigma = 0){ | ||
91 | + //sampleFFT = the sample in the Fourier domain (will be overwritten) | ||
92 | + //lambda = list of wavelengths | ||
93 | + //dFz = delta along the Fz axis in the frequency domain | ||
94 | + //inNA = NA of the internal obscuration | ||
95 | + //outNA = NA of the objective | ||
96 | + //zR = number of pixels along the Fz axis (same as the z-axis) | ||
97 | + //lambdaR = number of wavelengths | ||
98 | + //sigma = width of the Gaussian source | ||
99 | + int ifz = blockIdx.x * blockDim.x + threadIdx.x; | ||
100 | + int inu = blockIdx.y * blockDim.y + threadIdx.y; | ||
101 | + | ||
102 | + if(inu >= lambdaR || ifz >= zR) return; | ||
103 | + | ||
104 | + //calculate the index into the sample FT | ||
105 | + int i = inu * zR + ifz; | ||
106 | + | ||
107 | + //compute the frequency (and set all negative spatial frequencies to zero) | ||
108 | + T fz; | ||
109 | + if(ifz < zR / 2) | ||
110 | + fz = ifz * dFz; | ||
111 | + //if the spatial frequency is negative, set it to zero and exit | ||
112 | + else{ | ||
113 | + sampleFFT[i] = 0; | ||
114 | + return; | ||
115 | + } | ||
116 | + | ||
117 | + //compute the frequency in inverse microns | ||
118 | + T nu = 1/lambda[inu]; | ||
119 | + | ||
120 | + //determine the radius of the integration circle | ||
121 | + T nu_sq = nu * nu; | ||
122 | + T fz_sq = (fz * fz) / 4; | ||
123 | + | ||
124 | + //cut off frequencies above the diffraction limit | ||
125 | + T r; | ||
126 | + if(fz_sq < nu_sq) | ||
127 | + r = sqrt(nu_sq - fz_sq); | ||
128 | + else | ||
129 | + r = 0; | ||
130 | + | ||
131 | + //account for the optics | ||
132 | + T Q = 0; | ||
133 | + if(r > nu * inNA && r < nu * outNA) | ||
134 | + Q = 1; | ||
135 | + | ||
136 | + //account for the source | ||
137 | + //T sigma = 30.0; | ||
138 | + T s = exp( - (r*r * sigma*sigma) / 2 ); | ||
139 | + //T s=1; | ||
140 | + | ||
141 | + //compute the final filter | ||
142 | + T mirst = 0; | ||
143 | + if(fz != 0) | ||
144 | + mirst = 2 * stimPI * r * s * Q * (1/fz); | ||
145 | + | ||
146 | + sampleFFT[i] *= mirst; | ||
147 | + | ||
148 | +} | ||
149 | + | ||
150 | +/*This object performs a 1-dimensional (layered) MIRST simulation | ||
151 | +*/ | ||
152 | +template<typename T> | ||
153 | +class mirst1d{ | ||
154 | + | ||
155 | +private: | ||
156 | + unsigned int Z; //z-axis resolution | ||
157 | + unsigned int pad; //pixel padding on either side of the sample | ||
158 | + | ||
159 | + std::vector< material<T> > matlist; //list of materials | ||
160 | + std::vector< T > layers; //list of layer thicknesses | ||
161 | + | ||
162 | + std::vector< T > lambdas; //list of wavelengths that are being simulated | ||
163 | + unsigned int S; //number of wavelengths (size of "lambdas") | ||
164 | + | ||
165 | + T NA[2]; //numerical aperature (central obscuration and outer diameter) | ||
166 | + | ||
167 | + function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters) | ||
168 | + | ||
169 | + complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc. | ||
170 | + | ||
171 | + void fft(int direction = CUFFT_FORWARD){ | ||
172 | + | ||
173 | + unsigned padZ = Z + pad; | ||
174 | + | ||
175 | + //create cuFFT handles | ||
176 | + cufftHandle plan; | ||
177 | + cufftResult result; | ||
178 | + | ||
179 | + if(sizeof(T) == 4) | ||
180 | + result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision | ||
181 | + else | ||
182 | + result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision | ||
183 | + | ||
184 | + //check for Plan 1D errors | ||
185 | + if(result != CUFFT_SUCCESS){ | ||
186 | + std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl; | ||
187 | + CufftError(result); | ||
188 | + exit(1); | ||
189 | + } | ||
190 | + | ||
191 | + if(sizeof(T) == 4) | ||
192 | + result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction); | ||
193 | + else | ||
194 | + result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction); | ||
195 | + | ||
196 | + //check for FFT errors | ||
197 | + if(result != CUFFT_SUCCESS){ | ||
198 | + std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl; | ||
199 | + CufftError(result); | ||
200 | + exit(1); | ||
201 | + } | ||
202 | + | ||
203 | + cufftDestroy(plan); | ||
204 | + } | ||
205 | + | ||
206 | + | ||
207 | + //initialize the scratch memory | ||
208 | + void init_scratch(){ | ||
209 | + scratch = complexfield<T, 1>(Z + pad , lambdas.size()); | ||
210 | + scratch = 0; | ||
211 | + } | ||
212 | + | ||
213 | + //get the list of scattering efficiency (eta) values for a specified layer | ||
214 | + std::vector< complex<T> > layer_etas(unsigned int l){ | ||
215 | + | ||
216 | + std::vector< complex<T> > etas; | ||
217 | + | ||
218 | + //fill the list of etas | ||
219 | + for(unsigned int i=0; i<lambdas.size(); i++) | ||
220 | + etas.push_back( matlist[l].eta(lambdas[i]) ); | ||
221 | + return etas; | ||
222 | + } | ||
223 | + | ||
224 | + //calculates the optimal block and grid sizes using information from the GPU | ||
225 | + void cuda_params(dim3& grids, dim3& blocks){ | ||
226 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
227 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
228 | + | ||
229 | + //create one thread for each detector pixel | ||
230 | + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | ||
231 | + grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
232 | + } | ||
233 | + | ||
234 | + //add the fourier transform of layer n to the scratch space | ||
235 | + void build_layer_fft(unsigned int n, T* zf){ | ||
236 | + unsigned int paddedZ = Z + pad; | ||
237 | + | ||
238 | + T wpx = layers[n] / dz(); //calculate the width of the layer in pixels | ||
239 | + | ||
240 | + //allocate memory for the refractive index | ||
241 | + complex<T>* gpuRi; | ||
242 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S)); | ||
243 | + | ||
244 | + //allocate memory for the source profile | ||
245 | + T* gpuSrc; | ||
246 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S)); | ||
247 | + | ||
248 | + complex<T> ri; | ||
249 | + T source; | ||
250 | + //store the refractive index and source profile in a CPU array | ||
251 | + for(int inu=0; inu<S; inu++){ | ||
252 | + //save the refractive index to the GPU | ||
253 | + ri = matlist[n].getN(lambdas[inu]); | ||
254 | + HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice )); | ||
255 | + | ||
256 | + //save the source profile to the GPU | ||
257 | + source = source_profile(10000 / lambdas[inu]); | ||
258 | + HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice )); | ||
259 | + | ||
260 | + } | ||
261 | + | ||
262 | + //create one thread for each pixel of the field slice | ||
263 | + dim3 gridDim, blockDim; | ||
264 | + cuda_params(gridDim, blockDim); | ||
265 | + stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S); | ||
266 | + | ||
267 | + int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size | ||
268 | + int linGrid = S / linBlock + 1; | ||
269 | + stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S); | ||
270 | + | ||
271 | + //free memory | ||
272 | + HANDLE_ERROR(cudaFree(gpuRi)); | ||
273 | + HANDLE_ERROR(cudaFree(gpuSrc)); | ||
274 | + } | ||
275 | + | ||
276 | + void build_sample(){ | ||
277 | + init_scratch(); //initialize the GPU scratch space | ||
278 | + //build_layer(1); | ||
279 | + | ||
280 | + T* zf; | ||
281 | + HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S)); | ||
282 | + HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S)); | ||
283 | + | ||
284 | + //render each layer of the sample | ||
285 | + for(unsigned int l=0; l<layers.size(); l++){ | ||
286 | + build_layer_fft(l, zf); | ||
287 | + } | ||
288 | + | ||
289 | + HANDLE_ERROR(cudaFree(zf)); | ||
290 | + } | ||
291 | + | ||
292 | + void apply_filter(){ | ||
293 | + dim3 gridDim, blockDim; | ||
294 | + cuda_params(gridDim, blockDim); | ||
295 | + | ||
296 | + unsigned int Zpad = Z + pad; | ||
297 | + | ||
298 | + T sim_range = dz() * Zpad; | ||
299 | + T dFz = 1 / sim_range; | ||
300 | + | ||
301 | + //copy the array of wavelengths to the GPU | ||
302 | + T* gpuLambdas; | ||
303 | + HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad)); | ||
304 | + HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice)); | ||
305 | + stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas, | ||
306 | + dFz, | ||
307 | + NA[0], NA[1], | ||
308 | + S, Zpad); | ||
309 | + } | ||
310 | + | ||
311 | + //crop the image to the sample thickness - keep in mind that sample thickness != optical path length | ||
312 | + void crop(){ | ||
313 | + | ||
314 | + scratch = scratch.crop(Z, S); | ||
315 | + } | ||
316 | + | ||
317 | + //save the scratch field as a binary file | ||
318 | + void to_binary(std::string filename){ | ||
319 | + | ||
320 | + } | ||
321 | + | ||
322 | + | ||
323 | +public: | ||
324 | + | ||
325 | + //constructor | ||
326 | + mirst1d(unsigned int rZ = 100, | ||
327 | + unsigned int padding = 0){ | ||
328 | + Z = rZ; | ||
329 | + pad = padding; | ||
330 | + NA[0] = 0; | ||
331 | + NA[1] = 0.8; | ||
332 | + S = 0; | ||
333 | + source_profile = 1; | ||
334 | + } | ||
335 | + | ||
336 | + //add a layer, thickness = microns | ||
337 | + void add_layer(material<T> mat, T thickness){ | ||
338 | + matlist.push_back(mat); | ||
339 | + layers.push_back(thickness); | ||
340 | + } | ||
341 | + | ||
342 | + void add_layer(std::string filename, T thickness){ | ||
343 | + add_layer(material<T>(filename), thickness); | ||
344 | + } | ||
345 | + | ||
346 | + //adds a profile spectrum for the light source | ||
347 | + void set_source(std::string filename){ | ||
348 | + source_profile.load(filename); | ||
349 | + } | ||
350 | + | ||
351 | + //adds a block of wavenumbers (cm^-1) to the simulation parameters | ||
352 | + void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){ | ||
353 | + unsigned int nu = start; | ||
354 | + while(nu <= stop){ | ||
355 | + lambdas.push_back((T)10000 / nu); | ||
356 | + nu += step; | ||
357 | + } | ||
358 | + S = lambdas.size(); //increment the number of wavelengths (shorthand for later) | ||
359 | + } | ||
360 | + | ||
361 | + T thickness(){ | ||
362 | + T t = 0; | ||
363 | + for(unsigned int l=0; l<layers.size(); l++) | ||
364 | + t += layers[l]; | ||
365 | + return t; | ||
366 | + } | ||
367 | + | ||
368 | + void padding(unsigned int padding = 0){ | ||
369 | + pad = padding; | ||
370 | + } | ||
371 | + | ||
372 | + T dz(){ | ||
373 | + return thickness() / Z; //calculate the z-axis step size | ||
374 | + } | ||
375 | + | ||
376 | + void na(T in, T out){ | ||
377 | + NA[0] = in; | ||
378 | + NA[1] = out; | ||
379 | + } | ||
380 | + | ||
381 | + void na(T out){ | ||
382 | + na(0, out); | ||
383 | + } | ||
384 | + | ||
385 | + stim::function<T, T> get_source(){ | ||
386 | + return source_profile; | ||
387 | + } | ||
388 | + | ||
389 | + void save_sample(std::string filename){ | ||
390 | + //create a sample and save the magnitude as an image | ||
391 | + build_sample(); | ||
392 | + fft(CUFFT_INVERSE); | ||
393 | + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude); | ||
394 | + } | ||
395 | + | ||
396 | + void save_mirst(std::string filename, bool binary = true){ | ||
397 | + //apply the MIRST filter to a sample and save the image | ||
398 | + | ||
399 | + //build the sample in the Fourier domain | ||
400 | + build_sample(); | ||
401 | + | ||
402 | + //apply the MIRST filter | ||
403 | + apply_filter(); | ||
404 | + | ||
405 | + //apply an inverse FFT to bring the results back into the spatial domain | ||
406 | + fft(CUFFT_INVERSE); | ||
407 | + | ||
408 | + crop(); | ||
409 | + | ||
410 | + //save the image | ||
411 | + if(binary) | ||
412 | + to_binary(filename); | ||
413 | + else | ||
414 | + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude); | ||
415 | + } | ||
416 | + | ||
417 | + | ||
418 | + | ||
419 | + | ||
420 | + std::string str(){ | ||
421 | + | ||
422 | + stringstream ss; | ||
423 | + ss<<"1D MIRST Simulation========================="<<std::endl; | ||
424 | + ss<<"z-axis resolution: "<<Z<<std::endl; | ||
425 | + ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl; | ||
426 | + ss<<"number of wavelengths: "<<lambdas.size()<<std::endl; | ||
427 | + ss<<"padding: "<<pad<<std::endl; | ||
428 | + ss<<"sample thickness: "<<thickness()<<" um"<<std::endl; | ||
429 | + ss<<"dz: "<<dz()<<" um"<<std::endl; | ||
430 | + ss<<std::endl; | ||
431 | + ss<<layers.size()<<" layers-------------"<<std::endl; | ||
432 | + for(unsigned int l=0; l<layers.size(); l++) | ||
433 | + ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl; | ||
434 | + | ||
435 | + ss<<"source profile-----------"<<std::endl; | ||
436 | + ss<<get_source().str()<<std::endl; | ||
437 | + | ||
438 | + return ss.str(); | ||
439 | + | ||
440 | + | ||
441 | + } | ||
442 | + | ||
443 | + | ||
444 | + | ||
445 | +}; | ||
446 | + | ||
447 | +} |
ui/arguments.h
@@ -15,7 +15,7 @@ | @@ -15,7 +15,7 @@ | ||
15 | 15 | ||
16 | namespace stim{ | 16 | namespace stim{ |
17 | 17 | ||
18 | - class argument | 18 | + class cmd_option |
19 | { | 19 | { |
20 | private: | 20 | private: |
21 | bool ansi; | 21 | bool ansi; |
@@ -59,8 +59,8 @@ namespace stim{ | @@ -59,8 +59,8 @@ namespace stim{ | ||
59 | 59 | ||
60 | public: | 60 | public: |
61 | void set_ansi(bool b){ ansi = b; } | 61 | void set_ansi(bool b){ ansi = b; } |
62 | - //create an argument with a given name, description, and default value | ||
63 | - argument(std::string _name, std::string _desc, std::string _default = "", std::string _range = "") | 62 | + //create an option with a given name, description, and default value |
63 | + cmd_option(std::string _name, std::string _desc, std::string _default = "", std::string _range = "") | ||
64 | { | 64 | { |
65 | name = _name; | 65 | name = _name; |
66 | parse_desc(_desc); | 66 | parse_desc(_desc); |
@@ -81,12 +81,12 @@ namespace stim{ | @@ -81,12 +81,12 @@ namespace stim{ | ||
81 | return vals.size(); | 81 | return vals.size(); |
82 | } | 82 | } |
83 | 83 | ||
84 | - //return the value of a text argument | 84 | + //return the value of a text option |
85 | std::string as_string(unsigned int n = 0) | 85 | std::string as_string(unsigned int n = 0) |
86 | { | 86 | { |
87 | if(!flag) | 87 | if(!flag) |
88 | { | 88 | { |
89 | - std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl; | 89 | + std::cout<<"ERROR - Option requested without being set: "<<name<<std::endl; |
90 | exit(1); | 90 | exit(1); |
91 | } | 91 | } |
92 | 92 | ||
@@ -96,12 +96,12 @@ namespace stim{ | @@ -96,12 +96,12 @@ namespace stim{ | ||
96 | else return ""; | 96 | else return ""; |
97 | } | 97 | } |
98 | 98 | ||
99 | - //return the value of a floating point argument | 99 | + //return the value of a floating point option |
100 | float as_float(unsigned int n = 0) | 100 | float as_float(unsigned int n = 0) |
101 | { | 101 | { |
102 | if(!flag) | 102 | if(!flag) |
103 | { | 103 | { |
104 | - std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl; | 104 | + std::cout<<"ERROR - option requested without being set: "<<name<<std::endl; |
105 | exit(1); | 105 | exit(1); |
106 | } | 106 | } |
107 | 107 | ||
@@ -115,12 +115,12 @@ namespace stim{ | @@ -115,12 +115,12 @@ namespace stim{ | ||
115 | else return 0; | 115 | else return 0; |
116 | } | 116 | } |
117 | 117 | ||
118 | - //return the value of an integer argument | 118 | + //return the value of an integer option |
119 | int as_int(unsigned int n = 0) | 119 | int as_int(unsigned int n = 0) |
120 | { | 120 | { |
121 | if(!flag) | 121 | if(!flag) |
122 | { | 122 | { |
123 | - std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl; | 123 | + std::cout<<"ERROR - option requested without being set: "<<name<<std::endl; |
124 | exit(1); | 124 | exit(1); |
125 | } | 125 | } |
126 | 126 | ||
@@ -138,7 +138,7 @@ namespace stim{ | @@ -138,7 +138,7 @@ namespace stim{ | ||
138 | int col_width() | 138 | int col_width() |
139 | { | 139 | { |
140 | int n = 3; | 140 | int n = 3; |
141 | - //add the length of the argument name | 141 | + //add the length of the option name |
142 | n += name.size(); | 142 | n += name.size(); |
143 | 143 | ||
144 | //if there are any default parameters | 144 | //if there are any default parameters |
@@ -147,7 +147,7 @@ namespace stim{ | @@ -147,7 +147,7 @@ namespace stim{ | ||
147 | //padding (parenthesis, =, etc.) | 147 | //padding (parenthesis, =, etc.) |
148 | n += 6; | 148 | n += 6; |
149 | 149 | ||
150 | - //for each default argument value | 150 | + //for each default option value |
151 | for(unsigned int v=0; v<vals.size(); v++) | 151 | for(unsigned int v=0; v<vals.size(); v++) |
152 | n += vals[v].size() + 1; | 152 | n += vals[v].size() + 1; |
153 | } | 153 | } |
@@ -209,13 +209,13 @@ namespace stim{ | @@ -209,13 +209,13 @@ namespace stim{ | ||
209 | return ss.str(); | 209 | return ss.str(); |
210 | } | 210 | } |
211 | 211 | ||
212 | - //compare the name of the argument to a string | 212 | + //compare the name of the option to a string |
213 | bool operator==(std::string rhs) | 213 | bool operator==(std::string rhs) |
214 | { | 214 | { |
215 | return (name == rhs); | 215 | return (name == rhs); |
216 | } | 216 | } |
217 | 217 | ||
218 | - //set the argument to a given value | 218 | + //set the option to a given value |
219 | void set(std::string _value) | 219 | void set(std::string _value) |
220 | { | 220 | { |
221 | parse_val(_value); | 221 | parse_val(_value); |
@@ -242,10 +242,11 @@ namespace stim{ | @@ -242,10 +242,11 @@ namespace stim{ | ||
242 | private: | 242 | private: |
243 | bool ansi; | 243 | bool ansi; |
244 | 244 | ||
245 | - //vector of arguments | ||
246 | - std::vector<argument> args; | 245 | + //vector of options |
246 | + std::vector<cmd_option> opts; | ||
247 | + std::vector<std::string> args; | ||
247 | 248 | ||
248 | - //column width of the longest argument | 249 | + //column width of the longest option |
249 | int col_width; | 250 | int col_width; |
250 | 251 | ||
251 | //list of sections | 252 | //list of sections |
@@ -261,28 +262,28 @@ namespace stim{ | @@ -261,28 +262,28 @@ namespace stim{ | ||
261 | void set_ansi(bool b) | 262 | void set_ansi(bool b) |
262 | { | 263 | { |
263 | ansi = b; | 264 | ansi = b; |
264 | - for(unsigned int i=0; i<args.size(); i++) | ||
265 | - args[i].set_ansi(ansi); | 265 | + for(unsigned int i=0; i<opts.size(); i++) |
266 | + opts[i].set_ansi(ansi); | ||
266 | } | 267 | } |
267 | 268 | ||
268 | void add(std::string _name, std::string _desc, std::string _default = "", std::string _range = "") | 269 | void add(std::string _name, std::string _desc, std::string _default = "", std::string _range = "") |
269 | { | 270 | { |
270 | - argument arg(_name, _desc, _default, _range); | ||
271 | - arg.set_ansi(ansi); | ||
272 | - args.push_back(arg); | 271 | + cmd_option opt(_name, _desc, _default, _range); |
272 | + opt.set_ansi(ansi); | ||
273 | + opts.push_back(opt); | ||
273 | 274 | ||
274 | - col_width = std::max<int>(col_width, arg.col_width()); | 275 | + col_width = std::max<int>(col_width, opt.col_width()); |
275 | } | 276 | } |
276 | 277 | ||
277 | void section(std::string _name) | 278 | void section(std::string _name) |
278 | { | 279 | { |
279 | argsection s; | 280 | argsection s; |
280 | s.name = _name; | 281 | s.name = _name; |
281 | - s.index = args.size(); | 282 | + s.index = opts.size(); |
282 | sections.push_back(s); | 283 | sections.push_back(s); |
283 | } | 284 | } |
284 | 285 | ||
285 | - //output the arguments (generally in response to --help) | 286 | + //output the options (generally in response to --help) |
286 | std::string str() | 287 | std::string str() |
287 | { | 288 | { |
288 | std::stringstream ss; | 289 | std::stringstream ss; |
@@ -292,8 +293,8 @@ namespace stim{ | @@ -292,8 +293,8 @@ namespace stim{ | ||
292 | if(sections.size() > 0) | 293 | if(sections.size() > 0) |
293 | si = 0; | 294 | si = 0; |
294 | 295 | ||
295 | - //for each argument | ||
296 | - for(unsigned int a=0; a<args.size(); a++) | 296 | + //for each option |
297 | + for(unsigned int a=0; a<opts.size(); a++) | ||
297 | { | 298 | { |
298 | if(si != -1 && a == sections[si].index) | 299 | if(si != -1 && a == sections[si].index) |
299 | { | 300 | { |
@@ -305,7 +306,7 @@ namespace stim{ | @@ -305,7 +306,7 @@ namespace stim{ | ||
305 | if(si == (int)sections.size()) si = -1; | 306 | if(si == (int)sections.size()) si = -1; |
306 | } | 307 | } |
307 | 308 | ||
308 | - ss<<args[a].toStr(col_width)<<std::endl; | 309 | + ss<<opts[a].toStr(col_width)<<std::endl; |
309 | } | 310 | } |
310 | 311 | ||
311 | return ss.str(); | 312 | return ss.str(); |
@@ -313,9 +314,9 @@ namespace stim{ | @@ -313,9 +314,9 @@ namespace stim{ | ||
313 | 314 | ||
314 | int index(std::string _name) | 315 | int index(std::string _name) |
315 | { | 316 | { |
316 | - unsigned int i = find(args.begin(), args.end(), _name) - args.begin(); | 317 | + unsigned int i = find(opts.begin(), opts.end(), _name) - opts.begin(); |
317 | 318 | ||
318 | - if(i >= args.size()) | 319 | + if(i >= opts.size()) |
319 | return -1; | 320 | return -1; |
320 | 321 | ||
321 | return (int)i; | 322 | return (int)i; |
@@ -327,52 +328,57 @@ namespace stim{ | @@ -327,52 +328,57 @@ namespace stim{ | ||
327 | 328 | ||
328 | if(i != -1) | 329 | if(i != -1) |
329 | { | 330 | { |
330 | - args[i].set(_value); | 331 | + opts[i].set(_value); |
331 | //adjust the column width if necessary | 332 | //adjust the column width if necessary |
332 | - col_width = (std::max)(col_width, args[i].col_width()); | 333 | + col_width = (std::max)(col_width, opts[i].col_width()); |
333 | } | 334 | } |
334 | else | 335 | else |
335 | - std::cout<<"ERROR - Argument not recognized: "<<_name<<std::endl; | 336 | + std::cout<<"ERROR - option not recognized: "<<_name<<std::endl; |
336 | } | 337 | } |
337 | 338 | ||
338 | //parse a parameter string | 339 | //parse a parameter string |
339 | void parse(int argc, char* argv[]) | 340 | void parse(int argc, char* argv[]) |
340 | { | 341 | { |
341 | - //if the number of arguments is 1, we're done | 342 | + //if the number of options is 1, we're done |
342 | if(argc <= 1) return; | 343 | if(argc <= 1) return; |
343 | 344 | ||
344 | std::string name; | 345 | std::string name; |
345 | std::string params; | 346 | std::string params; |
346 | 347 | ||
348 | + bool args_done = false; //create a flag that turns true when the first option is encountered | ||
349 | + | ||
347 | for(int i=1; i<argc; i++) | 350 | for(int i=1; i<argc; i++) |
348 | { | 351 | { |
349 | - //if the argument is a parameter name | 352 | + //if the argument is an option |
350 | if(argv[i][0] == '-' && argv[i][1] == '-') | 353 | if(argv[i][0] == '-' && argv[i][1] == '-') |
351 | { | 354 | { |
352 | - //add any previous arguments | 355 | + args_done = true; //arguments for the executable are done, all options now |
356 | + //add any previous options | ||
353 | if(name != "") | 357 | if(name != "") |
354 | set(name, params); | 358 | set(name, params); |
355 | - //set the current argument to this name | 359 | + //set the current option to this name |
356 | name = argv[i]+2; | 360 | name = argv[i]+2; |
357 | //clear the parameters list | 361 | //clear the parameters list |
358 | params = ""; | 362 | params = ""; |
359 | } | 363 | } |
360 | - else | ||
361 | - { | 364 | + else if(!args_done){ |
365 | + args.push_back(argv[i]); | ||
366 | + } | ||
367 | + else{ //everything else is an arg for the most recent option | ||
362 | if(params != "") | 368 | if(params != "") |
363 | params += " "; | 369 | params += " "; |
364 | params += argv[i]; | 370 | params += argv[i]; |
365 | } | 371 | } |
366 | } | 372 | } |
367 | 373 | ||
368 | - //set the last argument | 374 | + //set the last option |
369 | set(name, params); | 375 | set(name, params); |
370 | } | 376 | } |
371 | 377 | ||
372 | //determine if a parameter has been set (either specified by the user or with a default value) | 378 | //determine if a parameter has been set (either specified by the user or with a default value) |
373 | bool operator()(std::string _name) | 379 | bool operator()(std::string _name) |
374 | { | 380 | { |
375 | - int i = find(args.begin(), args.end(), _name) - args.begin(); | 381 | + int i = find(opts.begin(), opts.end(), _name) - opts.begin(); |
376 | 382 | ||
377 | if(i < 0) | 383 | if(i < 0) |
378 | { | 384 | { |
@@ -380,12 +386,13 @@ namespace stim{ | @@ -380,12 +386,13 @@ namespace stim{ | ||
380 | exit(1); | 386 | exit(1); |
381 | } | 387 | } |
382 | 388 | ||
383 | - return args[i].is_set(); | 389 | + return opts[i].is_set(); |
384 | } | 390 | } |
385 | 391 | ||
386 | - int nargs(std::string _name) | 392 | + //number of arguments in a specified option |
393 | + unsigned int nargs(std::string _name) | ||
387 | { | 394 | { |
388 | - int i = find(args.begin(), args.end(), _name) - args.begin(); | 395 | + int i = find(opts.begin(), opts.end(), _name) - opts.begin(); |
389 | 396 | ||
390 | if(i < 0) | 397 | if(i < 0) |
391 | { | 398 | { |
@@ -393,12 +400,22 @@ namespace stim{ | @@ -393,12 +400,22 @@ namespace stim{ | ||
393 | exit(1); | 400 | exit(1); |
394 | } | 401 | } |
395 | 402 | ||
396 | - return args[i].nargs(); | 403 | + return opts[i].nargs(); |
404 | + } | ||
405 | + | ||
406 | + //number of arguments for the executable | ||
407 | + unsigned int nargs(){ | ||
408 | + return args.size(); | ||
409 | + } | ||
410 | + | ||
411 | + //return the a'th executable argument | ||
412 | + std::string arg(unsigned int a){ | ||
413 | + return args[a]; | ||
397 | } | 414 | } |
398 | 415 | ||
399 | - argument operator[](std::string _name) | 416 | + cmd_option operator[](std::string _name) |
400 | { | 417 | { |
401 | - int i = find(args.begin(), args.end(), _name) - args.begin(); | 418 | + int i = find(opts.begin(), opts.end(), _name) - opts.begin(); |
402 | 419 | ||
403 | if(i < 0) | 420 | if(i < 0) |
404 | { | 421 | { |
@@ -406,7 +423,7 @@ namespace stim{ | @@ -406,7 +423,7 @@ namespace stim{ | ||
406 | exit(1); | 423 | exit(1); |
407 | } | 424 | } |
408 | 425 | ||
409 | - return args[i]; | 426 | + return opts[i]; |
410 | } | 427 | } |
411 | 428 | ||
412 | 429 |