Commit 81e0d2215b30a17309ce7ba7615f94e2eda5c67e
1 parent
7b3948ab
separated executable arguments and options in the arglist class
Showing
8 changed files
with
2056 additions
and
2039 deletions
Show diff stats
math/complex.h
1 | -/*RTS Complex number class. This class is CUDA compatible, | |
2 | -and can therefore be used in CUDA code and on CUDA devices. | |
3 | -*/ | |
4 | - | |
5 | -#ifndef RTS_COMPLEX | |
6 | -#define RTS_COMPLEX | |
7 | - | |
8 | -#include "../cuda/callable.h" | |
9 | -#include <cmath> | |
10 | -#include <string> | |
11 | -#include <sstream> | |
12 | -#include <iostream> | |
13 | - | |
14 | -namespace stim | |
15 | -{ | |
16 | - | |
17 | -template <class T> | |
18 | -struct complex | |
19 | -{ | |
20 | - T r, i; | |
21 | - | |
22 | - //default constructor | |
23 | - CUDA_CALLABLE complex() | |
24 | - { | |
25 | - r = 0; | |
26 | - i = 0; | |
27 | - } | |
28 | - | |
29 | - //constructor when given real and imaginary values | |
30 | - CUDA_CALLABLE complex(T r, T i = 0) | |
31 | - { | |
32 | - this->r = r; | |
33 | - this->i = i; | |
34 | - } | |
35 | - | |
36 | - //access methods | |
37 | - CUDA_CALLABLE T real() | |
38 | - { | |
39 | - return r; | |
40 | - } | |
41 | - | |
42 | - CUDA_CALLABLE T real(T r_val) | |
43 | - { | |
44 | - r = r_val; | |
45 | - return r_val; | |
46 | - } | |
47 | - | |
48 | - CUDA_CALLABLE T imag() | |
49 | - { | |
50 | - return i; | |
51 | - } | |
52 | - CUDA_CALLABLE T imag(T i_val) | |
53 | - { | |
54 | - i = i_val; | |
55 | - return i_val; | |
56 | - } | |
57 | - | |
58 | - | |
59 | - | |
60 | - //return the current value multiplied by i | |
61 | - CUDA_CALLABLE complex<T> imul() | |
62 | - { | |
63 | - complex<T> result; | |
64 | - result.r = -i; | |
65 | - result.i = r; | |
66 | - | |
67 | - return result; | |
68 | - } | |
69 | - | |
70 | - //returns the complex signum (-1, 0, 1) | |
71 | - CUDA_CALLABLE int sgn(){ | |
72 | - if(r > 0) return 1; | |
73 | - else if(r < 0) return -1; | |
74 | - else return (0 < i - i < 0); | |
75 | - } | |
76 | - | |
77 | - //ARITHMETIC OPERATORS-------------------- | |
78 | - | |
79 | - //binary + operator (returns the result of adding two complex values) | |
80 | - CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const | |
81 | - { | |
82 | - complex<T> result; | |
83 | - result.r = r + rhs.r; | |
84 | - result.i = i + rhs.i; | |
85 | - return result; | |
86 | - } | |
87 | - | |
88 | - CUDA_CALLABLE complex<T> operator+ (const T rhs) const | |
89 | - { | |
90 | - complex<T> result; | |
91 | - result.r = r + rhs; | |
92 | - result.i = i; | |
93 | - return result; | |
94 | - } | |
95 | - | |
96 | - //binary - operator (returns the result of adding two complex values) | |
97 | - CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const | |
98 | - { | |
99 | - complex<T> result; | |
100 | - result.r = r - rhs.r; | |
101 | - result.i = i - rhs.i; | |
102 | - return result; | |
103 | - } | |
104 | - | |
105 | - //binary - operator (returns the result of adding two complex values) | |
106 | - CUDA_CALLABLE complex<T> operator- (const T rhs) | |
107 | - { | |
108 | - complex<T> result; | |
109 | - result.r = r - rhs; | |
110 | - result.i = i; | |
111 | - return result; | |
112 | - } | |
113 | - | |
114 | - //binary MULTIPLICATION operators (returns the result of multiplying complex values) | |
115 | - CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const | |
116 | - { | |
117 | - complex<T> result; | |
118 | - result.r = r * rhs.r - i * rhs.i; | |
119 | - result.i = r * rhs.i + i * rhs.r; | |
120 | - return result; | |
121 | - } | |
122 | - CUDA_CALLABLE complex<T> operator* (const T rhs) | |
123 | - { | |
124 | - return complex<T>(r * rhs, i * rhs); | |
125 | - } | |
126 | - | |
127 | - //binary DIVISION operators (returns the result of dividing complex values) | |
128 | - CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const | |
129 | - { | |
130 | - complex<T> result; | |
131 | - T denom = rhs.r * rhs.r + rhs.i * rhs.i; | |
132 | - result.r = (r * rhs.r + i * rhs.i) / denom; | |
133 | - result.i = (- r * rhs.i + i * rhs.r) / denom; | |
134 | - | |
135 | - return result; | |
136 | - } | |
137 | - CUDA_CALLABLE complex<T> operator/ (const T rhs) | |
138 | - { | |
139 | - return complex<T>(r / rhs, i / rhs); | |
140 | - } | |
141 | - | |
142 | - //ASSIGNMENT operators----------------------------------- | |
143 | - CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs) | |
144 | - { | |
145 | - //check for self-assignment | |
146 | - if(this != &rhs) | |
147 | - { | |
148 | - this->r = rhs.r; | |
149 | - this->i = rhs.i; | |
150 | - } | |
151 | - return *this; | |
152 | - } | |
153 | - CUDA_CALLABLE complex<T> & operator=(const T &rhs) | |
154 | - { | |
155 | - this->r = rhs; | |
156 | - this->i = 0; | |
157 | - | |
158 | - return *this; | |
159 | - } | |
160 | - | |
161 | - //arithmetic assignment operators | |
162 | - CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs) | |
163 | - { | |
164 | - *this = *this + rhs; | |
165 | - return *this; | |
166 | - } | |
167 | - CUDA_CALLABLE complex<T> operator+=(const T &rhs) | |
168 | - { | |
169 | - *this = *this + rhs; | |
170 | - return *this; | |
171 | - } | |
172 | - | |
173 | - CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs) | |
174 | - { | |
175 | - *this = *this - rhs; | |
176 | - return *this; | |
177 | - } | |
178 | - CUDA_CALLABLE complex<T> operator-=(const T &rhs) | |
179 | - { | |
180 | - *this = *this - rhs; | |
181 | - return *this; | |
182 | - } | |
183 | - | |
184 | - CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs) | |
185 | - { | |
186 | - *this = *this * rhs; | |
187 | - return *this; | |
188 | - } | |
189 | - CUDA_CALLABLE complex<T> operator*=(const T &rhs) | |
190 | - { | |
191 | - *this = *this * rhs; | |
192 | - return *this; | |
193 | - } | |
194 | - //divide and assign | |
195 | - CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs) | |
196 | - { | |
197 | - *this = *this / rhs; | |
198 | - return *this; | |
199 | - } | |
200 | - CUDA_CALLABLE complex<T> operator/=(const T &rhs) | |
201 | - { | |
202 | - *this = *this / rhs; | |
203 | - return *this; | |
204 | - } | |
205 | - | |
206 | - //absolute value operator (returns the absolute value of the complex number) | |
207 | - CUDA_CALLABLE T abs() | |
208 | - { | |
209 | - return std::sqrt(r * r + i * i); | |
210 | - } | |
211 | - | |
212 | - CUDA_CALLABLE complex<T> log() | |
213 | - { | |
214 | - complex<T> result; | |
215 | - result.r = (T)std::log(std::sqrt(r * r + i * i)); | |
216 | - result.i = (T)std::atan2(i, r); | |
217 | - | |
218 | - | |
219 | - return result; | |
220 | - } | |
221 | - | |
222 | - CUDA_CALLABLE complex<T> exp() | |
223 | - { | |
224 | - complex<T> result; | |
225 | - | |
226 | - T e_r = std::exp(r); | |
227 | - result.r = e_r * (T)std::cos(i); | |
228 | - result.i = e_r * (T)std::sin(i); | |
229 | - | |
230 | - return result; | |
231 | - } | |
232 | - | |
233 | - /*CUDA_CALLABLE complex<T> pow(int y) | |
234 | - { | |
235 | - | |
236 | - return pow((double)y); | |
237 | - }*/ | |
238 | - | |
239 | - CUDA_CALLABLE complex<T> pow(T y) | |
240 | - { | |
241 | - complex<T> result; | |
242 | - | |
243 | - result = log() * y; | |
244 | - | |
245 | - return result.exp(); | |
246 | - } | |
247 | - | |
248 | - CUDA_CALLABLE complex<T> sqrt() | |
249 | - { | |
250 | - complex<T> result; | |
251 | - | |
252 | - //convert to polar coordinates | |
253 | - T a = std::sqrt(r*r + i*i); | |
254 | - T theta = std::atan2(i, r); | |
255 | - | |
256 | - //find the square root | |
257 | - T a_p = std::sqrt(a); | |
258 | - T theta_p = theta/2.0f; | |
259 | - | |
260 | - //convert back to cartesian coordinates | |
261 | - result.r = a_p * std::cos(theta_p); | |
262 | - result.i = a_p * std::sin(theta_p); | |
263 | - | |
264 | - return result; | |
265 | - } | |
266 | - | |
267 | - std::string str() | |
268 | - { | |
269 | - std::stringstream ss; | |
270 | - ss<<"("<<r<<","<<i<<")"; | |
271 | - | |
272 | - return ss.str(); | |
273 | - } | |
274 | - | |
275 | - //COMPARISON operators | |
276 | - CUDA_CALLABLE bool operator==(complex<T> rhs) | |
277 | - { | |
278 | - if(r == rhs.r && i == rhs.i) | |
279 | - return true; | |
280 | - return false; | |
281 | - } | |
282 | - | |
283 | - CUDA_CALLABLE bool operator==(T rhs) | |
284 | - { | |
285 | - if(r == rhs && i == 0) | |
286 | - return true; | |
287 | - return false; | |
288 | - } | |
289 | - | |
290 | - CUDA_CALLABLE bool operator!=(T rhs) | |
291 | - { | |
292 | - if(r != rhs || i != 0) | |
293 | - return true; | |
294 | - return false; | |
295 | - } | |
296 | - | |
297 | - CUDA_CALLABLE bool operator<(complex<T> rhs){ | |
298 | - return abs() < rhs.abs(); | |
299 | - } | |
300 | - CUDA_CALLABLE bool operator<=(complex<T> rhs){ | |
301 | - return abs() <= rhs.abs(); | |
302 | - } | |
303 | - CUDA_CALLABLE bool operator>(complex<T> rhs){ | |
304 | - return abs() > rhs.abs(); | |
305 | - } | |
306 | - CUDA_CALLABLE bool operator >=(complex<T> rhs){ | |
307 | - return abs() >= rhs.abs(); | |
308 | - } | |
309 | - | |
310 | - //CASTING operators | |
311 | - template < typename otherT > | |
312 | - operator complex<otherT>() | |
313 | - { | |
314 | - complex<otherT> result((otherT)r, (otherT)i); | |
315 | - return result; | |
316 | - } | |
317 | - template< typename otherT > | |
318 | - complex( const complex<otherT> &rhs) | |
319 | - { | |
320 | - r = (T)rhs.r; | |
321 | - i = (T)rhs.i; | |
322 | - } | |
323 | - template< typename otherT > | |
324 | - complex& operator=(const complex<otherT> &rhs) | |
325 | - { | |
326 | - r = (T)rhs.r; | |
327 | - i = (T)rhs.i; | |
328 | - return *this; | |
329 | - } | |
330 | - | |
331 | -}; | |
332 | - | |
333 | -} //end RTS namespace | |
334 | - | |
335 | -//addition | |
336 | -template<typename T> | |
337 | -CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b) | |
338 | -{ | |
339 | - return stim::complex<T>((T)a + b.r, b.i); | |
340 | -} | |
341 | - | |
342 | -//subtraction with a real value | |
343 | -template<typename T> | |
344 | -CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b) | |
345 | -{ | |
346 | - return stim::complex<T>((T)a - b.r, -b.i); | |
347 | -} | |
348 | - | |
349 | -//minus sign | |
350 | -template<typename T> | |
351 | -CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs) | |
352 | -{ | |
353 | - return stim::complex<T>(-rhs.r, -rhs.i); | |
354 | -} | |
355 | - | |
356 | -//multiply a T value by a complex value | |
357 | -template<typename T> | |
358 | -CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b) | |
359 | -{ | |
360 | - return stim::complex<T>((T)a * b.r, (T)a * b.i); | |
361 | -} | |
362 | - | |
363 | -//divide a T value by a complex value | |
364 | -template<typename T> | |
365 | -CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b) | |
366 | -{ | |
367 | - stim::complex<T> result; | |
368 | - | |
369 | - T denom = b.r * b.r + b.i * b.i; | |
370 | - | |
371 | - result.r = ((T)a * b.r) / denom; | |
372 | - result.i = -((T)a * b.i) / denom; | |
373 | - | |
374 | - return result; | |
375 | -} | |
376 | - | |
377 | - | |
378 | -template<typename T> | |
379 | -CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y) | |
380 | -{ | |
381 | - return x.pow(y); | |
382 | -} | |
383 | -template<typename T> | |
384 | -CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y) | |
385 | -{ | |
386 | - return x.pow(y); | |
387 | -} | |
388 | - | |
389 | -//log function | |
390 | -template<typename T> | |
391 | -CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x) | |
392 | -{ | |
393 | - return x.log(); | |
394 | -} | |
395 | - | |
396 | -//exp function | |
397 | -template<typename T> | |
398 | -CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x) | |
399 | -{ | |
400 | - return x.exp(); | |
401 | -} | |
402 | - | |
403 | -//sqrt function | |
404 | -template<typename T> | |
405 | -CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x) | |
406 | -{ | |
407 | - return x.sqrt(); | |
408 | -} | |
409 | - | |
410 | - | |
411 | -template <typename T> | |
412 | -CUDA_CALLABLE static T abs(stim::complex<T> a) | |
413 | -{ | |
414 | - return a.abs(); | |
415 | -} | |
416 | - | |
417 | -template <typename T> | |
418 | -CUDA_CALLABLE static T real(stim::complex<T> a) | |
419 | -{ | |
420 | - return a.r; | |
421 | -} | |
422 | - | |
423 | -//template <typename T> | |
424 | -CUDA_CALLABLE static float real(float a) | |
425 | -{ | |
426 | - return a; | |
427 | -} | |
428 | - | |
429 | -template <typename T> | |
430 | -CUDA_CALLABLE static T imag(stim::complex<T> a) | |
431 | -{ | |
432 | - return a.i; | |
433 | -} | |
434 | - | |
435 | -//trigonometric functions | |
436 | -//template<class A> | |
437 | -/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x) | |
438 | -{ | |
439 | - stim::complex<float> result; | |
440 | - result.r = sinf(x.r) * coshf(x.i); | |
441 | - result.i = cosf(x.r) * sinhf(x.i); | |
442 | - | |
443 | - return result; | |
444 | -}*/ | |
445 | - | |
446 | -template<class A> | |
447 | -CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x) | |
448 | -{ | |
449 | - stim::complex<A> result; | |
450 | - result.r = (A)std::sin(x.r) * (A)std::cosh(x.i); | |
451 | - result.i = (A)std::cos(x.r) * (A)std::sinh(x.i); | |
452 | - | |
453 | - return result; | |
454 | -} | |
455 | - | |
456 | -//floating point template | |
457 | -//template<class A> | |
458 | -/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x) | |
459 | -{ | |
460 | - stim::complex<float> result; | |
461 | - result.r = cosf(x.r) * coshf(x.i); | |
462 | - result.i = -(sinf(x.r) * sinhf(x.i)); | |
463 | - | |
464 | - return result; | |
465 | -}*/ | |
466 | - | |
467 | -template<class A> | |
468 | -CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x) | |
469 | -{ | |
470 | - stim::complex<A> result; | |
471 | - result.r = (A)std::cos(x.r) * (A)std::cosh(x.i); | |
472 | - result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i)); | |
473 | - | |
474 | - return result; | |
475 | -} | |
476 | - | |
477 | - | |
478 | -template<class A> | |
479 | -std::ostream& operator<<(std::ostream& os, stim::complex<A> x) | |
480 | -{ | |
481 | - os<<x.str(); | |
482 | - return os; | |
483 | -} | |
484 | - | |
485 | -template<class A> | |
486 | -std::istream& operator>>(std::istream& is, stim::complex<A>& x) | |
487 | -{ | |
488 | - A r, i; | |
489 | - r = i = 0; //initialize the real and imaginary parts to zero | |
490 | - is>>r; //parse | |
491 | - is>>i; | |
492 | - | |
493 | - x.real(r); //assign the parsed values to x | |
494 | - x.imag(i); | |
495 | - | |
496 | - return is; //return the stream | |
497 | -} | |
498 | - | |
499 | -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | |
500 | -//template<class T> using rtsComplex = stim::complex<T>; | |
501 | -//#endif | |
502 | - | |
503 | - | |
504 | - | |
505 | -#endif | |
1 | +/*RTS Complex number class. This class is CUDA compatible, | |
2 | +and can therefore be used in CUDA code and on CUDA devices. | |
3 | +*/ | |
4 | + | |
5 | +#ifndef RTS_COMPLEX | |
6 | +#define RTS_COMPLEX | |
7 | + | |
8 | +#include "../cuda/callable.h" | |
9 | +#include <cmath> | |
10 | +#include <string> | |
11 | +#include <sstream> | |
12 | +#include <iostream> | |
13 | + | |
14 | +namespace stim | |
15 | +{ | |
16 | + | |
17 | +template <class T> | |
18 | +struct complex | |
19 | +{ | |
20 | + T r, i; | |
21 | + | |
22 | + //default constructor | |
23 | + CUDA_CALLABLE complex() | |
24 | + { | |
25 | + r = 0; | |
26 | + i = 0; | |
27 | + } | |
28 | + | |
29 | + //constructor when given real and imaginary values | |
30 | + CUDA_CALLABLE complex(T r, T i = 0) | |
31 | + { | |
32 | + this->r = r; | |
33 | + this->i = i; | |
34 | + } | |
35 | + | |
36 | + //access methods | |
37 | + CUDA_CALLABLE T real() | |
38 | + { | |
39 | + return r; | |
40 | + } | |
41 | + | |
42 | + CUDA_CALLABLE T real(T r_val) | |
43 | + { | |
44 | + r = r_val; | |
45 | + return r_val; | |
46 | + } | |
47 | + | |
48 | + CUDA_CALLABLE T imag() | |
49 | + { | |
50 | + return i; | |
51 | + } | |
52 | + CUDA_CALLABLE T imag(T i_val) | |
53 | + { | |
54 | + i = i_val; | |
55 | + return i_val; | |
56 | + } | |
57 | + | |
58 | + | |
59 | + | |
60 | + //return the current value multiplied by i | |
61 | + CUDA_CALLABLE complex<T> imul() | |
62 | + { | |
63 | + complex<T> result; | |
64 | + result.r = -i; | |
65 | + result.i = r; | |
66 | + | |
67 | + return result; | |
68 | + } | |
69 | + | |
70 | + //returns the complex signum (-1, 0, 1) | |
71 | + CUDA_CALLABLE int sgn(){ | |
72 | + if(r > 0) return 1; | |
73 | + else if(r < 0) return -1; | |
74 | + else return (0 < i - i < 0); | |
75 | + } | |
76 | + | |
77 | + //ARITHMETIC OPERATORS-------------------- | |
78 | + | |
79 | + //binary + operator (returns the result of adding two complex values) | |
80 | + CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const | |
81 | + { | |
82 | + complex<T> result; | |
83 | + result.r = r + rhs.r; | |
84 | + result.i = i + rhs.i; | |
85 | + return result; | |
86 | + } | |
87 | + | |
88 | + CUDA_CALLABLE complex<T> operator+ (const T rhs) const | |
89 | + { | |
90 | + complex<T> result; | |
91 | + result.r = r + rhs; | |
92 | + result.i = i; | |
93 | + return result; | |
94 | + } | |
95 | + | |
96 | + //binary - operator (returns the result of adding two complex values) | |
97 | + CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const | |
98 | + { | |
99 | + complex<T> result; | |
100 | + result.r = r - rhs.r; | |
101 | + result.i = i - rhs.i; | |
102 | + return result; | |
103 | + } | |
104 | + | |
105 | + //binary - operator (returns the result of adding two complex values) | |
106 | + CUDA_CALLABLE complex<T> operator- (const T rhs) | |
107 | + { | |
108 | + complex<T> result; | |
109 | + result.r = r - rhs; | |
110 | + result.i = i; | |
111 | + return result; | |
112 | + } | |
113 | + | |
114 | + //binary MULTIPLICATION operators (returns the result of multiplying complex values) | |
115 | + CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const | |
116 | + { | |
117 | + complex<T> result; | |
118 | + result.r = r * rhs.r - i * rhs.i; | |
119 | + result.i = r * rhs.i + i * rhs.r; | |
120 | + return result; | |
121 | + } | |
122 | + CUDA_CALLABLE complex<T> operator* (const T rhs) | |
123 | + { | |
124 | + return complex<T>(r * rhs, i * rhs); | |
125 | + } | |
126 | + | |
127 | + //binary DIVISION operators (returns the result of dividing complex values) | |
128 | + CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const | |
129 | + { | |
130 | + complex<T> result; | |
131 | + T denom = rhs.r * rhs.r + rhs.i * rhs.i; | |
132 | + result.r = (r * rhs.r + i * rhs.i) / denom; | |
133 | + result.i = (- r * rhs.i + i * rhs.r) / denom; | |
134 | + | |
135 | + return result; | |
136 | + } | |
137 | + CUDA_CALLABLE complex<T> operator/ (const T rhs) | |
138 | + { | |
139 | + return complex<T>(r / rhs, i / rhs); | |
140 | + } | |
141 | + | |
142 | + //ASSIGNMENT operators----------------------------------- | |
143 | + CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs) | |
144 | + { | |
145 | + //check for self-assignment | |
146 | + if(this != &rhs) | |
147 | + { | |
148 | + this->r = rhs.r; | |
149 | + this->i = rhs.i; | |
150 | + } | |
151 | + return *this; | |
152 | + } | |
153 | + CUDA_CALLABLE complex<T> & operator=(const T &rhs) | |
154 | + { | |
155 | + this->r = rhs; | |
156 | + this->i = 0; | |
157 | + | |
158 | + return *this; | |
159 | + } | |
160 | + | |
161 | + //arithmetic assignment operators | |
162 | + CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs) | |
163 | + { | |
164 | + *this = *this + rhs; | |
165 | + return *this; | |
166 | + } | |
167 | + CUDA_CALLABLE complex<T> operator+=(const T &rhs) | |
168 | + { | |
169 | + *this = *this + rhs; | |
170 | + return *this; | |
171 | + } | |
172 | + | |
173 | + CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs) | |
174 | + { | |
175 | + *this = *this - rhs; | |
176 | + return *this; | |
177 | + } | |
178 | + CUDA_CALLABLE complex<T> operator-=(const T &rhs) | |
179 | + { | |
180 | + *this = *this - rhs; | |
181 | + return *this; | |
182 | + } | |
183 | + | |
184 | + CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs) | |
185 | + { | |
186 | + *this = *this * rhs; | |
187 | + return *this; | |
188 | + } | |
189 | + CUDA_CALLABLE complex<T> operator*=(const T &rhs) | |
190 | + { | |
191 | + *this = *this * rhs; | |
192 | + return *this; | |
193 | + } | |
194 | + //divide and assign | |
195 | + CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs) | |
196 | + { | |
197 | + *this = *this / rhs; | |
198 | + return *this; | |
199 | + } | |
200 | + CUDA_CALLABLE complex<T> operator/=(const T &rhs) | |
201 | + { | |
202 | + *this = *this / rhs; | |
203 | + return *this; | |
204 | + } | |
205 | + | |
206 | + //absolute value operator (returns the absolute value of the complex number) | |
207 | + CUDA_CALLABLE T abs() | |
208 | + { | |
209 | + return std::sqrt(r * r + i * i); | |
210 | + } | |
211 | + | |
212 | + CUDA_CALLABLE complex<T> log() | |
213 | + { | |
214 | + complex<T> result; | |
215 | + result.r = (T)std::log(std::sqrt(r * r + i * i)); | |
216 | + result.i = (T)std::atan2(i, r); | |
217 | + | |
218 | + | |
219 | + return result; | |
220 | + } | |
221 | + | |
222 | + CUDA_CALLABLE complex<T> exp() | |
223 | + { | |
224 | + complex<T> result; | |
225 | + | |
226 | + T e_r = std::exp(r); | |
227 | + result.r = e_r * (T)std::cos(i); | |
228 | + result.i = e_r * (T)std::sin(i); | |
229 | + | |
230 | + return result; | |
231 | + } | |
232 | + | |
233 | + /*CUDA_CALLABLE complex<T> pow(int y) | |
234 | + { | |
235 | + | |
236 | + return pow((double)y); | |
237 | + }*/ | |
238 | + | |
239 | + CUDA_CALLABLE complex<T> pow(T y) | |
240 | + { | |
241 | + complex<T> result; | |
242 | + | |
243 | + result = log() * y; | |
244 | + | |
245 | + return result.exp(); | |
246 | + } | |
247 | + | |
248 | + CUDA_CALLABLE complex<T> sqrt() | |
249 | + { | |
250 | + complex<T> result; | |
251 | + | |
252 | + //convert to polar coordinates | |
253 | + T a = std::sqrt(r*r + i*i); | |
254 | + T theta = std::atan2(i, r); | |
255 | + | |
256 | + //find the square root | |
257 | + T a_p = std::sqrt(a); | |
258 | + T theta_p = theta/2.0f; | |
259 | + | |
260 | + //convert back to cartesian coordinates | |
261 | + result.r = a_p * std::cos(theta_p); | |
262 | + result.i = a_p * std::sin(theta_p); | |
263 | + | |
264 | + return result; | |
265 | + } | |
266 | + | |
267 | + std::string str() | |
268 | + { | |
269 | + std::stringstream ss; | |
270 | + ss<<"("<<r<<","<<i<<")"; | |
271 | + | |
272 | + return ss.str(); | |
273 | + } | |
274 | + | |
275 | + //COMPARISON operators | |
276 | + CUDA_CALLABLE bool operator==(complex<T> rhs) | |
277 | + { | |
278 | + if(r == rhs.r && i == rhs.i) | |
279 | + return true; | |
280 | + return false; | |
281 | + } | |
282 | + | |
283 | + CUDA_CALLABLE bool operator==(T rhs) | |
284 | + { | |
285 | + if(r == rhs && i == 0) | |
286 | + return true; | |
287 | + return false; | |
288 | + } | |
289 | + | |
290 | + CUDA_CALLABLE bool operator!=(T rhs) | |
291 | + { | |
292 | + if(r != rhs || i != 0) | |
293 | + return true; | |
294 | + return false; | |
295 | + } | |
296 | + | |
297 | + CUDA_CALLABLE bool operator<(complex<T> rhs){ | |
298 | + return abs() < rhs.abs(); | |
299 | + } | |
300 | + CUDA_CALLABLE bool operator<=(complex<T> rhs){ | |
301 | + return abs() <= rhs.abs(); | |
302 | + } | |
303 | + CUDA_CALLABLE bool operator>(complex<T> rhs){ | |
304 | + return abs() > rhs.abs(); | |
305 | + } | |
306 | + CUDA_CALLABLE bool operator >=(complex<T> rhs){ | |
307 | + return abs() >= rhs.abs(); | |
308 | + } | |
309 | + | |
310 | + //CASTING operators | |
311 | + template < typename otherT > | |
312 | + operator complex<otherT>() | |
313 | + { | |
314 | + complex<otherT> result((otherT)r, (otherT)i); | |
315 | + return result; | |
316 | + } | |
317 | + template< typename otherT > | |
318 | + complex( const complex<otherT> &rhs) | |
319 | + { | |
320 | + r = (T)rhs.r; | |
321 | + i = (T)rhs.i; | |
322 | + } | |
323 | + template< typename otherT > | |
324 | + complex& operator=(const complex<otherT> &rhs) | |
325 | + { | |
326 | + r = (T)rhs.r; | |
327 | + i = (T)rhs.i; | |
328 | + return *this; | |
329 | + } | |
330 | + | |
331 | +}; | |
332 | + | |
333 | +} //end RTS namespace | |
334 | + | |
335 | +//addition | |
336 | +template<typename T> | |
337 | +CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b) | |
338 | +{ | |
339 | + return stim::complex<T>((T)a + b.r, b.i); | |
340 | +} | |
341 | + | |
342 | +//subtraction with a real value | |
343 | +template<typename T> | |
344 | +CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b) | |
345 | +{ | |
346 | + return stim::complex<T>((T)a - b.r, -b.i); | |
347 | +} | |
348 | + | |
349 | +//minus sign | |
350 | +template<typename T> | |
351 | +CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs) | |
352 | +{ | |
353 | + return stim::complex<T>(-rhs.r, -rhs.i); | |
354 | +} | |
355 | + | |
356 | +//multiply a T value by a complex value | |
357 | +template<typename T> | |
358 | +CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b) | |
359 | +{ | |
360 | + return stim::complex<T>((T)a * b.r, (T)a * b.i); | |
361 | +} | |
362 | + | |
363 | +//divide a T value by a complex value | |
364 | +template<typename T> | |
365 | +CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b) | |
366 | +{ | |
367 | + stim::complex<T> result; | |
368 | + | |
369 | + T denom = b.r * b.r + b.i * b.i; | |
370 | + | |
371 | + result.r = ((T)a * b.r) / denom; | |
372 | + result.i = -((T)a * b.i) / denom; | |
373 | + | |
374 | + return result; | |
375 | +} | |
376 | + | |
377 | + | |
378 | +template<typename T> | |
379 | +CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y) | |
380 | +{ | |
381 | + return x.pow(y); | |
382 | +} | |
383 | +template<typename T> | |
384 | +CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y) | |
385 | +{ | |
386 | + return x.pow(y); | |
387 | +} | |
388 | + | |
389 | +//log function | |
390 | +template<typename T> | |
391 | +CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x) | |
392 | +{ | |
393 | + return x.log(); | |
394 | +} | |
395 | + | |
396 | +//exp function | |
397 | +template<typename T> | |
398 | +CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x) | |
399 | +{ | |
400 | + return x.exp(); | |
401 | +} | |
402 | + | |
403 | +//sqrt function | |
404 | +template<typename T> | |
405 | +CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x) | |
406 | +{ | |
407 | + return x.sqrt(); | |
408 | +} | |
409 | + | |
410 | + | |
411 | +template <typename T> | |
412 | +CUDA_CALLABLE static T abs(stim::complex<T> a) | |
413 | +{ | |
414 | + return a.abs(); | |
415 | +} | |
416 | + | |
417 | +template <typename T> | |
418 | +CUDA_CALLABLE static T real(stim::complex<T> a) | |
419 | +{ | |
420 | + return a.r; | |
421 | +} | |
422 | + | |
423 | +//template <typename T> | |
424 | +CUDA_CALLABLE static float real(float a) | |
425 | +{ | |
426 | + return a; | |
427 | +} | |
428 | + | |
429 | +template <typename T> | |
430 | +CUDA_CALLABLE static T imag(stim::complex<T> a) | |
431 | +{ | |
432 | + return a.i; | |
433 | +} | |
434 | + | |
435 | +//trigonometric functions | |
436 | +//template<class A> | |
437 | +/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x) | |
438 | +{ | |
439 | + stim::complex<float> result; | |
440 | + result.r = sinf(x.r) * coshf(x.i); | |
441 | + result.i = cosf(x.r) * sinhf(x.i); | |
442 | + | |
443 | + return result; | |
444 | +}*/ | |
445 | + | |
446 | +template<class A> | |
447 | +CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x) | |
448 | +{ | |
449 | + stim::complex<A> result; | |
450 | + result.r = (A)std::sin(x.r) * (A)std::cosh(x.i); | |
451 | + result.i = (A)std::cos(x.r) * (A)std::sinh(x.i); | |
452 | + | |
453 | + return result; | |
454 | +} | |
455 | + | |
456 | +//floating point template | |
457 | +//template<class A> | |
458 | +/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x) | |
459 | +{ | |
460 | + stim::complex<float> result; | |
461 | + result.r = cosf(x.r) * coshf(x.i); | |
462 | + result.i = -(sinf(x.r) * sinhf(x.i)); | |
463 | + | |
464 | + return result; | |
465 | +}*/ | |
466 | + | |
467 | +template<class A> | |
468 | +CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x) | |
469 | +{ | |
470 | + stim::complex<A> result; | |
471 | + result.r = (A)std::cos(x.r) * (A)std::cosh(x.i); | |
472 | + result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i)); | |
473 | + | |
474 | + return result; | |
475 | +} | |
476 | + | |
477 | + | |
478 | +template<class A> | |
479 | +std::ostream& operator<<(std::ostream& os, stim::complex<A> x) | |
480 | +{ | |
481 | + os<<x.str(); | |
482 | + return os; | |
483 | +} | |
484 | + | |
485 | +template<class A> | |
486 | +std::istream& operator>>(std::istream& is, stim::complex<A>& x) | |
487 | +{ | |
488 | + A r, i; | |
489 | + r = i = 0; //initialize the real and imaginary parts to zero | |
490 | + is>>r; //parse | |
491 | + is>>i; | |
492 | + | |
493 | + x.real(r); //assign the parsed values to x | |
494 | + x.imag(i); | |
495 | + | |
496 | + return is; //return the stream | |
497 | +} | |
498 | + | |
499 | +//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7 | |
500 | +//template<class T> using rtsComplex = stim::complex<T>; | |
501 | +//#endif | |
502 | + | |
503 | + | |
504 | + | |
505 | +#endif | ... | ... |
math/complexfield.cuh
1 | -#ifndef RTS_COMPLEXFIELD_H | |
2 | -#define RTS_COMPLEXFIELD_H | |
3 | - | |
4 | -#include "cublas_v2.h" | |
5 | -#include <cuda_runtime.h> | |
6 | - | |
7 | -#include "../math/field.cuh" | |
8 | -#include "../math/complex.h" | |
9 | -#include "../math/realfield.cuh" | |
10 | - | |
11 | -namespace stim{ | |
12 | - | |
13 | -template<typename T> | |
14 | -__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){ | |
15 | - | |
16 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
17 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
18 | - | |
19 | - //make sure that the thread indices are in-bounds | |
20 | - if(iu >= r0 || iv >= r1) return; | |
21 | - | |
22 | - //compute the index into the field | |
23 | - int i = iv*r0 + iu; | |
24 | - | |
25 | - //calculate and store the result | |
26 | - dest[i] = source[i].abs(); | |
27 | -} | |
28 | - | |
29 | -/*This class stores functions for saving images of complex fields | |
30 | -*/ | |
31 | -template<typename T, unsigned int D = 1> | |
32 | -class complexfield : public field< stim::complex<T>, D >{ | |
33 | - using field< stim::complex<T>, D >::R; | |
34 | - using field< stim::complex<T>, D >::X; | |
35 | - using field< stim::complex<T>, D >::shape; | |
36 | - using field< stim::complex<T>, D >::cuda_params; | |
37 | - | |
38 | - | |
39 | - | |
40 | -public: | |
41 | - | |
42 | - //find the maximum value of component n | |
43 | - stim::complex<T> find_max(unsigned int n){ | |
44 | - cublasStatus_t stat; | |
45 | - cublasHandle_t handle; | |
46 | - | |
47 | - //create a CUBLAS handle | |
48 | - stat = cublasCreate(&handle); | |
49 | - if(stat != CUBLAS_STATUS_SUCCESS){ | |
50 | - std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | |
51 | - exit(1); | |
52 | - } | |
53 | - | |
54 | - int L = R[0] * R[1]; //compute the number of discrete points in a slice | |
55 | - int index; //result of the max operation | |
56 | - stim::complex<T> result; | |
57 | - | |
58 | - if(sizeof(T) == 8) | |
59 | - stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index); | |
60 | - else | |
61 | - stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index); | |
62 | - | |
63 | - index -= 1; //adjust for 1-based indexing | |
64 | - | |
65 | - //if there was a GPU error, terminate | |
66 | - if(stat != CUBLAS_STATUS_SUCCESS){ | |
67 | - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | |
68 | - exit(1); | |
69 | - } | |
70 | - | |
71 | - //retrieve the maximum value for this slice and store it in the maxVal array | |
72 | - std::cout<<X[n]<<std::endl; | |
73 | - HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost)); | |
74 | - return result; | |
75 | - } | |
76 | - | |
77 | -public: | |
78 | - | |
79 | - enum attribute {magnitude, real, imaginary}; | |
80 | - | |
81 | - //constructor (no parameters) | |
82 | - complexfield() : field<stim::complex<T>, D>(){}; | |
83 | - | |
84 | - //constructor (resolution specified) | |
85 | - complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){}; | |
86 | - | |
87 | - //assignment from a field of complex values | |
88 | - complexfield & operator=(const field< stim::complex<T>, D > rhs){ | |
89 | - field< complex<T>, D >::operator=(rhs); | |
90 | - return *this; | |
91 | - } | |
92 | - | |
93 | - //assignment operator (scalar value) | |
94 | - complexfield & operator= (const complex<T> rhs){ | |
95 | - | |
96 | - field< complex<T>, D >::operator=(rhs); | |
97 | - return *this; | |
98 | - } | |
99 | - | |
100 | - //assignment operator (vector value) | |
101 | - complexfield & operator= (const vec< complex<T>, D > rhs){ | |
102 | - | |
103 | - field< complex<T>, D >::operator=(rhs); | |
104 | - return *this; | |
105 | - } | |
106 | - | |
107 | - //cropping | |
108 | - complexfield crop(unsigned int width, unsigned int height){ | |
109 | - | |
110 | - complexfield<T, D> result; | |
111 | - result = field< complex<T>, D>::crop(width, height); | |
112 | - return result; | |
113 | - } | |
114 | - | |
115 | - void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){ | |
116 | - | |
117 | - field<T, 1> rf(R[0], R[1]); | |
118 | - | |
119 | - //get cuda parameters | |
120 | - dim3 blocks, grids; | |
121 | - cuda_params(grids, blocks); | |
122 | - | |
123 | - if(type == magnitude){ | |
124 | - gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]); | |
125 | - rf.toImage(filename, n, true); | |
126 | - } | |
127 | - | |
128 | - } | |
129 | - | |
130 | - | |
131 | -}; | |
132 | - | |
133 | - | |
134 | -} //end namespace rts | |
135 | - | |
136 | - | |
137 | -#endif | |
1 | +#ifndef RTS_COMPLEXFIELD_H | |
2 | +#define RTS_COMPLEXFIELD_H | |
3 | + | |
4 | +#include "cublas_v2.h" | |
5 | +#include <cuda_runtime.h> | |
6 | + | |
7 | +#include "../math/field.cuh" | |
8 | +#include "../math/complex.h" | |
9 | +#include "../math/realfield.cuh" | |
10 | + | |
11 | +namespace stim{ | |
12 | + | |
13 | +template<typename T> | |
14 | +__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){ | |
15 | + | |
16 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
17 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
18 | + | |
19 | + //make sure that the thread indices are in-bounds | |
20 | + if(iu >= r0 || iv >= r1) return; | |
21 | + | |
22 | + //compute the index into the field | |
23 | + int i = iv*r0 + iu; | |
24 | + | |
25 | + //calculate and store the result | |
26 | + dest[i] = source[i].abs(); | |
27 | +} | |
28 | + | |
29 | +/*This class stores functions for saving images of complex fields | |
30 | +*/ | |
31 | +template<typename T, unsigned int D = 1> | |
32 | +class complexfield : public field< stim::complex<T>, D >{ | |
33 | + using field< stim::complex<T>, D >::R; | |
34 | + using field< stim::complex<T>, D >::X; | |
35 | + using field< stim::complex<T>, D >::shape; | |
36 | + using field< stim::complex<T>, D >::cuda_params; | |
37 | + | |
38 | + | |
39 | + | |
40 | +public: | |
41 | + | |
42 | + //find the maximum value of component n | |
43 | + stim::complex<T> find_max(unsigned int n){ | |
44 | + cublasStatus_t stat; | |
45 | + cublasHandle_t handle; | |
46 | + | |
47 | + //create a CUBLAS handle | |
48 | + stat = cublasCreate(&handle); | |
49 | + if(stat != CUBLAS_STATUS_SUCCESS){ | |
50 | + std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | |
51 | + exit(1); | |
52 | + } | |
53 | + | |
54 | + int L = R[0] * R[1]; //compute the number of discrete points in a slice | |
55 | + int index; //result of the max operation | |
56 | + stim::complex<T> result; | |
57 | + | |
58 | + if(sizeof(T) == 8) | |
59 | + stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index); | |
60 | + else | |
61 | + stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index); | |
62 | + | |
63 | + index -= 1; //adjust for 1-based indexing | |
64 | + | |
65 | + //if there was a GPU error, terminate | |
66 | + if(stat != CUBLAS_STATUS_SUCCESS){ | |
67 | + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | |
68 | + exit(1); | |
69 | + } | |
70 | + | |
71 | + //retrieve the maximum value for this slice and store it in the maxVal array | |
72 | + std::cout<<X[n]<<std::endl; | |
73 | + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost)); | |
74 | + return result; | |
75 | + } | |
76 | + | |
77 | +public: | |
78 | + | |
79 | + enum attribute {magnitude, real, imaginary}; | |
80 | + | |
81 | + //constructor (no parameters) | |
82 | + complexfield() : field<stim::complex<T>, D>(){}; | |
83 | + | |
84 | + //constructor (resolution specified) | |
85 | + complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){}; | |
86 | + | |
87 | + //assignment from a field of complex values | |
88 | + complexfield & operator=(const field< stim::complex<T>, D > rhs){ | |
89 | + field< complex<T>, D >::operator=(rhs); | |
90 | + return *this; | |
91 | + } | |
92 | + | |
93 | + //assignment operator (scalar value) | |
94 | + complexfield & operator= (const complex<T> rhs){ | |
95 | + | |
96 | + field< complex<T>, D >::operator=(rhs); | |
97 | + return *this; | |
98 | + } | |
99 | + | |
100 | + //assignment operator (vector value) | |
101 | + complexfield & operator= (const vec< complex<T>, D > rhs){ | |
102 | + | |
103 | + field< complex<T>, D >::operator=(rhs); | |
104 | + return *this; | |
105 | + } | |
106 | + | |
107 | + //cropping | |
108 | + complexfield crop(unsigned int width, unsigned int height){ | |
109 | + | |
110 | + complexfield<T, D> result; | |
111 | + result = field< complex<T>, D>::crop(width, height); | |
112 | + return result; | |
113 | + } | |
114 | + | |
115 | + void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){ | |
116 | + | |
117 | + field<T, 1> rf(R[0], R[1]); | |
118 | + | |
119 | + //get cuda parameters | |
120 | + dim3 blocks, grids; | |
121 | + cuda_params(grids, blocks); | |
122 | + | |
123 | + if(type == magnitude){ | |
124 | + gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]); | |
125 | + rf.toImage(filename, n, true); | |
126 | + } | |
127 | + | |
128 | + } | |
129 | + | |
130 | + | |
131 | +}; | |
132 | + | |
133 | + | |
134 | +} //end namespace rts | |
135 | + | |
136 | + | |
137 | +#endif | ... | ... |
math/field.cuh
1 | -#ifndef RTS_FIELD_CUH | |
2 | -#define RTS_FIELD_CUH | |
3 | - | |
4 | -#include <vector> | |
5 | -#include <string> | |
6 | -#include <sstream> | |
7 | - | |
8 | -#include "cublas_v2.h" | |
9 | -#include <cuda_runtime.h> | |
10 | - | |
11 | -#include "../math/rect.h" | |
12 | -#include "../cuda/threads.h" | |
13 | -#include "../cuda/error.h" | |
14 | -#include "../cuda/devices.h" | |
15 | -#include "../visualization/colormap.h" | |
16 | - | |
17 | - | |
18 | -namespace stim{ | |
19 | - | |
20 | -//multiply R = X * Y | |
21 | -template<typename T> | |
22 | -__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){ | |
23 | - | |
24 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
25 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
26 | - | |
27 | - //make sure that the thread indices are in-bounds | |
28 | - if(iu >= r0 || iv >= r1) return; | |
29 | - | |
30 | - //compute the index into the field | |
31 | - int i = iv*r0 + iu; | |
32 | - | |
33 | - //calculate and store the result | |
34 | - R[i] = X[i] * Y[i]; | |
35 | -} | |
36 | - | |
37 | -//assign a constant value to all points | |
38 | -template<typename T> | |
39 | -__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){ | |
40 | - | |
41 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
42 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
43 | - | |
44 | - //make sure that the thread indices are in-bounds | |
45 | - if(iu >= r0 || iv >= r1) return; | |
46 | - | |
47 | - //compute the index into the field | |
48 | - int i = iv*r0 + iu; | |
49 | - | |
50 | - //calculate and store the result | |
51 | - ptr[i] = val; | |
52 | -} | |
53 | - | |
54 | -//crop the field to the new dimensions (width x height) | |
55 | -template<typename T> | |
56 | -__global__ void gpu_field_crop(T* dest, T* source, | |
57 | - unsigned int r0, unsigned int r1, | |
58 | - unsigned int width, unsigned int height){ | |
59 | - | |
60 | - int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
61 | - int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
62 | - | |
63 | - //make sure that the thread indices are in-bounds | |
64 | - if(iu >= width || iv >= height) return; | |
65 | - | |
66 | - //compute the index into the field | |
67 | - int is = iv*r0 + iu; | |
68 | - int id = iv*width + iu; | |
69 | - | |
70 | - //calculate and store the result | |
71 | - dest[id] = source[is]; | |
72 | -} | |
73 | - | |
74 | -template<typename T, unsigned int D = 1> | |
75 | -class field{ | |
76 | - | |
77 | -protected: | |
78 | - | |
79 | - T* X[D]; //pointer to the field data | |
80 | - unsigned int R[2]; //field resolution | |
81 | - stim::rect<T> shape; //position and shape of the field slice | |
82 | - | |
83 | - //calculates the optimal block and grid sizes using information from the GPU | |
84 | - void cuda_params(dim3& grids, dim3& blocks){ | |
85 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
86 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
87 | - | |
88 | - //create one thread for each detector pixel | |
89 | - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | |
90 | - grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
91 | - } | |
92 | - | |
93 | - //find the maximum value of component n | |
94 | - T find_max(unsigned int n){ | |
95 | - cublasStatus_t stat; | |
96 | - cublasHandle_t handle; | |
97 | - | |
98 | - //create a CUBLAS handle | |
99 | - stat = cublasCreate(&handle); | |
100 | - if(stat != CUBLAS_STATUS_SUCCESS){ | |
101 | - std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | |
102 | - exit(1); | |
103 | - } | |
104 | - | |
105 | - int L = R[0] * R[1]; //compute the number of discrete points in a slice | |
106 | - int index; //result of the max operation | |
107 | - T result; | |
108 | - | |
109 | - if(sizeof(T) == 4) | |
110 | - stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index); | |
111 | - else | |
112 | - stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index); | |
113 | - | |
114 | - index -= 1; //adjust for 1-based indexing | |
115 | - | |
116 | - //if there was a GPU error, terminate | |
117 | - if(stat != CUBLAS_STATUS_SUCCESS){ | |
118 | - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | |
119 | - exit(1); | |
120 | - } | |
121 | - | |
122 | - //retrieve the maximum value for this slice and store it in the maxVal array | |
123 | - HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost)); | |
124 | - return result; | |
125 | - } | |
126 | - | |
127 | -public: | |
128 | - | |
129 | - //returns a list of file names given an input string with wild cards | |
130 | - std::vector<std::string> process_filename(std::string name){ | |
131 | - std::stringstream ss(name); | |
132 | - std::string item; | |
133 | - std::vector<std::string> elems; | |
134 | - while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension) | |
135 | - { | |
136 | - elems.push_back(item); | |
137 | - } | |
138 | - | |
139 | - std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters) | |
140 | - std::string ext = elems[1]; //file extension (ex. .bmp, .png) | |
141 | - ext = std::string(".") + ext; //add a period back into the extension | |
142 | - | |
143 | - size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'') | |
144 | - size_t i1 = prefix.find_last_of("?"); | |
145 | - | |
146 | - std::string postfix = prefix.substr(i1+1); | |
147 | - prefix = prefix.substr(0, i0); | |
148 | - | |
149 | - unsigned int digits = i1 - i0 + 1; //compute the number of wildcards | |
150 | - | |
151 | - std::vector<std::string> flist; //create a vector of file names | |
152 | - //fill the list | |
153 | - for(unsigned int d=0; d<D; d++){ | |
154 | - std::stringstream ss; //assemble the file name | |
155 | - ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext; | |
156 | - flist.push_back(ss.str()); | |
157 | - } | |
158 | - | |
159 | - return flist; | |
160 | - } | |
161 | - | |
162 | - void init(){ | |
163 | - for(unsigned int n=0; n<D; n++) | |
164 | - X[n] = NULL; | |
165 | - } | |
166 | - void destroy(){ | |
167 | - for(unsigned int n=0; n<D; n++) | |
168 | - if(X[n] != NULL) | |
169 | - HANDLE_ERROR(cudaFree(X[n])); | |
170 | - } | |
171 | - | |
172 | -public: | |
173 | - //field constructor | |
174 | - field(){ | |
175 | - R[0] = R[1] = 0; | |
176 | - init(); | |
177 | - } | |
178 | - | |
179 | - field(unsigned int x, unsigned int y){ | |
180 | - //set the resolution | |
181 | - R[0] = x; | |
182 | - R[1] = y; | |
183 | - //allocate memory on the GPU | |
184 | - for(unsigned int n=0; n<D; n++){ | |
185 | - HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] )); | |
186 | - } | |
187 | - clear(); //zero the field | |
188 | - } | |
189 | - | |
190 | - ///copy constructor | |
191 | - field(const field &rhs){ | |
192 | - //first make a shallow copy | |
193 | - R[0] = rhs.R[0]; | |
194 | - R[1] = rhs.R[1]; | |
195 | - | |
196 | - for(unsigned int n=0; n<D; n++){ | |
197 | - //do we have to make a deep copy? | |
198 | - if(rhs.X[n] == NULL) | |
199 | - X[n] = NULL; //no | |
200 | - else{ | |
201 | - //allocate the necessary memory | |
202 | - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | |
203 | - | |
204 | - //copy the slice | |
205 | - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
206 | - } | |
207 | - } | |
208 | - } | |
209 | - | |
210 | - ~field(){ | |
211 | - destroy(); | |
212 | - } | |
213 | - | |
214 | - //assignment operator | |
215 | - field & operator= (const field & rhs){ | |
216 | - | |
217 | - //de-allocate any existing GPU memory | |
218 | - destroy(); | |
219 | - | |
220 | - //copy the slice resolution | |
221 | - R[0] = rhs.R[0]; | |
222 | - R[1] = rhs.R[1]; | |
223 | - | |
224 | - for(unsigned int n=0; n<D; n++) | |
225 | - { | |
226 | - //allocate the necessary memory | |
227 | - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | |
228 | - //copy the slice | |
229 | - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
230 | - } | |
231 | - return *this; | |
232 | - } | |
233 | - | |
234 | - field & operator= (const T rhs){ | |
235 | - | |
236 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
237 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
238 | - | |
239 | - //create one thread for each detector pixel | |
240 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
241 | - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
242 | - | |
243 | - //assign the constant value to all positions and dimensions | |
244 | - for(int n=0; n<D; n++) | |
245 | - stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]); | |
246 | - | |
247 | - return *this; | |
248 | - } | |
249 | - | |
250 | - //assignment of vector component | |
251 | - field & operator= (const vec<T, D> rhs){ | |
252 | - | |
253 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
254 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
255 | - | |
256 | - //create one thread for each detector pixel | |
257 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
258 | - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
259 | - | |
260 | - //assign the constant value to all positions and dimensions | |
261 | - for(unsigned int n=0; n<D; n++) | |
262 | - stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]); | |
263 | - | |
264 | - return *this; | |
265 | - | |
266 | - } | |
267 | - | |
268 | - //multiply two fields (element-wise multiplication) | |
269 | - field<T, D> operator* (const field & rhs){ | |
270 | - | |
271 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
272 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
273 | - | |
274 | - //create one thread for each detector pixel | |
275 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
276 | - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
277 | - | |
278 | - //create a scalar field to store the result | |
279 | - field<T, D> result(R[0], R[1]); | |
280 | - | |
281 | - for(int n=0; n<D; n++) | |
282 | - stim::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]); | |
283 | - | |
284 | - return result; | |
285 | - } | |
286 | - | |
287 | - T* ptr(unsigned int n = 0){ | |
288 | - if(n < D) | |
289 | - return X[n]; | |
290 | - else return NULL; | |
291 | - } | |
292 | - | |
293 | - //return the vector component at position (u, v) | |
294 | - vec<T, D> get(unsigned int u, unsigned int v){ | |
295 | - | |
296 | - vec<T, D> result; | |
297 | - for(unsigned int d=0; d<D; d++){ | |
298 | - HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost)); | |
299 | - } | |
300 | - | |
301 | - return result; | |
302 | - } | |
303 | - | |
304 | - //set all components of the field to zero | |
305 | - void clear(){ | |
306 | - for(unsigned int n=0; n<D; n++) | |
307 | - if(X[n] != NULL) | |
308 | - HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1])); | |
309 | - } | |
310 | - | |
311 | - //crop the field | |
312 | - field<T, D> crop(unsigned int width, unsigned int height){ | |
313 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
314 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
315 | - | |
316 | - //create one thread for each detector pixel | |
317 | - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
318 | - dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
319 | - | |
320 | - //create a scalar field to store the result | |
321 | - field<T, D> result(width, height); | |
322 | - | |
323 | - for(int n=0; n<D; n++) | |
324 | - stim::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height); | |
325 | - | |
326 | - return result; | |
327 | - } | |
328 | - | |
329 | - //save an image representing component n | |
330 | - void toImage(std::string filename, unsigned int n = 0, | |
331 | - bool positive = false, stim::colormapType cmap = stim::cmBrewer){ | |
332 | - T max_val = find_max(n); //find the maximum value | |
333 | - | |
334 | - if(positive) //if the field is positive, use the range [0 max_val] | |
335 | - stim::gpu2image<T>(X[n], filename, R[0], R[1], 0, max_val, cmap); | |
336 | - else | |
337 | - stim::gpu2image<T>(X[n], filename, R[0], R[1], -max_val, max_val, cmap); | |
338 | - } | |
339 | - | |
340 | -}; | |
341 | - | |
342 | -} //end namespace rts | |
343 | -#endif | |
1 | +#ifndef RTS_FIELD_CUH | |
2 | +#define RTS_FIELD_CUH | |
3 | + | |
4 | +#include <vector> | |
5 | +#include <string> | |
6 | +#include <sstream> | |
7 | + | |
8 | +#include "cublas_v2.h" | |
9 | +#include <cuda_runtime.h> | |
10 | + | |
11 | +#include "../math/rect.h" | |
12 | +#include "../cuda/threads.h" | |
13 | +#include "../cuda/error.h" | |
14 | +#include "../cuda/devices.h" | |
15 | +#include "../visualization/colormap.h" | |
16 | + | |
17 | + | |
18 | +namespace stim{ | |
19 | + | |
20 | +//multiply R = X * Y | |
21 | +template<typename T> | |
22 | +__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){ | |
23 | + | |
24 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
25 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
26 | + | |
27 | + //make sure that the thread indices are in-bounds | |
28 | + if(iu >= r0 || iv >= r1) return; | |
29 | + | |
30 | + //compute the index into the field | |
31 | + int i = iv*r0 + iu; | |
32 | + | |
33 | + //calculate and store the result | |
34 | + R[i] = X[i] * Y[i]; | |
35 | +} | |
36 | + | |
37 | +//assign a constant value to all points | |
38 | +template<typename T> | |
39 | +__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){ | |
40 | + | |
41 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
42 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
43 | + | |
44 | + //make sure that the thread indices are in-bounds | |
45 | + if(iu >= r0 || iv >= r1) return; | |
46 | + | |
47 | + //compute the index into the field | |
48 | + int i = iv*r0 + iu; | |
49 | + | |
50 | + //calculate and store the result | |
51 | + ptr[i] = val; | |
52 | +} | |
53 | + | |
54 | +//crop the field to the new dimensions (width x height) | |
55 | +template<typename T> | |
56 | +__global__ void gpu_field_crop(T* dest, T* source, | |
57 | + unsigned int r0, unsigned int r1, | |
58 | + unsigned int width, unsigned int height){ | |
59 | + | |
60 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
61 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
62 | + | |
63 | + //make sure that the thread indices are in-bounds | |
64 | + if(iu >= width || iv >= height) return; | |
65 | + | |
66 | + //compute the index into the field | |
67 | + int is = iv*r0 + iu; | |
68 | + int id = iv*width + iu; | |
69 | + | |
70 | + //calculate and store the result | |
71 | + dest[id] = source[is]; | |
72 | +} | |
73 | + | |
74 | +template<typename T, unsigned int D = 1> | |
75 | +class field{ | |
76 | + | |
77 | +protected: | |
78 | + | |
79 | + T* X[D]; //pointer to the field data | |
80 | + unsigned int R[2]; //field resolution | |
81 | + stim::rect<T> shape; //position and shape of the field slice | |
82 | + | |
83 | + //calculates the optimal block and grid sizes using information from the GPU | |
84 | + void cuda_params(dim3& grids, dim3& blocks){ | |
85 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
86 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
87 | + | |
88 | + //create one thread for each detector pixel | |
89 | + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | |
90 | + grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
91 | + } | |
92 | + | |
93 | + //find the maximum value of component n | |
94 | + T find_max(unsigned int n){ | |
95 | + cublasStatus_t stat; | |
96 | + cublasHandle_t handle; | |
97 | + | |
98 | + //create a CUBLAS handle | |
99 | + stat = cublasCreate(&handle); | |
100 | + if(stat != CUBLAS_STATUS_SUCCESS){ | |
101 | + std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | |
102 | + exit(1); | |
103 | + } | |
104 | + | |
105 | + int L = R[0] * R[1]; //compute the number of discrete points in a slice | |
106 | + int index; //result of the max operation | |
107 | + T result; | |
108 | + | |
109 | + if(sizeof(T) == 4) | |
110 | + stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index); | |
111 | + else | |
112 | + stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index); | |
113 | + | |
114 | + index -= 1; //adjust for 1-based indexing | |
115 | + | |
116 | + //if there was a GPU error, terminate | |
117 | + if(stat != CUBLAS_STATUS_SUCCESS){ | |
118 | + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | |
119 | + exit(1); | |
120 | + } | |
121 | + | |
122 | + //retrieve the maximum value for this slice and store it in the maxVal array | |
123 | + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost)); | |
124 | + return result; | |
125 | + } | |
126 | + | |
127 | +public: | |
128 | + | |
129 | + //returns a list of file names given an input string with wild cards | |
130 | + std::vector<std::string> process_filename(std::string name){ | |
131 | + std::stringstream ss(name); | |
132 | + std::string item; | |
133 | + std::vector<std::string> elems; | |
134 | + while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension) | |
135 | + { | |
136 | + elems.push_back(item); | |
137 | + } | |
138 | + | |
139 | + std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters) | |
140 | + std::string ext = elems[1]; //file extension (ex. .bmp, .png) | |
141 | + ext = std::string(".") + ext; //add a period back into the extension | |
142 | + | |
143 | + size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'') | |
144 | + size_t i1 = prefix.find_last_of("?"); | |
145 | + | |
146 | + std::string postfix = prefix.substr(i1+1); | |
147 | + prefix = prefix.substr(0, i0); | |
148 | + | |
149 | + unsigned int digits = i1 - i0 + 1; //compute the number of wildcards | |
150 | + | |
151 | + std::vector<std::string> flist; //create a vector of file names | |
152 | + //fill the list | |
153 | + for(unsigned int d=0; d<D; d++){ | |
154 | + std::stringstream ss; //assemble the file name | |
155 | + ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext; | |
156 | + flist.push_back(ss.str()); | |
157 | + } | |
158 | + | |
159 | + return flist; | |
160 | + } | |
161 | + | |
162 | + void init(){ | |
163 | + for(unsigned int n=0; n<D; n++) | |
164 | + X[n] = NULL; | |
165 | + } | |
166 | + void destroy(){ | |
167 | + for(unsigned int n=0; n<D; n++) | |
168 | + if(X[n] != NULL) | |
169 | + HANDLE_ERROR(cudaFree(X[n])); | |
170 | + } | |
171 | + | |
172 | +public: | |
173 | + //field constructor | |
174 | + field(){ | |
175 | + R[0] = R[1] = 0; | |
176 | + init(); | |
177 | + } | |
178 | + | |
179 | + field(unsigned int x, unsigned int y){ | |
180 | + //set the resolution | |
181 | + R[0] = x; | |
182 | + R[1] = y; | |
183 | + //allocate memory on the GPU | |
184 | + for(unsigned int n=0; n<D; n++){ | |
185 | + HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] )); | |
186 | + } | |
187 | + clear(); //zero the field | |
188 | + } | |
189 | + | |
190 | + ///copy constructor | |
191 | + field(const field &rhs){ | |
192 | + //first make a shallow copy | |
193 | + R[0] = rhs.R[0]; | |
194 | + R[1] = rhs.R[1]; | |
195 | + | |
196 | + for(unsigned int n=0; n<D; n++){ | |
197 | + //do we have to make a deep copy? | |
198 | + if(rhs.X[n] == NULL) | |
199 | + X[n] = NULL; //no | |
200 | + else{ | |
201 | + //allocate the necessary memory | |
202 | + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | |
203 | + | |
204 | + //copy the slice | |
205 | + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
206 | + } | |
207 | + } | |
208 | + } | |
209 | + | |
210 | + ~field(){ | |
211 | + destroy(); | |
212 | + } | |
213 | + | |
214 | + //assignment operator | |
215 | + field & operator= (const field & rhs){ | |
216 | + | |
217 | + //de-allocate any existing GPU memory | |
218 | + destroy(); | |
219 | + | |
220 | + //copy the slice resolution | |
221 | + R[0] = rhs.R[0]; | |
222 | + R[1] = rhs.R[1]; | |
223 | + | |
224 | + for(unsigned int n=0; n<D; n++) | |
225 | + { | |
226 | + //allocate the necessary memory | |
227 | + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | |
228 | + //copy the slice | |
229 | + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
230 | + } | |
231 | + return *this; | |
232 | + } | |
233 | + | |
234 | + field & operator= (const T rhs){ | |
235 | + | |
236 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
237 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
238 | + | |
239 | + //create one thread for each detector pixel | |
240 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
241 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
242 | + | |
243 | + //assign the constant value to all positions and dimensions | |
244 | + for(int n=0; n<D; n++) | |
245 | + stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]); | |
246 | + | |
247 | + return *this; | |
248 | + } | |
249 | + | |
250 | + //assignment of vector component | |
251 | + field & operator= (const vec<T, D> rhs){ | |
252 | + | |
253 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
254 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
255 | + | |
256 | + //create one thread for each detector pixel | |
257 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
258 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
259 | + | |
260 | + //assign the constant value to all positions and dimensions | |
261 | + for(unsigned int n=0; n<D; n++) | |
262 | + stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]); | |
263 | + | |
264 | + return *this; | |
265 | + | |
266 | + } | |
267 | + | |
268 | + //multiply two fields (element-wise multiplication) | |
269 | + field<T, D> operator* (const field & rhs){ | |
270 | + | |
271 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
272 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
273 | + | |
274 | + //create one thread for each detector pixel | |
275 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
276 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
277 | + | |
278 | + //create a scalar field to store the result | |
279 | + field<T, D> result(R[0], R[1]); | |
280 | + | |
281 | + for(int n=0; n<D; n++) | |
282 | + stim::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]); | |
283 | + | |
284 | + return result; | |
285 | + } | |
286 | + | |
287 | + T* ptr(unsigned int n = 0){ | |
288 | + if(n < D) | |
289 | + return X[n]; | |
290 | + else return NULL; | |
291 | + } | |
292 | + | |
293 | + //return the vector component at position (u, v) | |
294 | + vec<T, D> get(unsigned int u, unsigned int v){ | |
295 | + | |
296 | + vec<T, D> result; | |
297 | + for(unsigned int d=0; d<D; d++){ | |
298 | + HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost)); | |
299 | + } | |
300 | + | |
301 | + return result; | |
302 | + } | |
303 | + | |
304 | + //set all components of the field to zero | |
305 | + void clear(){ | |
306 | + for(unsigned int n=0; n<D; n++) | |
307 | + if(X[n] != NULL) | |
308 | + HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1])); | |
309 | + } | |
310 | + | |
311 | + //crop the field | |
312 | + field<T, D> crop(unsigned int width, unsigned int height){ | |
313 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
314 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
315 | + | |
316 | + //create one thread for each detector pixel | |
317 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
318 | + dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
319 | + | |
320 | + //create a scalar field to store the result | |
321 | + field<T, D> result(width, height); | |
322 | + | |
323 | + for(int n=0; n<D; n++) | |
324 | + stim::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height); | |
325 | + | |
326 | + return result; | |
327 | + } | |
328 | + | |
329 | + //save an image representing component n | |
330 | + void toImage(std::string filename, unsigned int n = 0, | |
331 | + bool positive = false, stim::colormapType cmap = stim::cmBrewer){ | |
332 | + T max_val = find_max(n); //find the maximum value | |
333 | + | |
334 | + if(positive) //if the field is positive, use the range [0 max_val] | |
335 | + stim::gpu2image<T>(X[n], filename, R[0], R[1], 0, max_val, cmap); | |
336 | + else | |
337 | + stim::gpu2image<T>(X[n], filename, R[0], R[1], -max_val, max_val, cmap); | |
338 | + } | |
339 | + | |
340 | +}; | |
341 | + | |
342 | +} //end namespace rts | |
343 | +#endif | ... | ... |
math/rect.h
1 | -#ifndef RTS_RECT_H | |
2 | -#define RTS_RECT_H | |
3 | - | |
4 | -//enable CUDA_CALLABLE macro | |
5 | -#include "../cuda/callable.h" | |
6 | -#include "../math/vector.h" | |
7 | -#include "../math/triangle.h" | |
8 | -#include "../math/quaternion.h" | |
9 | -#include <iostream> | |
10 | -#include <iomanip> | |
11 | -#include <algorithm> | |
12 | - | |
13 | -namespace stim{ | |
14 | - | |
15 | -//template for a rectangle class in ND space | |
16 | -template <class T, int N = 3> | |
17 | -struct rect | |
18 | -{ | |
19 | - /* | |
20 | - ^ O | |
21 | - | | |
22 | - | | |
23 | - Y C | |
24 | - | | |
25 | - | | |
26 | - O---------X---------> | |
27 | - */ | |
28 | - | |
29 | -private: | |
30 | - | |
31 | - stim::vec<T, N> C; | |
32 | - stim::vec<T, N> X; | |
33 | - stim::vec<T, N> Y; | |
34 | - | |
35 | - CUDA_CALLABLE void scale(T factor){ | |
36 | - X *= factor; | |
37 | - Y *= factor; | |
38 | - } | |
39 | - | |
40 | - CUDA_CALLABLE void normal(vec<T, N> n){ //orient the rectangle along the specified normal | |
41 | - | |
42 | - n = n.norm(); //normalize, just in case | |
43 | - vec<T, N> n_current = X.cross(Y).norm(); //compute the current normal | |
44 | - quaternion<T> q; //create a quaternion | |
45 | - q.CreateRotation(n_current, n); //initialize a rotation from n_current to n | |
46 | - | |
47 | - //apply the quaternion to the vectors and position | |
48 | - X = q.toMatrix3() * X; | |
49 | - Y = q.toMatrix3() * Y; | |
50 | - } | |
51 | - | |
52 | - CUDA_CALLABLE void init(){ | |
53 | - C = vec<T, N>(0, 0, 0); | |
54 | - X = vec<T, N>(1, 0, 0); | |
55 | - Y = vec<T, N>(0, 1, 0); | |
56 | - } | |
57 | - | |
58 | -public: | |
59 | - | |
60 | - CUDA_CALLABLE rect(){ | |
61 | - init(); | |
62 | - } | |
63 | - | |
64 | - CUDA_CALLABLE rect(T size, T z_pos = (T)0){ | |
65 | - init(); //use the default setup | |
66 | - scale(size); //scale the rectangle | |
67 | - C[2] = z_pos; | |
68 | - } | |
69 | - | |
70 | - CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){ | |
71 | - init(); //start with the default setting | |
72 | - C = c; | |
73 | - scale(size); //scale the rectangle | |
74 | - normal(n); //orient | |
75 | - | |
76 | - } | |
77 | - | |
78 | - /*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c) | |
79 | - { | |
80 | - A = a; | |
81 | - Y = b - a; | |
82 | - X = c - a - Y; | |
83 | - | |
84 | - }*/ | |
85 | - | |
86 | - /******************************************************************* | |
87 | - Constructor - create a rect from a position, normal, and rotation | |
88 | - *******************************************************************/ | |
89 | - /*CUDA_CALLABLE rect(stim::vec<T, N> c, stim::vec<T, N> normal, T width, T height, T theta) | |
90 | - { | |
91 | - | |
92 | - //compute the X direction - start along world-space X | |
93 | - Y = stim::vec<T, N>(0, 1, 0); | |
94 | - if(Y == normal) | |
95 | - Y = stim::vec<T, N>(0, 0, 1); | |
96 | - | |
97 | - X = Y.cross(normal).norm(); | |
98 | - | |
99 | - std::cout<<X<<std::endl; | |
100 | - | |
101 | - //rotate the X axis by theta radians | |
102 | - stim::quaternion<T> q; | |
103 | - q.CreateRotation(theta, normal); | |
104 | - X = q.toMatrix3() * X; | |
105 | - Y = normal.cross(X); | |
106 | - | |
107 | - //normalize everything | |
108 | - X = X.norm(); | |
109 | - Y = Y.norm(); | |
110 | - | |
111 | - //scale to match the rect width and height | |
112 | - X = X * width; | |
113 | - Y = Y * height; | |
114 | - | |
115 | - //set the corner of the plane | |
116 | - A = c - X * 0.5f - Y * 0.5f; | |
117 | - | |
118 | - std::cout<<X<<std::endl; | |
119 | - }*/ | |
120 | - | |
121 | - //boolean comparison | |
122 | - bool operator==(const rect<T, N> & rhs) | |
123 | - { | |
124 | - if(C == rhs.C && X == rhs.X && Y == rhs.Y) | |
125 | - return true; | |
126 | - else | |
127 | - return false; | |
128 | - } | |
129 | - | |
130 | - /******************************************* | |
131 | - Return the normal for the rect | |
132 | - *******************************************/ | |
133 | - CUDA_CALLABLE stim::vec<T, N> n() | |
134 | - { | |
135 | - return (X.cross(Y)).norm(); | |
136 | - } | |
137 | - | |
138 | - CUDA_CALLABLE stim::vec<T, N> p(T a, T b) | |
139 | - { | |
140 | - stim::vec<T, N> result; | |
141 | - //given the two parameters a, b = [0 1], returns the position in world space | |
142 | - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | |
143 | - result = A + X * a + Y * b; | |
144 | - | |
145 | - return result; | |
146 | - } | |
147 | - | |
148 | - CUDA_CALLABLE stim::vec<T, N> operator()(T a, T b) | |
149 | - { | |
150 | - return p(a, b); | |
151 | - } | |
152 | - | |
153 | - std::string str() | |
154 | - { | |
155 | - std::stringstream ss; | |
156 | - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | |
157 | - ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; | |
158 | - ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl; | |
159 | - ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X; | |
160 | - | |
161 | - return ss.str(); | |
162 | - | |
163 | - } | |
164 | - | |
165 | - CUDA_CALLABLE rect<T, N> operator*(T rhs) | |
166 | - { | |
167 | - //scales the plane by a scalar value | |
168 | - | |
169 | - //create the new rectangle | |
170 | - rect<T, N> result = *this; | |
171 | - result.scale(rhs); | |
172 | - | |
173 | - return result; | |
174 | - | |
175 | - } | |
176 | - | |
177 | - CUDA_CALLABLE T dist(vec<T, N> p) | |
178 | - { | |
179 | - //compute the distance between a point and this rect | |
180 | - | |
181 | - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | |
182 | - | |
183 | - //first break the rect up into two triangles | |
184 | - triangle<T, N> T0(A, A+X, A+Y); | |
185 | - triangle<T, N> T1(A+X+Y, A+X, A+Y); | |
186 | - | |
187 | - | |
188 | - T d0 = T0.dist(p); | |
189 | - T d1 = T1.dist(p); | |
190 | - | |
191 | - if(d0 < d1) | |
192 | - return d0; | |
193 | - else | |
194 | - return d1; | |
195 | - } | |
196 | - | |
197 | - CUDA_CALLABLE T dist_max(vec<T, N> p) | |
198 | - { | |
199 | - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | |
200 | - T da = (A - p).len(); | |
201 | - T db = (A+X - p).len(); | |
202 | - T dc = (A+Y - p).len(); | |
203 | - T dd = (A+X+Y - p).len(); | |
204 | - | |
205 | - return std::max( da, std::max(db, std::max(dc, dd) ) ); | |
206 | - } | |
207 | -}; | |
208 | - | |
209 | -} //end namespace rts | |
210 | - | |
211 | -template <typename T, int N> | |
212 | -std::ostream& operator<<(std::ostream& os, stim::rect<T, N> R) | |
213 | -{ | |
214 | - os<<R.str(); | |
215 | - return os; | |
216 | -} | |
217 | - | |
218 | - | |
219 | -#endif | |
1 | +#ifndef RTS_RECT_H | |
2 | +#define RTS_RECT_H | |
3 | + | |
4 | +//enable CUDA_CALLABLE macro | |
5 | +#include "../cuda/callable.h" | |
6 | +#include "../math/vector.h" | |
7 | +#include "../math/triangle.h" | |
8 | +#include "../math/quaternion.h" | |
9 | +#include <iostream> | |
10 | +#include <iomanip> | |
11 | +#include <algorithm> | |
12 | + | |
13 | +namespace stim{ | |
14 | + | |
15 | +//template for a rectangle class in ND space | |
16 | +template <class T, int N = 3> | |
17 | +struct rect | |
18 | +{ | |
19 | + /* | |
20 | + ^ O | |
21 | + | | |
22 | + | | |
23 | + Y C | |
24 | + | | |
25 | + | | |
26 | + O---------X---------> | |
27 | + */ | |
28 | + | |
29 | +private: | |
30 | + | |
31 | + stim::vec<T, N> C; | |
32 | + stim::vec<T, N> X; | |
33 | + stim::vec<T, N> Y; | |
34 | + | |
35 | + CUDA_CALLABLE void scale(T factor){ | |
36 | + X *= factor; | |
37 | + Y *= factor; | |
38 | + } | |
39 | + | |
40 | + CUDA_CALLABLE void normal(vec<T, N> n){ //orient the rectangle along the specified normal | |
41 | + | |
42 | + n = n.norm(); //normalize, just in case | |
43 | + vec<T, N> n_current = X.cross(Y).norm(); //compute the current normal | |
44 | + quaternion<T> q; //create a quaternion | |
45 | + q.CreateRotation(n_current, n); //initialize a rotation from n_current to n | |
46 | + | |
47 | + //apply the quaternion to the vectors and position | |
48 | + X = q.toMatrix3() * X; | |
49 | + Y = q.toMatrix3() * Y; | |
50 | + } | |
51 | + | |
52 | + CUDA_CALLABLE void init(){ | |
53 | + C = vec<T, N>(0, 0, 0); | |
54 | + X = vec<T, N>(1, 0, 0); | |
55 | + Y = vec<T, N>(0, 1, 0); | |
56 | + } | |
57 | + | |
58 | +public: | |
59 | + | |
60 | + CUDA_CALLABLE rect(){ | |
61 | + init(); | |
62 | + } | |
63 | + | |
64 | + CUDA_CALLABLE rect(T size, T z_pos = (T)0){ | |
65 | + init(); //use the default setup | |
66 | + scale(size); //scale the rectangle | |
67 | + C[2] = z_pos; | |
68 | + } | |
69 | + | |
70 | + CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){ | |
71 | + init(); //start with the default setting | |
72 | + C = c; | |
73 | + scale(size); //scale the rectangle | |
74 | + normal(n); //orient | |
75 | + | |
76 | + } | |
77 | + | |
78 | + /*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c) | |
79 | + { | |
80 | + A = a; | |
81 | + Y = b - a; | |
82 | + X = c - a - Y; | |
83 | + | |
84 | + }*/ | |
85 | + | |
86 | + /******************************************************************* | |
87 | + Constructor - create a rect from a position, normal, and rotation | |
88 | + *******************************************************************/ | |
89 | + /*CUDA_CALLABLE rect(stim::vec<T, N> c, stim::vec<T, N> normal, T width, T height, T theta) | |
90 | + { | |
91 | + | |
92 | + //compute the X direction - start along world-space X | |
93 | + Y = stim::vec<T, N>(0, 1, 0); | |
94 | + if(Y == normal) | |
95 | + Y = stim::vec<T, N>(0, 0, 1); | |
96 | + | |
97 | + X = Y.cross(normal).norm(); | |
98 | + | |
99 | + std::cout<<X<<std::endl; | |
100 | + | |
101 | + //rotate the X axis by theta radians | |
102 | + stim::quaternion<T> q; | |
103 | + q.CreateRotation(theta, normal); | |
104 | + X = q.toMatrix3() * X; | |
105 | + Y = normal.cross(X); | |
106 | + | |
107 | + //normalize everything | |
108 | + X = X.norm(); | |
109 | + Y = Y.norm(); | |
110 | + | |
111 | + //scale to match the rect width and height | |
112 | + X = X * width; | |
113 | + Y = Y * height; | |
114 | + | |
115 | + //set the corner of the plane | |
116 | + A = c - X * 0.5f - Y * 0.5f; | |
117 | + | |
118 | + std::cout<<X<<std::endl; | |
119 | + }*/ | |
120 | + | |
121 | + //boolean comparison | |
122 | + bool operator==(const rect<T, N> & rhs) | |
123 | + { | |
124 | + if(C == rhs.C && X == rhs.X && Y == rhs.Y) | |
125 | + return true; | |
126 | + else | |
127 | + return false; | |
128 | + } | |
129 | + | |
130 | + /******************************************* | |
131 | + Return the normal for the rect | |
132 | + *******************************************/ | |
133 | + CUDA_CALLABLE stim::vec<T, N> n() | |
134 | + { | |
135 | + return (X.cross(Y)).norm(); | |
136 | + } | |
137 | + | |
138 | + CUDA_CALLABLE stim::vec<T, N> p(T a, T b) | |
139 | + { | |
140 | + stim::vec<T, N> result; | |
141 | + //given the two parameters a, b = [0 1], returns the position in world space | |
142 | + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | |
143 | + result = A + X * a + Y * b; | |
144 | + | |
145 | + return result; | |
146 | + } | |
147 | + | |
148 | + CUDA_CALLABLE stim::vec<T, N> operator()(T a, T b) | |
149 | + { | |
150 | + return p(a, b); | |
151 | + } | |
152 | + | |
153 | + std::string str() | |
154 | + { | |
155 | + std::stringstream ss; | |
156 | + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | |
157 | + ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; | |
158 | + ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl; | |
159 | + ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X; | |
160 | + | |
161 | + return ss.str(); | |
162 | + | |
163 | + } | |
164 | + | |
165 | + CUDA_CALLABLE rect<T, N> operator*(T rhs) | |
166 | + { | |
167 | + //scales the plane by a scalar value | |
168 | + | |
169 | + //create the new rectangle | |
170 | + rect<T, N> result = *this; | |
171 | + result.scale(rhs); | |
172 | + | |
173 | + return result; | |
174 | + | |
175 | + } | |
176 | + | |
177 | + CUDA_CALLABLE T dist(vec<T, N> p) | |
178 | + { | |
179 | + //compute the distance between a point and this rect | |
180 | + | |
181 | + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | |
182 | + | |
183 | + //first break the rect up into two triangles | |
184 | + triangle<T, N> T0(A, A+X, A+Y); | |
185 | + triangle<T, N> T1(A+X+Y, A+X, A+Y); | |
186 | + | |
187 | + | |
188 | + T d0 = T0.dist(p); | |
189 | + T d1 = T1.dist(p); | |
190 | + | |
191 | + if(d0 < d1) | |
192 | + return d0; | |
193 | + else | |
194 | + return d1; | |
195 | + } | |
196 | + | |
197 | + CUDA_CALLABLE T dist_max(vec<T, N> p) | |
198 | + { | |
199 | + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | |
200 | + T da = (A - p).len(); | |
201 | + T db = (A+X - p).len(); | |
202 | + T dc = (A+Y - p).len(); | |
203 | + T dd = (A+X+Y - p).len(); | |
204 | + | |
205 | + return std::max( da, std::max(db, std::max(dc, dd) ) ); | |
206 | + } | |
207 | +}; | |
208 | + | |
209 | +} //end namespace rts | |
210 | + | |
211 | +template <typename T, int N> | |
212 | +std::ostream& operator<<(std::ostream& os, stim::rect<T, N> R) | |
213 | +{ | |
214 | + os<<R.str(); | |
215 | + return os; | |
216 | +} | |
217 | + | |
218 | + | |
219 | +#endif | ... | ... |
math/triangle.h
1 | -#ifndef RTS_TRIANGLE_H | |
2 | -#define RTS_TRIANGLE_H | |
3 | - | |
4 | -//enable CUDA_CALLABLE macro | |
5 | -#include "../cuda/callable.h" | |
6 | -#include "../math/vector.h" | |
7 | -#include <iostream> | |
8 | - | |
9 | -namespace stim{ | |
10 | - | |
11 | -template <class T, int N=3> | |
12 | -struct triangle | |
13 | -{ | |
14 | - /* | |
15 | - A------>B | |
16 | - | / | |
17 | - | / | |
18 | - | / | |
19 | - | / | |
20 | - | / | |
21 | - | / | |
22 | - C | |
23 | - */ | |
24 | - private: | |
25 | - | |
26 | - vec<T, N> A; | |
27 | - vec<T, N> B; | |
28 | - vec<T, N> C; | |
29 | - | |
30 | - CUDA_CALLABLE vec<T, N> _p(T s, T t) | |
31 | - { | |
32 | - //This function returns the point specified by p = A + s(B-A) + t(C-A) | |
33 | - vec<T, N> E0 = B-A; | |
34 | - vec<T, N> E1 = C-A; | |
35 | - | |
36 | - return A + s*E0 + t*E1; | |
37 | - } | |
38 | - | |
39 | - | |
40 | - public: | |
41 | - | |
42 | - | |
43 | - | |
44 | - CUDA_CALLABLE triangle() | |
45 | - { | |
46 | - | |
47 | - } | |
48 | - | |
49 | - CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c) | |
50 | - { | |
51 | - A = a; | |
52 | - B = b; | |
53 | - C = c; | |
54 | - } | |
55 | - | |
56 | - CUDA_CALLABLE stim::vec<T, N> operator()(T s, T t) | |
57 | - { | |
58 | - return _p(s, t); | |
59 | - } | |
60 | - | |
61 | - CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p) | |
62 | - { | |
63 | - //comptue the distance between a point and this triangle | |
64 | - // This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf | |
65 | - | |
66 | - vec<T, N> E0 = B-A; | |
67 | - vec<T, N> E1 = C-A; | |
68 | - vec<T, N> D = A - p; | |
69 | - | |
70 | - T a = E0.dot(E0); | |
71 | - T b = E0.dot(E1); | |
72 | - T c = E1.dot(E1); | |
73 | - T d = E0.dot(D); | |
74 | - T e = E1.dot(D); | |
75 | - //T f = D.dot(D); | |
76 | - | |
77 | - T det = a*c - b*b; | |
78 | - T s = b*e - c*d; | |
79 | - T t = b*d - a*e; | |
80 | - | |
81 | - /*std::cout<<"E0: "<<E0<<std::endl; | |
82 | - std::cout<<"E1: "<<E1<<std::endl; | |
83 | - std::cout<<"a: "<<a<<std::endl; | |
84 | - std::cout<<"b: "<<b<<std::endl; | |
85 | - std::cout<<"c: "<<c<<std::endl; | |
86 | - std::cout<<"d: "<<d<<std::endl; | |
87 | - std::cout<<"e: "<<e<<std::endl; | |
88 | - std::cout<<"f: "<<f<<std::endl; | |
89 | - std::cout<<"det: "<<det<<std::endl; | |
90 | - std::cout<<"s: "<<s<<std::endl; | |
91 | - std::cout<<"t: "<<t<<std::endl;*/ | |
92 | - | |
93 | - | |
94 | - if( s+t <= det) | |
95 | - { | |
96 | - if(s < 0) | |
97 | - { | |
98 | - if(t < 0) | |
99 | - { | |
100 | - //region 4 | |
101 | - //std::cout<<"Region 4"<<std::endl; | |
102 | - s = 0; | |
103 | - t = 0; | |
104 | - //done? | |
105 | - } | |
106 | - else | |
107 | - { | |
108 | - //region 3 | |
109 | - //std::cout<<"Region 3"<<std::endl; | |
110 | - s=0; | |
111 | - t = ( e >= 0 ? 0 : ( -e >= c ? 1 : -e/c ) ); | |
112 | - //done | |
113 | - } | |
114 | - } | |
115 | - else if(t < 0) | |
116 | - { | |
117 | - //region 5 | |
118 | - //std::cout<<"Region 5"<<std::endl; | |
119 | - s = ( d >= 0 ? 0 : ( -d >= a ? 1 : -d/a ) ); | |
120 | - t = 0; | |
121 | - //done | |
122 | - } | |
123 | - else | |
124 | - { | |
125 | - //region 0 | |
126 | - //std::cout<<"Region 0"<<std::endl; | |
127 | - T invDet = (T)1.0/det; | |
128 | - s *= invDet; | |
129 | - t *= invDet; | |
130 | - //done | |
131 | - } | |
132 | - } | |
133 | - else | |
134 | - { | |
135 | - if(s < 0) | |
136 | - { | |
137 | - //region 2 | |
138 | - //std::cout<<"Region 2"<<std::endl; | |
139 | - s = 0; | |
140 | - t = 1; | |
141 | - //done? | |
142 | - | |
143 | - } | |
144 | - else if(t < 0) | |
145 | - { | |
146 | - //region 6 | |
147 | - //std::cout<<"Region 6"<<std::endl; | |
148 | - s = 1; | |
149 | - t = 0; | |
150 | - //done? | |
151 | - } | |
152 | - else | |
153 | - { | |
154 | - //region 1 | |
155 | - //std::cout<<"Region 1"<<std::endl; | |
156 | - T numer = c + e - b - d; | |
157 | - if( numer <= 0 ) | |
158 | - s = 0; | |
159 | - else | |
160 | - { | |
161 | - T denom = a - 2 * b + c; | |
162 | - s = ( numer >= denom ? 1 : numer/denom ); | |
163 | - } | |
164 | - t = 1 - s; | |
165 | - //done | |
166 | - } | |
167 | - } | |
168 | - | |
169 | - //std::cout<<"s: "<<s<<std::endl; | |
170 | - //std::cout<<"t: "<<t<<std::endl; | |
171 | - | |
172 | - //std::cout<<"p: "<<_p(s, t)<<std::endl; | |
173 | - | |
174 | - return _p(s, t); | |
175 | - | |
176 | - } | |
177 | - | |
178 | - CUDA_CALLABLE T dist(vec<T, N> p) | |
179 | - { | |
180 | - vec<T, N> n = nearest(p); | |
181 | - | |
182 | - return (p - n).len(); | |
183 | - } | |
184 | -}; | |
185 | - | |
186 | -} | |
187 | - | |
188 | -#endif | |
1 | +#ifndef RTS_TRIANGLE_H | |
2 | +#define RTS_TRIANGLE_H | |
3 | + | |
4 | +//enable CUDA_CALLABLE macro | |
5 | +#include "../cuda/callable.h" | |
6 | +#include "../math/vector.h" | |
7 | +#include <iostream> | |
8 | + | |
9 | +namespace stim{ | |
10 | + | |
11 | +template <class T, int N=3> | |
12 | +struct triangle | |
13 | +{ | |
14 | + /* | |
15 | + A------>B | |
16 | + | / | |
17 | + | / | |
18 | + | / | |
19 | + | / | |
20 | + | / | |
21 | + | / | |
22 | + C | |
23 | + */ | |
24 | + private: | |
25 | + | |
26 | + vec<T, N> A; | |
27 | + vec<T, N> B; | |
28 | + vec<T, N> C; | |
29 | + | |
30 | + CUDA_CALLABLE vec<T, N> _p(T s, T t) | |
31 | + { | |
32 | + //This function returns the point specified by p = A + s(B-A) + t(C-A) | |
33 | + vec<T, N> E0 = B-A; | |
34 | + vec<T, N> E1 = C-A; | |
35 | + | |
36 | + return A + s*E0 + t*E1; | |
37 | + } | |
38 | + | |
39 | + | |
40 | + public: | |
41 | + | |
42 | + | |
43 | + | |
44 | + CUDA_CALLABLE triangle() | |
45 | + { | |
46 | + | |
47 | + } | |
48 | + | |
49 | + CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c) | |
50 | + { | |
51 | + A = a; | |
52 | + B = b; | |
53 | + C = c; | |
54 | + } | |
55 | + | |
56 | + CUDA_CALLABLE stim::vec<T, N> operator()(T s, T t) | |
57 | + { | |
58 | + return _p(s, t); | |
59 | + } | |
60 | + | |
61 | + CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p) | |
62 | + { | |
63 | + //comptue the distance between a point and this triangle | |
64 | + // This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf | |
65 | + | |
66 | + vec<T, N> E0 = B-A; | |
67 | + vec<T, N> E1 = C-A; | |
68 | + vec<T, N> D = A - p; | |
69 | + | |
70 | + T a = E0.dot(E0); | |
71 | + T b = E0.dot(E1); | |
72 | + T c = E1.dot(E1); | |
73 | + T d = E0.dot(D); | |
74 | + T e = E1.dot(D); | |
75 | + //T f = D.dot(D); | |
76 | + | |
77 | + T det = a*c - b*b; | |
78 | + T s = b*e - c*d; | |
79 | + T t = b*d - a*e; | |
80 | + | |
81 | + /*std::cout<<"E0: "<<E0<<std::endl; | |
82 | + std::cout<<"E1: "<<E1<<std::endl; | |
83 | + std::cout<<"a: "<<a<<std::endl; | |
84 | + std::cout<<"b: "<<b<<std::endl; | |
85 | + std::cout<<"c: "<<c<<std::endl; | |
86 | + std::cout<<"d: "<<d<<std::endl; | |
87 | + std::cout<<"e: "<<e<<std::endl; | |
88 | + std::cout<<"f: "<<f<<std::endl; | |
89 | + std::cout<<"det: "<<det<<std::endl; | |
90 | + std::cout<<"s: "<<s<<std::endl; | |
91 | + std::cout<<"t: "<<t<<std::endl;*/ | |
92 | + | |
93 | + | |
94 | + if( s+t <= det) | |
95 | + { | |
96 | + if(s < 0) | |
97 | + { | |
98 | + if(t < 0) | |
99 | + { | |
100 | + //region 4 | |
101 | + //std::cout<<"Region 4"<<std::endl; | |
102 | + s = 0; | |
103 | + t = 0; | |
104 | + //done? | |
105 | + } | |
106 | + else | |
107 | + { | |
108 | + //region 3 | |
109 | + //std::cout<<"Region 3"<<std::endl; | |
110 | + s=0; | |
111 | + t = ( e >= 0 ? 0 : ( -e >= c ? 1 : -e/c ) ); | |
112 | + //done | |
113 | + } | |
114 | + } | |
115 | + else if(t < 0) | |
116 | + { | |
117 | + //region 5 | |
118 | + //std::cout<<"Region 5"<<std::endl; | |
119 | + s = ( d >= 0 ? 0 : ( -d >= a ? 1 : -d/a ) ); | |
120 | + t = 0; | |
121 | + //done | |
122 | + } | |
123 | + else | |
124 | + { | |
125 | + //region 0 | |
126 | + //std::cout<<"Region 0"<<std::endl; | |
127 | + T invDet = (T)1.0/det; | |
128 | + s *= invDet; | |
129 | + t *= invDet; | |
130 | + //done | |
131 | + } | |
132 | + } | |
133 | + else | |
134 | + { | |
135 | + if(s < 0) | |
136 | + { | |
137 | + //region 2 | |
138 | + //std::cout<<"Region 2"<<std::endl; | |
139 | + s = 0; | |
140 | + t = 1; | |
141 | + //done? | |
142 | + | |
143 | + } | |
144 | + else if(t < 0) | |
145 | + { | |
146 | + //region 6 | |
147 | + //std::cout<<"Region 6"<<std::endl; | |
148 | + s = 1; | |
149 | + t = 0; | |
150 | + //done? | |
151 | + } | |
152 | + else | |
153 | + { | |
154 | + //region 1 | |
155 | + //std::cout<<"Region 1"<<std::endl; | |
156 | + T numer = c + e - b - d; | |
157 | + if( numer <= 0 ) | |
158 | + s = 0; | |
159 | + else | |
160 | + { | |
161 | + T denom = a - 2 * b + c; | |
162 | + s = ( numer >= denom ? 1 : numer/denom ); | |
163 | + } | |
164 | + t = 1 - s; | |
165 | + //done | |
166 | + } | |
167 | + } | |
168 | + | |
169 | + //std::cout<<"s: "<<s<<std::endl; | |
170 | + //std::cout<<"t: "<<t<<std::endl; | |
171 | + | |
172 | + //std::cout<<"p: "<<_p(s, t)<<std::endl; | |
173 | + | |
174 | + return _p(s, t); | |
175 | + | |
176 | + } | |
177 | + | |
178 | + CUDA_CALLABLE T dist(vec<T, N> p) | |
179 | + { | |
180 | + vec<T, N> n = nearest(p); | |
181 | + | |
182 | + return (p - n).len(); | |
183 | + } | |
184 | +}; | |
185 | + | |
186 | +} | |
187 | + | |
188 | +#endif | ... | ... |
optics/material.h
1 | -#ifndef RTS_MATERIAL_H | |
2 | -#define RTS_MATERIAL_H | |
3 | - | |
4 | -#include <vector> | |
5 | -#include <ostream> | |
6 | -#include <iostream> | |
7 | -#include <fstream> | |
8 | -#include <complex> | |
9 | -#include <algorithm> | |
10 | -#include <sstream> | |
11 | -#include "../math/complex.h" | |
12 | -#include "../math/constants.h" | |
13 | -#include "../math/function.h" | |
14 | - | |
15 | -namespace stim{ | |
16 | - | |
17 | -//Material class - default representation for the material property is the refractive index (RI) | |
18 | -template<typename T> | |
19 | -class material : public function< T, complex<T> >{ | |
20 | - | |
21 | -public: | |
22 | - enum wave_property{microns, inverse_cm}; | |
23 | - enum material_property{ri, absorbance}; | |
24 | - | |
25 | -private: | |
26 | - | |
27 | - using function< T, complex<T> >::X; | |
28 | - using function< T, complex<T> >::Y; | |
29 | - using function< T, complex<T> >::insert; | |
30 | - using function< T, complex<T> >::bounding; | |
31 | - | |
32 | - std::string name; //name for the material (defaults to file name) | |
33 | - | |
34 | - void process_header(std::string str, wave_property& wp, material_property& mp){ | |
35 | - | |
36 | - std::stringstream ss(str); //create a stream from the data string | |
37 | - std::string line; | |
38 | - std::getline(ss, line); //get the first line as a string | |
39 | - while(line[0] == '#'){ //continue looping while the line is a comment | |
40 | - | |
41 | - std::stringstream lstream(line); //create a stream from the line | |
42 | - lstream.ignore(); //ignore the first character ('#') | |
43 | - | |
44 | - std::string prop; //get the property name | |
45 | - lstream>>prop; | |
46 | - | |
47 | - if(prop == "X"){ | |
48 | - std::string wp_name; | |
49 | - lstream>>wp_name; | |
50 | - if(wp_name == "microns") wp = microns; | |
51 | - else if(wp_name == "inverse_cm") wp = inverse_cm; | |
52 | - } | |
53 | - else if(prop == "Y"){ | |
54 | - std::string mp_name; | |
55 | - lstream>>mp_name; | |
56 | - if(mp_name == "ri") mp = ri; | |
57 | - else if(mp_name == "absorbance") mp = absorbance; | |
58 | - } | |
59 | - | |
60 | - std::getline(ss, line); //get the next line | |
61 | - } | |
62 | - | |
63 | - function< T, stim::complex<T> >::process_string(str); | |
64 | - } | |
65 | - | |
66 | - void from_inverse_cm(){ | |
67 | - //convert inverse centimeters to wavelength (in microns) | |
68 | - for(unsigned int i=0; i<X.size(); i++) | |
69 | - X[i] = 10000 / X[i]; | |
70 | - | |
71 | - //reverse the function array | |
72 | - std::reverse(X.begin(), X.end()); | |
73 | - std::reverse(Y.begin(), Y.end()); | |
74 | - | |
75 | - } | |
76 | - | |
77 | - void init(){ | |
78 | - bounding[0] = bounding[1] = stim::complex<T>(1, 0); | |
79 | - } | |
80 | - | |
81 | - | |
82 | -public: | |
83 | - | |
84 | - material(std::string filename, wave_property wp, material_property mp){ | |
85 | - name = filename; | |
86 | - load(filename, wp, mp); | |
87 | - } | |
88 | - | |
89 | - material(std::string filename){ | |
90 | - name = filename; | |
91 | - load(filename); | |
92 | - } | |
93 | - | |
94 | - material(){ | |
95 | - init(); | |
96 | - } | |
97 | - | |
98 | - complex<T> getN(T lambda){ | |
99 | - return function< T, complex<T> >::linear(lambda); | |
100 | - } | |
101 | - | |
102 | - void load(std::string filename, wave_property wp, material_property mp){ | |
103 | - | |
104 | - //load the file as a function | |
105 | - function< T, complex<T> >::load(filename); | |
106 | - } | |
107 | - | |
108 | - void load(std::string filename){ | |
109 | - | |
110 | - wave_property wp = inverse_cm; | |
111 | - material_property mp = ri; | |
112 | - //turn the file into a string | |
113 | - std::ifstream t(filename.c_str()); //open the file as a stream | |
114 | - | |
115 | - if(!t){ | |
116 | - std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl; | |
117 | - exit(1); | |
118 | - } | |
119 | - std::string str((std::istreambuf_iterator<char>(t)), | |
120 | - std::istreambuf_iterator<char>()); | |
121 | - | |
122 | - //process the header information | |
123 | - process_header(str, wp, mp); | |
124 | - | |
125 | - //convert units | |
126 | - if(wp == inverse_cm) | |
127 | - from_inverse_cm(); | |
128 | - //set the bounding values | |
129 | - bounding[0] = Y[0]; | |
130 | - bounding[1] = Y.back(); | |
131 | - } | |
132 | - std::string str(){ | |
133 | - std::stringstream ss; | |
134 | - ss<<name<<std::endl; | |
135 | - ss<<function< T, complex<T> >::str(); | |
136 | - return ss.str(); | |
137 | - } | |
138 | - std::string get_name(){ | |
139 | - return name; | |
140 | - } | |
141 | - | |
142 | - void set_name(std::string str){ | |
143 | - name = str; | |
144 | - } | |
145 | - | |
146 | -}; | |
147 | - | |
148 | -} | |
149 | - | |
150 | - | |
151 | - | |
152 | - | |
153 | -#endif | |
1 | +#ifndef RTS_MATERIAL_H | |
2 | +#define RTS_MATERIAL_H | |
3 | + | |
4 | +#include <vector> | |
5 | +#include <ostream> | |
6 | +#include <iostream> | |
7 | +#include <fstream> | |
8 | +#include <complex> | |
9 | +#include <algorithm> | |
10 | +#include <sstream> | |
11 | +#include "../math/complex.h" | |
12 | +#include "../math/constants.h" | |
13 | +#include "../math/function.h" | |
14 | + | |
15 | +namespace stim{ | |
16 | + | |
17 | +//Material class - default representation for the material property is the refractive index (RI) | |
18 | +template<typename T> | |
19 | +class material : public function< T, complex<T> >{ | |
20 | + | |
21 | +public: | |
22 | + enum wave_property{microns, inverse_cm}; | |
23 | + enum material_property{ri, absorbance}; | |
24 | + | |
25 | +private: | |
26 | + | |
27 | + using function< T, complex<T> >::X; | |
28 | + using function< T, complex<T> >::Y; | |
29 | + using function< T, complex<T> >::insert; | |
30 | + using function< T, complex<T> >::bounding; | |
31 | + | |
32 | + std::string name; //name for the material (defaults to file name) | |
33 | + | |
34 | + void process_header(std::string str, wave_property& wp, material_property& mp){ | |
35 | + | |
36 | + std::stringstream ss(str); //create a stream from the data string | |
37 | + std::string line; | |
38 | + std::getline(ss, line); //get the first line as a string | |
39 | + while(line[0] == '#'){ //continue looping while the line is a comment | |
40 | + | |
41 | + std::stringstream lstream(line); //create a stream from the line | |
42 | + lstream.ignore(); //ignore the first character ('#') | |
43 | + | |
44 | + std::string prop; //get the property name | |
45 | + lstream>>prop; | |
46 | + | |
47 | + if(prop == "X"){ | |
48 | + std::string wp_name; | |
49 | + lstream>>wp_name; | |
50 | + if(wp_name == "microns") wp = microns; | |
51 | + else if(wp_name == "inverse_cm") wp = inverse_cm; | |
52 | + } | |
53 | + else if(prop == "Y"){ | |
54 | + std::string mp_name; | |
55 | + lstream>>mp_name; | |
56 | + if(mp_name == "ri") mp = ri; | |
57 | + else if(mp_name == "absorbance") mp = absorbance; | |
58 | + } | |
59 | + | |
60 | + std::getline(ss, line); //get the next line | |
61 | + } | |
62 | + | |
63 | + function< T, stim::complex<T> >::process_string(str); | |
64 | + } | |
65 | + | |
66 | + void from_inverse_cm(){ | |
67 | + //convert inverse centimeters to wavelength (in microns) | |
68 | + for(unsigned int i=0; i<X.size(); i++) | |
69 | + X[i] = 10000 / X[i]; | |
70 | + | |
71 | + //reverse the function array | |
72 | + std::reverse(X.begin(), X.end()); | |
73 | + std::reverse(Y.begin(), Y.end()); | |
74 | + | |
75 | + } | |
76 | + | |
77 | + void init(){ | |
78 | + bounding[0] = bounding[1] = stim::complex<T>(1, 0); | |
79 | + } | |
80 | + | |
81 | + | |
82 | +public: | |
83 | + | |
84 | + material(std::string filename, wave_property wp, material_property mp){ | |
85 | + name = filename; | |
86 | + load(filename, wp, mp); | |
87 | + } | |
88 | + | |
89 | + material(std::string filename){ | |
90 | + name = filename; | |
91 | + load(filename); | |
92 | + } | |
93 | + | |
94 | + material(){ | |
95 | + init(); | |
96 | + } | |
97 | + | |
98 | + complex<T> getN(T lambda){ | |
99 | + return function< T, complex<T> >::linear(lambda); | |
100 | + } | |
101 | + | |
102 | + void load(std::string filename, wave_property wp, material_property mp){ | |
103 | + | |
104 | + //load the file as a function | |
105 | + function< T, complex<T> >::load(filename); | |
106 | + } | |
107 | + | |
108 | + void load(std::string filename){ | |
109 | + | |
110 | + wave_property wp = inverse_cm; | |
111 | + material_property mp = ri; | |
112 | + //turn the file into a string | |
113 | + std::ifstream t(filename.c_str()); //open the file as a stream | |
114 | + | |
115 | + if(!t){ | |
116 | + std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl; | |
117 | + exit(1); | |
118 | + } | |
119 | + std::string str((std::istreambuf_iterator<char>(t)), | |
120 | + std::istreambuf_iterator<char>()); | |
121 | + | |
122 | + //process the header information | |
123 | + process_header(str, wp, mp); | |
124 | + | |
125 | + //convert units | |
126 | + if(wp == inverse_cm) | |
127 | + from_inverse_cm(); | |
128 | + //set the bounding values | |
129 | + bounding[0] = Y[0]; | |
130 | + bounding[1] = Y.back(); | |
131 | + } | |
132 | + std::string str(){ | |
133 | + std::stringstream ss; | |
134 | + ss<<name<<std::endl; | |
135 | + ss<<function< T, complex<T> >::str(); | |
136 | + return ss.str(); | |
137 | + } | |
138 | + std::string get_name(){ | |
139 | + return name; | |
140 | + } | |
141 | + | |
142 | + void set_name(std::string str){ | |
143 | + name = str; | |
144 | + } | |
145 | + | |
146 | +}; | |
147 | + | |
148 | +} | |
149 | + | |
150 | + | |
151 | + | |
152 | + | |
153 | +#endif | ... | ... |
optics/mirst-1d.cuh
1 | -#include "../optics/material.h" | |
2 | -#include "../math/complexfield.cuh" | |
3 | -#include "../math/constants.h" | |
4 | -//#include "../envi/bil.h" | |
5 | - | |
6 | -#include "cufft.h" | |
7 | - | |
8 | -#include <vector> | |
9 | -#include <sstream> | |
10 | - | |
11 | -namespace stim{ | |
12 | - | |
13 | -//this function writes a sinc function to "dest" such that an iFFT produces a slab | |
14 | -template<typename T> | |
15 | -__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri, | |
16 | - T* src, T* zf, | |
17 | - T w, unsigned int zR, unsigned int nuR){ | |
18 | - //dest = complex field representing the sample | |
19 | - //ri = refractive indices for each wavelength | |
20 | - //src = intensity of the light source for each wavelength | |
21 | - //zf = z position of the slab interface for each wavelength (accounting for optical path length) | |
22 | - //w = width of the slab (in pixels) | |
23 | - //zR = number of z-axis samples | |
24 | - //nuR = number of wavelengths | |
25 | - | |
26 | - //get the current coordinate in the plane slice | |
27 | - int ifz = blockIdx.x * blockDim.x + threadIdx.x; | |
28 | - int inu = blockIdx.y * blockDim.y + threadIdx.y; | |
29 | - | |
30 | - //make sure that the thread indices are in-bounds | |
31 | - if(inu >= nuR || ifz >= zR) return; | |
32 | - | |
33 | - int i = inu * zR + ifz; | |
34 | - | |
35 | - T fz; | |
36 | - if(ifz < zR/2) | |
37 | - fz = ifz / (T)zR; | |
38 | - else | |
39 | - fz = -(zR - ifz) / (T)zR; | |
40 | - | |
41 | - //if the slab starts outside of the simulation domain, just return | |
42 | - if(zf[inu] >= zR) return; | |
43 | - | |
44 | - //fill the array along z with a sinc function representing the Fourier transform of the layer | |
45 | - | |
46 | - T opl = w * ri[inu].real(); //optical path length | |
47 | - | |
48 | - //handle the case where the slab goes outside the simulation domain | |
49 | - if(zf[inu] + opl >= zR) | |
50 | - opl = zR - zf[inu]; | |
51 | - | |
52 | - if(opl == 0) return; | |
53 | - | |
54 | - //T l = w * ri[inu].real(); | |
55 | - //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0)); | |
56 | - complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2)); | |
57 | - | |
58 | - complex<T> eta = ri[inu] * ri[inu] - 1; | |
59 | - | |
60 | - //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz); | |
61 | - if(ifz == 0) | |
62 | - dest[i] += opl * exp(e) * eta * src[inu]; | |
63 | - else | |
64 | - dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl); | |
65 | -} | |
66 | - | |
67 | -template<typename T> | |
68 | -__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){ | |
69 | - //zf = current z depth (optical path length) in pixels | |
70 | - //ri = refractive index of the material | |
71 | - //w = actual width of the layer (in pixels) | |
72 | - | |
73 | - | |
74 | - //compute the index for this thread | |
75 | - int i = blockIdx.x * blockDim.x + threadIdx.x; | |
76 | - if(i >= S) return; | |
77 | - | |
78 | - if(ri == NULL) | |
79 | - zf[i] += w; | |
80 | - else | |
81 | - zf[i] += ri[i].real() * w; | |
82 | -} | |
83 | - | |
84 | -//apply the 1D MIRST filter to an existing sample (overwriting the sample) | |
85 | -template<typename T> | |
86 | -__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda, | |
87 | - T dFz, | |
88 | - T inNA, T outNA, | |
89 | - unsigned int lambdaR, unsigned int zR, | |
90 | - T sigma = 0){ | |
91 | - //sampleFFT = the sample in the Fourier domain (will be overwritten) | |
92 | - //lambda = list of wavelengths | |
93 | - //dFz = delta along the Fz axis in the frequency domain | |
94 | - //inNA = NA of the internal obscuration | |
95 | - //outNA = NA of the objective | |
96 | - //zR = number of pixels along the Fz axis (same as the z-axis) | |
97 | - //lambdaR = number of wavelengths | |
98 | - //sigma = width of the Gaussian source | |
99 | - int ifz = blockIdx.x * blockDim.x + threadIdx.x; | |
100 | - int inu = blockIdx.y * blockDim.y + threadIdx.y; | |
101 | - | |
102 | - if(inu >= lambdaR || ifz >= zR) return; | |
103 | - | |
104 | - //calculate the index into the sample FT | |
105 | - int i = inu * zR + ifz; | |
106 | - | |
107 | - //compute the frequency (and set all negative spatial frequencies to zero) | |
108 | - T fz; | |
109 | - if(ifz < zR / 2) | |
110 | - fz = ifz * dFz; | |
111 | - //if the spatial frequency is negative, set it to zero and exit | |
112 | - else{ | |
113 | - sampleFFT[i] = 0; | |
114 | - return; | |
115 | - } | |
116 | - | |
117 | - //compute the frequency in inverse microns | |
118 | - T nu = 1/lambda[inu]; | |
119 | - | |
120 | - //determine the radius of the integration circle | |
121 | - T nu_sq = nu * nu; | |
122 | - T fz_sq = (fz * fz) / 4; | |
123 | - | |
124 | - //cut off frequencies above the diffraction limit | |
125 | - T r; | |
126 | - if(fz_sq < nu_sq) | |
127 | - r = sqrt(nu_sq - fz_sq); | |
128 | - else | |
129 | - r = 0; | |
130 | - | |
131 | - //account for the optics | |
132 | - T Q = 0; | |
133 | - if(r > nu * inNA && r < nu * outNA) | |
134 | - Q = 1; | |
135 | - | |
136 | - //account for the source | |
137 | - //T sigma = 30.0; | |
138 | - T s = exp( - (r*r * sigma*sigma) / 2 ); | |
139 | - //T s=1; | |
140 | - | |
141 | - //compute the final filter | |
142 | - T mirst = 0; | |
143 | - if(fz != 0) | |
144 | - mirst = 2 * stimPI * r * s * Q * (1/fz); | |
145 | - | |
146 | - sampleFFT[i] *= mirst; | |
147 | - | |
148 | -} | |
149 | - | |
150 | -/*This object performs a 1-dimensional (layered) MIRST simulation | |
151 | -*/ | |
152 | -template<typename T> | |
153 | -class mirst1d{ | |
154 | - | |
155 | -private: | |
156 | - unsigned int Z; //z-axis resolution | |
157 | - unsigned int pad; //pixel padding on either side of the sample | |
158 | - | |
159 | - std::vector< material<T> > matlist; //list of materials | |
160 | - std::vector< T > layers; //list of layer thicknesses | |
161 | - | |
162 | - std::vector< T > lambdas; //list of wavelengths that are being simulated | |
163 | - unsigned int S; //number of wavelengths (size of "lambdas") | |
164 | - | |
165 | - T NA[2]; //numerical aperature (central obscuration and outer diameter) | |
166 | - | |
167 | - function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters) | |
168 | - | |
169 | - complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc. | |
170 | - | |
171 | - void fft(int direction = CUFFT_FORWARD){ | |
172 | - | |
173 | - unsigned padZ = Z + pad; | |
174 | - | |
175 | - //create cuFFT handles | |
176 | - cufftHandle plan; | |
177 | - cufftResult result; | |
178 | - | |
179 | - if(sizeof(T) == 4) | |
180 | - result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision | |
181 | - else | |
182 | - result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision | |
183 | - | |
184 | - //check for Plan 1D errors | |
185 | - if(result != CUFFT_SUCCESS){ | |
186 | - std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl; | |
187 | - CufftError(result); | |
188 | - exit(1); | |
189 | - } | |
190 | - | |
191 | - if(sizeof(T) == 4) | |
192 | - result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction); | |
193 | - else | |
194 | - result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction); | |
195 | - | |
196 | - //check for FFT errors | |
197 | - if(result != CUFFT_SUCCESS){ | |
198 | - std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl; | |
199 | - CufftError(result); | |
200 | - exit(1); | |
201 | - } | |
202 | - | |
203 | - cufftDestroy(plan); | |
204 | - } | |
205 | - | |
206 | - | |
207 | - //initialize the scratch memory | |
208 | - void init_scratch(){ | |
209 | - scratch = complexfield<T, 1>(Z + pad , lambdas.size()); | |
210 | - scratch = 0; | |
211 | - } | |
212 | - | |
213 | - //get the list of scattering efficiency (eta) values for a specified layer | |
214 | - std::vector< complex<T> > layer_etas(unsigned int l){ | |
215 | - | |
216 | - std::vector< complex<T> > etas; | |
217 | - | |
218 | - //fill the list of etas | |
219 | - for(unsigned int i=0; i<lambdas.size(); i++) | |
220 | - etas.push_back( matlist[l].eta(lambdas[i]) ); | |
221 | - return etas; | |
222 | - } | |
223 | - | |
224 | - //calculates the optimal block and grid sizes using information from the GPU | |
225 | - void cuda_params(dim3& grids, dim3& blocks){ | |
226 | - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
227 | - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
228 | - | |
229 | - //create one thread for each detector pixel | |
230 | - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | |
231 | - grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
232 | - } | |
233 | - | |
234 | - //add the fourier transform of layer n to the scratch space | |
235 | - void build_layer_fft(unsigned int n, T* zf){ | |
236 | - unsigned int paddedZ = Z + pad; | |
237 | - | |
238 | - T wpx = layers[n] / dz(); //calculate the width of the layer in pixels | |
239 | - | |
240 | - //allocate memory for the refractive index | |
241 | - complex<T>* gpuRi; | |
242 | - HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S)); | |
243 | - | |
244 | - //allocate memory for the source profile | |
245 | - T* gpuSrc; | |
246 | - HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S)); | |
247 | - | |
248 | - complex<T> ri; | |
249 | - T source; | |
250 | - //store the refractive index and source profile in a CPU array | |
251 | - for(int inu=0; inu<S; inu++){ | |
252 | - //save the refractive index to the GPU | |
253 | - ri = matlist[n].getN(lambdas[inu]); | |
254 | - HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice )); | |
255 | - | |
256 | - //save the source profile to the GPU | |
257 | - source = source_profile(10000 / lambdas[inu]); | |
258 | - HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice )); | |
259 | - | |
260 | - } | |
261 | - | |
262 | - //create one thread for each pixel of the field slice | |
263 | - dim3 gridDim, blockDim; | |
264 | - cuda_params(gridDim, blockDim); | |
265 | - stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S); | |
266 | - | |
267 | - int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size | |
268 | - int linGrid = S / linBlock + 1; | |
269 | - stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S); | |
270 | - | |
271 | - //free memory | |
272 | - HANDLE_ERROR(cudaFree(gpuRi)); | |
273 | - HANDLE_ERROR(cudaFree(gpuSrc)); | |
274 | - } | |
275 | - | |
276 | - void build_sample(){ | |
277 | - init_scratch(); //initialize the GPU scratch space | |
278 | - //build_layer(1); | |
279 | - | |
280 | - T* zf; | |
281 | - HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S)); | |
282 | - HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S)); | |
283 | - | |
284 | - //render each layer of the sample | |
285 | - for(unsigned int l=0; l<layers.size(); l++){ | |
286 | - build_layer_fft(l, zf); | |
287 | - } | |
288 | - | |
289 | - HANDLE_ERROR(cudaFree(zf)); | |
290 | - } | |
291 | - | |
292 | - void apply_filter(){ | |
293 | - dim3 gridDim, blockDim; | |
294 | - cuda_params(gridDim, blockDim); | |
295 | - | |
296 | - unsigned int Zpad = Z + pad; | |
297 | - | |
298 | - T sim_range = dz() * Zpad; | |
299 | - T dFz = 1 / sim_range; | |
300 | - | |
301 | - //copy the array of wavelengths to the GPU | |
302 | - T* gpuLambdas; | |
303 | - HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad)); | |
304 | - HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice)); | |
305 | - stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas, | |
306 | - dFz, | |
307 | - NA[0], NA[1], | |
308 | - S, Zpad); | |
309 | - } | |
310 | - | |
311 | - //crop the image to the sample thickness - keep in mind that sample thickness != optical path length | |
312 | - void crop(){ | |
313 | - | |
314 | - scratch = scratch.crop(Z, S); | |
315 | - } | |
316 | - | |
317 | - //save the scratch field as a binary file | |
318 | - void to_binary(std::string filename){ | |
319 | - | |
320 | - } | |
321 | - | |
322 | - | |
323 | -public: | |
324 | - | |
325 | - //constructor | |
326 | - mirst1d(unsigned int rZ = 100, | |
327 | - unsigned int padding = 0){ | |
328 | - Z = rZ; | |
329 | - pad = padding; | |
330 | - NA[0] = 0; | |
331 | - NA[1] = 0.8; | |
332 | - S = 0; | |
333 | - source_profile = 1; | |
334 | - } | |
335 | - | |
336 | - //add a layer, thickness = microns | |
337 | - void add_layer(material<T> mat, T thickness){ | |
338 | - matlist.push_back(mat); | |
339 | - layers.push_back(thickness); | |
340 | - } | |
341 | - | |
342 | - void add_layer(std::string filename, T thickness){ | |
343 | - add_layer(material<T>(filename), thickness); | |
344 | - } | |
345 | - | |
346 | - //adds a profile spectrum for the light source | |
347 | - void set_source(std::string filename){ | |
348 | - source_profile.load(filename); | |
349 | - } | |
350 | - | |
351 | - //adds a block of wavenumbers (cm^-1) to the simulation parameters | |
352 | - void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){ | |
353 | - unsigned int nu = start; | |
354 | - while(nu <= stop){ | |
355 | - lambdas.push_back((T)10000 / nu); | |
356 | - nu += step; | |
357 | - } | |
358 | - S = lambdas.size(); //increment the number of wavelengths (shorthand for later) | |
359 | - } | |
360 | - | |
361 | - T thickness(){ | |
362 | - T t = 0; | |
363 | - for(unsigned int l=0; l<layers.size(); l++) | |
364 | - t += layers[l]; | |
365 | - return t; | |
366 | - } | |
367 | - | |
368 | - void padding(unsigned int padding = 0){ | |
369 | - pad = padding; | |
370 | - } | |
371 | - | |
372 | - T dz(){ | |
373 | - return thickness() / Z; //calculate the z-axis step size | |
374 | - } | |
375 | - | |
376 | - void na(T in, T out){ | |
377 | - NA[0] = in; | |
378 | - NA[1] = out; | |
379 | - } | |
380 | - | |
381 | - void na(T out){ | |
382 | - na(0, out); | |
383 | - } | |
384 | - | |
385 | - stim::function<T, T> get_source(){ | |
386 | - return source_profile; | |
387 | - } | |
388 | - | |
389 | - void save_sample(std::string filename){ | |
390 | - //create a sample and save the magnitude as an image | |
391 | - build_sample(); | |
392 | - fft(CUFFT_INVERSE); | |
393 | - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude); | |
394 | - } | |
395 | - | |
396 | - void save_mirst(std::string filename, bool binary = true){ | |
397 | - //apply the MIRST filter to a sample and save the image | |
398 | - | |
399 | - //build the sample in the Fourier domain | |
400 | - build_sample(); | |
401 | - | |
402 | - //apply the MIRST filter | |
403 | - apply_filter(); | |
404 | - | |
405 | - //apply an inverse FFT to bring the results back into the spatial domain | |
406 | - fft(CUFFT_INVERSE); | |
407 | - | |
408 | - crop(); | |
409 | - | |
410 | - //save the image | |
411 | - if(binary) | |
412 | - to_binary(filename); | |
413 | - else | |
414 | - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude); | |
415 | - } | |
416 | - | |
417 | - | |
418 | - | |
419 | - | |
420 | - std::string str(){ | |
421 | - | |
422 | - stringstream ss; | |
423 | - ss<<"1D MIRST Simulation========================="<<std::endl; | |
424 | - ss<<"z-axis resolution: "<<Z<<std::endl; | |
425 | - ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl; | |
426 | - ss<<"number of wavelengths: "<<lambdas.size()<<std::endl; | |
427 | - ss<<"padding: "<<pad<<std::endl; | |
428 | - ss<<"sample thickness: "<<thickness()<<" um"<<std::endl; | |
429 | - ss<<"dz: "<<dz()<<" um"<<std::endl; | |
430 | - ss<<std::endl; | |
431 | - ss<<layers.size()<<" layers-------------"<<std::endl; | |
432 | - for(unsigned int l=0; l<layers.size(); l++) | |
433 | - ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl; | |
434 | - | |
435 | - ss<<"source profile-----------"<<std::endl; | |
436 | - ss<<get_source().str()<<std::endl; | |
437 | - | |
438 | - return ss.str(); | |
439 | - | |
440 | - | |
441 | - } | |
442 | - | |
443 | - | |
444 | - | |
445 | -}; | |
446 | - | |
447 | -} | |
1 | +#include "../optics/material.h" | |
2 | +#include "../math/complexfield.cuh" | |
3 | +#include "../math/constants.h" | |
4 | +//#include "../envi/bil.h" | |
5 | + | |
6 | +#include "cufft.h" | |
7 | + | |
8 | +#include <vector> | |
9 | +#include <sstream> | |
10 | + | |
11 | +namespace stim{ | |
12 | + | |
13 | +//this function writes a sinc function to "dest" such that an iFFT produces a slab | |
14 | +template<typename T> | |
15 | +__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri, | |
16 | + T* src, T* zf, | |
17 | + T w, unsigned int zR, unsigned int nuR){ | |
18 | + //dest = complex field representing the sample | |
19 | + //ri = refractive indices for each wavelength | |
20 | + //src = intensity of the light source for each wavelength | |
21 | + //zf = z position of the slab interface for each wavelength (accounting for optical path length) | |
22 | + //w = width of the slab (in pixels) | |
23 | + //zR = number of z-axis samples | |
24 | + //nuR = number of wavelengths | |
25 | + | |
26 | + //get the current coordinate in the plane slice | |
27 | + int ifz = blockIdx.x * blockDim.x + threadIdx.x; | |
28 | + int inu = blockIdx.y * blockDim.y + threadIdx.y; | |
29 | + | |
30 | + //make sure that the thread indices are in-bounds | |
31 | + if(inu >= nuR || ifz >= zR) return; | |
32 | + | |
33 | + int i = inu * zR + ifz; | |
34 | + | |
35 | + T fz; | |
36 | + if(ifz < zR/2) | |
37 | + fz = ifz / (T)zR; | |
38 | + else | |
39 | + fz = -(zR - ifz) / (T)zR; | |
40 | + | |
41 | + //if the slab starts outside of the simulation domain, just return | |
42 | + if(zf[inu] >= zR) return; | |
43 | + | |
44 | + //fill the array along z with a sinc function representing the Fourier transform of the layer | |
45 | + | |
46 | + T opl = w * ri[inu].real(); //optical path length | |
47 | + | |
48 | + //handle the case where the slab goes outside the simulation domain | |
49 | + if(zf[inu] + opl >= zR) | |
50 | + opl = zR - zf[inu]; | |
51 | + | |
52 | + if(opl == 0) return; | |
53 | + | |
54 | + //T l = w * ri[inu].real(); | |
55 | + //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0)); | |
56 | + complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2)); | |
57 | + | |
58 | + complex<T> eta = ri[inu] * ri[inu] - 1; | |
59 | + | |
60 | + //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz); | |
61 | + if(ifz == 0) | |
62 | + dest[i] += opl * exp(e) * eta * src[inu]; | |
63 | + else | |
64 | + dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl); | |
65 | +} | |
66 | + | |
67 | +template<typename T> | |
68 | +__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){ | |
69 | + //zf = current z depth (optical path length) in pixels | |
70 | + //ri = refractive index of the material | |
71 | + //w = actual width of the layer (in pixels) | |
72 | + | |
73 | + | |
74 | + //compute the index for this thread | |
75 | + int i = blockIdx.x * blockDim.x + threadIdx.x; | |
76 | + if(i >= S) return; | |
77 | + | |
78 | + if(ri == NULL) | |
79 | + zf[i] += w; | |
80 | + else | |
81 | + zf[i] += ri[i].real() * w; | |
82 | +} | |
83 | + | |
84 | +//apply the 1D MIRST filter to an existing sample (overwriting the sample) | |
85 | +template<typename T> | |
86 | +__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda, | |
87 | + T dFz, | |
88 | + T inNA, T outNA, | |
89 | + unsigned int lambdaR, unsigned int zR, | |
90 | + T sigma = 0){ | |
91 | + //sampleFFT = the sample in the Fourier domain (will be overwritten) | |
92 | + //lambda = list of wavelengths | |
93 | + //dFz = delta along the Fz axis in the frequency domain | |
94 | + //inNA = NA of the internal obscuration | |
95 | + //outNA = NA of the objective | |
96 | + //zR = number of pixels along the Fz axis (same as the z-axis) | |
97 | + //lambdaR = number of wavelengths | |
98 | + //sigma = width of the Gaussian source | |
99 | + int ifz = blockIdx.x * blockDim.x + threadIdx.x; | |
100 | + int inu = blockIdx.y * blockDim.y + threadIdx.y; | |
101 | + | |
102 | + if(inu >= lambdaR || ifz >= zR) return; | |
103 | + | |
104 | + //calculate the index into the sample FT | |
105 | + int i = inu * zR + ifz; | |
106 | + | |
107 | + //compute the frequency (and set all negative spatial frequencies to zero) | |
108 | + T fz; | |
109 | + if(ifz < zR / 2) | |
110 | + fz = ifz * dFz; | |
111 | + //if the spatial frequency is negative, set it to zero and exit | |
112 | + else{ | |
113 | + sampleFFT[i] = 0; | |
114 | + return; | |
115 | + } | |
116 | + | |
117 | + //compute the frequency in inverse microns | |
118 | + T nu = 1/lambda[inu]; | |
119 | + | |
120 | + //determine the radius of the integration circle | |
121 | + T nu_sq = nu * nu; | |
122 | + T fz_sq = (fz * fz) / 4; | |
123 | + | |
124 | + //cut off frequencies above the diffraction limit | |
125 | + T r; | |
126 | + if(fz_sq < nu_sq) | |
127 | + r = sqrt(nu_sq - fz_sq); | |
128 | + else | |
129 | + r = 0; | |
130 | + | |
131 | + //account for the optics | |
132 | + T Q = 0; | |
133 | + if(r > nu * inNA && r < nu * outNA) | |
134 | + Q = 1; | |
135 | + | |
136 | + //account for the source | |
137 | + //T sigma = 30.0; | |
138 | + T s = exp( - (r*r * sigma*sigma) / 2 ); | |
139 | + //T s=1; | |
140 | + | |
141 | + //compute the final filter | |
142 | + T mirst = 0; | |
143 | + if(fz != 0) | |
144 | + mirst = 2 * stimPI * r * s * Q * (1/fz); | |
145 | + | |
146 | + sampleFFT[i] *= mirst; | |
147 | + | |
148 | +} | |
149 | + | |
150 | +/*This object performs a 1-dimensional (layered) MIRST simulation | |
151 | +*/ | |
152 | +template<typename T> | |
153 | +class mirst1d{ | |
154 | + | |
155 | +private: | |
156 | + unsigned int Z; //z-axis resolution | |
157 | + unsigned int pad; //pixel padding on either side of the sample | |
158 | + | |
159 | + std::vector< material<T> > matlist; //list of materials | |
160 | + std::vector< T > layers; //list of layer thicknesses | |
161 | + | |
162 | + std::vector< T > lambdas; //list of wavelengths that are being simulated | |
163 | + unsigned int S; //number of wavelengths (size of "lambdas") | |
164 | + | |
165 | + T NA[2]; //numerical aperature (central obscuration and outer diameter) | |
166 | + | |
167 | + function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters) | |
168 | + | |
169 | + complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc. | |
170 | + | |
171 | + void fft(int direction = CUFFT_FORWARD){ | |
172 | + | |
173 | + unsigned padZ = Z + pad; | |
174 | + | |
175 | + //create cuFFT handles | |
176 | + cufftHandle plan; | |
177 | + cufftResult result; | |
178 | + | |
179 | + if(sizeof(T) == 4) | |
180 | + result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision | |
181 | + else | |
182 | + result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision | |
183 | + | |
184 | + //check for Plan 1D errors | |
185 | + if(result != CUFFT_SUCCESS){ | |
186 | + std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl; | |
187 | + CufftError(result); | |
188 | + exit(1); | |
189 | + } | |
190 | + | |
191 | + if(sizeof(T) == 4) | |
192 | + result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction); | |
193 | + else | |
194 | + result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction); | |
195 | + | |
196 | + //check for FFT errors | |
197 | + if(result != CUFFT_SUCCESS){ | |
198 | + std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl; | |
199 | + CufftError(result); | |
200 | + exit(1); | |
201 | + } | |
202 | + | |
203 | + cufftDestroy(plan); | |
204 | + } | |
205 | + | |
206 | + | |
207 | + //initialize the scratch memory | |
208 | + void init_scratch(){ | |
209 | + scratch = complexfield<T, 1>(Z + pad , lambdas.size()); | |
210 | + scratch = 0; | |
211 | + } | |
212 | + | |
213 | + //get the list of scattering efficiency (eta) values for a specified layer | |
214 | + std::vector< complex<T> > layer_etas(unsigned int l){ | |
215 | + | |
216 | + std::vector< complex<T> > etas; | |
217 | + | |
218 | + //fill the list of etas | |
219 | + for(unsigned int i=0; i<lambdas.size(); i++) | |
220 | + etas.push_back( matlist[l].eta(lambdas[i]) ); | |
221 | + return etas; | |
222 | + } | |
223 | + | |
224 | + //calculates the optimal block and grid sizes using information from the GPU | |
225 | + void cuda_params(dim3& grids, dim3& blocks){ | |
226 | + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size | |
227 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
228 | + | |
229 | + //create one thread for each detector pixel | |
230 | + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK); | |
231 | + grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
232 | + } | |
233 | + | |
234 | + //add the fourier transform of layer n to the scratch space | |
235 | + void build_layer_fft(unsigned int n, T* zf){ | |
236 | + unsigned int paddedZ = Z + pad; | |
237 | + | |
238 | + T wpx = layers[n] / dz(); //calculate the width of the layer in pixels | |
239 | + | |
240 | + //allocate memory for the refractive index | |
241 | + complex<T>* gpuRi; | |
242 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S)); | |
243 | + | |
244 | + //allocate memory for the source profile | |
245 | + T* gpuSrc; | |
246 | + HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S)); | |
247 | + | |
248 | + complex<T> ri; | |
249 | + T source; | |
250 | + //store the refractive index and source profile in a CPU array | |
251 | + for(int inu=0; inu<S; inu++){ | |
252 | + //save the refractive index to the GPU | |
253 | + ri = matlist[n].getN(lambdas[inu]); | |
254 | + HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice )); | |
255 | + | |
256 | + //save the source profile to the GPU | |
257 | + source = source_profile(10000 / lambdas[inu]); | |
258 | + HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice )); | |
259 | + | |
260 | + } | |
261 | + | |
262 | + //create one thread for each pixel of the field slice | |
263 | + dim3 gridDim, blockDim; | |
264 | + cuda_params(gridDim, blockDim); | |
265 | + stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S); | |
266 | + | |
267 | + int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size | |
268 | + int linGrid = S / linBlock + 1; | |
269 | + stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S); | |
270 | + | |
271 | + //free memory | |
272 | + HANDLE_ERROR(cudaFree(gpuRi)); | |
273 | + HANDLE_ERROR(cudaFree(gpuSrc)); | |
274 | + } | |
275 | + | |
276 | + void build_sample(){ | |
277 | + init_scratch(); //initialize the GPU scratch space | |
278 | + //build_layer(1); | |
279 | + | |
280 | + T* zf; | |
281 | + HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S)); | |
282 | + HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S)); | |
283 | + | |
284 | + //render each layer of the sample | |
285 | + for(unsigned int l=0; l<layers.size(); l++){ | |
286 | + build_layer_fft(l, zf); | |
287 | + } | |
288 | + | |
289 | + HANDLE_ERROR(cudaFree(zf)); | |
290 | + } | |
291 | + | |
292 | + void apply_filter(){ | |
293 | + dim3 gridDim, blockDim; | |
294 | + cuda_params(gridDim, blockDim); | |
295 | + | |
296 | + unsigned int Zpad = Z + pad; | |
297 | + | |
298 | + T sim_range = dz() * Zpad; | |
299 | + T dFz = 1 / sim_range; | |
300 | + | |
301 | + //copy the array of wavelengths to the GPU | |
302 | + T* gpuLambdas; | |
303 | + HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad)); | |
304 | + HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice)); | |
305 | + stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas, | |
306 | + dFz, | |
307 | + NA[0], NA[1], | |
308 | + S, Zpad); | |
309 | + } | |
310 | + | |
311 | + //crop the image to the sample thickness - keep in mind that sample thickness != optical path length | |
312 | + void crop(){ | |
313 | + | |
314 | + scratch = scratch.crop(Z, S); | |
315 | + } | |
316 | + | |
317 | + //save the scratch field as a binary file | |
318 | + void to_binary(std::string filename){ | |
319 | + | |
320 | + } | |
321 | + | |
322 | + | |
323 | +public: | |
324 | + | |
325 | + //constructor | |
326 | + mirst1d(unsigned int rZ = 100, | |
327 | + unsigned int padding = 0){ | |
328 | + Z = rZ; | |
329 | + pad = padding; | |
330 | + NA[0] = 0; | |
331 | + NA[1] = 0.8; | |
332 | + S = 0; | |
333 | + source_profile = 1; | |
334 | + } | |
335 | + | |
336 | + //add a layer, thickness = microns | |
337 | + void add_layer(material<T> mat, T thickness){ | |
338 | + matlist.push_back(mat); | |
339 | + layers.push_back(thickness); | |
340 | + } | |
341 | + | |
342 | + void add_layer(std::string filename, T thickness){ | |
343 | + add_layer(material<T>(filename), thickness); | |
344 | + } | |
345 | + | |
346 | + //adds a profile spectrum for the light source | |
347 | + void set_source(std::string filename){ | |
348 | + source_profile.load(filename); | |
349 | + } | |
350 | + | |
351 | + //adds a block of wavenumbers (cm^-1) to the simulation parameters | |
352 | + void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){ | |
353 | + unsigned int nu = start; | |
354 | + while(nu <= stop){ | |
355 | + lambdas.push_back((T)10000 / nu); | |
356 | + nu += step; | |
357 | + } | |
358 | + S = lambdas.size(); //increment the number of wavelengths (shorthand for later) | |
359 | + } | |
360 | + | |
361 | + T thickness(){ | |
362 | + T t = 0; | |
363 | + for(unsigned int l=0; l<layers.size(); l++) | |
364 | + t += layers[l]; | |
365 | + return t; | |
366 | + } | |
367 | + | |
368 | + void padding(unsigned int padding = 0){ | |
369 | + pad = padding; | |
370 | + } | |
371 | + | |
372 | + T dz(){ | |
373 | + return thickness() / Z; //calculate the z-axis step size | |
374 | + } | |
375 | + | |
376 | + void na(T in, T out){ | |
377 | + NA[0] = in; | |
378 | + NA[1] = out; | |
379 | + } | |
380 | + | |
381 | + void na(T out){ | |
382 | + na(0, out); | |
383 | + } | |
384 | + | |
385 | + stim::function<T, T> get_source(){ | |
386 | + return source_profile; | |
387 | + } | |
388 | + | |
389 | + void save_sample(std::string filename){ | |
390 | + //create a sample and save the magnitude as an image | |
391 | + build_sample(); | |
392 | + fft(CUFFT_INVERSE); | |
393 | + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude); | |
394 | + } | |
395 | + | |
396 | + void save_mirst(std::string filename, bool binary = true){ | |
397 | + //apply the MIRST filter to a sample and save the image | |
398 | + | |
399 | + //build the sample in the Fourier domain | |
400 | + build_sample(); | |
401 | + | |
402 | + //apply the MIRST filter | |
403 | + apply_filter(); | |
404 | + | |
405 | + //apply an inverse FFT to bring the results back into the spatial domain | |
406 | + fft(CUFFT_INVERSE); | |
407 | + | |
408 | + crop(); | |
409 | + | |
410 | + //save the image | |
411 | + if(binary) | |
412 | + to_binary(filename); | |
413 | + else | |
414 | + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude); | |
415 | + } | |
416 | + | |
417 | + | |
418 | + | |
419 | + | |
420 | + std::string str(){ | |
421 | + | |
422 | + stringstream ss; | |
423 | + ss<<"1D MIRST Simulation========================="<<std::endl; | |
424 | + ss<<"z-axis resolution: "<<Z<<std::endl; | |
425 | + ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl; | |
426 | + ss<<"number of wavelengths: "<<lambdas.size()<<std::endl; | |
427 | + ss<<"padding: "<<pad<<std::endl; | |
428 | + ss<<"sample thickness: "<<thickness()<<" um"<<std::endl; | |
429 | + ss<<"dz: "<<dz()<<" um"<<std::endl; | |
430 | + ss<<std::endl; | |
431 | + ss<<layers.size()<<" layers-------------"<<std::endl; | |
432 | + for(unsigned int l=0; l<layers.size(); l++) | |
433 | + ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl; | |
434 | + | |
435 | + ss<<"source profile-----------"<<std::endl; | |
436 | + ss<<get_source().str()<<std::endl; | |
437 | + | |
438 | + return ss.str(); | |
439 | + | |
440 | + | |
441 | + } | |
442 | + | |
443 | + | |
444 | + | |
445 | +}; | |
446 | + | |
447 | +} | ... | ... |
ui/arguments.h
... | ... | @@ -15,7 +15,7 @@ |
15 | 15 | |
16 | 16 | namespace stim{ |
17 | 17 | |
18 | - class argument | |
18 | + class cmd_option | |
19 | 19 | { |
20 | 20 | private: |
21 | 21 | bool ansi; |
... | ... | @@ -59,8 +59,8 @@ namespace stim{ |
59 | 59 | |
60 | 60 | public: |
61 | 61 | void set_ansi(bool b){ ansi = b; } |
62 | - //create an argument with a given name, description, and default value | |
63 | - argument(std::string _name, std::string _desc, std::string _default = "", std::string _range = "") | |
62 | + //create an option with a given name, description, and default value | |
63 | + cmd_option(std::string _name, std::string _desc, std::string _default = "", std::string _range = "") | |
64 | 64 | { |
65 | 65 | name = _name; |
66 | 66 | parse_desc(_desc); |
... | ... | @@ -81,12 +81,12 @@ namespace stim{ |
81 | 81 | return vals.size(); |
82 | 82 | } |
83 | 83 | |
84 | - //return the value of a text argument | |
84 | + //return the value of a text option | |
85 | 85 | std::string as_string(unsigned int n = 0) |
86 | 86 | { |
87 | 87 | if(!flag) |
88 | 88 | { |
89 | - std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl; | |
89 | + std::cout<<"ERROR - Option requested without being set: "<<name<<std::endl; | |
90 | 90 | exit(1); |
91 | 91 | } |
92 | 92 | |
... | ... | @@ -96,12 +96,12 @@ namespace stim{ |
96 | 96 | else return ""; |
97 | 97 | } |
98 | 98 | |
99 | - //return the value of a floating point argument | |
99 | + //return the value of a floating point option | |
100 | 100 | float as_float(unsigned int n = 0) |
101 | 101 | { |
102 | 102 | if(!flag) |
103 | 103 | { |
104 | - std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl; | |
104 | + std::cout<<"ERROR - option requested without being set: "<<name<<std::endl; | |
105 | 105 | exit(1); |
106 | 106 | } |
107 | 107 | |
... | ... | @@ -115,12 +115,12 @@ namespace stim{ |
115 | 115 | else return 0; |
116 | 116 | } |
117 | 117 | |
118 | - //return the value of an integer argument | |
118 | + //return the value of an integer option | |
119 | 119 | int as_int(unsigned int n = 0) |
120 | 120 | { |
121 | 121 | if(!flag) |
122 | 122 | { |
123 | - std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl; | |
123 | + std::cout<<"ERROR - option requested without being set: "<<name<<std::endl; | |
124 | 124 | exit(1); |
125 | 125 | } |
126 | 126 | |
... | ... | @@ -138,7 +138,7 @@ namespace stim{ |
138 | 138 | int col_width() |
139 | 139 | { |
140 | 140 | int n = 3; |
141 | - //add the length of the argument name | |
141 | + //add the length of the option name | |
142 | 142 | n += name.size(); |
143 | 143 | |
144 | 144 | //if there are any default parameters |
... | ... | @@ -147,7 +147,7 @@ namespace stim{ |
147 | 147 | //padding (parenthesis, =, etc.) |
148 | 148 | n += 6; |
149 | 149 | |
150 | - //for each default argument value | |
150 | + //for each default option value | |
151 | 151 | for(unsigned int v=0; v<vals.size(); v++) |
152 | 152 | n += vals[v].size() + 1; |
153 | 153 | } |
... | ... | @@ -209,13 +209,13 @@ namespace stim{ |
209 | 209 | return ss.str(); |
210 | 210 | } |
211 | 211 | |
212 | - //compare the name of the argument to a string | |
212 | + //compare the name of the option to a string | |
213 | 213 | bool operator==(std::string rhs) |
214 | 214 | { |
215 | 215 | return (name == rhs); |
216 | 216 | } |
217 | 217 | |
218 | - //set the argument to a given value | |
218 | + //set the option to a given value | |
219 | 219 | void set(std::string _value) |
220 | 220 | { |
221 | 221 | parse_val(_value); |
... | ... | @@ -242,10 +242,11 @@ namespace stim{ |
242 | 242 | private: |
243 | 243 | bool ansi; |
244 | 244 | |
245 | - //vector of arguments | |
246 | - std::vector<argument> args; | |
245 | + //vector of options | |
246 | + std::vector<cmd_option> opts; | |
247 | + std::vector<std::string> args; | |
247 | 248 | |
248 | - //column width of the longest argument | |
249 | + //column width of the longest option | |
249 | 250 | int col_width; |
250 | 251 | |
251 | 252 | //list of sections |
... | ... | @@ -261,28 +262,28 @@ namespace stim{ |
261 | 262 | void set_ansi(bool b) |
262 | 263 | { |
263 | 264 | ansi = b; |
264 | - for(unsigned int i=0; i<args.size(); i++) | |
265 | - args[i].set_ansi(ansi); | |
265 | + for(unsigned int i=0; i<opts.size(); i++) | |
266 | + opts[i].set_ansi(ansi); | |
266 | 267 | } |
267 | 268 | |
268 | 269 | void add(std::string _name, std::string _desc, std::string _default = "", std::string _range = "") |
269 | 270 | { |
270 | - argument arg(_name, _desc, _default, _range); | |
271 | - arg.set_ansi(ansi); | |
272 | - args.push_back(arg); | |
271 | + cmd_option opt(_name, _desc, _default, _range); | |
272 | + opt.set_ansi(ansi); | |
273 | + opts.push_back(opt); | |
273 | 274 | |
274 | - col_width = std::max<int>(col_width, arg.col_width()); | |
275 | + col_width = std::max<int>(col_width, opt.col_width()); | |
275 | 276 | } |
276 | 277 | |
277 | 278 | void section(std::string _name) |
278 | 279 | { |
279 | 280 | argsection s; |
280 | 281 | s.name = _name; |
281 | - s.index = args.size(); | |
282 | + s.index = opts.size(); | |
282 | 283 | sections.push_back(s); |
283 | 284 | } |
284 | 285 | |
285 | - //output the arguments (generally in response to --help) | |
286 | + //output the options (generally in response to --help) | |
286 | 287 | std::string str() |
287 | 288 | { |
288 | 289 | std::stringstream ss; |
... | ... | @@ -292,8 +293,8 @@ namespace stim{ |
292 | 293 | if(sections.size() > 0) |
293 | 294 | si = 0; |
294 | 295 | |
295 | - //for each argument | |
296 | - for(unsigned int a=0; a<args.size(); a++) | |
296 | + //for each option | |
297 | + for(unsigned int a=0; a<opts.size(); a++) | |
297 | 298 | { |
298 | 299 | if(si != -1 && a == sections[si].index) |
299 | 300 | { |
... | ... | @@ -305,7 +306,7 @@ namespace stim{ |
305 | 306 | if(si == (int)sections.size()) si = -1; |
306 | 307 | } |
307 | 308 | |
308 | - ss<<args[a].toStr(col_width)<<std::endl; | |
309 | + ss<<opts[a].toStr(col_width)<<std::endl; | |
309 | 310 | } |
310 | 311 | |
311 | 312 | return ss.str(); |
... | ... | @@ -313,9 +314,9 @@ namespace stim{ |
313 | 314 | |
314 | 315 | int index(std::string _name) |
315 | 316 | { |
316 | - unsigned int i = find(args.begin(), args.end(), _name) - args.begin(); | |
317 | + unsigned int i = find(opts.begin(), opts.end(), _name) - opts.begin(); | |
317 | 318 | |
318 | - if(i >= args.size()) | |
319 | + if(i >= opts.size()) | |
319 | 320 | return -1; |
320 | 321 | |
321 | 322 | return (int)i; |
... | ... | @@ -327,52 +328,57 @@ namespace stim{ |
327 | 328 | |
328 | 329 | if(i != -1) |
329 | 330 | { |
330 | - args[i].set(_value); | |
331 | + opts[i].set(_value); | |
331 | 332 | //adjust the column width if necessary |
332 | - col_width = (std::max)(col_width, args[i].col_width()); | |
333 | + col_width = (std::max)(col_width, opts[i].col_width()); | |
333 | 334 | } |
334 | 335 | else |
335 | - std::cout<<"ERROR - Argument not recognized: "<<_name<<std::endl; | |
336 | + std::cout<<"ERROR - option not recognized: "<<_name<<std::endl; | |
336 | 337 | } |
337 | 338 | |
338 | 339 | //parse a parameter string |
339 | 340 | void parse(int argc, char* argv[]) |
340 | 341 | { |
341 | - //if the number of arguments is 1, we're done | |
342 | + //if the number of options is 1, we're done | |
342 | 343 | if(argc <= 1) return; |
343 | 344 | |
344 | 345 | std::string name; |
345 | 346 | std::string params; |
346 | 347 | |
348 | + bool args_done = false; //create a flag that turns true when the first option is encountered | |
349 | + | |
347 | 350 | for(int i=1; i<argc; i++) |
348 | 351 | { |
349 | - //if the argument is a parameter name | |
352 | + //if the argument is an option | |
350 | 353 | if(argv[i][0] == '-' && argv[i][1] == '-') |
351 | 354 | { |
352 | - //add any previous arguments | |
355 | + args_done = true; //arguments for the executable are done, all options now | |
356 | + //add any previous options | |
353 | 357 | if(name != "") |
354 | 358 | set(name, params); |
355 | - //set the current argument to this name | |
359 | + //set the current option to this name | |
356 | 360 | name = argv[i]+2; |
357 | 361 | //clear the parameters list |
358 | 362 | params = ""; |
359 | 363 | } |
360 | - else | |
361 | - { | |
364 | + else if(!args_done){ | |
365 | + args.push_back(argv[i]); | |
366 | + } | |
367 | + else{ //everything else is an arg for the most recent option | |
362 | 368 | if(params != "") |
363 | 369 | params += " "; |
364 | 370 | params += argv[i]; |
365 | 371 | } |
366 | 372 | } |
367 | 373 | |
368 | - //set the last argument | |
374 | + //set the last option | |
369 | 375 | set(name, params); |
370 | 376 | } |
371 | 377 | |
372 | 378 | //determine if a parameter has been set (either specified by the user or with a default value) |
373 | 379 | bool operator()(std::string _name) |
374 | 380 | { |
375 | - int i = find(args.begin(), args.end(), _name) - args.begin(); | |
381 | + int i = find(opts.begin(), opts.end(), _name) - opts.begin(); | |
376 | 382 | |
377 | 383 | if(i < 0) |
378 | 384 | { |
... | ... | @@ -380,12 +386,13 @@ namespace stim{ |
380 | 386 | exit(1); |
381 | 387 | } |
382 | 388 | |
383 | - return args[i].is_set(); | |
389 | + return opts[i].is_set(); | |
384 | 390 | } |
385 | 391 | |
386 | - int nargs(std::string _name) | |
392 | + //number of arguments in a specified option | |
393 | + unsigned int nargs(std::string _name) | |
387 | 394 | { |
388 | - int i = find(args.begin(), args.end(), _name) - args.begin(); | |
395 | + int i = find(opts.begin(), opts.end(), _name) - opts.begin(); | |
389 | 396 | |
390 | 397 | if(i < 0) |
391 | 398 | { |
... | ... | @@ -393,12 +400,22 @@ namespace stim{ |
393 | 400 | exit(1); |
394 | 401 | } |
395 | 402 | |
396 | - return args[i].nargs(); | |
403 | + return opts[i].nargs(); | |
404 | + } | |
405 | + | |
406 | + //number of arguments for the executable | |
407 | + unsigned int nargs(){ | |
408 | + return args.size(); | |
409 | + } | |
410 | + | |
411 | + //return the a'th executable argument | |
412 | + std::string arg(unsigned int a){ | |
413 | + return args[a]; | |
397 | 414 | } |
398 | 415 | |
399 | - argument operator[](std::string _name) | |
416 | + cmd_option operator[](std::string _name) | |
400 | 417 | { |
401 | - int i = find(args.begin(), args.end(), _name) - args.begin(); | |
418 | + int i = find(opts.begin(), opts.end(), _name) - opts.begin(); | |
402 | 419 | |
403 | 420 | if(i < 0) |
404 | 421 | { |
... | ... | @@ -406,7 +423,7 @@ namespace stim{ |
406 | 423 | exit(1); |
407 | 424 | } |
408 | 425 | |
409 | - return args[i]; | |
426 | + return opts[i]; | |
410 | 427 | } |
411 | 428 | |
412 | 429 | ... | ... |