Commit 81e0d2215b30a17309ce7ba7615f94e2eda5c67e

Authored by David Mayerich
1 parent 7b3948ab

separated executable arguments and options in the arglist class

1 -/*RTS Complex number class. This class is CUDA compatible,  
2 -and can therefore be used in CUDA code and on CUDA devices.  
3 -*/  
4 -  
5 -#ifndef RTS_COMPLEX  
6 -#define RTS_COMPLEX  
7 -  
8 -#include "../cuda/callable.h"  
9 -#include <cmath>  
10 -#include <string>  
11 -#include <sstream>  
12 -#include <iostream>  
13 -  
14 -namespace stim  
15 -{  
16 -  
17 -template <class T>  
18 -struct complex  
19 -{  
20 - T r, i;  
21 -  
22 - //default constructor  
23 - CUDA_CALLABLE complex()  
24 - {  
25 - r = 0;  
26 - i = 0;  
27 - }  
28 -  
29 - //constructor when given real and imaginary values  
30 - CUDA_CALLABLE complex(T r, T i = 0)  
31 - {  
32 - this->r = r;  
33 - this->i = i;  
34 - }  
35 -  
36 - //access methods  
37 - CUDA_CALLABLE T real()  
38 - {  
39 - return r;  
40 - }  
41 -  
42 - CUDA_CALLABLE T real(T r_val)  
43 - {  
44 - r = r_val;  
45 - return r_val;  
46 - }  
47 -  
48 - CUDA_CALLABLE T imag()  
49 - {  
50 - return i;  
51 - }  
52 - CUDA_CALLABLE T imag(T i_val)  
53 - {  
54 - i = i_val;  
55 - return i_val;  
56 - }  
57 -  
58 -  
59 -  
60 - //return the current value multiplied by i  
61 - CUDA_CALLABLE complex<T> imul()  
62 - {  
63 - complex<T> result;  
64 - result.r = -i;  
65 - result.i = r;  
66 -  
67 - return result;  
68 - }  
69 -  
70 - //returns the complex signum (-1, 0, 1)  
71 - CUDA_CALLABLE int sgn(){  
72 - if(r > 0) return 1;  
73 - else if(r < 0) return -1;  
74 - else return (0 < i - i < 0);  
75 - }  
76 -  
77 - //ARITHMETIC OPERATORS--------------------  
78 -  
79 - //binary + operator (returns the result of adding two complex values)  
80 - CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const  
81 - {  
82 - complex<T> result;  
83 - result.r = r + rhs.r;  
84 - result.i = i + rhs.i;  
85 - return result;  
86 - }  
87 -  
88 - CUDA_CALLABLE complex<T> operator+ (const T rhs) const  
89 - {  
90 - complex<T> result;  
91 - result.r = r + rhs;  
92 - result.i = i;  
93 - return result;  
94 - }  
95 -  
96 - //binary - operator (returns the result of adding two complex values)  
97 - CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const  
98 - {  
99 - complex<T> result;  
100 - result.r = r - rhs.r;  
101 - result.i = i - rhs.i;  
102 - return result;  
103 - }  
104 -  
105 - //binary - operator (returns the result of adding two complex values)  
106 - CUDA_CALLABLE complex<T> operator- (const T rhs)  
107 - {  
108 - complex<T> result;  
109 - result.r = r - rhs;  
110 - result.i = i;  
111 - return result;  
112 - }  
113 -  
114 - //binary MULTIPLICATION operators (returns the result of multiplying complex values)  
115 - CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const  
116 - {  
117 - complex<T> result;  
118 - result.r = r * rhs.r - i * rhs.i;  
119 - result.i = r * rhs.i + i * rhs.r;  
120 - return result;  
121 - }  
122 - CUDA_CALLABLE complex<T> operator* (const T rhs)  
123 - {  
124 - return complex<T>(r * rhs, i * rhs);  
125 - }  
126 -  
127 - //binary DIVISION operators (returns the result of dividing complex values)  
128 - CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const  
129 - {  
130 - complex<T> result;  
131 - T denom = rhs.r * rhs.r + rhs.i * rhs.i;  
132 - result.r = (r * rhs.r + i * rhs.i) / denom;  
133 - result.i = (- r * rhs.i + i * rhs.r) / denom;  
134 -  
135 - return result;  
136 - }  
137 - CUDA_CALLABLE complex<T> operator/ (const T rhs)  
138 - {  
139 - return complex<T>(r / rhs, i / rhs);  
140 - }  
141 -  
142 - //ASSIGNMENT operators-----------------------------------  
143 - CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs)  
144 - {  
145 - //check for self-assignment  
146 - if(this != &rhs)  
147 - {  
148 - this->r = rhs.r;  
149 - this->i = rhs.i;  
150 - }  
151 - return *this;  
152 - }  
153 - CUDA_CALLABLE complex<T> & operator=(const T &rhs)  
154 - {  
155 - this->r = rhs;  
156 - this->i = 0;  
157 -  
158 - return *this;  
159 - }  
160 -  
161 - //arithmetic assignment operators  
162 - CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs)  
163 - {  
164 - *this = *this + rhs;  
165 - return *this;  
166 - }  
167 - CUDA_CALLABLE complex<T> operator+=(const T &rhs)  
168 - {  
169 - *this = *this + rhs;  
170 - return *this;  
171 - }  
172 -  
173 - CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs)  
174 - {  
175 - *this = *this - rhs;  
176 - return *this;  
177 - }  
178 - CUDA_CALLABLE complex<T> operator-=(const T &rhs)  
179 - {  
180 - *this = *this - rhs;  
181 - return *this;  
182 - }  
183 -  
184 - CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs)  
185 - {  
186 - *this = *this * rhs;  
187 - return *this;  
188 - }  
189 - CUDA_CALLABLE complex<T> operator*=(const T &rhs)  
190 - {  
191 - *this = *this * rhs;  
192 - return *this;  
193 - }  
194 - //divide and assign  
195 - CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs)  
196 - {  
197 - *this = *this / rhs;  
198 - return *this;  
199 - }  
200 - CUDA_CALLABLE complex<T> operator/=(const T &rhs)  
201 - {  
202 - *this = *this / rhs;  
203 - return *this;  
204 - }  
205 -  
206 - //absolute value operator (returns the absolute value of the complex number)  
207 - CUDA_CALLABLE T abs()  
208 - {  
209 - return std::sqrt(r * r + i * i);  
210 - }  
211 -  
212 - CUDA_CALLABLE complex<T> log()  
213 - {  
214 - complex<T> result;  
215 - result.r = (T)std::log(std::sqrt(r * r + i * i));  
216 - result.i = (T)std::atan2(i, r);  
217 -  
218 -  
219 - return result;  
220 - }  
221 -  
222 - CUDA_CALLABLE complex<T> exp()  
223 - {  
224 - complex<T> result;  
225 -  
226 - T e_r = std::exp(r);  
227 - result.r = e_r * (T)std::cos(i);  
228 - result.i = e_r * (T)std::sin(i);  
229 -  
230 - return result;  
231 - }  
232 -  
233 - /*CUDA_CALLABLE complex<T> pow(int y)  
234 - {  
235 -  
236 - return pow((double)y);  
237 - }*/  
238 -  
239 - CUDA_CALLABLE complex<T> pow(T y)  
240 - {  
241 - complex<T> result;  
242 -  
243 - result = log() * y;  
244 -  
245 - return result.exp();  
246 - }  
247 -  
248 - CUDA_CALLABLE complex<T> sqrt()  
249 - {  
250 - complex<T> result;  
251 -  
252 - //convert to polar coordinates  
253 - T a = std::sqrt(r*r + i*i);  
254 - T theta = std::atan2(i, r);  
255 -  
256 - //find the square root  
257 - T a_p = std::sqrt(a);  
258 - T theta_p = theta/2.0f;  
259 -  
260 - //convert back to cartesian coordinates  
261 - result.r = a_p * std::cos(theta_p);  
262 - result.i = a_p * std::sin(theta_p);  
263 -  
264 - return result;  
265 - }  
266 -  
267 - std::string str()  
268 - {  
269 - std::stringstream ss;  
270 - ss<<"("<<r<<","<<i<<")";  
271 -  
272 - return ss.str();  
273 - }  
274 -  
275 - //COMPARISON operators  
276 - CUDA_CALLABLE bool operator==(complex<T> rhs)  
277 - {  
278 - if(r == rhs.r && i == rhs.i)  
279 - return true;  
280 - return false;  
281 - }  
282 -  
283 - CUDA_CALLABLE bool operator==(T rhs)  
284 - {  
285 - if(r == rhs && i == 0)  
286 - return true;  
287 - return false;  
288 - }  
289 -  
290 - CUDA_CALLABLE bool operator!=(T rhs)  
291 - {  
292 - if(r != rhs || i != 0)  
293 - return true;  
294 - return false;  
295 - }  
296 -  
297 - CUDA_CALLABLE bool operator<(complex<T> rhs){  
298 - return abs() < rhs.abs();  
299 - }  
300 - CUDA_CALLABLE bool operator<=(complex<T> rhs){  
301 - return abs() <= rhs.abs();  
302 - }  
303 - CUDA_CALLABLE bool operator>(complex<T> rhs){  
304 - return abs() > rhs.abs();  
305 - }  
306 - CUDA_CALLABLE bool operator >=(complex<T> rhs){  
307 - return abs() >= rhs.abs();  
308 - }  
309 -  
310 - //CASTING operators  
311 - template < typename otherT >  
312 - operator complex<otherT>()  
313 - {  
314 - complex<otherT> result((otherT)r, (otherT)i);  
315 - return result;  
316 - }  
317 - template< typename otherT >  
318 - complex( const complex<otherT> &rhs)  
319 - {  
320 - r = (T)rhs.r;  
321 - i = (T)rhs.i;  
322 - }  
323 - template< typename otherT >  
324 - complex& operator=(const complex<otherT> &rhs)  
325 - {  
326 - r = (T)rhs.r;  
327 - i = (T)rhs.i;  
328 - return *this;  
329 - }  
330 -  
331 -};  
332 -  
333 -} //end RTS namespace  
334 -  
335 -//addition  
336 -template<typename T>  
337 -CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b)  
338 -{  
339 - return stim::complex<T>((T)a + b.r, b.i);  
340 -}  
341 -  
342 -//subtraction with a real value  
343 -template<typename T>  
344 -CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b)  
345 -{  
346 - return stim::complex<T>((T)a - b.r, -b.i);  
347 -}  
348 -  
349 -//minus sign  
350 -template<typename T>  
351 -CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs)  
352 -{  
353 - return stim::complex<T>(-rhs.r, -rhs.i);  
354 -}  
355 -  
356 -//multiply a T value by a complex value  
357 -template<typename T>  
358 -CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b)  
359 -{  
360 - return stim::complex<T>((T)a * b.r, (T)a * b.i);  
361 -}  
362 -  
363 -//divide a T value by a complex value  
364 -template<typename T>  
365 -CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b)  
366 -{  
367 - stim::complex<T> result;  
368 -  
369 - T denom = b.r * b.r + b.i * b.i;  
370 -  
371 - result.r = ((T)a * b.r) / denom;  
372 - result.i = -((T)a * b.i) / denom;  
373 -  
374 - return result;  
375 -}  
376 -  
377 -  
378 -template<typename T>  
379 -CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y)  
380 -{  
381 - return x.pow(y);  
382 -}  
383 -template<typename T>  
384 -CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y)  
385 -{  
386 - return x.pow(y);  
387 -}  
388 -  
389 -//log function  
390 -template<typename T>  
391 -CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x)  
392 -{  
393 - return x.log();  
394 -}  
395 -  
396 -//exp function  
397 -template<typename T>  
398 -CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x)  
399 -{  
400 - return x.exp();  
401 -}  
402 -  
403 -//sqrt function  
404 -template<typename T>  
405 -CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x)  
406 -{  
407 - return x.sqrt();  
408 -}  
409 -  
410 -  
411 -template <typename T>  
412 -CUDA_CALLABLE static T abs(stim::complex<T> a)  
413 -{  
414 - return a.abs();  
415 -}  
416 -  
417 -template <typename T>  
418 -CUDA_CALLABLE static T real(stim::complex<T> a)  
419 -{  
420 - return a.r;  
421 -}  
422 -  
423 -//template <typename T>  
424 -CUDA_CALLABLE static float real(float a)  
425 -{  
426 - return a;  
427 -}  
428 -  
429 -template <typename T>  
430 -CUDA_CALLABLE static T imag(stim::complex<T> a)  
431 -{  
432 - return a.i;  
433 -}  
434 -  
435 -//trigonometric functions  
436 -//template<class A>  
437 -/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)  
438 -{  
439 - stim::complex<float> result;  
440 - result.r = sinf(x.r) * coshf(x.i);  
441 - result.i = cosf(x.r) * sinhf(x.i);  
442 -  
443 - return result;  
444 -}*/  
445 -  
446 -template<class A>  
447 -CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)  
448 -{  
449 - stim::complex<A> result;  
450 - result.r = (A)std::sin(x.r) * (A)std::cosh(x.i);  
451 - result.i = (A)std::cos(x.r) * (A)std::sinh(x.i);  
452 -  
453 - return result;  
454 -}  
455 -  
456 -//floating point template  
457 -//template<class A>  
458 -/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)  
459 -{  
460 - stim::complex<float> result;  
461 - result.r = cosf(x.r) * coshf(x.i);  
462 - result.i = -(sinf(x.r) * sinhf(x.i));  
463 -  
464 - return result;  
465 -}*/  
466 -  
467 -template<class A>  
468 -CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)  
469 -{  
470 - stim::complex<A> result;  
471 - result.r = (A)std::cos(x.r) * (A)std::cosh(x.i);  
472 - result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i));  
473 -  
474 - return result;  
475 -}  
476 -  
477 -  
478 -template<class A>  
479 -std::ostream& operator<<(std::ostream& os, stim::complex<A> x)  
480 -{  
481 - os<<x.str();  
482 - return os;  
483 -}  
484 -  
485 -template<class A>  
486 -std::istream& operator>>(std::istream& is, stim::complex<A>& x)  
487 -{  
488 - A r, i;  
489 - r = i = 0; //initialize the real and imaginary parts to zero  
490 - is>>r; //parse  
491 - is>>i;  
492 -  
493 - x.real(r); //assign the parsed values to x  
494 - x.imag(i);  
495 -  
496 - return is; //return the stream  
497 -}  
498 -  
499 -//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7  
500 -//template<class T> using rtsComplex = stim::complex<T>;  
501 -//#endif  
502 -  
503 -  
504 -  
505 -#endif 1 +/*RTS Complex number class. This class is CUDA compatible,
  2 +and can therefore be used in CUDA code and on CUDA devices.
  3 +*/
  4 +
  5 +#ifndef RTS_COMPLEX
  6 +#define RTS_COMPLEX
  7 +
  8 +#include "../cuda/callable.h"
  9 +#include <cmath>
  10 +#include <string>
  11 +#include <sstream>
  12 +#include <iostream>
  13 +
  14 +namespace stim
  15 +{
  16 +
  17 +template <class T>
  18 +struct complex
  19 +{
  20 + T r, i;
  21 +
  22 + //default constructor
  23 + CUDA_CALLABLE complex()
  24 + {
  25 + r = 0;
  26 + i = 0;
  27 + }
  28 +
  29 + //constructor when given real and imaginary values
  30 + CUDA_CALLABLE complex(T r, T i = 0)
  31 + {
  32 + this->r = r;
  33 + this->i = i;
  34 + }
  35 +
  36 + //access methods
  37 + CUDA_CALLABLE T real()
  38 + {
  39 + return r;
  40 + }
  41 +
  42 + CUDA_CALLABLE T real(T r_val)
  43 + {
  44 + r = r_val;
  45 + return r_val;
  46 + }
  47 +
  48 + CUDA_CALLABLE T imag()
  49 + {
  50 + return i;
  51 + }
  52 + CUDA_CALLABLE T imag(T i_val)
  53 + {
  54 + i = i_val;
  55 + return i_val;
  56 + }
  57 +
  58 +
  59 +
  60 + //return the current value multiplied by i
  61 + CUDA_CALLABLE complex<T> imul()
  62 + {
  63 + complex<T> result;
  64 + result.r = -i;
  65 + result.i = r;
  66 +
  67 + return result;
  68 + }
  69 +
  70 + //returns the complex signum (-1, 0, 1)
  71 + CUDA_CALLABLE int sgn(){
  72 + if(r > 0) return 1;
  73 + else if(r < 0) return -1;
  74 + else return (0 < i - i < 0);
  75 + }
  76 +
  77 + //ARITHMETIC OPERATORS--------------------
  78 +
  79 + //binary + operator (returns the result of adding two complex values)
  80 + CUDA_CALLABLE complex<T> operator+ (const complex<T> rhs) const
  81 + {
  82 + complex<T> result;
  83 + result.r = r + rhs.r;
  84 + result.i = i + rhs.i;
  85 + return result;
  86 + }
  87 +
  88 + CUDA_CALLABLE complex<T> operator+ (const T rhs) const
  89 + {
  90 + complex<T> result;
  91 + result.r = r + rhs;
  92 + result.i = i;
  93 + return result;
  94 + }
  95 +
  96 + //binary - operator (returns the result of adding two complex values)
  97 + CUDA_CALLABLE complex<T> operator- (const complex<T> rhs) const
  98 + {
  99 + complex<T> result;
  100 + result.r = r - rhs.r;
  101 + result.i = i - rhs.i;
  102 + return result;
  103 + }
  104 +
  105 + //binary - operator (returns the result of adding two complex values)
  106 + CUDA_CALLABLE complex<T> operator- (const T rhs)
  107 + {
  108 + complex<T> result;
  109 + result.r = r - rhs;
  110 + result.i = i;
  111 + return result;
  112 + }
  113 +
  114 + //binary MULTIPLICATION operators (returns the result of multiplying complex values)
  115 + CUDA_CALLABLE complex<T> operator* (const complex<T> rhs) const
  116 + {
  117 + complex<T> result;
  118 + result.r = r * rhs.r - i * rhs.i;
  119 + result.i = r * rhs.i + i * rhs.r;
  120 + return result;
  121 + }
  122 + CUDA_CALLABLE complex<T> operator* (const T rhs)
  123 + {
  124 + return complex<T>(r * rhs, i * rhs);
  125 + }
  126 +
  127 + //binary DIVISION operators (returns the result of dividing complex values)
  128 + CUDA_CALLABLE complex<T> operator/ (const complex<T> rhs) const
  129 + {
  130 + complex<T> result;
  131 + T denom = rhs.r * rhs.r + rhs.i * rhs.i;
  132 + result.r = (r * rhs.r + i * rhs.i) / denom;
  133 + result.i = (- r * rhs.i + i * rhs.r) / denom;
  134 +
  135 + return result;
  136 + }
  137 + CUDA_CALLABLE complex<T> operator/ (const T rhs)
  138 + {
  139 + return complex<T>(r / rhs, i / rhs);
  140 + }
  141 +
  142 + //ASSIGNMENT operators-----------------------------------
  143 + CUDA_CALLABLE complex<T> & operator=(const complex<T> &rhs)
  144 + {
  145 + //check for self-assignment
  146 + if(this != &rhs)
  147 + {
  148 + this->r = rhs.r;
  149 + this->i = rhs.i;
  150 + }
  151 + return *this;
  152 + }
  153 + CUDA_CALLABLE complex<T> & operator=(const T &rhs)
  154 + {
  155 + this->r = rhs;
  156 + this->i = 0;
  157 +
  158 + return *this;
  159 + }
  160 +
  161 + //arithmetic assignment operators
  162 + CUDA_CALLABLE complex<T> operator+=(const complex<T> &rhs)
  163 + {
  164 + *this = *this + rhs;
  165 + return *this;
  166 + }
  167 + CUDA_CALLABLE complex<T> operator+=(const T &rhs)
  168 + {
  169 + *this = *this + rhs;
  170 + return *this;
  171 + }
  172 +
  173 + CUDA_CALLABLE complex<T> operator-=(const complex<T> &rhs)
  174 + {
  175 + *this = *this - rhs;
  176 + return *this;
  177 + }
  178 + CUDA_CALLABLE complex<T> operator-=(const T &rhs)
  179 + {
  180 + *this = *this - rhs;
  181 + return *this;
  182 + }
  183 +
  184 + CUDA_CALLABLE complex<T> operator*=(const complex<T> &rhs)
  185 + {
  186 + *this = *this * rhs;
  187 + return *this;
  188 + }
  189 + CUDA_CALLABLE complex<T> operator*=(const T &rhs)
  190 + {
  191 + *this = *this * rhs;
  192 + return *this;
  193 + }
  194 + //divide and assign
  195 + CUDA_CALLABLE complex<T> operator/=(const complex<T> &rhs)
  196 + {
  197 + *this = *this / rhs;
  198 + return *this;
  199 + }
  200 + CUDA_CALLABLE complex<T> operator/=(const T &rhs)
  201 + {
  202 + *this = *this / rhs;
  203 + return *this;
  204 + }
  205 +
  206 + //absolute value operator (returns the absolute value of the complex number)
  207 + CUDA_CALLABLE T abs()
  208 + {
  209 + return std::sqrt(r * r + i * i);
  210 + }
  211 +
  212 + CUDA_CALLABLE complex<T> log()
  213 + {
  214 + complex<T> result;
  215 + result.r = (T)std::log(std::sqrt(r * r + i * i));
  216 + result.i = (T)std::atan2(i, r);
  217 +
  218 +
  219 + return result;
  220 + }
  221 +
  222 + CUDA_CALLABLE complex<T> exp()
  223 + {
  224 + complex<T> result;
  225 +
  226 + T e_r = std::exp(r);
  227 + result.r = e_r * (T)std::cos(i);
  228 + result.i = e_r * (T)std::sin(i);
  229 +
  230 + return result;
  231 + }
  232 +
  233 + /*CUDA_CALLABLE complex<T> pow(int y)
  234 + {
  235 +
  236 + return pow((double)y);
  237 + }*/
  238 +
  239 + CUDA_CALLABLE complex<T> pow(T y)
  240 + {
  241 + complex<T> result;
  242 +
  243 + result = log() * y;
  244 +
  245 + return result.exp();
  246 + }
  247 +
  248 + CUDA_CALLABLE complex<T> sqrt()
  249 + {
  250 + complex<T> result;
  251 +
  252 + //convert to polar coordinates
  253 + T a = std::sqrt(r*r + i*i);
  254 + T theta = std::atan2(i, r);
  255 +
  256 + //find the square root
  257 + T a_p = std::sqrt(a);
  258 + T theta_p = theta/2.0f;
  259 +
  260 + //convert back to cartesian coordinates
  261 + result.r = a_p * std::cos(theta_p);
  262 + result.i = a_p * std::sin(theta_p);
  263 +
  264 + return result;
  265 + }
  266 +
  267 + std::string str()
  268 + {
  269 + std::stringstream ss;
  270 + ss<<"("<<r<<","<<i<<")";
  271 +
  272 + return ss.str();
  273 + }
  274 +
  275 + //COMPARISON operators
  276 + CUDA_CALLABLE bool operator==(complex<T> rhs)
  277 + {
  278 + if(r == rhs.r && i == rhs.i)
  279 + return true;
  280 + return false;
  281 + }
  282 +
  283 + CUDA_CALLABLE bool operator==(T rhs)
  284 + {
  285 + if(r == rhs && i == 0)
  286 + return true;
  287 + return false;
  288 + }
  289 +
  290 + CUDA_CALLABLE bool operator!=(T rhs)
  291 + {
  292 + if(r != rhs || i != 0)
  293 + return true;
  294 + return false;
  295 + }
  296 +
  297 + CUDA_CALLABLE bool operator<(complex<T> rhs){
  298 + return abs() < rhs.abs();
  299 + }
  300 + CUDA_CALLABLE bool operator<=(complex<T> rhs){
  301 + return abs() <= rhs.abs();
  302 + }
  303 + CUDA_CALLABLE bool operator>(complex<T> rhs){
  304 + return abs() > rhs.abs();
  305 + }
  306 + CUDA_CALLABLE bool operator >=(complex<T> rhs){
  307 + return abs() >= rhs.abs();
  308 + }
  309 +
  310 + //CASTING operators
  311 + template < typename otherT >
  312 + operator complex<otherT>()
  313 + {
  314 + complex<otherT> result((otherT)r, (otherT)i);
  315 + return result;
  316 + }
  317 + template< typename otherT >
  318 + complex( const complex<otherT> &rhs)
  319 + {
  320 + r = (T)rhs.r;
  321 + i = (T)rhs.i;
  322 + }
  323 + template< typename otherT >
  324 + complex& operator=(const complex<otherT> &rhs)
  325 + {
  326 + r = (T)rhs.r;
  327 + i = (T)rhs.i;
  328 + return *this;
  329 + }
  330 +
  331 +};
  332 +
  333 +} //end RTS namespace
  334 +
  335 +//addition
  336 +template<typename T>
  337 +CUDA_CALLABLE static stim::complex<T> operator+(const double a, const stim::complex<T> b)
  338 +{
  339 + return stim::complex<T>((T)a + b.r, b.i);
  340 +}
  341 +
  342 +//subtraction with a real value
  343 +template<typename T>
  344 +CUDA_CALLABLE static stim::complex<T> operator-(const double a, const stim::complex<T> b)
  345 +{
  346 + return stim::complex<T>((T)a - b.r, -b.i);
  347 +}
  348 +
  349 +//minus sign
  350 +template<typename T>
  351 +CUDA_CALLABLE static stim::complex<T> operator-(const stim::complex<T> &rhs)
  352 +{
  353 + return stim::complex<T>(-rhs.r, -rhs.i);
  354 +}
  355 +
  356 +//multiply a T value by a complex value
  357 +template<typename T>
  358 +CUDA_CALLABLE static stim::complex<T> operator*(const double a, const stim::complex<T> b)
  359 +{
  360 + return stim::complex<T>((T)a * b.r, (T)a * b.i);
  361 +}
  362 +
  363 +//divide a T value by a complex value
  364 +template<typename T>
  365 +CUDA_CALLABLE static stim::complex<T> operator/(const double a, const stim::complex<T> b)
  366 +{
  367 + stim::complex<T> result;
  368 +
  369 + T denom = b.r * b.r + b.i * b.i;
  370 +
  371 + result.r = ((T)a * b.r) / denom;
  372 + result.i = -((T)a * b.i) / denom;
  373 +
  374 + return result;
  375 +}
  376 +
  377 +
  378 +template<typename T>
  379 +CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, T y)
  380 +{
  381 + return x.pow(y);
  382 +}
  383 +template<typename T>
  384 +CUDA_CALLABLE static stim::complex<T> pow(stim::complex<T> x, int y)
  385 +{
  386 + return x.pow(y);
  387 +}
  388 +
  389 +//log function
  390 +template<typename T>
  391 +CUDA_CALLABLE static stim::complex<T> log(stim::complex<T> x)
  392 +{
  393 + return x.log();
  394 +}
  395 +
  396 +//exp function
  397 +template<typename T>
  398 +CUDA_CALLABLE static stim::complex<T> exp(stim::complex<T> x)
  399 +{
  400 + return x.exp();
  401 +}
  402 +
  403 +//sqrt function
  404 +template<typename T>
  405 +CUDA_CALLABLE static stim::complex<T> sqrt(stim::complex<T> x)
  406 +{
  407 + return x.sqrt();
  408 +}
  409 +
  410 +
  411 +template <typename T>
  412 +CUDA_CALLABLE static T abs(stim::complex<T> a)
  413 +{
  414 + return a.abs();
  415 +}
  416 +
  417 +template <typename T>
  418 +CUDA_CALLABLE static T real(stim::complex<T> a)
  419 +{
  420 + return a.r;
  421 +}
  422 +
  423 +//template <typename T>
  424 +CUDA_CALLABLE static float real(float a)
  425 +{
  426 + return a;
  427 +}
  428 +
  429 +template <typename T>
  430 +CUDA_CALLABLE static T imag(stim::complex<T> a)
  431 +{
  432 + return a.i;
  433 +}
  434 +
  435 +//trigonometric functions
  436 +//template<class A>
  437 +/*CUDA_CALLABLE static stim::complex<float> sinf(const stim::complex<float> x)
  438 +{
  439 + stim::complex<float> result;
  440 + result.r = sinf(x.r) * coshf(x.i);
  441 + result.i = cosf(x.r) * sinhf(x.i);
  442 +
  443 + return result;
  444 +}*/
  445 +
  446 +template<class A>
  447 +CUDA_CALLABLE stim::complex<A> sin(const stim::complex<A> x)
  448 +{
  449 + stim::complex<A> result;
  450 + result.r = (A)std::sin(x.r) * (A)std::cosh(x.i);
  451 + result.i = (A)std::cos(x.r) * (A)std::sinh(x.i);
  452 +
  453 + return result;
  454 +}
  455 +
  456 +//floating point template
  457 +//template<class A>
  458 +/*CUDA_CALLABLE static stim::complex<float> cosf(const stim::complex<float> x)
  459 +{
  460 + stim::complex<float> result;
  461 + result.r = cosf(x.r) * coshf(x.i);
  462 + result.i = -(sinf(x.r) * sinhf(x.i));
  463 +
  464 + return result;
  465 +}*/
  466 +
  467 +template<class A>
  468 +CUDA_CALLABLE stim::complex<A> cos(const stim::complex<A> x)
  469 +{
  470 + stim::complex<A> result;
  471 + result.r = (A)std::cos(x.r) * (A)std::cosh(x.i);
  472 + result.i = -((A)std::sin(x.r) * (A)std::sinh(x.i));
  473 +
  474 + return result;
  475 +}
  476 +
  477 +
  478 +template<class A>
  479 +std::ostream& operator<<(std::ostream& os, stim::complex<A> x)
  480 +{
  481 + os<<x.str();
  482 + return os;
  483 +}
  484 +
  485 +template<class A>
  486 +std::istream& operator>>(std::istream& is, stim::complex<A>& x)
  487 +{
  488 + A r, i;
  489 + r = i = 0; //initialize the real and imaginary parts to zero
  490 + is>>r; //parse
  491 + is>>i;
  492 +
  493 + x.real(r); //assign the parsed values to x
  494 + x.imag(i);
  495 +
  496 + return is; //return the stream
  497 +}
  498 +
  499 +//#if __GNUC__ > 3 && __GNUC_MINOR__ > 7
  500 +//template<class T> using rtsComplex = stim::complex<T>;
  501 +//#endif
  502 +
  503 +
  504 +
  505 +#endif
math/complexfield.cuh
1 -#ifndef RTS_COMPLEXFIELD_H  
2 -#define RTS_COMPLEXFIELD_H  
3 -  
4 -#include "cublas_v2.h"  
5 -#include <cuda_runtime.h>  
6 -  
7 -#include "../math/field.cuh"  
8 -#include "../math/complex.h"  
9 -#include "../math/realfield.cuh"  
10 -  
11 -namespace stim{  
12 -  
13 -template<typename T>  
14 -__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){  
15 -  
16 - int iu = blockIdx.x * blockDim.x + threadIdx.x;  
17 - int iv = blockIdx.y * blockDim.y + threadIdx.y;  
18 -  
19 - //make sure that the thread indices are in-bounds  
20 - if(iu >= r0 || iv >= r1) return;  
21 -  
22 - //compute the index into the field  
23 - int i = iv*r0 + iu;  
24 -  
25 - //calculate and store the result  
26 - dest[i] = source[i].abs();  
27 -}  
28 -  
29 -/*This class stores functions for saving images of complex fields  
30 -*/  
31 -template<typename T, unsigned int D = 1>  
32 -class complexfield : public field< stim::complex<T>, D >{  
33 - using field< stim::complex<T>, D >::R;  
34 - using field< stim::complex<T>, D >::X;  
35 - using field< stim::complex<T>, D >::shape;  
36 - using field< stim::complex<T>, D >::cuda_params;  
37 -  
38 -  
39 -  
40 -public:  
41 -  
42 - //find the maximum value of component n  
43 - stim::complex<T> find_max(unsigned int n){  
44 - cublasStatus_t stat;  
45 - cublasHandle_t handle;  
46 -  
47 - //create a CUBLAS handle  
48 - stat = cublasCreate(&handle);  
49 - if(stat != CUBLAS_STATUS_SUCCESS){  
50 - std::cout<<"CUBLAS Error: initialization failed"<<std::endl;  
51 - exit(1);  
52 - }  
53 -  
54 - int L = R[0] * R[1]; //compute the number of discrete points in a slice  
55 - int index; //result of the max operation  
56 - stim::complex<T> result;  
57 -  
58 - if(sizeof(T) == 8)  
59 - stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index);  
60 - else  
61 - stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index);  
62 -  
63 - index -= 1; //adjust for 1-based indexing  
64 -  
65 - //if there was a GPU error, terminate  
66 - if(stat != CUBLAS_STATUS_SUCCESS){  
67 - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;  
68 - exit(1);  
69 - }  
70 -  
71 - //retrieve the maximum value for this slice and store it in the maxVal array  
72 - std::cout<<X[n]<<std::endl;  
73 - HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost));  
74 - return result;  
75 - }  
76 -  
77 -public:  
78 -  
79 - enum attribute {magnitude, real, imaginary};  
80 -  
81 - //constructor (no parameters)  
82 - complexfield() : field<stim::complex<T>, D>(){};  
83 -  
84 - //constructor (resolution specified)  
85 - complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){};  
86 -  
87 - //assignment from a field of complex values  
88 - complexfield & operator=(const field< stim::complex<T>, D > rhs){  
89 - field< complex<T>, D >::operator=(rhs);  
90 - return *this;  
91 - }  
92 -  
93 - //assignment operator (scalar value)  
94 - complexfield & operator= (const complex<T> rhs){  
95 -  
96 - field< complex<T>, D >::operator=(rhs);  
97 - return *this;  
98 - }  
99 -  
100 - //assignment operator (vector value)  
101 - complexfield & operator= (const vec< complex<T>, D > rhs){  
102 -  
103 - field< complex<T>, D >::operator=(rhs);  
104 - return *this;  
105 - }  
106 -  
107 - //cropping  
108 - complexfield crop(unsigned int width, unsigned int height){  
109 -  
110 - complexfield<T, D> result;  
111 - result = field< complex<T>, D>::crop(width, height);  
112 - return result;  
113 - }  
114 -  
115 - void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){  
116 -  
117 - field<T, 1> rf(R[0], R[1]);  
118 -  
119 - //get cuda parameters  
120 - dim3 blocks, grids;  
121 - cuda_params(grids, blocks);  
122 -  
123 - if(type == magnitude){  
124 - gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]);  
125 - rf.toImage(filename, n, true);  
126 - }  
127 -  
128 - }  
129 -  
130 -  
131 -};  
132 -  
133 -  
134 -} //end namespace rts  
135 -  
136 -  
137 -#endif 1 +#ifndef RTS_COMPLEXFIELD_H
  2 +#define RTS_COMPLEXFIELD_H
  3 +
  4 +#include "cublas_v2.h"
  5 +#include <cuda_runtime.h>
  6 +
  7 +#include "../math/field.cuh"
  8 +#include "../math/complex.h"
  9 +#include "../math/realfield.cuh"
  10 +
  11 +namespace stim{
  12 +
  13 +template<typename T>
  14 +__global__ void gpu_complexfield_mag(T* dest, complex<T>* source, unsigned int r0, unsigned int r1){
  15 +
  16 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  17 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  18 +
  19 + //make sure that the thread indices are in-bounds
  20 + if(iu >= r0 || iv >= r1) return;
  21 +
  22 + //compute the index into the field
  23 + int i = iv*r0 + iu;
  24 +
  25 + //calculate and store the result
  26 + dest[i] = source[i].abs();
  27 +}
  28 +
  29 +/*This class stores functions for saving images of complex fields
  30 +*/
  31 +template<typename T, unsigned int D = 1>
  32 +class complexfield : public field< stim::complex<T>, D >{
  33 + using field< stim::complex<T>, D >::R;
  34 + using field< stim::complex<T>, D >::X;
  35 + using field< stim::complex<T>, D >::shape;
  36 + using field< stim::complex<T>, D >::cuda_params;
  37 +
  38 +
  39 +
  40 +public:
  41 +
  42 + //find the maximum value of component n
  43 + stim::complex<T> find_max(unsigned int n){
  44 + cublasStatus_t stat;
  45 + cublasHandle_t handle;
  46 +
  47 + //create a CUBLAS handle
  48 + stat = cublasCreate(&handle);
  49 + if(stat != CUBLAS_STATUS_SUCCESS){
  50 + std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
  51 + exit(1);
  52 + }
  53 +
  54 + int L = R[0] * R[1]; //compute the number of discrete points in a slice
  55 + int index; //result of the max operation
  56 + stim::complex<T> result;
  57 +
  58 + if(sizeof(T) == 8)
  59 + stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index);
  60 + else
  61 + stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index);
  62 +
  63 + index -= 1; //adjust for 1-based indexing
  64 +
  65 + //if there was a GPU error, terminate
  66 + if(stat != CUBLAS_STATUS_SUCCESS){
  67 + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
  68 + exit(1);
  69 + }
  70 +
  71 + //retrieve the maximum value for this slice and store it in the maxVal array
  72 + std::cout<<X[n]<<std::endl;
  73 + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(stim::complex<T>), cudaMemcpyDeviceToHost));
  74 + return result;
  75 + }
  76 +
  77 +public:
  78 +
  79 + enum attribute {magnitude, real, imaginary};
  80 +
  81 + //constructor (no parameters)
  82 + complexfield() : field<stim::complex<T>, D>(){};
  83 +
  84 + //constructor (resolution specified)
  85 + complexfield(unsigned int r0, unsigned int r1) : field<stim::complex<T>, D>(r0, r1){};
  86 +
  87 + //assignment from a field of complex values
  88 + complexfield & operator=(const field< stim::complex<T>, D > rhs){
  89 + field< complex<T>, D >::operator=(rhs);
  90 + return *this;
  91 + }
  92 +
  93 + //assignment operator (scalar value)
  94 + complexfield & operator= (const complex<T> rhs){
  95 +
  96 + field< complex<T>, D >::operator=(rhs);
  97 + return *this;
  98 + }
  99 +
  100 + //assignment operator (vector value)
  101 + complexfield & operator= (const vec< complex<T>, D > rhs){
  102 +
  103 + field< complex<T>, D >::operator=(rhs);
  104 + return *this;
  105 + }
  106 +
  107 + //cropping
  108 + complexfield crop(unsigned int width, unsigned int height){
  109 +
  110 + complexfield<T, D> result;
  111 + result = field< complex<T>, D>::crop(width, height);
  112 + return result;
  113 + }
  114 +
  115 + void toImage(std::string filename, attribute type = magnitude, unsigned int n=0){
  116 +
  117 + field<T, 1> rf(R[0], R[1]);
  118 +
  119 + //get cuda parameters
  120 + dim3 blocks, grids;
  121 + cuda_params(grids, blocks);
  122 +
  123 + if(type == magnitude){
  124 + gpu_complexfield_mag <<<grids, blocks>>> (rf.ptr(), X[n], R[0], R[1]);
  125 + rf.toImage(filename, n, true);
  126 + }
  127 +
  128 + }
  129 +
  130 +
  131 +};
  132 +
  133 +
  134 +} //end namespace rts
  135 +
  136 +
  137 +#endif
1 -#ifndef RTS_FIELD_CUH  
2 -#define RTS_FIELD_CUH  
3 -  
4 -#include <vector>  
5 -#include <string>  
6 -#include <sstream>  
7 -  
8 -#include "cublas_v2.h"  
9 -#include <cuda_runtime.h>  
10 -  
11 -#include "../math/rect.h"  
12 -#include "../cuda/threads.h"  
13 -#include "../cuda/error.h"  
14 -#include "../cuda/devices.h"  
15 -#include "../visualization/colormap.h"  
16 -  
17 -  
18 -namespace stim{  
19 -  
20 -//multiply R = X * Y  
21 -template<typename T>  
22 -__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){  
23 -  
24 - int iu = blockIdx.x * blockDim.x + threadIdx.x;  
25 - int iv = blockIdx.y * blockDim.y + threadIdx.y;  
26 -  
27 - //make sure that the thread indices are in-bounds  
28 - if(iu >= r0 || iv >= r1) return;  
29 -  
30 - //compute the index into the field  
31 - int i = iv*r0 + iu;  
32 -  
33 - //calculate and store the result  
34 - R[i] = X[i] * Y[i];  
35 -}  
36 -  
37 -//assign a constant value to all points  
38 -template<typename T>  
39 -__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){  
40 -  
41 - int iu = blockIdx.x * blockDim.x + threadIdx.x;  
42 - int iv = blockIdx.y * blockDim.y + threadIdx.y;  
43 -  
44 - //make sure that the thread indices are in-bounds  
45 - if(iu >= r0 || iv >= r1) return;  
46 -  
47 - //compute the index into the field  
48 - int i = iv*r0 + iu;  
49 -  
50 - //calculate and store the result  
51 - ptr[i] = val;  
52 -}  
53 -  
54 -//crop the field to the new dimensions (width x height)  
55 -template<typename T>  
56 -__global__ void gpu_field_crop(T* dest, T* source,  
57 - unsigned int r0, unsigned int r1,  
58 - unsigned int width, unsigned int height){  
59 -  
60 - int iu = blockIdx.x * blockDim.x + threadIdx.x;  
61 - int iv = blockIdx.y * blockDim.y + threadIdx.y;  
62 -  
63 - //make sure that the thread indices are in-bounds  
64 - if(iu >= width || iv >= height) return;  
65 -  
66 - //compute the index into the field  
67 - int is = iv*r0 + iu;  
68 - int id = iv*width + iu;  
69 -  
70 - //calculate and store the result  
71 - dest[id] = source[is];  
72 -}  
73 -  
74 -template<typename T, unsigned int D = 1>  
75 -class field{  
76 -  
77 -protected:  
78 -  
79 - T* X[D]; //pointer to the field data  
80 - unsigned int R[2]; //field resolution  
81 - stim::rect<T> shape; //position and shape of the field slice  
82 -  
83 - //calculates the optimal block and grid sizes using information from the GPU  
84 - void cuda_params(dim3& grids, dim3& blocks){  
85 - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size  
86 - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);  
87 -  
88 - //create one thread for each detector pixel  
89 - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);  
90 - grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);  
91 - }  
92 -  
93 - //find the maximum value of component n  
94 - T find_max(unsigned int n){  
95 - cublasStatus_t stat;  
96 - cublasHandle_t handle;  
97 -  
98 - //create a CUBLAS handle  
99 - stat = cublasCreate(&handle);  
100 - if(stat != CUBLAS_STATUS_SUCCESS){  
101 - std::cout<<"CUBLAS Error: initialization failed"<<std::endl;  
102 - exit(1);  
103 - }  
104 -  
105 - int L = R[0] * R[1]; //compute the number of discrete points in a slice  
106 - int index; //result of the max operation  
107 - T result;  
108 -  
109 - if(sizeof(T) == 4)  
110 - stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index);  
111 - else  
112 - stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index);  
113 -  
114 - index -= 1; //adjust for 1-based indexing  
115 -  
116 - //if there was a GPU error, terminate  
117 - if(stat != CUBLAS_STATUS_SUCCESS){  
118 - std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;  
119 - exit(1);  
120 - }  
121 -  
122 - //retrieve the maximum value for this slice and store it in the maxVal array  
123 - HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost));  
124 - return result;  
125 - }  
126 -  
127 -public:  
128 -  
129 - //returns a list of file names given an input string with wild cards  
130 - std::vector<std::string> process_filename(std::string name){  
131 - std::stringstream ss(name);  
132 - std::string item;  
133 - std::vector<std::string> elems;  
134 - while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension)  
135 - {  
136 - elems.push_back(item);  
137 - }  
138 -  
139 - std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters)  
140 - std::string ext = elems[1]; //file extension (ex. .bmp, .png)  
141 - ext = std::string(".") + ext; //add a period back into the extension  
142 -  
143 - size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'')  
144 - size_t i1 = prefix.find_last_of("?");  
145 -  
146 - std::string postfix = prefix.substr(i1+1);  
147 - prefix = prefix.substr(0, i0);  
148 -  
149 - unsigned int digits = i1 - i0 + 1; //compute the number of wildcards  
150 -  
151 - std::vector<std::string> flist; //create a vector of file names  
152 - //fill the list  
153 - for(unsigned int d=0; d<D; d++){  
154 - std::stringstream ss; //assemble the file name  
155 - ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext;  
156 - flist.push_back(ss.str());  
157 - }  
158 -  
159 - return flist;  
160 - }  
161 -  
162 - void init(){  
163 - for(unsigned int n=0; n<D; n++)  
164 - X[n] = NULL;  
165 - }  
166 - void destroy(){  
167 - for(unsigned int n=0; n<D; n++)  
168 - if(X[n] != NULL)  
169 - HANDLE_ERROR(cudaFree(X[n]));  
170 - }  
171 -  
172 -public:  
173 - //field constructor  
174 - field(){  
175 - R[0] = R[1] = 0;  
176 - init();  
177 - }  
178 -  
179 - field(unsigned int x, unsigned int y){  
180 - //set the resolution  
181 - R[0] = x;  
182 - R[1] = y;  
183 - //allocate memory on the GPU  
184 - for(unsigned int n=0; n<D; n++){  
185 - HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] ));  
186 - }  
187 - clear(); //zero the field  
188 - }  
189 -  
190 - ///copy constructor  
191 - field(const field &rhs){  
192 - //first make a shallow copy  
193 - R[0] = rhs.R[0];  
194 - R[1] = rhs.R[1];  
195 -  
196 - for(unsigned int n=0; n<D; n++){  
197 - //do we have to make a deep copy?  
198 - if(rhs.X[n] == NULL)  
199 - X[n] = NULL; //no  
200 - else{  
201 - //allocate the necessary memory  
202 - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));  
203 -  
204 - //copy the slice  
205 - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));  
206 - }  
207 - }  
208 - }  
209 -  
210 - ~field(){  
211 - destroy();  
212 - }  
213 -  
214 - //assignment operator  
215 - field & operator= (const field & rhs){  
216 -  
217 - //de-allocate any existing GPU memory  
218 - destroy();  
219 -  
220 - //copy the slice resolution  
221 - R[0] = rhs.R[0];  
222 - R[1] = rhs.R[1];  
223 -  
224 - for(unsigned int n=0; n<D; n++)  
225 - {  
226 - //allocate the necessary memory  
227 - HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));  
228 - //copy the slice  
229 - HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));  
230 - }  
231 - return *this;  
232 - }  
233 -  
234 - field & operator= (const T rhs){  
235 -  
236 - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size  
237 - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);  
238 -  
239 - //create one thread for each detector pixel  
240 - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);  
241 - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);  
242 -  
243 - //assign the constant value to all positions and dimensions  
244 - for(int n=0; n<D; n++)  
245 - stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]);  
246 -  
247 - return *this;  
248 - }  
249 -  
250 - //assignment of vector component  
251 - field & operator= (const vec<T, D> rhs){  
252 -  
253 - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size  
254 - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);  
255 -  
256 - //create one thread for each detector pixel  
257 - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);  
258 - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);  
259 -  
260 - //assign the constant value to all positions and dimensions  
261 - for(unsigned int n=0; n<D; n++)  
262 - stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]);  
263 -  
264 - return *this;  
265 -  
266 - }  
267 -  
268 - //multiply two fields (element-wise multiplication)  
269 - field<T, D> operator* (const field & rhs){  
270 -  
271 - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size  
272 - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);  
273 -  
274 - //create one thread for each detector pixel  
275 - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);  
276 - dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);  
277 -  
278 - //create a scalar field to store the result  
279 - field<T, D> result(R[0], R[1]);  
280 -  
281 - for(int n=0; n<D; n++)  
282 - stim::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]);  
283 -  
284 - return result;  
285 - }  
286 -  
287 - T* ptr(unsigned int n = 0){  
288 - if(n < D)  
289 - return X[n];  
290 - else return NULL;  
291 - }  
292 -  
293 - //return the vector component at position (u, v)  
294 - vec<T, D> get(unsigned int u, unsigned int v){  
295 -  
296 - vec<T, D> result;  
297 - for(unsigned int d=0; d<D; d++){  
298 - HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost));  
299 - }  
300 -  
301 - return result;  
302 - }  
303 -  
304 - //set all components of the field to zero  
305 - void clear(){  
306 - for(unsigned int n=0; n<D; n++)  
307 - if(X[n] != NULL)  
308 - HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1]));  
309 - }  
310 -  
311 - //crop the field  
312 - field<T, D> crop(unsigned int width, unsigned int height){  
313 - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size  
314 - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);  
315 -  
316 - //create one thread for each detector pixel  
317 - dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);  
318 - dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK);  
319 -  
320 - //create a scalar field to store the result  
321 - field<T, D> result(width, height);  
322 -  
323 - for(int n=0; n<D; n++)  
324 - stim::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height);  
325 -  
326 - return result;  
327 - }  
328 -  
329 - //save an image representing component n  
330 - void toImage(std::string filename, unsigned int n = 0,  
331 - bool positive = false, stim::colormapType cmap = stim::cmBrewer){  
332 - T max_val = find_max(n); //find the maximum value  
333 -  
334 - if(positive) //if the field is positive, use the range [0 max_val]  
335 - stim::gpu2image<T>(X[n], filename, R[0], R[1], 0, max_val, cmap);  
336 - else  
337 - stim::gpu2image<T>(X[n], filename, R[0], R[1], -max_val, max_val, cmap);  
338 - }  
339 -  
340 -};  
341 -  
342 -} //end namespace rts  
343 -#endif 1 +#ifndef RTS_FIELD_CUH
  2 +#define RTS_FIELD_CUH
  3 +
  4 +#include <vector>
  5 +#include <string>
  6 +#include <sstream>
  7 +
  8 +#include "cublas_v2.h"
  9 +#include <cuda_runtime.h>
  10 +
  11 +#include "../math/rect.h"
  12 +#include "../cuda/threads.h"
  13 +#include "../cuda/error.h"
  14 +#include "../cuda/devices.h"
  15 +#include "../visualization/colormap.h"
  16 +
  17 +
  18 +namespace stim{
  19 +
  20 +//multiply R = X * Y
  21 +template<typename T>
  22 +__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){
  23 +
  24 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  25 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  26 +
  27 + //make sure that the thread indices are in-bounds
  28 + if(iu >= r0 || iv >= r1) return;
  29 +
  30 + //compute the index into the field
  31 + int i = iv*r0 + iu;
  32 +
  33 + //calculate and store the result
  34 + R[i] = X[i] * Y[i];
  35 +}
  36 +
  37 +//assign a constant value to all points
  38 +template<typename T>
  39 +__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){
  40 +
  41 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  42 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  43 +
  44 + //make sure that the thread indices are in-bounds
  45 + if(iu >= r0 || iv >= r1) return;
  46 +
  47 + //compute the index into the field
  48 + int i = iv*r0 + iu;
  49 +
  50 + //calculate and store the result
  51 + ptr[i] = val;
  52 +}
  53 +
  54 +//crop the field to the new dimensions (width x height)
  55 +template<typename T>
  56 +__global__ void gpu_field_crop(T* dest, T* source,
  57 + unsigned int r0, unsigned int r1,
  58 + unsigned int width, unsigned int height){
  59 +
  60 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  61 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  62 +
  63 + //make sure that the thread indices are in-bounds
  64 + if(iu >= width || iv >= height) return;
  65 +
  66 + //compute the index into the field
  67 + int is = iv*r0 + iu;
  68 + int id = iv*width + iu;
  69 +
  70 + //calculate and store the result
  71 + dest[id] = source[is];
  72 +}
  73 +
  74 +template<typename T, unsigned int D = 1>
  75 +class field{
  76 +
  77 +protected:
  78 +
  79 + T* X[D]; //pointer to the field data
  80 + unsigned int R[2]; //field resolution
  81 + stim::rect<T> shape; //position and shape of the field slice
  82 +
  83 + //calculates the optimal block and grid sizes using information from the GPU
  84 + void cuda_params(dim3& grids, dim3& blocks){
  85 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  86 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  87 +
  88 + //create one thread for each detector pixel
  89 + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
  90 + grids = dim3((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  91 + }
  92 +
  93 + //find the maximum value of component n
  94 + T find_max(unsigned int n){
  95 + cublasStatus_t stat;
  96 + cublasHandle_t handle;
  97 +
  98 + //create a CUBLAS handle
  99 + stat = cublasCreate(&handle);
  100 + if(stat != CUBLAS_STATUS_SUCCESS){
  101 + std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
  102 + exit(1);
  103 + }
  104 +
  105 + int L = R[0] * R[1]; //compute the number of discrete points in a slice
  106 + int index; //result of the max operation
  107 + T result;
  108 +
  109 + if(sizeof(T) == 4)
  110 + stat = cublasIsamax(handle, L, (const float*)X[n], 1, &index);
  111 + else
  112 + stat = cublasIdamax(handle, L, (const double*)X[n], 1, &index);
  113 +
  114 + index -= 1; //adjust for 1-based indexing
  115 +
  116 + //if there was a GPU error, terminate
  117 + if(stat != CUBLAS_STATUS_SUCCESS){
  118 + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
  119 + exit(1);
  120 + }
  121 +
  122 + //retrieve the maximum value for this slice and store it in the maxVal array
  123 + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(T), cudaMemcpyDeviceToHost));
  124 + return result;
  125 + }
  126 +
  127 +public:
  128 +
  129 + //returns a list of file names given an input string with wild cards
  130 + std::vector<std::string> process_filename(std::string name){
  131 + std::stringstream ss(name);
  132 + std::string item;
  133 + std::vector<std::string> elems;
  134 + while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension)
  135 + {
  136 + elems.push_back(item);
  137 + }
  138 +
  139 + std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters)
  140 + std::string ext = elems[1]; //file extension (ex. .bmp, .png)
  141 + ext = std::string(".") + ext; //add a period back into the extension
  142 +
  143 + size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'')
  144 + size_t i1 = prefix.find_last_of("?");
  145 +
  146 + std::string postfix = prefix.substr(i1+1);
  147 + prefix = prefix.substr(0, i0);
  148 +
  149 + unsigned int digits = i1 - i0 + 1; //compute the number of wildcards
  150 +
  151 + std::vector<std::string> flist; //create a vector of file names
  152 + //fill the list
  153 + for(unsigned int d=0; d<D; d++){
  154 + std::stringstream ss; //assemble the file name
  155 + ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext;
  156 + flist.push_back(ss.str());
  157 + }
  158 +
  159 + return flist;
  160 + }
  161 +
  162 + void init(){
  163 + for(unsigned int n=0; n<D; n++)
  164 + X[n] = NULL;
  165 + }
  166 + void destroy(){
  167 + for(unsigned int n=0; n<D; n++)
  168 + if(X[n] != NULL)
  169 + HANDLE_ERROR(cudaFree(X[n]));
  170 + }
  171 +
  172 +public:
  173 + //field constructor
  174 + field(){
  175 + R[0] = R[1] = 0;
  176 + init();
  177 + }
  178 +
  179 + field(unsigned int x, unsigned int y){
  180 + //set the resolution
  181 + R[0] = x;
  182 + R[1] = y;
  183 + //allocate memory on the GPU
  184 + for(unsigned int n=0; n<D; n++){
  185 + HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] ));
  186 + }
  187 + clear(); //zero the field
  188 + }
  189 +
  190 + ///copy constructor
  191 + field(const field &rhs){
  192 + //first make a shallow copy
  193 + R[0] = rhs.R[0];
  194 + R[1] = rhs.R[1];
  195 +
  196 + for(unsigned int n=0; n<D; n++){
  197 + //do we have to make a deep copy?
  198 + if(rhs.X[n] == NULL)
  199 + X[n] = NULL; //no
  200 + else{
  201 + //allocate the necessary memory
  202 + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
  203 +
  204 + //copy the slice
  205 + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  206 + }
  207 + }
  208 + }
  209 +
  210 + ~field(){
  211 + destroy();
  212 + }
  213 +
  214 + //assignment operator
  215 + field & operator= (const field & rhs){
  216 +
  217 + //de-allocate any existing GPU memory
  218 + destroy();
  219 +
  220 + //copy the slice resolution
  221 + R[0] = rhs.R[0];
  222 + R[1] = rhs.R[1];
  223 +
  224 + for(unsigned int n=0; n<D; n++)
  225 + {
  226 + //allocate the necessary memory
  227 + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1]));
  228 + //copy the slice
  229 + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  230 + }
  231 + return *this;
  232 + }
  233 +
  234 + field & operator= (const T rhs){
  235 +
  236 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  237 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  238 +
  239 + //create one thread for each detector pixel
  240 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  241 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  242 +
  243 + //assign the constant value to all positions and dimensions
  244 + for(int n=0; n<D; n++)
  245 + stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]);
  246 +
  247 + return *this;
  248 + }
  249 +
  250 + //assignment of vector component
  251 + field & operator= (const vec<T, D> rhs){
  252 +
  253 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  254 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  255 +
  256 + //create one thread for each detector pixel
  257 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  258 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  259 +
  260 + //assign the constant value to all positions and dimensions
  261 + for(unsigned int n=0; n<D; n++)
  262 + stim::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]);
  263 +
  264 + return *this;
  265 +
  266 + }
  267 +
  268 + //multiply two fields (element-wise multiplication)
  269 + field<T, D> operator* (const field & rhs){
  270 +
  271 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  272 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  273 +
  274 + //create one thread for each detector pixel
  275 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  276 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  277 +
  278 + //create a scalar field to store the result
  279 + field<T, D> result(R[0], R[1]);
  280 +
  281 + for(int n=0; n<D; n++)
  282 + stim::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]);
  283 +
  284 + return result;
  285 + }
  286 +
  287 + T* ptr(unsigned int n = 0){
  288 + if(n < D)
  289 + return X[n];
  290 + else return NULL;
  291 + }
  292 +
  293 + //return the vector component at position (u, v)
  294 + vec<T, D> get(unsigned int u, unsigned int v){
  295 +
  296 + vec<T, D> result;
  297 + for(unsigned int d=0; d<D; d++){
  298 + HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost));
  299 + }
  300 +
  301 + return result;
  302 + }
  303 +
  304 + //set all components of the field to zero
  305 + void clear(){
  306 + for(unsigned int n=0; n<D; n++)
  307 + if(X[n] != NULL)
  308 + HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1]));
  309 + }
  310 +
  311 + //crop the field
  312 + field<T, D> crop(unsigned int width, unsigned int height){
  313 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  314 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  315 +
  316 + //create one thread for each detector pixel
  317 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  318 + dim3 dimGrid((width + SQRT_BLOCK -1)/SQRT_BLOCK, (height + SQRT_BLOCK - 1)/SQRT_BLOCK);
  319 +
  320 + //create a scalar field to store the result
  321 + field<T, D> result(width, height);
  322 +
  323 + for(int n=0; n<D; n++)
  324 + stim::gpu_field_crop <<<dimGrid, dimBlock>>> (result.X[n], X[n], R[0], R[1], width, height);
  325 +
  326 + return result;
  327 + }
  328 +
  329 + //save an image representing component n
  330 + void toImage(std::string filename, unsigned int n = 0,
  331 + bool positive = false, stim::colormapType cmap = stim::cmBrewer){
  332 + T max_val = find_max(n); //find the maximum value
  333 +
  334 + if(positive) //if the field is positive, use the range [0 max_val]
  335 + stim::gpu2image<T>(X[n], filename, R[0], R[1], 0, max_val, cmap);
  336 + else
  337 + stim::gpu2image<T>(X[n], filename, R[0], R[1], -max_val, max_val, cmap);
  338 + }
  339 +
  340 +};
  341 +
  342 +} //end namespace rts
  343 +#endif
1 -#ifndef RTS_RECT_H  
2 -#define RTS_RECT_H  
3 -  
4 -//enable CUDA_CALLABLE macro  
5 -#include "../cuda/callable.h"  
6 -#include "../math/vector.h"  
7 -#include "../math/triangle.h"  
8 -#include "../math/quaternion.h"  
9 -#include <iostream>  
10 -#include <iomanip>  
11 -#include <algorithm>  
12 -  
13 -namespace stim{  
14 -  
15 -//template for a rectangle class in ND space  
16 -template <class T, int N = 3>  
17 -struct rect  
18 -{  
19 - /*  
20 - ^ O  
21 - |  
22 - |  
23 - Y C  
24 - |  
25 - |  
26 - O---------X--------->  
27 - */  
28 -  
29 -private:  
30 -  
31 - stim::vec<T, N> C;  
32 - stim::vec<T, N> X;  
33 - stim::vec<T, N> Y;  
34 -  
35 - CUDA_CALLABLE void scale(T factor){  
36 - X *= factor;  
37 - Y *= factor;  
38 - }  
39 -  
40 - CUDA_CALLABLE void normal(vec<T, N> n){ //orient the rectangle along the specified normal  
41 -  
42 - n = n.norm(); //normalize, just in case  
43 - vec<T, N> n_current = X.cross(Y).norm(); //compute the current normal  
44 - quaternion<T> q; //create a quaternion  
45 - q.CreateRotation(n_current, n); //initialize a rotation from n_current to n  
46 -  
47 - //apply the quaternion to the vectors and position  
48 - X = q.toMatrix3() * X;  
49 - Y = q.toMatrix3() * Y;  
50 - }  
51 -  
52 - CUDA_CALLABLE void init(){  
53 - C = vec<T, N>(0, 0, 0);  
54 - X = vec<T, N>(1, 0, 0);  
55 - Y = vec<T, N>(0, 1, 0);  
56 - }  
57 -  
58 -public:  
59 -  
60 - CUDA_CALLABLE rect(){  
61 - init();  
62 - }  
63 -  
64 - CUDA_CALLABLE rect(T size, T z_pos = (T)0){  
65 - init(); //use the default setup  
66 - scale(size); //scale the rectangle  
67 - C[2] = z_pos;  
68 - }  
69 -  
70 - CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){  
71 - init(); //start with the default setting  
72 - C = c;  
73 - scale(size); //scale the rectangle  
74 - normal(n); //orient  
75 -  
76 - }  
77 -  
78 - /*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c)  
79 - {  
80 - A = a;  
81 - Y = b - a;  
82 - X = c - a - Y;  
83 -  
84 - }*/  
85 -  
86 - /*******************************************************************  
87 - Constructor - create a rect from a position, normal, and rotation  
88 - *******************************************************************/  
89 - /*CUDA_CALLABLE rect(stim::vec<T, N> c, stim::vec<T, N> normal, T width, T height, T theta)  
90 - {  
91 -  
92 - //compute the X direction - start along world-space X  
93 - Y = stim::vec<T, N>(0, 1, 0);  
94 - if(Y == normal)  
95 - Y = stim::vec<T, N>(0, 0, 1);  
96 -  
97 - X = Y.cross(normal).norm();  
98 -  
99 - std::cout<<X<<std::endl;  
100 -  
101 - //rotate the X axis by theta radians  
102 - stim::quaternion<T> q;  
103 - q.CreateRotation(theta, normal);  
104 - X = q.toMatrix3() * X;  
105 - Y = normal.cross(X);  
106 -  
107 - //normalize everything  
108 - X = X.norm();  
109 - Y = Y.norm();  
110 -  
111 - //scale to match the rect width and height  
112 - X = X * width;  
113 - Y = Y * height;  
114 -  
115 - //set the corner of the plane  
116 - A = c - X * 0.5f - Y * 0.5f;  
117 -  
118 - std::cout<<X<<std::endl;  
119 - }*/  
120 -  
121 - //boolean comparison  
122 - bool operator==(const rect<T, N> & rhs)  
123 - {  
124 - if(C == rhs.C && X == rhs.X && Y == rhs.Y)  
125 - return true;  
126 - else  
127 - return false;  
128 - }  
129 -  
130 - /*******************************************  
131 - Return the normal for the rect  
132 - *******************************************/  
133 - CUDA_CALLABLE stim::vec<T, N> n()  
134 - {  
135 - return (X.cross(Y)).norm();  
136 - }  
137 -  
138 - CUDA_CALLABLE stim::vec<T, N> p(T a, T b)  
139 - {  
140 - stim::vec<T, N> result;  
141 - //given the two parameters a, b = [0 1], returns the position in world space  
142 - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;  
143 - result = A + X * a + Y * b;  
144 -  
145 - return result;  
146 - }  
147 -  
148 - CUDA_CALLABLE stim::vec<T, N> operator()(T a, T b)  
149 - {  
150 - return p(a, b);  
151 - }  
152 -  
153 - std::string str()  
154 - {  
155 - std::stringstream ss;  
156 - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;  
157 - ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;  
158 - ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl;  
159 - ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X;  
160 -  
161 - return ss.str();  
162 -  
163 - }  
164 -  
165 - CUDA_CALLABLE rect<T, N> operator*(T rhs)  
166 - {  
167 - //scales the plane by a scalar value  
168 -  
169 - //create the new rectangle  
170 - rect<T, N> result = *this;  
171 - result.scale(rhs);  
172 -  
173 - return result;  
174 -  
175 - }  
176 -  
177 - CUDA_CALLABLE T dist(vec<T, N> p)  
178 - {  
179 - //compute the distance between a point and this rect  
180 -  
181 - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;  
182 -  
183 - //first break the rect up into two triangles  
184 - triangle<T, N> T0(A, A+X, A+Y);  
185 - triangle<T, N> T1(A+X+Y, A+X, A+Y);  
186 -  
187 -  
188 - T d0 = T0.dist(p);  
189 - T d1 = T1.dist(p);  
190 -  
191 - if(d0 < d1)  
192 - return d0;  
193 - else  
194 - return d1;  
195 - }  
196 -  
197 - CUDA_CALLABLE T dist_max(vec<T, N> p)  
198 - {  
199 - vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;  
200 - T da = (A - p).len();  
201 - T db = (A+X - p).len();  
202 - T dc = (A+Y - p).len();  
203 - T dd = (A+X+Y - p).len();  
204 -  
205 - return std::max( da, std::max(db, std::max(dc, dd) ) );  
206 - }  
207 -};  
208 -  
209 -} //end namespace rts  
210 -  
211 -template <typename T, int N>  
212 -std::ostream& operator<<(std::ostream& os, stim::rect<T, N> R)  
213 -{  
214 - os<<R.str();  
215 - return os;  
216 -}  
217 -  
218 -  
219 -#endif 1 +#ifndef RTS_RECT_H
  2 +#define RTS_RECT_H
  3 +
  4 +//enable CUDA_CALLABLE macro
  5 +#include "../cuda/callable.h"
  6 +#include "../math/vector.h"
  7 +#include "../math/triangle.h"
  8 +#include "../math/quaternion.h"
  9 +#include <iostream>
  10 +#include <iomanip>
  11 +#include <algorithm>
  12 +
  13 +namespace stim{
  14 +
  15 +//template for a rectangle class in ND space
  16 +template <class T, int N = 3>
  17 +struct rect
  18 +{
  19 + /*
  20 + ^ O
  21 + |
  22 + |
  23 + Y C
  24 + |
  25 + |
  26 + O---------X--------->
  27 + */
  28 +
  29 +private:
  30 +
  31 + stim::vec<T, N> C;
  32 + stim::vec<T, N> X;
  33 + stim::vec<T, N> Y;
  34 +
  35 + CUDA_CALLABLE void scale(T factor){
  36 + X *= factor;
  37 + Y *= factor;
  38 + }
  39 +
  40 + CUDA_CALLABLE void normal(vec<T, N> n){ //orient the rectangle along the specified normal
  41 +
  42 + n = n.norm(); //normalize, just in case
  43 + vec<T, N> n_current = X.cross(Y).norm(); //compute the current normal
  44 + quaternion<T> q; //create a quaternion
  45 + q.CreateRotation(n_current, n); //initialize a rotation from n_current to n
  46 +
  47 + //apply the quaternion to the vectors and position
  48 + X = q.toMatrix3() * X;
  49 + Y = q.toMatrix3() * Y;
  50 + }
  51 +
  52 + CUDA_CALLABLE void init(){
  53 + C = vec<T, N>(0, 0, 0);
  54 + X = vec<T, N>(1, 0, 0);
  55 + Y = vec<T, N>(0, 1, 0);
  56 + }
  57 +
  58 +public:
  59 +
  60 + CUDA_CALLABLE rect(){
  61 + init();
  62 + }
  63 +
  64 + CUDA_CALLABLE rect(T size, T z_pos = (T)0){
  65 + init(); //use the default setup
  66 + scale(size); //scale the rectangle
  67 + C[2] = z_pos;
  68 + }
  69 +
  70 + CUDA_CALLABLE rect(T size, vec<T, N> c, vec<T, N> n = vec<T, N>(0, 0, 1)){
  71 + init(); //start with the default setting
  72 + C = c;
  73 + scale(size); //scale the rectangle
  74 + normal(n); //orient
  75 +
  76 + }
  77 +
  78 + /*CUDA_CALLABLE rect(vec<T, N> a, vec<T, N> b, vec<T, N> c)
  79 + {
  80 + A = a;
  81 + Y = b - a;
  82 + X = c - a - Y;
  83 +
  84 + }*/
  85 +
  86 + /*******************************************************************
  87 + Constructor - create a rect from a position, normal, and rotation
  88 + *******************************************************************/
  89 + /*CUDA_CALLABLE rect(stim::vec<T, N> c, stim::vec<T, N> normal, T width, T height, T theta)
  90 + {
  91 +
  92 + //compute the X direction - start along world-space X
  93 + Y = stim::vec<T, N>(0, 1, 0);
  94 + if(Y == normal)
  95 + Y = stim::vec<T, N>(0, 0, 1);
  96 +
  97 + X = Y.cross(normal).norm();
  98 +
  99 + std::cout<<X<<std::endl;
  100 +
  101 + //rotate the X axis by theta radians
  102 + stim::quaternion<T> q;
  103 + q.CreateRotation(theta, normal);
  104 + X = q.toMatrix3() * X;
  105 + Y = normal.cross(X);
  106 +
  107 + //normalize everything
  108 + X = X.norm();
  109 + Y = Y.norm();
  110 +
  111 + //scale to match the rect width and height
  112 + X = X * width;
  113 + Y = Y * height;
  114 +
  115 + //set the corner of the plane
  116 + A = c - X * 0.5f - Y * 0.5f;
  117 +
  118 + std::cout<<X<<std::endl;
  119 + }*/
  120 +
  121 + //boolean comparison
  122 + bool operator==(const rect<T, N> & rhs)
  123 + {
  124 + if(C == rhs.C && X == rhs.X && Y == rhs.Y)
  125 + return true;
  126 + else
  127 + return false;
  128 + }
  129 +
  130 + /*******************************************
  131 + Return the normal for the rect
  132 + *******************************************/
  133 + CUDA_CALLABLE stim::vec<T, N> n()
  134 + {
  135 + return (X.cross(Y)).norm();
  136 + }
  137 +
  138 + CUDA_CALLABLE stim::vec<T, N> p(T a, T b)
  139 + {
  140 + stim::vec<T, N> result;
  141 + //given the two parameters a, b = [0 1], returns the position in world space
  142 + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
  143 + result = A + X * a + Y * b;
  144 +
  145 + return result;
  146 + }
  147 +
  148 + CUDA_CALLABLE stim::vec<T, N> operator()(T a, T b)
  149 + {
  150 + return p(a, b);
  151 + }
  152 +
  153 + std::string str()
  154 + {
  155 + std::stringstream ss;
  156 + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
  157 + ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl;
  158 + ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl;
  159 + ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X;
  160 +
  161 + return ss.str();
  162 +
  163 + }
  164 +
  165 + CUDA_CALLABLE rect<T, N> operator*(T rhs)
  166 + {
  167 + //scales the plane by a scalar value
  168 +
  169 + //create the new rectangle
  170 + rect<T, N> result = *this;
  171 + result.scale(rhs);
  172 +
  173 + return result;
  174 +
  175 + }
  176 +
  177 + CUDA_CALLABLE T dist(vec<T, N> p)
  178 + {
  179 + //compute the distance between a point and this rect
  180 +
  181 + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
  182 +
  183 + //first break the rect up into two triangles
  184 + triangle<T, N> T0(A, A+X, A+Y);
  185 + triangle<T, N> T1(A+X+Y, A+X, A+Y);
  186 +
  187 +
  188 + T d0 = T0.dist(p);
  189 + T d1 = T1.dist(p);
  190 +
  191 + if(d0 < d1)
  192 + return d0;
  193 + else
  194 + return d1;
  195 + }
  196 +
  197 + CUDA_CALLABLE T dist_max(vec<T, N> p)
  198 + {
  199 + vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5;
  200 + T da = (A - p).len();
  201 + T db = (A+X - p).len();
  202 + T dc = (A+Y - p).len();
  203 + T dd = (A+X+Y - p).len();
  204 +
  205 + return std::max( da, std::max(db, std::max(dc, dd) ) );
  206 + }
  207 +};
  208 +
  209 +} //end namespace rts
  210 +
  211 +template <typename T, int N>
  212 +std::ostream& operator<<(std::ostream& os, stim::rect<T, N> R)
  213 +{
  214 + os<<R.str();
  215 + return os;
  216 +}
  217 +
  218 +
  219 +#endif
1 -#ifndef RTS_TRIANGLE_H  
2 -#define RTS_TRIANGLE_H  
3 -  
4 -//enable CUDA_CALLABLE macro  
5 -#include "../cuda/callable.h"  
6 -#include "../math/vector.h"  
7 -#include <iostream>  
8 -  
9 -namespace stim{  
10 -  
11 -template <class T, int N=3>  
12 -struct triangle  
13 -{  
14 - /*  
15 - A------>B  
16 - | /  
17 - | /  
18 - | /  
19 - | /  
20 - | /  
21 - | /  
22 - C  
23 - */  
24 - private:  
25 -  
26 - vec<T, N> A;  
27 - vec<T, N> B;  
28 - vec<T, N> C;  
29 -  
30 - CUDA_CALLABLE vec<T, N> _p(T s, T t)  
31 - {  
32 - //This function returns the point specified by p = A + s(B-A) + t(C-A)  
33 - vec<T, N> E0 = B-A;  
34 - vec<T, N> E1 = C-A;  
35 -  
36 - return A + s*E0 + t*E1;  
37 - }  
38 -  
39 -  
40 - public:  
41 -  
42 -  
43 -  
44 - CUDA_CALLABLE triangle()  
45 - {  
46 -  
47 - }  
48 -  
49 - CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c)  
50 - {  
51 - A = a;  
52 - B = b;  
53 - C = c;  
54 - }  
55 -  
56 - CUDA_CALLABLE stim::vec<T, N> operator()(T s, T t)  
57 - {  
58 - return _p(s, t);  
59 - }  
60 -  
61 - CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p)  
62 - {  
63 - //comptue the distance between a point and this triangle  
64 - // This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf  
65 -  
66 - vec<T, N> E0 = B-A;  
67 - vec<T, N> E1 = C-A;  
68 - vec<T, N> D = A - p;  
69 -  
70 - T a = E0.dot(E0);  
71 - T b = E0.dot(E1);  
72 - T c = E1.dot(E1);  
73 - T d = E0.dot(D);  
74 - T e = E1.dot(D);  
75 - //T f = D.dot(D);  
76 -  
77 - T det = a*c - b*b;  
78 - T s = b*e - c*d;  
79 - T t = b*d - a*e;  
80 -  
81 - /*std::cout<<"E0: "<<E0<<std::endl;  
82 - std::cout<<"E1: "<<E1<<std::endl;  
83 - std::cout<<"a: "<<a<<std::endl;  
84 - std::cout<<"b: "<<b<<std::endl;  
85 - std::cout<<"c: "<<c<<std::endl;  
86 - std::cout<<"d: "<<d<<std::endl;  
87 - std::cout<<"e: "<<e<<std::endl;  
88 - std::cout<<"f: "<<f<<std::endl;  
89 - std::cout<<"det: "<<det<<std::endl;  
90 - std::cout<<"s: "<<s<<std::endl;  
91 - std::cout<<"t: "<<t<<std::endl;*/  
92 -  
93 -  
94 - if( s+t <= det)  
95 - {  
96 - if(s < 0)  
97 - {  
98 - if(t < 0)  
99 - {  
100 - //region 4  
101 - //std::cout<<"Region 4"<<std::endl;  
102 - s = 0;  
103 - t = 0;  
104 - //done?  
105 - }  
106 - else  
107 - {  
108 - //region 3  
109 - //std::cout<<"Region 3"<<std::endl;  
110 - s=0;  
111 - t = ( e >= 0 ? 0 : ( -e >= c ? 1 : -e/c ) );  
112 - //done  
113 - }  
114 - }  
115 - else if(t < 0)  
116 - {  
117 - //region 5  
118 - //std::cout<<"Region 5"<<std::endl;  
119 - s = ( d >= 0 ? 0 : ( -d >= a ? 1 : -d/a ) );  
120 - t = 0;  
121 - //done  
122 - }  
123 - else  
124 - {  
125 - //region 0  
126 - //std::cout<<"Region 0"<<std::endl;  
127 - T invDet = (T)1.0/det;  
128 - s *= invDet;  
129 - t *= invDet;  
130 - //done  
131 - }  
132 - }  
133 - else  
134 - {  
135 - if(s < 0)  
136 - {  
137 - //region 2  
138 - //std::cout<<"Region 2"<<std::endl;  
139 - s = 0;  
140 - t = 1;  
141 - //done?  
142 -  
143 - }  
144 - else if(t < 0)  
145 - {  
146 - //region 6  
147 - //std::cout<<"Region 6"<<std::endl;  
148 - s = 1;  
149 - t = 0;  
150 - //done?  
151 - }  
152 - else  
153 - {  
154 - //region 1  
155 - //std::cout<<"Region 1"<<std::endl;  
156 - T numer = c + e - b - d;  
157 - if( numer <= 0 )  
158 - s = 0;  
159 - else  
160 - {  
161 - T denom = a - 2 * b + c;  
162 - s = ( numer >= denom ? 1 : numer/denom );  
163 - }  
164 - t = 1 - s;  
165 - //done  
166 - }  
167 - }  
168 -  
169 - //std::cout<<"s: "<<s<<std::endl;  
170 - //std::cout<<"t: "<<t<<std::endl;  
171 -  
172 - //std::cout<<"p: "<<_p(s, t)<<std::endl;  
173 -  
174 - return _p(s, t);  
175 -  
176 - }  
177 -  
178 - CUDA_CALLABLE T dist(vec<T, N> p)  
179 - {  
180 - vec<T, N> n = nearest(p);  
181 -  
182 - return (p - n).len();  
183 - }  
184 -};  
185 -  
186 -}  
187 -  
188 -#endif 1 +#ifndef RTS_TRIANGLE_H
  2 +#define RTS_TRIANGLE_H
  3 +
  4 +//enable CUDA_CALLABLE macro
  5 +#include "../cuda/callable.h"
  6 +#include "../math/vector.h"
  7 +#include <iostream>
  8 +
  9 +namespace stim{
  10 +
  11 +template <class T, int N=3>
  12 +struct triangle
  13 +{
  14 + /*
  15 + A------>B
  16 + | /
  17 + | /
  18 + | /
  19 + | /
  20 + | /
  21 + | /
  22 + C
  23 + */
  24 + private:
  25 +
  26 + vec<T, N> A;
  27 + vec<T, N> B;
  28 + vec<T, N> C;
  29 +
  30 + CUDA_CALLABLE vec<T, N> _p(T s, T t)
  31 + {
  32 + //This function returns the point specified by p = A + s(B-A) + t(C-A)
  33 + vec<T, N> E0 = B-A;
  34 + vec<T, N> E1 = C-A;
  35 +
  36 + return A + s*E0 + t*E1;
  37 + }
  38 +
  39 +
  40 + public:
  41 +
  42 +
  43 +
  44 + CUDA_CALLABLE triangle()
  45 + {
  46 +
  47 + }
  48 +
  49 + CUDA_CALLABLE triangle(vec<T, N> a, vec<T, N> b, vec<T, N> c)
  50 + {
  51 + A = a;
  52 + B = b;
  53 + C = c;
  54 + }
  55 +
  56 + CUDA_CALLABLE stim::vec<T, N> operator()(T s, T t)
  57 + {
  58 + return _p(s, t);
  59 + }
  60 +
  61 + CUDA_CALLABLE vec<T, N> nearest(vec<T, N> p)
  62 + {
  63 + //comptue the distance between a point and this triangle
  64 + // This code is adapted from: http://www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf
  65 +
  66 + vec<T, N> E0 = B-A;
  67 + vec<T, N> E1 = C-A;
  68 + vec<T, N> D = A - p;
  69 +
  70 + T a = E0.dot(E0);
  71 + T b = E0.dot(E1);
  72 + T c = E1.dot(E1);
  73 + T d = E0.dot(D);
  74 + T e = E1.dot(D);
  75 + //T f = D.dot(D);
  76 +
  77 + T det = a*c - b*b;
  78 + T s = b*e - c*d;
  79 + T t = b*d - a*e;
  80 +
  81 + /*std::cout<<"E0: "<<E0<<std::endl;
  82 + std::cout<<"E1: "<<E1<<std::endl;
  83 + std::cout<<"a: "<<a<<std::endl;
  84 + std::cout<<"b: "<<b<<std::endl;
  85 + std::cout<<"c: "<<c<<std::endl;
  86 + std::cout<<"d: "<<d<<std::endl;
  87 + std::cout<<"e: "<<e<<std::endl;
  88 + std::cout<<"f: "<<f<<std::endl;
  89 + std::cout<<"det: "<<det<<std::endl;
  90 + std::cout<<"s: "<<s<<std::endl;
  91 + std::cout<<"t: "<<t<<std::endl;*/
  92 +
  93 +
  94 + if( s+t <= det)
  95 + {
  96 + if(s < 0)
  97 + {
  98 + if(t < 0)
  99 + {
  100 + //region 4
  101 + //std::cout<<"Region 4"<<std::endl;
  102 + s = 0;
  103 + t = 0;
  104 + //done?
  105 + }
  106 + else
  107 + {
  108 + //region 3
  109 + //std::cout<<"Region 3"<<std::endl;
  110 + s=0;
  111 + t = ( e >= 0 ? 0 : ( -e >= c ? 1 : -e/c ) );
  112 + //done
  113 + }
  114 + }
  115 + else if(t < 0)
  116 + {
  117 + //region 5
  118 + //std::cout<<"Region 5"<<std::endl;
  119 + s = ( d >= 0 ? 0 : ( -d >= a ? 1 : -d/a ) );
  120 + t = 0;
  121 + //done
  122 + }
  123 + else
  124 + {
  125 + //region 0
  126 + //std::cout<<"Region 0"<<std::endl;
  127 + T invDet = (T)1.0/det;
  128 + s *= invDet;
  129 + t *= invDet;
  130 + //done
  131 + }
  132 + }
  133 + else
  134 + {
  135 + if(s < 0)
  136 + {
  137 + //region 2
  138 + //std::cout<<"Region 2"<<std::endl;
  139 + s = 0;
  140 + t = 1;
  141 + //done?
  142 +
  143 + }
  144 + else if(t < 0)
  145 + {
  146 + //region 6
  147 + //std::cout<<"Region 6"<<std::endl;
  148 + s = 1;
  149 + t = 0;
  150 + //done?
  151 + }
  152 + else
  153 + {
  154 + //region 1
  155 + //std::cout<<"Region 1"<<std::endl;
  156 + T numer = c + e - b - d;
  157 + if( numer <= 0 )
  158 + s = 0;
  159 + else
  160 + {
  161 + T denom = a - 2 * b + c;
  162 + s = ( numer >= denom ? 1 : numer/denom );
  163 + }
  164 + t = 1 - s;
  165 + //done
  166 + }
  167 + }
  168 +
  169 + //std::cout<<"s: "<<s<<std::endl;
  170 + //std::cout<<"t: "<<t<<std::endl;
  171 +
  172 + //std::cout<<"p: "<<_p(s, t)<<std::endl;
  173 +
  174 + return _p(s, t);
  175 +
  176 + }
  177 +
  178 + CUDA_CALLABLE T dist(vec<T, N> p)
  179 + {
  180 + vec<T, N> n = nearest(p);
  181 +
  182 + return (p - n).len();
  183 + }
  184 +};
  185 +
  186 +}
  187 +
  188 +#endif
1 -#ifndef RTS_MATERIAL_H  
2 -#define RTS_MATERIAL_H  
3 -  
4 -#include <vector>  
5 -#include <ostream>  
6 -#include <iostream>  
7 -#include <fstream>  
8 -#include <complex>  
9 -#include <algorithm>  
10 -#include <sstream>  
11 -#include "../math/complex.h"  
12 -#include "../math/constants.h"  
13 -#include "../math/function.h"  
14 -  
15 -namespace stim{  
16 -  
17 -//Material class - default representation for the material property is the refractive index (RI)  
18 -template<typename T>  
19 -class material : public function< T, complex<T> >{  
20 -  
21 -public:  
22 - enum wave_property{microns, inverse_cm};  
23 - enum material_property{ri, absorbance};  
24 -  
25 -private:  
26 -  
27 - using function< T, complex<T> >::X;  
28 - using function< T, complex<T> >::Y;  
29 - using function< T, complex<T> >::insert;  
30 - using function< T, complex<T> >::bounding;  
31 -  
32 - std::string name; //name for the material (defaults to file name)  
33 -  
34 - void process_header(std::string str, wave_property& wp, material_property& mp){  
35 -  
36 - std::stringstream ss(str); //create a stream from the data string  
37 - std::string line;  
38 - std::getline(ss, line); //get the first line as a string  
39 - while(line[0] == '#'){ //continue looping while the line is a comment  
40 -  
41 - std::stringstream lstream(line); //create a stream from the line  
42 - lstream.ignore(); //ignore the first character ('#')  
43 -  
44 - std::string prop; //get the property name  
45 - lstream>>prop;  
46 -  
47 - if(prop == "X"){  
48 - std::string wp_name;  
49 - lstream>>wp_name;  
50 - if(wp_name == "microns") wp = microns;  
51 - else if(wp_name == "inverse_cm") wp = inverse_cm;  
52 - }  
53 - else if(prop == "Y"){  
54 - std::string mp_name;  
55 - lstream>>mp_name;  
56 - if(mp_name == "ri") mp = ri;  
57 - else if(mp_name == "absorbance") mp = absorbance;  
58 - }  
59 -  
60 - std::getline(ss, line); //get the next line  
61 - }  
62 -  
63 - function< T, stim::complex<T> >::process_string(str);  
64 - }  
65 -  
66 - void from_inverse_cm(){  
67 - //convert inverse centimeters to wavelength (in microns)  
68 - for(unsigned int i=0; i<X.size(); i++)  
69 - X[i] = 10000 / X[i];  
70 -  
71 - //reverse the function array  
72 - std::reverse(X.begin(), X.end());  
73 - std::reverse(Y.begin(), Y.end());  
74 -  
75 - }  
76 -  
77 - void init(){  
78 - bounding[0] = bounding[1] = stim::complex<T>(1, 0);  
79 - }  
80 -  
81 -  
82 -public:  
83 -  
84 - material(std::string filename, wave_property wp, material_property mp){  
85 - name = filename;  
86 - load(filename, wp, mp);  
87 - }  
88 -  
89 - material(std::string filename){  
90 - name = filename;  
91 - load(filename);  
92 - }  
93 -  
94 - material(){  
95 - init();  
96 - }  
97 -  
98 - complex<T> getN(T lambda){  
99 - return function< T, complex<T> >::linear(lambda);  
100 - }  
101 -  
102 - void load(std::string filename, wave_property wp, material_property mp){  
103 -  
104 - //load the file as a function  
105 - function< T, complex<T> >::load(filename);  
106 - }  
107 -  
108 - void load(std::string filename){  
109 -  
110 - wave_property wp = inverse_cm;  
111 - material_property mp = ri;  
112 - //turn the file into a string  
113 - std::ifstream t(filename.c_str()); //open the file as a stream  
114 -  
115 - if(!t){  
116 - std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;  
117 - exit(1);  
118 - }  
119 - std::string str((std::istreambuf_iterator<char>(t)),  
120 - std::istreambuf_iterator<char>());  
121 -  
122 - //process the header information  
123 - process_header(str, wp, mp);  
124 -  
125 - //convert units  
126 - if(wp == inverse_cm)  
127 - from_inverse_cm();  
128 - //set the bounding values  
129 - bounding[0] = Y[0];  
130 - bounding[1] = Y.back();  
131 - }  
132 - std::string str(){  
133 - std::stringstream ss;  
134 - ss<<name<<std::endl;  
135 - ss<<function< T, complex<T> >::str();  
136 - return ss.str();  
137 - }  
138 - std::string get_name(){  
139 - return name;  
140 - }  
141 -  
142 - void set_name(std::string str){  
143 - name = str;  
144 - }  
145 -  
146 -};  
147 -  
148 -}  
149 -  
150 -  
151 -  
152 -  
153 -#endif 1 +#ifndef RTS_MATERIAL_H
  2 +#define RTS_MATERIAL_H
  3 +
  4 +#include <vector>
  5 +#include <ostream>
  6 +#include <iostream>
  7 +#include <fstream>
  8 +#include <complex>
  9 +#include <algorithm>
  10 +#include <sstream>
  11 +#include "../math/complex.h"
  12 +#include "../math/constants.h"
  13 +#include "../math/function.h"
  14 +
  15 +namespace stim{
  16 +
  17 +//Material class - default representation for the material property is the refractive index (RI)
  18 +template<typename T>
  19 +class material : public function< T, complex<T> >{
  20 +
  21 +public:
  22 + enum wave_property{microns, inverse_cm};
  23 + enum material_property{ri, absorbance};
  24 +
  25 +private:
  26 +
  27 + using function< T, complex<T> >::X;
  28 + using function< T, complex<T> >::Y;
  29 + using function< T, complex<T> >::insert;
  30 + using function< T, complex<T> >::bounding;
  31 +
  32 + std::string name; //name for the material (defaults to file name)
  33 +
  34 + void process_header(std::string str, wave_property& wp, material_property& mp){
  35 +
  36 + std::stringstream ss(str); //create a stream from the data string
  37 + std::string line;
  38 + std::getline(ss, line); //get the first line as a string
  39 + while(line[0] == '#'){ //continue looping while the line is a comment
  40 +
  41 + std::stringstream lstream(line); //create a stream from the line
  42 + lstream.ignore(); //ignore the first character ('#')
  43 +
  44 + std::string prop; //get the property name
  45 + lstream>>prop;
  46 +
  47 + if(prop == "X"){
  48 + std::string wp_name;
  49 + lstream>>wp_name;
  50 + if(wp_name == "microns") wp = microns;
  51 + else if(wp_name == "inverse_cm") wp = inverse_cm;
  52 + }
  53 + else if(prop == "Y"){
  54 + std::string mp_name;
  55 + lstream>>mp_name;
  56 + if(mp_name == "ri") mp = ri;
  57 + else if(mp_name == "absorbance") mp = absorbance;
  58 + }
  59 +
  60 + std::getline(ss, line); //get the next line
  61 + }
  62 +
  63 + function< T, stim::complex<T> >::process_string(str);
  64 + }
  65 +
  66 + void from_inverse_cm(){
  67 + //convert inverse centimeters to wavelength (in microns)
  68 + for(unsigned int i=0; i<X.size(); i++)
  69 + X[i] = 10000 / X[i];
  70 +
  71 + //reverse the function array
  72 + std::reverse(X.begin(), X.end());
  73 + std::reverse(Y.begin(), Y.end());
  74 +
  75 + }
  76 +
  77 + void init(){
  78 + bounding[0] = bounding[1] = stim::complex<T>(1, 0);
  79 + }
  80 +
  81 +
  82 +public:
  83 +
  84 + material(std::string filename, wave_property wp, material_property mp){
  85 + name = filename;
  86 + load(filename, wp, mp);
  87 + }
  88 +
  89 + material(std::string filename){
  90 + name = filename;
  91 + load(filename);
  92 + }
  93 +
  94 + material(){
  95 + init();
  96 + }
  97 +
  98 + complex<T> getN(T lambda){
  99 + return function< T, complex<T> >::linear(lambda);
  100 + }
  101 +
  102 + void load(std::string filename, wave_property wp, material_property mp){
  103 +
  104 + //load the file as a function
  105 + function< T, complex<T> >::load(filename);
  106 + }
  107 +
  108 + void load(std::string filename){
  109 +
  110 + wave_property wp = inverse_cm;
  111 + material_property mp = ri;
  112 + //turn the file into a string
  113 + std::ifstream t(filename.c_str()); //open the file as a stream
  114 +
  115 + if(!t){
  116 + std::cout<<"ERROR: Couldn't open the material file '"<<filename<<"'"<<std::endl;
  117 + exit(1);
  118 + }
  119 + std::string str((std::istreambuf_iterator<char>(t)),
  120 + std::istreambuf_iterator<char>());
  121 +
  122 + //process the header information
  123 + process_header(str, wp, mp);
  124 +
  125 + //convert units
  126 + if(wp == inverse_cm)
  127 + from_inverse_cm();
  128 + //set the bounding values
  129 + bounding[0] = Y[0];
  130 + bounding[1] = Y.back();
  131 + }
  132 + std::string str(){
  133 + std::stringstream ss;
  134 + ss<<name<<std::endl;
  135 + ss<<function< T, complex<T> >::str();
  136 + return ss.str();
  137 + }
  138 + std::string get_name(){
  139 + return name;
  140 + }
  141 +
  142 + void set_name(std::string str){
  143 + name = str;
  144 + }
  145 +
  146 +};
  147 +
  148 +}
  149 +
  150 +
  151 +
  152 +
  153 +#endif
optics/mirst-1d.cuh
1 -#include "../optics/material.h"  
2 -#include "../math/complexfield.cuh"  
3 -#include "../math/constants.h"  
4 -//#include "../envi/bil.h"  
5 -  
6 -#include "cufft.h"  
7 -  
8 -#include <vector>  
9 -#include <sstream>  
10 -  
11 -namespace stim{  
12 -  
13 -//this function writes a sinc function to "dest" such that an iFFT produces a slab  
14 -template<typename T>  
15 -__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,  
16 - T* src, T* zf,  
17 - T w, unsigned int zR, unsigned int nuR){  
18 - //dest = complex field representing the sample  
19 - //ri = refractive indices for each wavelength  
20 - //src = intensity of the light source for each wavelength  
21 - //zf = z position of the slab interface for each wavelength (accounting for optical path length)  
22 - //w = width of the slab (in pixels)  
23 - //zR = number of z-axis samples  
24 - //nuR = number of wavelengths  
25 -  
26 - //get the current coordinate in the plane slice  
27 - int ifz = blockIdx.x * blockDim.x + threadIdx.x;  
28 - int inu = blockIdx.y * blockDim.y + threadIdx.y;  
29 -  
30 - //make sure that the thread indices are in-bounds  
31 - if(inu >= nuR || ifz >= zR) return;  
32 -  
33 - int i = inu * zR + ifz;  
34 -  
35 - T fz;  
36 - if(ifz < zR/2)  
37 - fz = ifz / (T)zR;  
38 - else  
39 - fz = -(zR - ifz) / (T)zR;  
40 -  
41 - //if the slab starts outside of the simulation domain, just return  
42 - if(zf[inu] >= zR) return;  
43 -  
44 - //fill the array along z with a sinc function representing the Fourier transform of the layer  
45 -  
46 - T opl = w * ri[inu].real(); //optical path length  
47 -  
48 - //handle the case where the slab goes outside the simulation domain  
49 - if(zf[inu] + opl >= zR)  
50 - opl = zR - zf[inu];  
51 -  
52 - if(opl == 0) return;  
53 -  
54 - //T l = w * ri[inu].real();  
55 - //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));  
56 - complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));  
57 -  
58 - complex<T> eta = ri[inu] * ri[inu] - 1;  
59 -  
60 - //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);  
61 - if(ifz == 0)  
62 - dest[i] += opl * exp(e) * eta * src[inu];  
63 - else  
64 - dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);  
65 -}  
66 -  
67 -template<typename T>  
68 -__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){  
69 - //zf = current z depth (optical path length) in pixels  
70 - //ri = refractive index of the material  
71 - //w = actual width of the layer (in pixels)  
72 -  
73 -  
74 - //compute the index for this thread  
75 - int i = blockIdx.x * blockDim.x + threadIdx.x;  
76 - if(i >= S) return;  
77 -  
78 - if(ri == NULL)  
79 - zf[i] += w;  
80 - else  
81 - zf[i] += ri[i].real() * w;  
82 -}  
83 -  
84 -//apply the 1D MIRST filter to an existing sample (overwriting the sample)  
85 -template<typename T>  
86 -__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,  
87 - T dFz,  
88 - T inNA, T outNA,  
89 - unsigned int lambdaR, unsigned int zR,  
90 - T sigma = 0){  
91 - //sampleFFT = the sample in the Fourier domain (will be overwritten)  
92 - //lambda = list of wavelengths  
93 - //dFz = delta along the Fz axis in the frequency domain  
94 - //inNA = NA of the internal obscuration  
95 - //outNA = NA of the objective  
96 - //zR = number of pixels along the Fz axis (same as the z-axis)  
97 - //lambdaR = number of wavelengths  
98 - //sigma = width of the Gaussian source  
99 - int ifz = blockIdx.x * blockDim.x + threadIdx.x;  
100 - int inu = blockIdx.y * blockDim.y + threadIdx.y;  
101 -  
102 - if(inu >= lambdaR || ifz >= zR) return;  
103 -  
104 - //calculate the index into the sample FT  
105 - int i = inu * zR + ifz;  
106 -  
107 - //compute the frequency (and set all negative spatial frequencies to zero)  
108 - T fz;  
109 - if(ifz < zR / 2)  
110 - fz = ifz * dFz;  
111 - //if the spatial frequency is negative, set it to zero and exit  
112 - else{  
113 - sampleFFT[i] = 0;  
114 - return;  
115 - }  
116 -  
117 - //compute the frequency in inverse microns  
118 - T nu = 1/lambda[inu];  
119 -  
120 - //determine the radius of the integration circle  
121 - T nu_sq = nu * nu;  
122 - T fz_sq = (fz * fz) / 4;  
123 -  
124 - //cut off frequencies above the diffraction limit  
125 - T r;  
126 - if(fz_sq < nu_sq)  
127 - r = sqrt(nu_sq - fz_sq);  
128 - else  
129 - r = 0;  
130 -  
131 - //account for the optics  
132 - T Q = 0;  
133 - if(r > nu * inNA && r < nu * outNA)  
134 - Q = 1;  
135 -  
136 - //account for the source  
137 - //T sigma = 30.0;  
138 - T s = exp( - (r*r * sigma*sigma) / 2 );  
139 - //T s=1;  
140 -  
141 - //compute the final filter  
142 - T mirst = 0;  
143 - if(fz != 0)  
144 - mirst = 2 * stimPI * r * s * Q * (1/fz);  
145 -  
146 - sampleFFT[i] *= mirst;  
147 -  
148 -}  
149 -  
150 -/*This object performs a 1-dimensional (layered) MIRST simulation  
151 -*/  
152 -template<typename T>  
153 -class mirst1d{  
154 -  
155 -private:  
156 - unsigned int Z; //z-axis resolution  
157 - unsigned int pad; //pixel padding on either side of the sample  
158 -  
159 - std::vector< material<T> > matlist; //list of materials  
160 - std::vector< T > layers; //list of layer thicknesses  
161 -  
162 - std::vector< T > lambdas; //list of wavelengths that are being simulated  
163 - unsigned int S; //number of wavelengths (size of "lambdas")  
164 -  
165 - T NA[2]; //numerical aperature (central obscuration and outer diameter)  
166 -  
167 - function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)  
168 -  
169 - complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.  
170 -  
171 - void fft(int direction = CUFFT_FORWARD){  
172 -  
173 - unsigned padZ = Z + pad;  
174 -  
175 - //create cuFFT handles  
176 - cufftHandle plan;  
177 - cufftResult result;  
178 -  
179 - if(sizeof(T) == 4)  
180 - result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision  
181 - else  
182 - result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision  
183 -  
184 - //check for Plan 1D errors  
185 - if(result != CUFFT_SUCCESS){  
186 - std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;  
187 - CufftError(result);  
188 - exit(1);  
189 - }  
190 -  
191 - if(sizeof(T) == 4)  
192 - result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);  
193 - else  
194 - result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);  
195 -  
196 - //check for FFT errors  
197 - if(result != CUFFT_SUCCESS){  
198 - std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;  
199 - CufftError(result);  
200 - exit(1);  
201 - }  
202 -  
203 - cufftDestroy(plan);  
204 - }  
205 -  
206 -  
207 - //initialize the scratch memory  
208 - void init_scratch(){  
209 - scratch = complexfield<T, 1>(Z + pad , lambdas.size());  
210 - scratch = 0;  
211 - }  
212 -  
213 - //get the list of scattering efficiency (eta) values for a specified layer  
214 - std::vector< complex<T> > layer_etas(unsigned int l){  
215 -  
216 - std::vector< complex<T> > etas;  
217 -  
218 - //fill the list of etas  
219 - for(unsigned int i=0; i<lambdas.size(); i++)  
220 - etas.push_back( matlist[l].eta(lambdas[i]) );  
221 - return etas;  
222 - }  
223 -  
224 - //calculates the optimal block and grid sizes using information from the GPU  
225 - void cuda_params(dim3& grids, dim3& blocks){  
226 - int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size  
227 - int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);  
228 -  
229 - //create one thread for each detector pixel  
230 - blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);  
231 - grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);  
232 - }  
233 -  
234 - //add the fourier transform of layer n to the scratch space  
235 - void build_layer_fft(unsigned int n, T* zf){  
236 - unsigned int paddedZ = Z + pad;  
237 -  
238 - T wpx = layers[n] / dz(); //calculate the width of the layer in pixels  
239 -  
240 - //allocate memory for the refractive index  
241 - complex<T>* gpuRi;  
242 - HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));  
243 -  
244 - //allocate memory for the source profile  
245 - T* gpuSrc;  
246 - HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));  
247 -  
248 - complex<T> ri;  
249 - T source;  
250 - //store the refractive index and source profile in a CPU array  
251 - for(int inu=0; inu<S; inu++){  
252 - //save the refractive index to the GPU  
253 - ri = matlist[n].getN(lambdas[inu]);  
254 - HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));  
255 -  
256 - //save the source profile to the GPU  
257 - source = source_profile(10000 / lambdas[inu]);  
258 - HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));  
259 -  
260 - }  
261 -  
262 - //create one thread for each pixel of the field slice  
263 - dim3 gridDim, blockDim;  
264 - cuda_params(gridDim, blockDim);  
265 - stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);  
266 -  
267 - int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size  
268 - int linGrid = S / linBlock + 1;  
269 - stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);  
270 -  
271 - //free memory  
272 - HANDLE_ERROR(cudaFree(gpuRi));  
273 - HANDLE_ERROR(cudaFree(gpuSrc));  
274 - }  
275 -  
276 - void build_sample(){  
277 - init_scratch(); //initialize the GPU scratch space  
278 - //build_layer(1);  
279 -  
280 - T* zf;  
281 - HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));  
282 - HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));  
283 -  
284 - //render each layer of the sample  
285 - for(unsigned int l=0; l<layers.size(); l++){  
286 - build_layer_fft(l, zf);  
287 - }  
288 -  
289 - HANDLE_ERROR(cudaFree(zf));  
290 - }  
291 -  
292 - void apply_filter(){  
293 - dim3 gridDim, blockDim;  
294 - cuda_params(gridDim, blockDim);  
295 -  
296 - unsigned int Zpad = Z + pad;  
297 -  
298 - T sim_range = dz() * Zpad;  
299 - T dFz = 1 / sim_range;  
300 -  
301 - //copy the array of wavelengths to the GPU  
302 - T* gpuLambdas;  
303 - HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));  
304 - HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));  
305 - stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,  
306 - dFz,  
307 - NA[0], NA[1],  
308 - S, Zpad);  
309 - }  
310 -  
311 - //crop the image to the sample thickness - keep in mind that sample thickness != optical path length  
312 - void crop(){  
313 -  
314 - scratch = scratch.crop(Z, S);  
315 - }  
316 -  
317 - //save the scratch field as a binary file  
318 - void to_binary(std::string filename){  
319 -  
320 - }  
321 -  
322 -  
323 -public:  
324 -  
325 - //constructor  
326 - mirst1d(unsigned int rZ = 100,  
327 - unsigned int padding = 0){  
328 - Z = rZ;  
329 - pad = padding;  
330 - NA[0] = 0;  
331 - NA[1] = 0.8;  
332 - S = 0;  
333 - source_profile = 1;  
334 - }  
335 -  
336 - //add a layer, thickness = microns  
337 - void add_layer(material<T> mat, T thickness){  
338 - matlist.push_back(mat);  
339 - layers.push_back(thickness);  
340 - }  
341 -  
342 - void add_layer(std::string filename, T thickness){  
343 - add_layer(material<T>(filename), thickness);  
344 - }  
345 -  
346 - //adds a profile spectrum for the light source  
347 - void set_source(std::string filename){  
348 - source_profile.load(filename);  
349 - }  
350 -  
351 - //adds a block of wavenumbers (cm^-1) to the simulation parameters  
352 - void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){  
353 - unsigned int nu = start;  
354 - while(nu <= stop){  
355 - lambdas.push_back((T)10000 / nu);  
356 - nu += step;  
357 - }  
358 - S = lambdas.size(); //increment the number of wavelengths (shorthand for later)  
359 - }  
360 -  
361 - T thickness(){  
362 - T t = 0;  
363 - for(unsigned int l=0; l<layers.size(); l++)  
364 - t += layers[l];  
365 - return t;  
366 - }  
367 -  
368 - void padding(unsigned int padding = 0){  
369 - pad = padding;  
370 - }  
371 -  
372 - T dz(){  
373 - return thickness() / Z; //calculate the z-axis step size  
374 - }  
375 -  
376 - void na(T in, T out){  
377 - NA[0] = in;  
378 - NA[1] = out;  
379 - }  
380 -  
381 - void na(T out){  
382 - na(0, out);  
383 - }  
384 -  
385 - stim::function<T, T> get_source(){  
386 - return source_profile;  
387 - }  
388 -  
389 - void save_sample(std::string filename){  
390 - //create a sample and save the magnitude as an image  
391 - build_sample();  
392 - fft(CUFFT_INVERSE);  
393 - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);  
394 - }  
395 -  
396 - void save_mirst(std::string filename, bool binary = true){  
397 - //apply the MIRST filter to a sample and save the image  
398 -  
399 - //build the sample in the Fourier domain  
400 - build_sample();  
401 -  
402 - //apply the MIRST filter  
403 - apply_filter();  
404 -  
405 - //apply an inverse FFT to bring the results back into the spatial domain  
406 - fft(CUFFT_INVERSE);  
407 -  
408 - crop();  
409 -  
410 - //save the image  
411 - if(binary)  
412 - to_binary(filename);  
413 - else  
414 - scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);  
415 - }  
416 -  
417 -  
418 -  
419 -  
420 - std::string str(){  
421 -  
422 - stringstream ss;  
423 - ss<<"1D MIRST Simulation========================="<<std::endl;  
424 - ss<<"z-axis resolution: "<<Z<<std::endl;  
425 - ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;  
426 - ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;  
427 - ss<<"padding: "<<pad<<std::endl;  
428 - ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;  
429 - ss<<"dz: "<<dz()<<" um"<<std::endl;  
430 - ss<<std::endl;  
431 - ss<<layers.size()<<" layers-------------"<<std::endl;  
432 - for(unsigned int l=0; l<layers.size(); l++)  
433 - ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;  
434 -  
435 - ss<<"source profile-----------"<<std::endl;  
436 - ss<<get_source().str()<<std::endl;  
437 -  
438 - return ss.str();  
439 -  
440 -  
441 - }  
442 -  
443 -  
444 -  
445 -};  
446 -  
447 -} 1 +#include "../optics/material.h"
  2 +#include "../math/complexfield.cuh"
  3 +#include "../math/constants.h"
  4 +//#include "../envi/bil.h"
  5 +
  6 +#include "cufft.h"
  7 +
  8 +#include <vector>
  9 +#include <sstream>
  10 +
  11 +namespace stim{
  12 +
  13 +//this function writes a sinc function to "dest" such that an iFFT produces a slab
  14 +template<typename T>
  15 +__global__ void gpu_mirst1d_layer_fft(complex<T>* dest, complex<T>* ri,
  16 + T* src, T* zf,
  17 + T w, unsigned int zR, unsigned int nuR){
  18 + //dest = complex field representing the sample
  19 + //ri = refractive indices for each wavelength
  20 + //src = intensity of the light source for each wavelength
  21 + //zf = z position of the slab interface for each wavelength (accounting for optical path length)
  22 + //w = width of the slab (in pixels)
  23 + //zR = number of z-axis samples
  24 + //nuR = number of wavelengths
  25 +
  26 + //get the current coordinate in the plane slice
  27 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  28 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  29 +
  30 + //make sure that the thread indices are in-bounds
  31 + if(inu >= nuR || ifz >= zR) return;
  32 +
  33 + int i = inu * zR + ifz;
  34 +
  35 + T fz;
  36 + if(ifz < zR/2)
  37 + fz = ifz / (T)zR;
  38 + else
  39 + fz = -(zR - ifz) / (T)zR;
  40 +
  41 + //if the slab starts outside of the simulation domain, just return
  42 + if(zf[inu] >= zR) return;
  43 +
  44 + //fill the array along z with a sinc function representing the Fourier transform of the layer
  45 +
  46 + T opl = w * ri[inu].real(); //optical path length
  47 +
  48 + //handle the case where the slab goes outside the simulation domain
  49 + if(zf[inu] + opl >= zR)
  50 + opl = zR - zf[inu];
  51 +
  52 + if(opl == 0) return;
  53 +
  54 + //T l = w * ri[inu].real();
  55 + //complex<T> e(0.0, -2 * PI * fz * (zf[inu] + zR/2 - l/2.0));
  56 + complex<T> e(0, -2 * stimPI * fz * (zf[inu] + opl/2));
  57 +
  58 + complex<T> eta = ri[inu] * ri[inu] - 1;
  59 +
  60 + //dest[i] = fz;//exp(e) * m[inu] * src[inu] * sin(PI * fz * l) / (PI * fz);
  61 + if(ifz == 0)
  62 + dest[i] += opl * exp(e) * eta * src[inu];
  63 + else
  64 + dest[i] += opl * exp(e) * eta * src[inu] * sin(stimPI * fz * opl) / (stimPI * fz * opl);
  65 +}
  66 +
  67 +template<typename T>
  68 +__global__ void gpu_mirst1d_increment_z(T* zf, complex<T>* ri, T w, unsigned int S){
  69 + //zf = current z depth (optical path length) in pixels
  70 + //ri = refractive index of the material
  71 + //w = actual width of the layer (in pixels)
  72 +
  73 +
  74 + //compute the index for this thread
  75 + int i = blockIdx.x * blockDim.x + threadIdx.x;
  76 + if(i >= S) return;
  77 +
  78 + if(ri == NULL)
  79 + zf[i] += w;
  80 + else
  81 + zf[i] += ri[i].real() * w;
  82 +}
  83 +
  84 +//apply the 1D MIRST filter to an existing sample (overwriting the sample)
  85 +template<typename T>
  86 +__global__ void gpu_mirst1d_apply_filter(complex<T>* sampleFFT, T* lambda,
  87 + T dFz,
  88 + T inNA, T outNA,
  89 + unsigned int lambdaR, unsigned int zR,
  90 + T sigma = 0){
  91 + //sampleFFT = the sample in the Fourier domain (will be overwritten)
  92 + //lambda = list of wavelengths
  93 + //dFz = delta along the Fz axis in the frequency domain
  94 + //inNA = NA of the internal obscuration
  95 + //outNA = NA of the objective
  96 + //zR = number of pixels along the Fz axis (same as the z-axis)
  97 + //lambdaR = number of wavelengths
  98 + //sigma = width of the Gaussian source
  99 + int ifz = blockIdx.x * blockDim.x + threadIdx.x;
  100 + int inu = blockIdx.y * blockDim.y + threadIdx.y;
  101 +
  102 + if(inu >= lambdaR || ifz >= zR) return;
  103 +
  104 + //calculate the index into the sample FT
  105 + int i = inu * zR + ifz;
  106 +
  107 + //compute the frequency (and set all negative spatial frequencies to zero)
  108 + T fz;
  109 + if(ifz < zR / 2)
  110 + fz = ifz * dFz;
  111 + //if the spatial frequency is negative, set it to zero and exit
  112 + else{
  113 + sampleFFT[i] = 0;
  114 + return;
  115 + }
  116 +
  117 + //compute the frequency in inverse microns
  118 + T nu = 1/lambda[inu];
  119 +
  120 + //determine the radius of the integration circle
  121 + T nu_sq = nu * nu;
  122 + T fz_sq = (fz * fz) / 4;
  123 +
  124 + //cut off frequencies above the diffraction limit
  125 + T r;
  126 + if(fz_sq < nu_sq)
  127 + r = sqrt(nu_sq - fz_sq);
  128 + else
  129 + r = 0;
  130 +
  131 + //account for the optics
  132 + T Q = 0;
  133 + if(r > nu * inNA && r < nu * outNA)
  134 + Q = 1;
  135 +
  136 + //account for the source
  137 + //T sigma = 30.0;
  138 + T s = exp( - (r*r * sigma*sigma) / 2 );
  139 + //T s=1;
  140 +
  141 + //compute the final filter
  142 + T mirst = 0;
  143 + if(fz != 0)
  144 + mirst = 2 * stimPI * r * s * Q * (1/fz);
  145 +
  146 + sampleFFT[i] *= mirst;
  147 +
  148 +}
  149 +
  150 +/*This object performs a 1-dimensional (layered) MIRST simulation
  151 +*/
  152 +template<typename T>
  153 +class mirst1d{
  154 +
  155 +private:
  156 + unsigned int Z; //z-axis resolution
  157 + unsigned int pad; //pixel padding on either side of the sample
  158 +
  159 + std::vector< material<T> > matlist; //list of materials
  160 + std::vector< T > layers; //list of layer thicknesses
  161 +
  162 + std::vector< T > lambdas; //list of wavelengths that are being simulated
  163 + unsigned int S; //number of wavelengths (size of "lambdas")
  164 +
  165 + T NA[2]; //numerical aperature (central obscuration and outer diameter)
  166 +
  167 + function<T, T> source_profile; //profile (spectrum) of the source (expressed in inverse centimeters)
  168 +
  169 + complexfield<T, 1> scratch; //scratch GPU memory used to build samples, transforms, etc.
  170 +
  171 + void fft(int direction = CUFFT_FORWARD){
  172 +
  173 + unsigned padZ = Z + pad;
  174 +
  175 + //create cuFFT handles
  176 + cufftHandle plan;
  177 + cufftResult result;
  178 +
  179 + if(sizeof(T) == 4)
  180 + result = cufftPlan1d(&plan, padZ, CUFFT_C2C, lambdas.size()); //single precision
  181 + else
  182 + result = cufftPlan1d(&plan, padZ, CUFFT_Z2Z, lambdas.size()); //double precision
  183 +
  184 + //check for Plan 1D errors
  185 + if(result != CUFFT_SUCCESS){
  186 + std::cout<<"Error creating CUFFT plan for computing the FFT:"<<std::endl;
  187 + CufftError(result);
  188 + exit(1);
  189 + }
  190 +
  191 + if(sizeof(T) == 4)
  192 + result = cufftExecC2C(plan, (cufftComplex*)scratch.ptr(), (cufftComplex*)scratch.ptr(), direction);
  193 + else
  194 + result = cufftExecZ2Z(plan, (cufftDoubleComplex*)scratch.ptr(), (cufftDoubleComplex*)scratch.ptr(), direction);
  195 +
  196 + //check for FFT errors
  197 + if(result != CUFFT_SUCCESS){
  198 + std::cout<<"Error executing CUFFT to compute the FFT."<<std::endl;
  199 + CufftError(result);
  200 + exit(1);
  201 + }
  202 +
  203 + cufftDestroy(plan);
  204 + }
  205 +
  206 +
  207 + //initialize the scratch memory
  208 + void init_scratch(){
  209 + scratch = complexfield<T, 1>(Z + pad , lambdas.size());
  210 + scratch = 0;
  211 + }
  212 +
  213 + //get the list of scattering efficiency (eta) values for a specified layer
  214 + std::vector< complex<T> > layer_etas(unsigned int l){
  215 +
  216 + std::vector< complex<T> > etas;
  217 +
  218 + //fill the list of etas
  219 + for(unsigned int i=0; i<lambdas.size(); i++)
  220 + etas.push_back( matlist[l].eta(lambdas[i]) );
  221 + return etas;
  222 + }
  223 +
  224 + //calculates the optimal block and grid sizes using information from the GPU
  225 + void cuda_params(dim3& grids, dim3& blocks){
  226 + int maxThreads = stim::maxThreadsPerBlock(); //compute the optimal block size
  227 + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads);
  228 +
  229 + //create one thread for each detector pixel
  230 + blocks = dim3(SQRT_BLOCK, SQRT_BLOCK);
  231 + grids = dim3(((Z + 2 * pad) + SQRT_BLOCK -1)/SQRT_BLOCK, (S + SQRT_BLOCK - 1)/SQRT_BLOCK);
  232 + }
  233 +
  234 + //add the fourier transform of layer n to the scratch space
  235 + void build_layer_fft(unsigned int n, T* zf){
  236 + unsigned int paddedZ = Z + pad;
  237 +
  238 + T wpx = layers[n] / dz(); //calculate the width of the layer in pixels
  239 +
  240 + //allocate memory for the refractive index
  241 + complex<T>* gpuRi;
  242 + HANDLE_ERROR(cudaMalloc( (void**)&gpuRi, sizeof(complex<T>) * S));
  243 +
  244 + //allocate memory for the source profile
  245 + T* gpuSrc;
  246 + HANDLE_ERROR(cudaMalloc( (void**)&gpuSrc, sizeof(T) * S));
  247 +
  248 + complex<T> ri;
  249 + T source;
  250 + //store the refractive index and source profile in a CPU array
  251 + for(int inu=0; inu<S; inu++){
  252 + //save the refractive index to the GPU
  253 + ri = matlist[n].getN(lambdas[inu]);
  254 + HANDLE_ERROR(cudaMemcpy( gpuRi + inu, &ri, sizeof(complex<T>), cudaMemcpyHostToDevice ));
  255 +
  256 + //save the source profile to the GPU
  257 + source = source_profile(10000 / lambdas[inu]);
  258 + HANDLE_ERROR(cudaMemcpy( gpuSrc + inu, &source, sizeof(T), cudaMemcpyHostToDevice ));
  259 +
  260 + }
  261 +
  262 + //create one thread for each pixel of the field slice
  263 + dim3 gridDim, blockDim;
  264 + cuda_params(gridDim, blockDim);
  265 + stim::gpu_mirst1d_layer_fft<<<gridDim, blockDim>>>(scratch.ptr(), gpuRi, gpuSrc, zf, wpx, paddedZ, S);
  266 +
  267 + int linBlock = stim::maxThreadsPerBlock(); //compute the optimal block size
  268 + int linGrid = S / linBlock + 1;
  269 + stim::gpu_mirst1d_increment_z <<<linGrid, linBlock>>>(zf, gpuRi, wpx, S);
  270 +
  271 + //free memory
  272 + HANDLE_ERROR(cudaFree(gpuRi));
  273 + HANDLE_ERROR(cudaFree(gpuSrc));
  274 + }
  275 +
  276 + void build_sample(){
  277 + init_scratch(); //initialize the GPU scratch space
  278 + //build_layer(1);
  279 +
  280 + T* zf;
  281 + HANDLE_ERROR(cudaMalloc(&zf, sizeof(T) * S));
  282 + HANDLE_ERROR(cudaMemset(zf, 0, sizeof(T) * S));
  283 +
  284 + //render each layer of the sample
  285 + for(unsigned int l=0; l<layers.size(); l++){
  286 + build_layer_fft(l, zf);
  287 + }
  288 +
  289 + HANDLE_ERROR(cudaFree(zf));
  290 + }
  291 +
  292 + void apply_filter(){
  293 + dim3 gridDim, blockDim;
  294 + cuda_params(gridDim, blockDim);
  295 +
  296 + unsigned int Zpad = Z + pad;
  297 +
  298 + T sim_range = dz() * Zpad;
  299 + T dFz = 1 / sim_range;
  300 +
  301 + //copy the array of wavelengths to the GPU
  302 + T* gpuLambdas;
  303 + HANDLE_ERROR(cudaMalloc(&gpuLambdas, sizeof(T) * Zpad));
  304 + HANDLE_ERROR(cudaMemcpy(gpuLambdas, &lambdas[0], sizeof(T) * Zpad, cudaMemcpyHostToDevice));
  305 + stim::gpu_mirst1d_apply_filter <<<gridDim, blockDim>>>(scratch.ptr(), gpuLambdas,
  306 + dFz,
  307 + NA[0], NA[1],
  308 + S, Zpad);
  309 + }
  310 +
  311 + //crop the image to the sample thickness - keep in mind that sample thickness != optical path length
  312 + void crop(){
  313 +
  314 + scratch = scratch.crop(Z, S);
  315 + }
  316 +
  317 + //save the scratch field as a binary file
  318 + void to_binary(std::string filename){
  319 +
  320 + }
  321 +
  322 +
  323 +public:
  324 +
  325 + //constructor
  326 + mirst1d(unsigned int rZ = 100,
  327 + unsigned int padding = 0){
  328 + Z = rZ;
  329 + pad = padding;
  330 + NA[0] = 0;
  331 + NA[1] = 0.8;
  332 + S = 0;
  333 + source_profile = 1;
  334 + }
  335 +
  336 + //add a layer, thickness = microns
  337 + void add_layer(material<T> mat, T thickness){
  338 + matlist.push_back(mat);
  339 + layers.push_back(thickness);
  340 + }
  341 +
  342 + void add_layer(std::string filename, T thickness){
  343 + add_layer(material<T>(filename), thickness);
  344 + }
  345 +
  346 + //adds a profile spectrum for the light source
  347 + void set_source(std::string filename){
  348 + source_profile.load(filename);
  349 + }
  350 +
  351 + //adds a block of wavenumbers (cm^-1) to the simulation parameters
  352 + void add_wavenumbers(unsigned int start, unsigned int stop, unsigned int step){
  353 + unsigned int nu = start;
  354 + while(nu <= stop){
  355 + lambdas.push_back((T)10000 / nu);
  356 + nu += step;
  357 + }
  358 + S = lambdas.size(); //increment the number of wavelengths (shorthand for later)
  359 + }
  360 +
  361 + T thickness(){
  362 + T t = 0;
  363 + for(unsigned int l=0; l<layers.size(); l++)
  364 + t += layers[l];
  365 + return t;
  366 + }
  367 +
  368 + void padding(unsigned int padding = 0){
  369 + pad = padding;
  370 + }
  371 +
  372 + T dz(){
  373 + return thickness() / Z; //calculate the z-axis step size
  374 + }
  375 +
  376 + void na(T in, T out){
  377 + NA[0] = in;
  378 + NA[1] = out;
  379 + }
  380 +
  381 + void na(T out){
  382 + na(0, out);
  383 + }
  384 +
  385 + stim::function<T, T> get_source(){
  386 + return source_profile;
  387 + }
  388 +
  389 + void save_sample(std::string filename){
  390 + //create a sample and save the magnitude as an image
  391 + build_sample();
  392 + fft(CUFFT_INVERSE);
  393 + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  394 + }
  395 +
  396 + void save_mirst(std::string filename, bool binary = true){
  397 + //apply the MIRST filter to a sample and save the image
  398 +
  399 + //build the sample in the Fourier domain
  400 + build_sample();
  401 +
  402 + //apply the MIRST filter
  403 + apply_filter();
  404 +
  405 + //apply an inverse FFT to bring the results back into the spatial domain
  406 + fft(CUFFT_INVERSE);
  407 +
  408 + crop();
  409 +
  410 + //save the image
  411 + if(binary)
  412 + to_binary(filename);
  413 + else
  414 + scratch.toImage(filename, stim::complexfield<T, 1>::magnitude);
  415 + }
  416 +
  417 +
  418 +
  419 +
  420 + std::string str(){
  421 +
  422 + stringstream ss;
  423 + ss<<"1D MIRST Simulation========================="<<std::endl;
  424 + ss<<"z-axis resolution: "<<Z<<std::endl;
  425 + ss<<"simulation domain: ["<<lambdas[0]<<", "<<lambdas.back()<<"]"<<std::endl;
  426 + ss<<"number of wavelengths: "<<lambdas.size()<<std::endl;
  427 + ss<<"padding: "<<pad<<std::endl;
  428 + ss<<"sample thickness: "<<thickness()<<" um"<<std::endl;
  429 + ss<<"dz: "<<dz()<<" um"<<std::endl;
  430 + ss<<std::endl;
  431 + ss<<layers.size()<<" layers-------------"<<std::endl;
  432 + for(unsigned int l=0; l<layers.size(); l++)
  433 + ss<<"layer "<<l<<": "<<layers[l]<<" um"<<"---------"<<std::endl<<matlist[l].str()<<std::endl;
  434 +
  435 + ss<<"source profile-----------"<<std::endl;
  436 + ss<<get_source().str()<<std::endl;
  437 +
  438 + return ss.str();
  439 +
  440 +
  441 + }
  442 +
  443 +
  444 +
  445 +};
  446 +
  447 +}
@@ -15,7 +15,7 @@ @@ -15,7 +15,7 @@
15 15
16 namespace stim{ 16 namespace stim{
17 17
18 - class argument 18 + class cmd_option
19 { 19 {
20 private: 20 private:
21 bool ansi; 21 bool ansi;
@@ -59,8 +59,8 @@ namespace stim{ @@ -59,8 +59,8 @@ namespace stim{
59 59
60 public: 60 public:
61 void set_ansi(bool b){ ansi = b; } 61 void set_ansi(bool b){ ansi = b; }
62 - //create an argument with a given name, description, and default value  
63 - argument(std::string _name, std::string _desc, std::string _default = "", std::string _range = "") 62 + //create an option with a given name, description, and default value
  63 + cmd_option(std::string _name, std::string _desc, std::string _default = "", std::string _range = "")
64 { 64 {
65 name = _name; 65 name = _name;
66 parse_desc(_desc); 66 parse_desc(_desc);
@@ -81,12 +81,12 @@ namespace stim{ @@ -81,12 +81,12 @@ namespace stim{
81 return vals.size(); 81 return vals.size();
82 } 82 }
83 83
84 - //return the value of a text argument 84 + //return the value of a text option
85 std::string as_string(unsigned int n = 0) 85 std::string as_string(unsigned int n = 0)
86 { 86 {
87 if(!flag) 87 if(!flag)
88 { 88 {
89 - std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl; 89 + std::cout<<"ERROR - Option requested without being set: "<<name<<std::endl;
90 exit(1); 90 exit(1);
91 } 91 }
92 92
@@ -96,12 +96,12 @@ namespace stim{ @@ -96,12 +96,12 @@ namespace stim{
96 else return ""; 96 else return "";
97 } 97 }
98 98
99 - //return the value of a floating point argument 99 + //return the value of a floating point option
100 float as_float(unsigned int n = 0) 100 float as_float(unsigned int n = 0)
101 { 101 {
102 if(!flag) 102 if(!flag)
103 { 103 {
104 - std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl; 104 + std::cout<<"ERROR - option requested without being set: "<<name<<std::endl;
105 exit(1); 105 exit(1);
106 } 106 }
107 107
@@ -115,12 +115,12 @@ namespace stim{ @@ -115,12 +115,12 @@ namespace stim{
115 else return 0; 115 else return 0;
116 } 116 }
117 117
118 - //return the value of an integer argument 118 + //return the value of an integer option
119 int as_int(unsigned int n = 0) 119 int as_int(unsigned int n = 0)
120 { 120 {
121 if(!flag) 121 if(!flag)
122 { 122 {
123 - std::cout<<"ERROR - Argument requested without being set: "<<name<<std::endl; 123 + std::cout<<"ERROR - option requested without being set: "<<name<<std::endl;
124 exit(1); 124 exit(1);
125 } 125 }
126 126
@@ -138,7 +138,7 @@ namespace stim{ @@ -138,7 +138,7 @@ namespace stim{
138 int col_width() 138 int col_width()
139 { 139 {
140 int n = 3; 140 int n = 3;
141 - //add the length of the argument name 141 + //add the length of the option name
142 n += name.size(); 142 n += name.size();
143 143
144 //if there are any default parameters 144 //if there are any default parameters
@@ -147,7 +147,7 @@ namespace stim{ @@ -147,7 +147,7 @@ namespace stim{
147 //padding (parenthesis, =, etc.) 147 //padding (parenthesis, =, etc.)
148 n += 6; 148 n += 6;
149 149
150 - //for each default argument value 150 + //for each default option value
151 for(unsigned int v=0; v<vals.size(); v++) 151 for(unsigned int v=0; v<vals.size(); v++)
152 n += vals[v].size() + 1; 152 n += vals[v].size() + 1;
153 } 153 }
@@ -209,13 +209,13 @@ namespace stim{ @@ -209,13 +209,13 @@ namespace stim{
209 return ss.str(); 209 return ss.str();
210 } 210 }
211 211
212 - //compare the name of the argument to a string 212 + //compare the name of the option to a string
213 bool operator==(std::string rhs) 213 bool operator==(std::string rhs)
214 { 214 {
215 return (name == rhs); 215 return (name == rhs);
216 } 216 }
217 217
218 - //set the argument to a given value 218 + //set the option to a given value
219 void set(std::string _value) 219 void set(std::string _value)
220 { 220 {
221 parse_val(_value); 221 parse_val(_value);
@@ -242,10 +242,11 @@ namespace stim{ @@ -242,10 +242,11 @@ namespace stim{
242 private: 242 private:
243 bool ansi; 243 bool ansi;
244 244
245 - //vector of arguments  
246 - std::vector<argument> args; 245 + //vector of options
  246 + std::vector<cmd_option> opts;
  247 + std::vector<std::string> args;
247 248
248 - //column width of the longest argument 249 + //column width of the longest option
249 int col_width; 250 int col_width;
250 251
251 //list of sections 252 //list of sections
@@ -261,28 +262,28 @@ namespace stim{ @@ -261,28 +262,28 @@ namespace stim{
261 void set_ansi(bool b) 262 void set_ansi(bool b)
262 { 263 {
263 ansi = b; 264 ansi = b;
264 - for(unsigned int i=0; i<args.size(); i++)  
265 - args[i].set_ansi(ansi); 265 + for(unsigned int i=0; i<opts.size(); i++)
  266 + opts[i].set_ansi(ansi);
266 } 267 }
267 268
268 void add(std::string _name, std::string _desc, std::string _default = "", std::string _range = "") 269 void add(std::string _name, std::string _desc, std::string _default = "", std::string _range = "")
269 { 270 {
270 - argument arg(_name, _desc, _default, _range);  
271 - arg.set_ansi(ansi);  
272 - args.push_back(arg); 271 + cmd_option opt(_name, _desc, _default, _range);
  272 + opt.set_ansi(ansi);
  273 + opts.push_back(opt);
273 274
274 - col_width = std::max<int>(col_width, arg.col_width()); 275 + col_width = std::max<int>(col_width, opt.col_width());
275 } 276 }
276 277
277 void section(std::string _name) 278 void section(std::string _name)
278 { 279 {
279 argsection s; 280 argsection s;
280 s.name = _name; 281 s.name = _name;
281 - s.index = args.size(); 282 + s.index = opts.size();
282 sections.push_back(s); 283 sections.push_back(s);
283 } 284 }
284 285
285 - //output the arguments (generally in response to --help) 286 + //output the options (generally in response to --help)
286 std::string str() 287 std::string str()
287 { 288 {
288 std::stringstream ss; 289 std::stringstream ss;
@@ -292,8 +293,8 @@ namespace stim{ @@ -292,8 +293,8 @@ namespace stim{
292 if(sections.size() > 0) 293 if(sections.size() > 0)
293 si = 0; 294 si = 0;
294 295
295 - //for each argument  
296 - for(unsigned int a=0; a<args.size(); a++) 296 + //for each option
  297 + for(unsigned int a=0; a<opts.size(); a++)
297 { 298 {
298 if(si != -1 && a == sections[si].index) 299 if(si != -1 && a == sections[si].index)
299 { 300 {
@@ -305,7 +306,7 @@ namespace stim{ @@ -305,7 +306,7 @@ namespace stim{
305 if(si == (int)sections.size()) si = -1; 306 if(si == (int)sections.size()) si = -1;
306 } 307 }
307 308
308 - ss<<args[a].toStr(col_width)<<std::endl; 309 + ss<<opts[a].toStr(col_width)<<std::endl;
309 } 310 }
310 311
311 return ss.str(); 312 return ss.str();
@@ -313,9 +314,9 @@ namespace stim{ @@ -313,9 +314,9 @@ namespace stim{
313 314
314 int index(std::string _name) 315 int index(std::string _name)
315 { 316 {
316 - unsigned int i = find(args.begin(), args.end(), _name) - args.begin(); 317 + unsigned int i = find(opts.begin(), opts.end(), _name) - opts.begin();
317 318
318 - if(i >= args.size()) 319 + if(i >= opts.size())
319 return -1; 320 return -1;
320 321
321 return (int)i; 322 return (int)i;
@@ -327,52 +328,57 @@ namespace stim{ @@ -327,52 +328,57 @@ namespace stim{
327 328
328 if(i != -1) 329 if(i != -1)
329 { 330 {
330 - args[i].set(_value); 331 + opts[i].set(_value);
331 //adjust the column width if necessary 332 //adjust the column width if necessary
332 - col_width = (std::max)(col_width, args[i].col_width()); 333 + col_width = (std::max)(col_width, opts[i].col_width());
333 } 334 }
334 else 335 else
335 - std::cout<<"ERROR - Argument not recognized: "<<_name<<std::endl; 336 + std::cout<<"ERROR - option not recognized: "<<_name<<std::endl;
336 } 337 }
337 338
338 //parse a parameter string 339 //parse a parameter string
339 void parse(int argc, char* argv[]) 340 void parse(int argc, char* argv[])
340 { 341 {
341 - //if the number of arguments is 1, we're done 342 + //if the number of options is 1, we're done
342 if(argc <= 1) return; 343 if(argc <= 1) return;
343 344
344 std::string name; 345 std::string name;
345 std::string params; 346 std::string params;
346 347
  348 + bool args_done = false; //create a flag that turns true when the first option is encountered
  349 +
347 for(int i=1; i<argc; i++) 350 for(int i=1; i<argc; i++)
348 { 351 {
349 - //if the argument is a parameter name 352 + //if the argument is an option
350 if(argv[i][0] == '-' && argv[i][1] == '-') 353 if(argv[i][0] == '-' && argv[i][1] == '-')
351 { 354 {
352 - //add any previous arguments 355 + args_done = true; //arguments for the executable are done, all options now
  356 + //add any previous options
353 if(name != "") 357 if(name != "")
354 set(name, params); 358 set(name, params);
355 - //set the current argument to this name 359 + //set the current option to this name
356 name = argv[i]+2; 360 name = argv[i]+2;
357 //clear the parameters list 361 //clear the parameters list
358 params = ""; 362 params = "";
359 } 363 }
360 - else  
361 - { 364 + else if(!args_done){
  365 + args.push_back(argv[i]);
  366 + }
  367 + else{ //everything else is an arg for the most recent option
362 if(params != "") 368 if(params != "")
363 params += " "; 369 params += " ";
364 params += argv[i]; 370 params += argv[i];
365 } 371 }
366 } 372 }
367 373
368 - //set the last argument 374 + //set the last option
369 set(name, params); 375 set(name, params);
370 } 376 }
371 377
372 //determine if a parameter has been set (either specified by the user or with a default value) 378 //determine if a parameter has been set (either specified by the user or with a default value)
373 bool operator()(std::string _name) 379 bool operator()(std::string _name)
374 { 380 {
375 - int i = find(args.begin(), args.end(), _name) - args.begin(); 381 + int i = find(opts.begin(), opts.end(), _name) - opts.begin();
376 382
377 if(i < 0) 383 if(i < 0)
378 { 384 {
@@ -380,12 +386,13 @@ namespace stim{ @@ -380,12 +386,13 @@ namespace stim{
380 exit(1); 386 exit(1);
381 } 387 }
382 388
383 - return args[i].is_set(); 389 + return opts[i].is_set();
384 } 390 }
385 391
386 - int nargs(std::string _name) 392 + //number of arguments in a specified option
  393 + unsigned int nargs(std::string _name)
387 { 394 {
388 - int i = find(args.begin(), args.end(), _name) - args.begin(); 395 + int i = find(opts.begin(), opts.end(), _name) - opts.begin();
389 396
390 if(i < 0) 397 if(i < 0)
391 { 398 {
@@ -393,12 +400,22 @@ namespace stim{ @@ -393,12 +400,22 @@ namespace stim{
393 exit(1); 400 exit(1);
394 } 401 }
395 402
396 - return args[i].nargs(); 403 + return opts[i].nargs();
  404 + }
  405 +
  406 + //number of arguments for the executable
  407 + unsigned int nargs(){
  408 + return args.size();
  409 + }
  410 +
  411 + //return the a'th executable argument
  412 + std::string arg(unsigned int a){
  413 + return args[a];
397 } 414 }
398 415
399 - argument operator[](std::string _name) 416 + cmd_option operator[](std::string _name)
400 { 417 {
401 - int i = find(args.begin(), args.end(), _name) - args.begin(); 418 + int i = find(opts.begin(), opts.end(), _name) - opts.begin();
402 419
403 if(i < 0) 420 if(i < 0)
404 { 421 {
@@ -406,7 +423,7 @@ namespace stim{ @@ -406,7 +423,7 @@ namespace stim{
406 exit(1); 423 exit(1);
407 } 424 }
408 425
409 - return args[i]; 426 + return opts[i];
410 } 427 }
411 428
412 429