Commit 9b5637091b06c86a533de160293520afa1d1fd5f
1 parent
feb0c9dd
generalized aabb classes
Showing
15 changed files
with
2963 additions
and
45 deletions
Show diff stats
1 | +#!/usr/bin/python3 | |
2 | + | |
3 | +#import system processes | |
4 | +import subprocess, sys | |
5 | + | |
6 | +if len(sys.argv) > 1: | |
7 | + infile = int(sys.argv[1]) | |
8 | + | |
9 | +basefile = infile + "-base" | |
10 | +normfile = infile + "-norm" | |
11 | + | |
12 | +runcommand = "hsiproc " + infile + basefile + " --baseline baseline.txt" | |
13 | +subprocess.call(runcommand, shell=True) | |
0 | 14 | \ No newline at end of file | ... | ... |
stim/biomodels/network.h
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
1 | +#ifndef RTS_BESSEL_H | |
2 | +#define RTS_BESSEL_H | |
3 | + | |
4 | +#define _USE_MATH_DEFINES | |
5 | +#include <math.h> | |
6 | +#include "../math/complex.h" | |
7 | +#define eps 1e-15 | |
8 | +#define el 0.5772156649015329 | |
9 | + | |
10 | + | |
11 | +namespace stim{ | |
12 | + | |
13 | +static complex<double> cii(0.0,1.0); | |
14 | +static complex<double> cone(1.0,0.0); | |
15 | +static complex<double> czero(0.0,0.0); | |
16 | + | |
17 | +template< typename P > | |
18 | +P gamma(P x) | |
19 | +{ | |
20 | + int i,k,m; | |
21 | + P ga,gr,r,z; | |
22 | + | |
23 | + static P g[] = { | |
24 | + 1.0, | |
25 | + 0.5772156649015329, | |
26 | + -0.6558780715202538, | |
27 | + -0.420026350340952e-1, | |
28 | + 0.1665386113822915, | |
29 | + -0.421977345555443e-1, | |
30 | + -0.9621971527877e-2, | |
31 | + 0.7218943246663e-2, | |
32 | + -0.11651675918591e-2, | |
33 | + -0.2152416741149e-3, | |
34 | + 0.1280502823882e-3, | |
35 | + -0.201348547807e-4, | |
36 | + -0.12504934821e-5, | |
37 | + 0.1133027232e-5, | |
38 | + -0.2056338417e-6, | |
39 | + 0.6116095e-8, | |
40 | + 0.50020075e-8, | |
41 | + -0.11812746e-8, | |
42 | + 0.1043427e-9, | |
43 | + 0.77823e-11, | |
44 | + -0.36968e-11, | |
45 | + 0.51e-12, | |
46 | + -0.206e-13, | |
47 | + -0.54e-14, | |
48 | + 0.14e-14}; | |
49 | + | |
50 | + if (x > 171.0) return 1e308; // This value is an overflow flag. | |
51 | + if (x == (int)x) { | |
52 | + if (x > 0.0) { | |
53 | + ga = 1.0; // use factorial | |
54 | + for (i=2;i<x;i++) { | |
55 | + ga *= i; | |
56 | + } | |
57 | + } | |
58 | + else | |
59 | + ga = 1e308; | |
60 | + } | |
61 | + else { | |
62 | + if (fabs(x) > 1.0) { | |
63 | + z = fabs(x); | |
64 | + m = (int)z; | |
65 | + r = 1.0; | |
66 | + for (k=1;k<=m;k++) { | |
67 | + r *= (z-k); | |
68 | + } | |
69 | + z -= m; | |
70 | + } | |
71 | + else | |
72 | + z = x; | |
73 | + gr = g[24]; | |
74 | + for (k=23;k>=0;k--) { | |
75 | + gr = gr*z+g[k]; | |
76 | + } | |
77 | + ga = 1.0/(gr*z); | |
78 | + if (fabs(x) > 1.0) { | |
79 | + ga *= r; | |
80 | + if (x < 0.0) { | |
81 | + ga = -M_PI/(x*ga*sin(M_PI*x)); | |
82 | + } | |
83 | + } | |
84 | + } | |
85 | + return ga; | |
86 | +} | |
87 | + | |
88 | +template<typename P> | |
89 | +int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1, | |
90 | + P &j0p,P &j1p,P &y0p,P &y1p) | |
91 | +{ | |
92 | + P x2,r,ec,w0,w1,r0,r1,cs0,cs1; | |
93 | + P cu,p0,q0,p1,q1,t1,t2; | |
94 | + int k,kz; | |
95 | + static P a[] = { | |
96 | + -7.03125e-2, | |
97 | + 0.112152099609375, | |
98 | + -0.5725014209747314, | |
99 | + 6.074042001273483, | |
100 | + -1.100171402692467e2, | |
101 | + 3.038090510922384e3, | |
102 | + -1.188384262567832e5, | |
103 | + 6.252951493434797e6, | |
104 | + -4.259392165047669e8, | |
105 | + 3.646840080706556e10, | |
106 | + -3.833534661393944e12, | |
107 | + 4.854014686852901e14, | |
108 | + -7.286857349377656e16, | |
109 | + 1.279721941975975e19}; | |
110 | + static P b[] = { | |
111 | + 7.32421875e-2, | |
112 | + -0.2271080017089844, | |
113 | + 1.727727502584457, | |
114 | + -2.438052969955606e1, | |
115 | + 5.513358961220206e2, | |
116 | + -1.825775547429318e4, | |
117 | + 8.328593040162893e5, | |
118 | + -5.006958953198893e7, | |
119 | + 3.836255180230433e9, | |
120 | + -3.649010818849833e11, | |
121 | + 4.218971570284096e13, | |
122 | + -5.827244631566907e15, | |
123 | + 9.476288099260110e17, | |
124 | + -1.792162323051699e20}; | |
125 | + static P a1[] = { | |
126 | + 0.1171875, | |
127 | + -0.1441955566406250, | |
128 | + 0.6765925884246826, | |
129 | + -6.883914268109947, | |
130 | + 1.215978918765359e2, | |
131 | + -3.302272294480852e3, | |
132 | + 1.276412726461746e5, | |
133 | + -6.656367718817688e6, | |
134 | + 4.502786003050393e8, | |
135 | + -3.833857520742790e10, | |
136 | + 4.011838599133198e12, | |
137 | + -5.060568503314727e14, | |
138 | + 7.572616461117958e16, | |
139 | + -1.326257285320556e19}; | |
140 | + static P b1[] = { | |
141 | + -0.1025390625, | |
142 | + 0.2775764465332031, | |
143 | + -1.993531733751297, | |
144 | + 2.724882731126854e1, | |
145 | + -6.038440767050702e2, | |
146 | + 1.971837591223663e4, | |
147 | + -8.902978767070678e5, | |
148 | + 5.310411010968522e7, | |
149 | + -4.043620325107754e9, | |
150 | + 3.827011346598605e11, | |
151 | + -4.406481417852278e13, | |
152 | + 6.065091351222699e15, | |
153 | + -9.833883876590679e17, | |
154 | + 1.855045211579828e20}; | |
155 | + | |
156 | + if (x < 0.0) return 1; | |
157 | + if (x == 0.0) { | |
158 | + j0 = 1.0; | |
159 | + j1 = 0.0; | |
160 | + y0 = -1e308; | |
161 | + y1 = -1e308; | |
162 | + j0p = 0.0; | |
163 | + j1p = 0.5; | |
164 | + y0p = 1e308; | |
165 | + y1p = 1e308; | |
166 | + return 0; | |
167 | + } | |
168 | + x2 = x*x; | |
169 | + if (x <= 12.0) { | |
170 | + j0 = 1.0; | |
171 | + r = 1.0; | |
172 | + for (k=1;k<=30;k++) { | |
173 | + r *= -0.25*x2/(k*k); | |
174 | + j0 += r; | |
175 | + if (fabs(r) < fabs(j0)*1e-15) break; | |
176 | + } | |
177 | + j1 = 1.0; | |
178 | + r = 1.0; | |
179 | + for (k=1;k<=30;k++) { | |
180 | + r *= -0.25*x2/(k*(k+1)); | |
181 | + j1 += r; | |
182 | + if (fabs(r) < fabs(j1)*1e-15) break; | |
183 | + } | |
184 | + j1 *= 0.5*x; | |
185 | + ec = log(0.5*x)+el; | |
186 | + cs0 = 0.0; | |
187 | + w0 = 0.0; | |
188 | + r0 = 1.0; | |
189 | + for (k=1;k<=30;k++) { | |
190 | + w0 += 1.0/k; | |
191 | + r0 *= -0.25*x2/(k*k); | |
192 | + r = r0 * w0; | |
193 | + cs0 += r; | |
194 | + if (fabs(r) < fabs(cs0)*1e-15) break; | |
195 | + } | |
196 | + y0 = M_2_PI*(ec*j0-cs0); | |
197 | + cs1 = 1.0; | |
198 | + w1 = 0.0; | |
199 | + r1 = 1.0; | |
200 | + for (k=1;k<=30;k++) { | |
201 | + w1 += 1.0/k; | |
202 | + r1 *= -0.25*x2/(k*(k+1)); | |
203 | + r = r1*(2.0*w1+1.0/(k+1)); | |
204 | + cs1 += r; | |
205 | + if (fabs(r) < fabs(cs1)*1e-15) break; | |
206 | + } | |
207 | + y1 = M_2_PI * (ec*j1-1.0/x-0.25*x*cs1); | |
208 | + } | |
209 | + else { | |
210 | + if (x >= 50.0) kz = 8; // Can be changed to 10 | |
211 | + else if (x >= 35.0) kz = 10; // " " 12 | |
212 | + else kz = 12; // " " 14 | |
213 | + t1 = x-M_PI_4; | |
214 | + p0 = 1.0; | |
215 | + q0 = -0.125/x; | |
216 | + for (k=0;k<kz;k++) { | |
217 | + p0 += a[k]*pow(x,-2*k-2); | |
218 | + q0 += b[k]*pow(x,-2*k-3); | |
219 | + } | |
220 | + cu = sqrt(M_2_PI/x); | |
221 | + j0 = cu*(p0*cos(t1)-q0*sin(t1)); | |
222 | + y0 = cu*(p0*sin(t1)+q0*cos(t1)); | |
223 | + t2 = x-0.75*M_PI; | |
224 | + p1 = 1.0; | |
225 | + q1 = 0.375/x; | |
226 | + for (k=0;k<kz;k++) { | |
227 | + p1 += a1[k]*pow(x,-2*k-2); | |
228 | + q1 += b1[k]*pow(x,-2*k-3); | |
229 | + } | |
230 | + j1 = cu*(p1*cos(t2)-q1*sin(t2)); | |
231 | + y1 = cu*(p1*sin(t2)+q1*cos(t2)); | |
232 | + } | |
233 | + j0p = -j1; | |
234 | + j1p = j0-j1/x; | |
235 | + y0p = -y1; | |
236 | + y1p = y0-y1/x; | |
237 | + return 0; | |
238 | +} | |
239 | +// | |
240 | +// INPUT: | |
241 | +// double x -- argument of Bessel function | |
242 | +// | |
243 | +// OUTPUT: | |
244 | +// double j0 -- Bessel function of 1st kind, 0th order | |
245 | +// double j1 -- Bessel function of 1st kind, 1st order | |
246 | +// double y0 -- Bessel function of 2nd kind, 0th order | |
247 | +// double y1 -- Bessel function of 2nd kind, 1st order | |
248 | +// double j0p -- derivative of Bessel function of 1st kind, 0th order | |
249 | +// double j1p -- derivative of Bessel function of 1st kind, 1st order | |
250 | +// double y0p -- derivative of Bessel function of 2nd kind, 0th order | |
251 | +// double y1p -- derivative of Bessel function of 2nd kind, 1st order | |
252 | +// | |
253 | +// RETURN: | |
254 | +// int error code: 0 = OK, 1 = error | |
255 | +// | |
256 | +// This algorithm computes the functions using polynomial approximations. | |
257 | +// | |
258 | +template<typename P> | |
259 | +int bessjy01b(P x,P &j0,P &j1,P &y0,P &y1, | |
260 | + P &j0p,P &j1p,P &y0p,P &y1p) | |
261 | +{ | |
262 | + P t,t2,dtmp,a0,p0,q0,p1,q1,ta0,ta1; | |
263 | + if (x < 0.0) return 1; | |
264 | + if (x == 0.0) { | |
265 | + j0 = 1.0; | |
266 | + j1 = 0.0; | |
267 | + y0 = -1e308; | |
268 | + y1 = -1e308; | |
269 | + j0p = 0.0; | |
270 | + j1p = 0.5; | |
271 | + y0p = 1e308; | |
272 | + y1p = 1e308; | |
273 | + return 0; | |
274 | + } | |
275 | + if(x <= 4.0) { | |
276 | + t = x/4.0; | |
277 | + t2 = t*t; | |
278 | + j0 = ((((((-0.5014415e-3*t2+0.76771853e-2)*t2-0.0709253492)*t2+ | |
279 | + 0.4443584263)*t2-1.7777560599)*t2+3.9999973021)*t2 | |
280 | + -3.9999998721)*t2+1.0; | |
281 | + j1 = t*(((((((-0.1289769e-3*t2+0.22069155e-2)*t2-0.0236616773)*t2+ | |
282 | + 0.1777582922)*t2-0.8888839649)*t2+2.6666660544)*t2- | |
283 | + 3.999999971)*t2+1.9999999998); | |
284 | + dtmp = (((((((-0.567433e-4*t2+0.859977e-3)*t2-0.94855882e-2)*t2+ | |
285 | + 0.0772975809)*t2-0.4261737419)*t2+1.4216421221)*t2- | |
286 | + 2.3498519931)*t2+1.0766115157)*t2+0.3674669052; | |
287 | + y0 = M_2_PI*log(0.5*x)*j0+dtmp; | |
288 | + dtmp = (((((((0.6535773e-3*t2-0.0108175626)*t2+0.107657607)*t2- | |
289 | + 0.7268945577)*t2+3.1261399273)*t2-7.3980241381)*t2+ | |
290 | + 6.8529236342)*t2+0.3932562018)*t2-0.6366197726; | |
291 | + y1 = M_2_PI*log(0.5*x)*j1+dtmp/x; | |
292 | + } | |
293 | + else { | |
294 | + t = 4.0/x; | |
295 | + t2 = t*t; | |
296 | + a0 = sqrt(M_2_PI/x); | |
297 | + p0 = ((((-0.9285e-5*t2+0.43506e-4)*t2-0.122226e-3)*t2+ | |
298 | + 0.434725e-3)*t2-0.4394275e-2)*t2+0.999999997; | |
299 | + q0 = t*(((((0.8099e-5*t2-0.35614e-4)*t2+0.85844e-4)*t2- | |
300 | + 0.218024e-3)*t2+0.1144106e-2)*t2-0.031249995); | |
301 | + ta0 = x-M_PI_4; | |
302 | + j0 = a0*(p0*cos(ta0)-q0*sin(ta0)); | |
303 | + y0 = a0*(p0*sin(ta0)+q0*cos(ta0)); | |
304 | + p1 = ((((0.10632e-4*t2-0.50363e-4)*t2+0.145575e-3)*t2 | |
305 | + -0.559487e-3)*t2+0.7323931e-2)*t2+1.000000004; | |
306 | + q1 = t*(((((-0.9173e-5*t2+0.40658e-4)*t2-0.99941e-4)*t2 | |
307 | + +0.266891e-3)*t2-0.1601836e-2)*t2+0.093749994); | |
308 | + ta1 = x-0.75*M_PI; | |
309 | + j1 = a0*(p1*cos(ta1)-q1*sin(ta1)); | |
310 | + y1 = a0*(p1*sin(ta1)+q1*cos(ta1)); | |
311 | + } | |
312 | + j0p = -j1; | |
313 | + j1p = j0-j1/x; | |
314 | + y0p = -y1; | |
315 | + y1p = y0-y1/x; | |
316 | + return 0; | |
317 | +} | |
318 | +template<typename P> | |
319 | +int msta1(P x,int mp) | |
320 | +{ | |
321 | + P a0,f0,f1,f; | |
322 | + int i,n0,n1,nn; | |
323 | + | |
324 | + a0 = fabs(x); | |
325 | + n0 = (int)(1.1*a0)+1; | |
326 | + f0 = 0.5*log10(6.28*n0)-n0*log10(1.36*a0/n0)-mp; | |
327 | + n1 = n0+5; | |
328 | + f1 = 0.5*log10(6.28*n1)-n1*log10(1.36*a0/n1)-mp; | |
329 | + for (i=0;i<20;i++) { | |
330 | + nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); | |
331 | + f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp; | |
332 | + if (std::abs(nn-n1) < 1) break; | |
333 | + n0 = n1; | |
334 | + f0 = f1; | |
335 | + n1 = nn; | |
336 | + f1 = f; | |
337 | + } | |
338 | + return nn; | |
339 | +} | |
340 | +template<typename P> | |
341 | +int msta2(P x,int n,int mp) | |
342 | +{ | |
343 | + P a0,ejn,hmp,f0,f1,f,obj; | |
344 | + int i,n0,n1,nn; | |
345 | + | |
346 | + a0 = fabs(x); | |
347 | + hmp = 0.5*mp; | |
348 | + ejn = 0.5*log10(6.28*n)-n*log10(1.36*a0/n); | |
349 | + if (ejn <= hmp) { | |
350 | + obj = mp; | |
351 | + n0 = (int)(1.1*a0); | |
352 | + if (n0 < 1) n0 = 1; | |
353 | + } | |
354 | + else { | |
355 | + obj = hmp+ejn; | |
356 | + n0 = n; | |
357 | + } | |
358 | + f0 = 0.5*log10(6.28*n0)-n0*log10(1.36*a0/n0)-obj; | |
359 | + n1 = n0+5; | |
360 | + f1 = 0.5*log10(6.28*n1)-n1*log10(1.36*a0/n1)-obj; | |
361 | + for (i=0;i<20;i++) { | |
362 | + nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); | |
363 | + f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj; | |
364 | + if (std::abs(nn-n1) < 1) break; | |
365 | + n0 = n1; | |
366 | + f0 = f1; | |
367 | + n1 = nn; | |
368 | + f1 = f; | |
369 | + } | |
370 | + return nn+10; | |
371 | +} | |
372 | +// | |
373 | +// INPUT: | |
374 | +// double x -- argument of Bessel function of 1st and 2nd kind. | |
375 | +// int n -- order | |
376 | +// | |
377 | +// OUPUT: | |
378 | +// | |
379 | +// int nm -- highest order actually computed (nm <= n) | |
380 | +// double jn[] -- Bessel function of 1st kind, orders from 0 to nm | |
381 | +// double yn[] -- Bessel function of 2nd kind, orders from 0 to nm | |
382 | +// double j'n[]-- derivative of Bessel function of 1st kind, | |
383 | +// orders from 0 to nm | |
384 | +// double y'n[]-- derivative of Bessel function of 2nd kind, | |
385 | +// orders from 0 to nm | |
386 | +// | |
387 | +// Computes Bessel functions of all order up to 'n' using recurrence | |
388 | +// relations. If 'nm' < 'n' only 'nm' orders are returned. | |
389 | +// | |
390 | +template<typename P> | |
391 | +int bessjyna(int n,P x,int &nm,P *jn,P *yn, | |
392 | + P *jnp,P *ynp) | |
393 | +{ | |
394 | + P bj0,bj1,f,f0,f1,f2,cs; | |
395 | + int i,k,m,ecode; | |
396 | + | |
397 | + nm = n; | |
398 | + if ((x < 0.0) || (n < 0)) return 1; | |
399 | + if (x < 1e-15) { | |
400 | + for (i=0;i<=n;i++) { | |
401 | + jn[i] = 0.0; | |
402 | + yn[i] = -1e308; | |
403 | + jnp[i] = 0.0; | |
404 | + ynp[i] = 1e308; | |
405 | + } | |
406 | + jn[0] = 1.0; | |
407 | + jnp[1] = 0.5; | |
408 | + return 0; | |
409 | + } | |
410 | + ecode = bessjy01a(x,jn[0],jn[1],yn[0],yn[1],jnp[0],jnp[1],ynp[0],ynp[1]); | |
411 | + if (n < 2) return 0; | |
412 | + bj0 = jn[0]; | |
413 | + bj1 = jn[1]; | |
414 | + if (n < (int)0.9*x) { | |
415 | + for (k=2;k<=n;k++) { | |
416 | + jn[k] = 2.0*(k-1.0)*bj1/x-bj0; | |
417 | + bj0 = bj1; | |
418 | + bj1 = jn[k]; | |
419 | + } | |
420 | + } | |
421 | + else { | |
422 | + m = msta1(x,200); | |
423 | + if (m < n) nm = m; | |
424 | + else m = msta2(x,n,15); | |
425 | + f2 = 0.0; | |
426 | + f1 = 1.0e-100; | |
427 | + for (k=m;k>=0;k--) { | |
428 | + f = 2.0*(k+1.0)/x*f1-f2; | |
429 | + if (k <= nm) jn[k] = f; | |
430 | + f2 = f1; | |
431 | + f1 = f; | |
432 | + } | |
433 | + if (fabs(bj0) > fabs(bj1)) cs = bj0/f; | |
434 | + else cs = bj1/f2; | |
435 | + for (k=0;k<=nm;k++) { | |
436 | + jn[k] *= cs; | |
437 | + } | |
438 | + } | |
439 | + for (k=2;k<=nm;k++) { | |
440 | + jnp[k] = jn[k-1]-k*jn[k]/x; | |
441 | + } | |
442 | + f0 = yn[0]; | |
443 | + f1 = yn[1]; | |
444 | + for (k=2;k<=nm;k++) { | |
445 | + f = 2.0*(k-1.0)*f1/x-f0; | |
446 | + yn[k] = f; | |
447 | + f0 = f1; | |
448 | + f1 = f; | |
449 | + } | |
450 | + for (k=2;k<=nm;k++) { | |
451 | + ynp[k] = yn[k-1]-k*yn[k]/x; | |
452 | + } | |
453 | + return 0; | |
454 | +} | |
455 | +// | |
456 | +// Same input and output conventions as above. Different recurrence | |
457 | +// relations used for 'x' < 300. | |
458 | +// | |
459 | +template<typename P> | |
460 | +int bessjynb(int n,P x,int &nm,P *jn,P *yn, | |
461 | + P *jnp,P *ynp) | |
462 | +{ | |
463 | + P t1,t2,f,f1,f2,bj0,bj1,bjk,by0,by1,cu,s0,su,sv; | |
464 | + P ec,bs,byk,p0,p1,q0,q1; | |
465 | + static P a[] = { | |
466 | + -0.7031250000000000e-1, | |
467 | + 0.1121520996093750, | |
468 | + -0.5725014209747314, | |
469 | + 6.074042001273483}; | |
470 | + static P b[] = { | |
471 | + 0.7324218750000000e-1, | |
472 | + -0.2271080017089844, | |
473 | + 1.727727502584457, | |
474 | + -2.438052969955606e1}; | |
475 | + static P a1[] = { | |
476 | + 0.1171875, | |
477 | + -0.1441955566406250, | |
478 | + 0.6765925884246826, | |
479 | + -6.883914268109947}; | |
480 | + static P b1[] = { | |
481 | + -0.1025390625, | |
482 | + 0.2775764465332031, | |
483 | + -1.993531733751297, | |
484 | + 2.724882731126854e1}; | |
485 | + | |
486 | + int i,k,m; | |
487 | + nm = n; | |
488 | + if ((x < 0.0) || (n < 0)) return 1; | |
489 | + if (x < 1e-15) { | |
490 | + for (i=0;i<=n;i++) { | |
491 | + jn[i] = 0.0; | |
492 | + yn[i] = -1e308; | |
493 | + jnp[i] = 0.0; | |
494 | + ynp[i] = 1e308; | |
495 | + } | |
496 | + jn[0] = 1.0; | |
497 | + jnp[1] = 0.5; | |
498 | + return 0; | |
499 | + } | |
500 | + if (x <= 300.0 || n > (int)(0.9*x)) { | |
501 | + if (n == 0) nm = 1; | |
502 | + m = msta1(x,200); | |
503 | + if (m < nm) nm = m; | |
504 | + else m = msta2(x,nm,15); | |
505 | + bs = 0.0; | |
506 | + su = 0.0; | |
507 | + sv = 0.0; | |
508 | + f2 = 0.0; | |
509 | + f1 = 1.0e-100; | |
510 | + for (k = m;k>=0;k--) { | |
511 | + f = 2.0*(k+1.0)/x*f1 - f2; | |
512 | + if (k <= nm) jn[k] = f; | |
513 | + if ((k == 2*(int)(k/2)) && (k != 0)) { | |
514 | + bs += 2.0*f; | |
515 | +// su += pow(-1,k>>1)*f/(double)k; | |
516 | + su += (-1)*((k & 2)-1)*f/(P)k; | |
517 | + } | |
518 | + else if (k > 1) { | |
519 | +// sv += pow(-1,k>>1)*k*f/(k*k-1.0); | |
520 | + sv += (-1)*((k & 2)-1)*(P)k*f/(k*k-1.0); | |
521 | + } | |
522 | + f2 = f1; | |
523 | + f1 = f; | |
524 | + } | |
525 | + s0 = bs+f; | |
526 | + for (k=0;k<=nm;k++) { | |
527 | + jn[k] /= s0; | |
528 | + } | |
529 | + ec = log(0.5*x) +0.5772156649015329; | |
530 | + by0 = M_2_PI*(ec*jn[0]-4.0*su/s0); | |
531 | + yn[0] = by0; | |
532 | + by1 = M_2_PI*((ec-1.0)*jn[1]-jn[0]/x-4.0*sv/s0); | |
533 | + yn[1] = by1; | |
534 | + } | |
535 | + else { | |
536 | + t1 = x-M_PI_4; | |
537 | + p0 = 1.0; | |
538 | + q0 = -0.125/x; | |
539 | + for (k=0;k<4;k++) { | |
540 | + p0 += a[k]*pow(x,-2*k-2); | |
541 | + q0 += b[k]*pow(x,-2*k-3); | |
542 | + } | |
543 | + cu = sqrt(M_2_PI/x); | |
544 | + bj0 = cu*(p0*cos(t1)-q0*sin(t1)); | |
545 | + by0 = cu*(p0*sin(t1)+q0*cos(t1)); | |
546 | + jn[0] = bj0; | |
547 | + yn[0] = by0; | |
548 | + t2 = x-0.75*M_PI; | |
549 | + p1 = 1.0; | |
550 | + q1 = 0.375/x; | |
551 | + for (k=0;k<4;k++) { | |
552 | + p1 += a1[k]*pow(x,-2*k-2); | |
553 | + q1 += b1[k]*pow(x,-2*k-3); | |
554 | + } | |
555 | + bj1 = cu*(p1*cos(t2)-q1*sin(t2)); | |
556 | + by1 = cu*(p1*sin(t2)+q1*cos(t2)); | |
557 | + jn[1] = bj1; | |
558 | + yn[1] = by1; | |
559 | + for (k=2;k<=nm;k++) { | |
560 | + bjk = 2.0*(k-1.0)*bj1/x-bj0; | |
561 | + jn[k] = bjk; | |
562 | + bj0 = bj1; | |
563 | + bj1 = bjk; | |
564 | + } | |
565 | + } | |
566 | + jnp[0] = -jn[1]; | |
567 | + for (k=1;k<=nm;k++) { | |
568 | + jnp[k] = jn[k-1]-k*jn[k]/x; | |
569 | + } | |
570 | + for (k=2;k<=nm;k++) { | |
571 | + byk = 2.0*(k-1.0)*by1/x-by0; | |
572 | + yn[k] = byk; | |
573 | + by0 = by1; | |
574 | + by1 = byk; | |
575 | + } | |
576 | + ynp[0] = -yn[1]; | |
577 | + for (k=1;k<=nm;k++) { | |
578 | + ynp[k] = yn[k-1]-k*yn[k]/x; | |
579 | + } | |
580 | + return 0; | |
581 | + | |
582 | +} | |
583 | + | |
584 | +// The following routine computes Bessel Jv(x) and Yv(x) for | |
585 | +// arbitrary positive order (v). For negative order, use: | |
586 | +// | |
587 | +// J-v(x) = Jv(x)cos(v pi) - Yv(x)sin(v pi) | |
588 | +// Y-v(x) = Jv(x)sin(v pi) + Yv(x)cos(v pi) | |
589 | +// | |
590 | +template<typename P> | |
591 | +int bessjyv(P v,P x,P &vm,P *jv,P *yv, | |
592 | + P *djv,P *dyv) | |
593 | +{ | |
594 | + P v0,vl,vg,vv,a,a0,r,x2,bjv0,bjv1,bjvl,f,f0,f1,f2; | |
595 | + P r0,r1,ck,cs,cs0,cs1,sk,qx,px,byv0,byv1,rp,xk,rq; | |
596 | + P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk; | |
597 | + int j,k,l,m,n,kz; | |
598 | + | |
599 | + x2 = x*x; | |
600 | + n = (int)v; | |
601 | + v0 = v-n; | |
602 | + if ((x < 0.0) || (v < 0.0)) return 1; | |
603 | + if (x < 1e-15) { | |
604 | + for (k=0;k<=n;k++) { | |
605 | + jv[k] = 0.0; | |
606 | + yv[k] = -1e308; | |
607 | + djv[k] = 0.0; | |
608 | + dyv[k] = 1e308; | |
609 | + if (v0 == 0.0) { | |
610 | + jv[0] = 1.0; | |
611 | + djv[1] = 0.5; | |
612 | + } | |
613 | + else djv[0] = 1e308; | |
614 | + } | |
615 | + vm = v; | |
616 | + return 0; | |
617 | + } | |
618 | + if (x <= 12.0) { | |
619 | + for (l=0;l<2;l++) { | |
620 | + vl = v0 + l; | |
621 | + bjvl = 1.0; | |
622 | + r = 1.0; | |
623 | + for (k=1;k<=40;k++) { | |
624 | + r *= -0.25*x2/(k*(k+vl)); | |
625 | + bjvl += r; | |
626 | + if (fabs(r) < fabs(bjvl)*1e-15) break; | |
627 | + } | |
628 | + vg = 1.0 + vl; | |
629 | + a = pow(0.5*x,vl)/gamma(vg); | |
630 | + if (l == 0) bjv0 = bjvl*a; | |
631 | + else bjv1 = bjvl*a; | |
632 | + } | |
633 | + } | |
634 | + else { | |
635 | + if (x >= 50.0) kz = 8; | |
636 | + else if (x >= 35.0) kz = 10; | |
637 | + else kz = 11; | |
638 | + for (j=0;j<2;j++) { | |
639 | + vv = 4.0*(j+v0)*(j+v0); | |
640 | + px = 1.0; | |
641 | + rp = 1.0; | |
642 | + for (k=1;k<=kz;k++) { | |
643 | + rp *= (-0.78125e-2)*(vv-pow(4.0*k-3.0,2.0))* | |
644 | + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*x2); | |
645 | + px += rp; | |
646 | + } | |
647 | + qx = 1.0; | |
648 | + rq = 1.0; | |
649 | + for (k=1;k<=kz;k++) { | |
650 | + rq *= (-0.78125e-2)*(vv-pow(4.0*k-1.0,2.0))* | |
651 | + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*x2); | |
652 | + qx += rq; | |
653 | + } | |
654 | + qx *= 0.125*(vv-1.0)/x; | |
655 | + xk = x-(0.5*(j+v0)+0.25)*M_PI; | |
656 | + a0 = sqrt(M_2_PI/x); | |
657 | + ck = cos(xk); | |
658 | + sk = sin(xk); | |
659 | + | |
660 | + if (j == 0) { | |
661 | + bjv0 = a0*(px*ck-qx*sk); | |
662 | + byv0 = a0*(px*sk+qx*ck); | |
663 | + } | |
664 | + else if (j == 1) { | |
665 | + bjv1 = a0*(px*ck-qx*sk); | |
666 | + byv1 = a0*(px*sk+qx*ck); | |
667 | + } | |
668 | + } | |
669 | + } | |
670 | + jv[0] = bjv0; | |
671 | + jv[1] = bjv1; | |
672 | + djv[0] = v0*jv[0]/x-jv[1]; | |
673 | + djv[1] = -(1.0+v0)*jv[1]/x+jv[0]; | |
674 | + if ((n >= 2) && (n <= (int)(0.9*x))) { | |
675 | + f0 = bjv0; | |
676 | + f1 = bjv1; | |
677 | + for (k=2;k<=n;k++) { | |
678 | + f = 2.0*(k+v0-1.0)*f1/x-f0; | |
679 | + jv[k] = f; | |
680 | + f0 = f1; | |
681 | + f1 = f; | |
682 | + } | |
683 | + } | |
684 | + else if (n >= 2) { | |
685 | + m = msta1(x,200); | |
686 | + if (m < n) n = m; | |
687 | + else m = msta2(x,n,15); | |
688 | + f2 = 0.0; | |
689 | + f1 = 1.0e-100; | |
690 | + for (k=m;k>=0;k--) { | |
691 | + f = 2.0*(v0+k+1.0)*f1/x-f2; | |
692 | + if (k <= n) jv[k] = f; | |
693 | + f2 = f1; | |
694 | + f1 = f; | |
695 | + } | |
696 | + if (fabs(bjv0) > fabs(bjv1)) cs = bjv0/f; | |
697 | + else cs = bjv1/f2; | |
698 | + for (k=0;k<=n;k++) { | |
699 | + jv[k] *= cs; | |
700 | + } | |
701 | + } | |
702 | + for (k=2;k<=n;k++) { | |
703 | + djv[k] = -(k+v0)*jv[k]/x+jv[k-1]; | |
704 | + } | |
705 | + if (x <= 12.0) { | |
706 | + if (v0 != 0.0) { | |
707 | + for (l=0;l<2;l++) { | |
708 | + vl = v0 +l; | |
709 | + bjvl = 1.0; | |
710 | + r = 1.0; | |
711 | + for (k=1;k<=40;k++) { | |
712 | + r *= -0.25*x2/(k*(k-vl)); | |
713 | + bjvl += r; | |
714 | + if (fabs(r) < fabs(bjvl)*1e-15) break; | |
715 | + } | |
716 | + vg = 1.0-vl; | |
717 | + b = pow(2.0/x,vl)/gamma(vg); | |
718 | + if (l == 0) bju0 = bjvl*b; | |
719 | + else bju1 = bjvl*b; | |
720 | + } | |
721 | + pv0 = M_PI*v0; | |
722 | + pv1 = M_PI*(1.0+v0); | |
723 | + byv0 = (bjv0*cos(pv0)-bju0)/sin(pv0); | |
724 | + byv1 = (bjv1*cos(pv1)-bju1)/sin(pv1); | |
725 | + } | |
726 | + else { | |
727 | + ec = log(0.5*x)+el; | |
728 | + cs0 = 0.0; | |
729 | + w0 = 0.0; | |
730 | + r0 = 1.0; | |
731 | + for (k=1;k<=30;k++) { | |
732 | + w0 += 1.0/k; | |
733 | + r0 *= -0.25*x2/(k*k); | |
734 | + cs0 += r0*w0; | |
735 | + } | |
736 | + byv0 = M_2_PI*(ec*bjv0-cs0); | |
737 | + cs1 = 1.0; | |
738 | + w1 = 0.0; | |
739 | + r1 = 1.0; | |
740 | + for (k=1;k<=30;k++) { | |
741 | + w1 += 1.0/k; | |
742 | + r1 *= -0.25*x2/(k*(k+1)); | |
743 | + cs1 += r1*(2.0*w1+1.0/(k+1.0)); | |
744 | + } | |
745 | + byv1 = M_2_PI*(ec*bjv1-1.0/x-0.25*x*cs1); | |
746 | + } | |
747 | + } | |
748 | + yv[0] = byv0; | |
749 | + yv[1] = byv1; | |
750 | + for (k=2;k<=n;k++) { | |
751 | + byvk = 2.0*(v0+k-1.0)*byv1/x-byv0; | |
752 | + yv[k] = byvk; | |
753 | + byv0 = byv1; | |
754 | + byv1 = byvk; | |
755 | + } | |
756 | + dyv[0] = v0*yv[0]/x-yv[1]; | |
757 | + for (k=1;k<=n;k++) { | |
758 | + dyv[k] = -(k+v0)*yv[k]/x+yv[k-1]; | |
759 | + } | |
760 | + vm = n + v0; | |
761 | + return 0; | |
762 | +} | |
763 | + | |
764 | +template<typename P> | |
765 | +int bessjyv_sph(int v, P z, P &vm, P* cjv, | |
766 | + P* cyv, P* cjvp, P* cyvp) | |
767 | +{ | |
768 | + //first, compute the bessel functions of fractional order | |
769 | + bessjyv<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); | |
770 | + | |
771 | + //iterate through each and scale | |
772 | + for(int n = 0; n<=v; n++) | |
773 | + { | |
774 | + | |
775 | + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); | |
776 | + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); | |
777 | + | |
778 | + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); | |
779 | + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); | |
780 | + } | |
781 | + | |
782 | + return 0; | |
783 | + | |
784 | +} | |
785 | + | |
786 | +template<typename P> | |
787 | +int cbessjy01(complex<P> z,complex<P> &cj0,complex<P> &cj1, | |
788 | + complex<P> &cy0,complex<P> &cy1,complex<P> &cj0p, | |
789 | + complex<P> &cj1p,complex<P> &cy0p,complex<P> &cy1p) | |
790 | +{ | |
791 | + complex<P> z1,z2,cr,cp,cs,cp0,cq0,cp1,cq1,ct1,ct2,cu; | |
792 | + P a0,w0,w1; | |
793 | + int k,kz; | |
794 | + | |
795 | + static P a[] = { | |
796 | + -7.03125e-2, | |
797 | + 0.112152099609375, | |
798 | + -0.5725014209747314, | |
799 | + 6.074042001273483, | |
800 | + -1.100171402692467e2, | |
801 | + 3.038090510922384e3, | |
802 | + -1.188384262567832e5, | |
803 | + 6.252951493434797e6, | |
804 | + -4.259392165047669e8, | |
805 | + 3.646840080706556e10, | |
806 | + -3.833534661393944e12, | |
807 | + 4.854014686852901e14, | |
808 | + -7.286857349377656e16, | |
809 | + 1.279721941975975e19}; | |
810 | + static P b[] = { | |
811 | + 7.32421875e-2, | |
812 | + -0.2271080017089844, | |
813 | + 1.727727502584457, | |
814 | + -2.438052969955606e1, | |
815 | + 5.513358961220206e2, | |
816 | + -1.825775547429318e4, | |
817 | + 8.328593040162893e5, | |
818 | + -5.006958953198893e7, | |
819 | + 3.836255180230433e9, | |
820 | + -3.649010818849833e11, | |
821 | + 4.218971570284096e13, | |
822 | + -5.827244631566907e15, | |
823 | + 9.476288099260110e17, | |
824 | + -1.792162323051699e20}; | |
825 | + static P a1[] = { | |
826 | + 0.1171875, | |
827 | + -0.1441955566406250, | |
828 | + 0.6765925884246826, | |
829 | + -6.883914268109947, | |
830 | + 1.215978918765359e2, | |
831 | + -3.302272294480852e3, | |
832 | + 1.276412726461746e5, | |
833 | + -6.656367718817688e6, | |
834 | + 4.502786003050393e8, | |
835 | + -3.833857520742790e10, | |
836 | + 4.011838599133198e12, | |
837 | + -5.060568503314727e14, | |
838 | + 7.572616461117958e16, | |
839 | + -1.326257285320556e19}; | |
840 | + static P b1[] = { | |
841 | + -0.1025390625, | |
842 | + 0.2775764465332031, | |
843 | + -1.993531733751297, | |
844 | + 2.724882731126854e1, | |
845 | + -6.038440767050702e2, | |
846 | + 1.971837591223663e4, | |
847 | + -8.902978767070678e5, | |
848 | + 5.310411010968522e7, | |
849 | + -4.043620325107754e9, | |
850 | + 3.827011346598605e11, | |
851 | + -4.406481417852278e13, | |
852 | + 6.065091351222699e15, | |
853 | + -9.833883876590679e17, | |
854 | + 1.855045211579828e20}; | |
855 | + | |
856 | + a0 = abs(z); | |
857 | + z2 = z*z; | |
858 | + z1 = z; | |
859 | + if (a0 == 0.0) { | |
860 | + cj0 = cone; | |
861 | + cj1 = czero; | |
862 | + cy0 = complex<P>(-1e308,0); | |
863 | + cy1 = complex<P>(-1e308,0); | |
864 | + cj0p = czero; | |
865 | + cj1p = complex<P>(0.5,0.0); | |
866 | + cy0p = complex<P>(1e308,0); | |
867 | + cy1p = complex<P>(1e308,0); | |
868 | + return 0; | |
869 | + } | |
870 | + if (real(z) < 0.0) z1 = -z; | |
871 | + if (a0 <= 12.0) { | |
872 | + cj0 = cone; | |
873 | + cr = cone; | |
874 | + for (k=1;k<=40;k++) { | |
875 | + cr *= -0.25*z2/(P)(k*k); | |
876 | + cj0 += cr; | |
877 | + if (abs(cr) < abs(cj0)*eps) break; | |
878 | + } | |
879 | + cj1 = cone; | |
880 | + cr = cone; | |
881 | + for (k=1;k<=40;k++) { | |
882 | + cr *= -0.25*z2/(k*(k+1.0)); | |
883 | + cj1 += cr; | |
884 | + if (abs(cr) < abs(cj1)*eps) break; | |
885 | + } | |
886 | + cj1 *= 0.5*z1; | |
887 | + w0 = 0.0; | |
888 | + cr = cone; | |
889 | + cs = czero; | |
890 | + for (k=1;k<=40;k++) { | |
891 | + w0 += 1.0/k; | |
892 | + cr *= -0.25*z2/(P)(k*k); | |
893 | + cp = cr*w0; | |
894 | + cs += cp; | |
895 | + if (abs(cp) < abs(cs)*eps) break; | |
896 | + } | |
897 | + cy0 = M_2_PI*((log(0.5*z1)+el)*cj0-cs); | |
898 | + w1 = 0.0; | |
899 | + cr = cone; | |
900 | + cs = cone; | |
901 | + for (k=1;k<=40;k++) { | |
902 | + w1 += 1.0/k; | |
903 | + cr *= -0.25*z2/(k*(k+1.0)); | |
904 | + cp = cr*(2.0*w1+1.0/(k+1.0)); | |
905 | + cs += cp; | |
906 | + if (abs(cp) < abs(cs)*eps) break; | |
907 | + } | |
908 | + cy1 = M_2_PI*((log(0.5*z1)+el)*cj1-1.0/z1-0.25*z1*cs); | |
909 | + } | |
910 | + else { | |
911 | + if (a0 >= 50.0) kz = 8; // can be changed to 10 | |
912 | + else if (a0 >= 35.0) kz = 10; // " " " 12 | |
913 | + else kz = 12; // " " " 14 | |
914 | + ct1 = z1 - M_PI_4; | |
915 | + cp0 = cone; | |
916 | + for (k=0;k<kz;k++) { | |
917 | + cp0 += a[k]*pow(z1,-2.0*k-2.0); | |
918 | + } | |
919 | + cq0 = -0.125/z1; | |
920 | + for (k=0;k<kz;k++) { | |
921 | + cq0 += b[k]*pow(z1,-2.0*k-3.0); | |
922 | + } | |
923 | + cu = sqrt(M_2_PI/z1); | |
924 | + cj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1)); | |
925 | + cy0 = cu*(cp0*sin(ct1)+cq0*cos(ct1)); | |
926 | + ct2 = z1 - 0.75*M_PI; | |
927 | + cp1 = cone; | |
928 | + for (k=0;k<kz;k++) { | |
929 | + cp1 += a1[k]*pow(z1,-2.0*k-2.0); | |
930 | + } | |
931 | + cq1 = 0.375/z1; | |
932 | + for (k=0;k<kz;k++) { | |
933 | + cq1 += b1[k]*pow(z1,-2.0*k-3.0); | |
934 | + } | |
935 | + cj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2)); | |
936 | + cy1 = cu*(cp1*sin(ct2)+cq1*cos(ct2)); | |
937 | + } | |
938 | + if (real(z) < 0.0) { | |
939 | + if (imag(z) < 0.0) { | |
940 | + cy0 -= 2.0*cii*cj0; | |
941 | + cy1 = -(cy1-2.0*cii*cj1); | |
942 | + } | |
943 | + else if (imag(z) > 0.0) { | |
944 | + cy0 += 2.0*cii*cj0; | |
945 | + cy1 = -(cy1+2.0*cii*cj1); | |
946 | + } | |
947 | + cj1 = -cj1; | |
948 | + } | |
949 | + cj0p = -cj1; | |
950 | + cj1p = cj0-cj1/z; | |
951 | + cy0p = -cy1; | |
952 | + cy1p = cy0-cy1/z; | |
953 | + return 0; | |
954 | +} | |
955 | + | |
956 | +template<typename P> | |
957 | +int cbessjyna(int n,complex<P> z,int &nm,complex<P> *cj, | |
958 | + complex<P> *cy,complex<P> *cjp,complex<P> *cyp) | |
959 | +{ | |
960 | + complex<P> cbj0,cbj1,cby0,cby1,cj0,cjk,cj1,cf,cf1,cf2; | |
961 | + complex<P> cs,cg0,cg1,cyk,cyl1,cyl2,cylk,cp11,cp12,cp21,cp22; | |
962 | + complex<P> ch0,ch1,ch2; | |
963 | + P a0,yak,ya1,ya0,wa; | |
964 | + int m,k,lb,lb0; | |
965 | + | |
966 | + if (n < 0) return 1; | |
967 | + a0 = abs(z); | |
968 | + nm = n; | |
969 | + if (a0 < 1.0e-100) { | |
970 | + for (k=0;k<=n;k++) { | |
971 | + cj[k] = czero; | |
972 | + cy[k] = complex<P> (-1e308,0); | |
973 | + cjp[k] = czero; | |
974 | + cyp[k] = complex<P>(1e308,0); | |
975 | + } | |
976 | + cj[0] = cone; | |
977 | + cjp[1] = complex<P>(0.5,0.0); | |
978 | + return 0; | |
979 | + } | |
980 | + cbessjy01(z,cj[0],cj[1],cy[0],cy[1],cjp[0],cjp[1],cyp[0],cyp[1]); | |
981 | + cbj0 = cj[0]; | |
982 | + cbj1 = cj[1]; | |
983 | + cby0 = cy[0]; | |
984 | + cby1 = cy[1]; | |
985 | + if (n <= 1) return 0; | |
986 | + if (n < (int)0.25*a0) { | |
987 | + cj0 = cbj0; | |
988 | + cj1 = cbj1; | |
989 | + for (k=2;k<=n;k++) { | |
990 | + cjk = 2.0*(k-1.0)*cj1/z-cj0; | |
991 | + cj[k] = cjk; | |
992 | + cj0 = cj1; | |
993 | + cj1 = cjk; | |
994 | + } | |
995 | + } | |
996 | + else { | |
997 | + m = msta1(a0,200); | |
998 | + if (m < n) nm = m; | |
999 | + else m = msta2(a0,n,15); | |
1000 | + cf2 = czero; | |
1001 | + cf1 = complex<P> (1.0e-100,0.0); | |
1002 | + for (k=m;k>=0;k--) { | |
1003 | + cf = 2.0*(k+1.0)*cf1/z-cf2; | |
1004 | + if (k <=nm) cj[k] = cf; | |
1005 | + cf2 = cf1; | |
1006 | + cf1 = cf; | |
1007 | + } | |
1008 | + if (abs(cbj0) > abs(cbj1)) cs = cbj0/cf; | |
1009 | + else cs = cbj1/cf2; | |
1010 | + for (k=0;k<=nm;k++) { | |
1011 | + cj[k] *= cs; | |
1012 | + } | |
1013 | + } | |
1014 | + for (k=2;k<=nm;k++) { | |
1015 | + cjp[k] = cj[k-1]-(P)k*cj[k]/z; | |
1016 | + } | |
1017 | + ya0 = abs(cby0); | |
1018 | + lb = 0; | |
1019 | + cg0 = cby0; | |
1020 | + cg1 = cby1; | |
1021 | + for (k=2;k<=nm;k++) { | |
1022 | + cyk = 2.0*(k-1.0)*cg1/z-cg0; | |
1023 | + yak = abs(cyk); | |
1024 | + ya1 = abs(cg0); | |
1025 | + if ((yak < ya0) && (yak < ya1)) lb = k; | |
1026 | + cy[k] = cyk; | |
1027 | + cg0 = cg1; | |
1028 | + cg1 = cyk; | |
1029 | + } | |
1030 | + lb0 = 0; | |
1031 | + if ((lb > 4) && (imag(z) != 0.0)) { | |
1032 | + while (lb != lb0) { | |
1033 | + ch2 = cone; | |
1034 | + ch1 = czero; | |
1035 | + lb0 = lb; | |
1036 | + for (k=lb;k>=1;k--) { | |
1037 | + ch0 = 2.0*k*ch1/z-ch2; | |
1038 | + ch2 = ch1; | |
1039 | + ch1 = ch0; | |
1040 | + } | |
1041 | + cp12 = ch0; | |
1042 | + cp22 = ch2; | |
1043 | + ch2 = czero; | |
1044 | + ch1 = cone; | |
1045 | + for (k=lb;k>=1;k--) { | |
1046 | + ch0 = 2.0*k*ch1/z-ch2; | |
1047 | + ch2 = ch1; | |
1048 | + ch1 = ch0; | |
1049 | + } | |
1050 | + cp11 = ch0; | |
1051 | + cp21 = ch2; | |
1052 | + if (lb == nm) | |
1053 | + cj[lb+1] = 2.0*lb*cj[lb]/z-cj[lb-1]; | |
1054 | + if (abs(cj[0]) > abs(cj[1])) { | |
1055 | + cy[lb+1] = (cj[lb+1]*cby0-2.0*cp11/(M_PI*z))/cj[0]; | |
1056 | + cy[lb] = (cj[lb]*cby0+2.0*cp12/(M_PI*z))/cj[0]; | |
1057 | + } | |
1058 | + else { | |
1059 | + cy[lb+1] = (cj[lb+1]*cby1-2.0*cp21/(M_PI*z))/cj[1]; | |
1060 | + cy[lb] = (cj[lb]*cby1+2.0*cp22/(M_PI*z))/cj[1]; | |
1061 | + } | |
1062 | + cyl2 = cy[lb+1]; | |
1063 | + cyl1 = cy[lb]; | |
1064 | + for (k=lb-1;k>=0;k--) { | |
1065 | + cylk = 2.0*(k+1.0)*cyl1/z-cyl2; | |
1066 | + cy[k] = cylk; | |
1067 | + cyl2 = cyl1; | |
1068 | + cyl1 = cylk; | |
1069 | + } | |
1070 | + cyl1 = cy[lb]; | |
1071 | + cyl2 = cy[lb+1]; | |
1072 | + for (k=lb+1;k<n;k++) { | |
1073 | + cylk = 2.0*k*cyl2/z-cyl1; | |
1074 | + cy[k+1] = cylk; | |
1075 | + cyl1 = cyl2; | |
1076 | + cyl2 = cylk; | |
1077 | + } | |
1078 | + for (k=2;k<=nm;k++) { | |
1079 | + wa = abs(cy[k]); | |
1080 | + if (wa < abs(cy[k-1])) lb = k; | |
1081 | + } | |
1082 | + } | |
1083 | + } | |
1084 | + for (k=2;k<=nm;k++) { | |
1085 | + cyp[k] = cy[k-1]-(P)k*cy[k]/z; | |
1086 | + } | |
1087 | + return 0; | |
1088 | +} | |
1089 | + | |
1090 | +template<typename P> | |
1091 | +int cbessjynb(int n,complex<P> z,int &nm,complex<P> *cj, | |
1092 | + complex<P> *cy,complex<P> *cjp,complex<P> *cyp) | |
1093 | +{ | |
1094 | + complex<P> cf,cf0,cf1,cf2,cbs,csu,csv,cs0,ce; | |
1095 | + complex<P> ct1,cp0,cq0,cp1,cq1,cu,cbj0,cby0,cbj1,cby1; | |
1096 | + complex<P> cyy,cbjk,ct2; | |
1097 | + P a0,y0; | |
1098 | + int k,m; | |
1099 | + static P a[] = { | |
1100 | + -0.7031250000000000e-1, | |
1101 | + 0.1121520996093750, | |
1102 | + -0.5725014209747314, | |
1103 | + 6.074042001273483}; | |
1104 | + static P b[] = { | |
1105 | + 0.7324218750000000e-1, | |
1106 | + -0.2271080017089844, | |
1107 | + 1.727727502584457, | |
1108 | + -2.438052969955606e1}; | |
1109 | + static P a1[] = { | |
1110 | + 0.1171875, | |
1111 | + -0.1441955566406250, | |
1112 | + 0.6765925884246826, | |
1113 | + -6.883914268109947}; | |
1114 | + static P b1[] = { | |
1115 | + -0.1025390625, | |
1116 | + 0.2775764465332031, | |
1117 | + -1.993531733751297, | |
1118 | + 2.724882731126854e1}; | |
1119 | + | |
1120 | + y0 = abs(imag(z)); | |
1121 | + a0 = abs(z); | |
1122 | + nm = n; | |
1123 | + if (a0 < 1.0e-100) { | |
1124 | + for (k=0;k<=n;k++) { | |
1125 | + cj[k] = czero; | |
1126 | + cy[k] = complex<P> (-1e308,0); | |
1127 | + cjp[k] = czero; | |
1128 | + cyp[k] = complex<P>(1e308,0); | |
1129 | + } | |
1130 | + cj[0] = cone; | |
1131 | + cjp[1] = complex<P>(0.5,0.0); | |
1132 | + return 0; | |
1133 | + } | |
1134 | + if ((a0 <= 300.0) || (n > (int)(0.25*a0))) { | |
1135 | + if (n == 0) nm = 1; | |
1136 | + m = msta1(a0,200); | |
1137 | + if (m < nm) nm = m; | |
1138 | + else m = msta2(a0,nm,15); | |
1139 | + cbs = czero; | |
1140 | + csu = czero; | |
1141 | + csv = czero; | |
1142 | + cf2 = czero; | |
1143 | + cf1 = complex<P> (1.0e-100,0.0); | |
1144 | + for (k=m;k>=0;k--) { | |
1145 | + cf = 2.0*(k+1.0)*cf1/z-cf2; | |
1146 | + if (k <= nm) cj[k] = cf; | |
1147 | + if (((k & 1) == 0) && (k != 0)) { | |
1148 | + if (y0 <= 1.0) { | |
1149 | + cbs += 2.0*cf; | |
1150 | + } | |
1151 | + else { | |
1152 | + cbs += (-1)*((k & 2)-1)*2.0*cf; | |
1153 | + } | |
1154 | + csu += (P)((-1)*((k & 2)-1))*cf/(P)k; | |
1155 | + } | |
1156 | + else if (k > 1) { | |
1157 | + csv += (P)((-1)*((k & 2)-1)*k)*cf/(P)(k*k-1.0); | |
1158 | + } | |
1159 | + cf2 = cf1; | |
1160 | + cf1 = cf; | |
1161 | + } | |
1162 | + if (y0 <= 1.0) cs0 = cbs+cf; | |
1163 | + else cs0 = (cbs+cf)/cos(z); | |
1164 | + for (k=0;k<=nm;k++) { | |
1165 | + cj[k] /= cs0; | |
1166 | + } | |
1167 | + ce = log(0.5*z)+el; | |
1168 | + cy[0] = M_2_PI*(ce*cj[0]-4.0*csu/cs0); | |
1169 | + cy[1] = M_2_PI*(-cj[0]/z+(ce-1.0)*cj[1]-4.0*csv/cs0); | |
1170 | + } | |
1171 | + else { | |
1172 | + ct1 = z-M_PI_4; | |
1173 | + cp0 = cone; | |
1174 | + for (k=0;k<4;k++) { | |
1175 | + cp0 += a[k]*pow(z,-2.0*k-2.0); | |
1176 | + } | |
1177 | + cq0 = -0.125/z; | |
1178 | + for (k=0;k<4;k++) { | |
1179 | + cq0 += b[k] *pow(z,-2.0*k-3.0); | |
1180 | + } | |
1181 | + cu = sqrt(M_2_PI/z); | |
1182 | + cbj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1)); | |
1183 | + cby0 = cu*(cp0*sin(ct1)+cq0*cos(ct1)); | |
1184 | + cj[0] = cbj0; | |
1185 | + cy[0] = cby0; | |
1186 | + ct2 = z-0.75*M_PI; | |
1187 | + cp1 = cone; | |
1188 | + for (k=0;k<4;k++) { | |
1189 | + cp1 += a1[k]*pow(z,-2.0*k-2.0); | |
1190 | + } | |
1191 | + cq1 = 0.375/z; | |
1192 | + for (k=0;k<4;k++) { | |
1193 | + cq1 += b1[k]*pow(z,-2.0*k-3.0); | |
1194 | + } | |
1195 | + cbj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2)); | |
1196 | + cby1 = cu*(cp1*sin(ct2)+cq1*cos(ct2)); | |
1197 | + cj[1] = cbj1; | |
1198 | + cy[1] = cby1; | |
1199 | + for (k=2;k<=n;k++) { | |
1200 | + cbjk = 2.0*(k-1.0)*cbj1/z-cbj0; | |
1201 | + cj[k] = cbjk; | |
1202 | + cbj0 = cbj1; | |
1203 | + cbj1 = cbjk; | |
1204 | + } | |
1205 | + } | |
1206 | + cjp[0] = -cj[1]; | |
1207 | + for (k=1;k<=nm;k++) { | |
1208 | + cjp[k] = cj[k-1]-(P)k*cj[k]/z; | |
1209 | + } | |
1210 | + if (abs(cj[0]) > 1.0) | |
1211 | + cy[1] = (cj[1]*cy[0]-2.0/(M_PI*z))/cj[0]; | |
1212 | + for (k=2;k<=nm;k++) { | |
1213 | + if (abs(cj[k-1]) >= abs(cj[k-2])) | |
1214 | + cyy = (cj[k]*cy[k-1]-2.0/(M_PI*z))/cj[k-1]; | |
1215 | + else | |
1216 | + cyy = (cj[k]*cy[k-2]-4.0*(k-1.0)/(M_PI*z*z))/cj[k-2]; | |
1217 | + cy[k] = cyy; | |
1218 | + } | |
1219 | + cyp[0] = -cy[1]; | |
1220 | + for (k=1;k<=nm;k++) { | |
1221 | + cyp[k] = cy[k-1]-(P)k*cy[k]/z; | |
1222 | + } | |
1223 | + | |
1224 | + return 0; | |
1225 | +} | |
1226 | + | |
1227 | +template<typename P> | |
1228 | +int cbessjyva(P v,complex<P> z,P &vm,complex<P>*cjv, | |
1229 | + complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp) | |
1230 | +{ | |
1231 | + complex<P> z1,z2,zk,cjvl,cr,ca,cjv0,cjv1,cpz,crp; | |
1232 | + complex<P> cqz,crq,ca0,cck,csk,cyv0,cyv1,cju0,cju1,cb; | |
1233 | + complex<P> cs,cs0,cr0,cs1,cr1,cec,cf,cf0,cf1,cf2; | |
1234 | + complex<P> cfac0,cfac1,cg0,cg1,cyk,cp11,cp12,cp21,cp22; | |
1235 | + complex<P> ch0,ch1,ch2,cyl1,cyl2,cylk; | |
1236 | + | |
1237 | + P a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa; | |
1238 | + int j,n,k,kz,l,lb,lb0,m; | |
1239 | + | |
1240 | + a0 = abs(z); | |
1241 | + z1 = z; | |
1242 | + z2 = z*z; | |
1243 | + n = (int)v; | |
1244 | + | |
1245 | + | |
1246 | + v0 = v-n; | |
1247 | + | |
1248 | + pv0 = M_PI*v0; | |
1249 | + pv1 = M_PI*(1.0+v0); | |
1250 | + if (a0 < 1.0e-100) { | |
1251 | + for (k=0;k<=n;k++) { | |
1252 | + cjv[k] = czero; | |
1253 | + cyv[k] = complex<P> (-1e308,0); | |
1254 | + cjvp[k] = czero; | |
1255 | + cyvp[k] = complex<P> (1e308,0); | |
1256 | + | |
1257 | + } | |
1258 | + if (v0 == 0.0) { | |
1259 | + cjv[0] = cone; | |
1260 | + cjvp[1] = complex<P> (0.5,0.0); | |
1261 | + } | |
1262 | + else { | |
1263 | + cjvp[0] = complex<P> (1e308,0); | |
1264 | + } | |
1265 | + vm = v; | |
1266 | + return 0; | |
1267 | + } | |
1268 | + if (real(z1) < 0.0) z1 = -z; | |
1269 | + if (a0 <= 12.0) { | |
1270 | + for (l=0;l<2;l++) { | |
1271 | + vl = v0+l; | |
1272 | + cjvl = cone; | |
1273 | + cr = cone; | |
1274 | + for (k=1;k<=40;k++) { | |
1275 | + cr *= -0.25*z2/(k*(k+vl)); | |
1276 | + cjvl += cr; | |
1277 | + if (abs(cr) < abs(cjvl)*eps) break; | |
1278 | + } | |
1279 | + vg = 1.0 + vl; | |
1280 | + ga = gamma(vg); | |
1281 | + ca = pow(0.5*z1,vl)/ga; | |
1282 | + if (l == 0) cjv0 = cjvl*ca; | |
1283 | + else cjv1 = cjvl*ca; | |
1284 | + } | |
1285 | + } | |
1286 | + else { | |
1287 | + if (a0 >= 50.0) kz = 8; | |
1288 | + else if (a0 >= 35.0) kz = 10; | |
1289 | + else kz = 11; | |
1290 | + for (j=0;j<2;j++) { | |
1291 | + vv = 4.0*(j+v0)*(j+v0); | |
1292 | + cpz = cone; | |
1293 | + crp = cone; | |
1294 | + for (k=1;k<=kz;k++) { | |
1295 | + crp = -0.78125e-2*crp*(vv-pow(4.0*k-3.0,2.0))* | |
1296 | + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*z2); | |
1297 | + cpz += crp; | |
1298 | + } | |
1299 | + cqz = cone; | |
1300 | + crq = cone; | |
1301 | + for (k=1;k<=kz;k++) { | |
1302 | + crq = -0.78125e-2*crq*(vv-pow(4.0*k-1.0,2.0))* | |
1303 | + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*z2); | |
1304 | + cqz += crq; | |
1305 | + } | |
1306 | + cqz *= 0.125*(vv-1.0)/z1; | |
1307 | + zk = z1-(0.5*(j+v0)+0.25)*M_PI; | |
1308 | + ca0 = sqrt(M_2_PI/z1); | |
1309 | + cck = cos(zk); | |
1310 | + csk = sin(zk); | |
1311 | + if (j == 0) { | |
1312 | + cjv0 = ca0*(cpz*cck-cqz*csk); | |
1313 | + cyv0 = ca0*(cpz*csk+cqz+cck); | |
1314 | + } | |
1315 | + else { | |
1316 | + cjv1 = ca0*(cpz*cck-cqz*csk); | |
1317 | + cyv1 = ca0*(cpz*csk+cqz*cck); | |
1318 | + } | |
1319 | + } | |
1320 | + } | |
1321 | + if (a0 <= 12.0) { | |
1322 | + if (v0 != 0.0) { | |
1323 | + for (l=0;l<2;l++) { | |
1324 | + vl = v0+l; | |
1325 | + cjvl = cone; | |
1326 | + cr = cone; | |
1327 | + for (k=1;k<=40;k++) { | |
1328 | + cr *= -0.25*z2/(k*(k-vl)); | |
1329 | + cjvl += cr; | |
1330 | + if (abs(cr) < abs(cjvl)*eps) break; | |
1331 | + } | |
1332 | + vg = 1.0-vl; | |
1333 | + gb = gamma(vg); | |
1334 | + cb = pow(2.0/z1,vl)/gb; | |
1335 | + if (l == 0) cju0 = cjvl*cb; | |
1336 | + else cju1 = cjvl*cb; | |
1337 | + } | |
1338 | + cyv0 = (cjv0*cos(pv0)-cju0)/sin(pv0); | |
1339 | + cyv1 = (cjv1*cos(pv1)-cju1)/sin(pv1); | |
1340 | + } | |
1341 | + else { | |
1342 | + cec = log(0.5*z1)+el; | |
1343 | + cs0 = czero; | |
1344 | + w0 = 0.0; | |
1345 | + cr0 = cone; | |
1346 | + for (k=1;k<=30;k++) { | |
1347 | + w0 += 1.0/k; | |
1348 | + cr0 *= -0.25*z2/(P)(k*k); | |
1349 | + cs0 += cr0*w0; | |
1350 | + } | |
1351 | + cyv0 = M_2_PI*(cec*cjv0-cs0); | |
1352 | + cs1 = cone; | |
1353 | + w1 = 0.0; | |
1354 | + cr1 = cone; | |
1355 | + for (k=1;k<=30;k++) { | |
1356 | + w1 += 1.0/k; | |
1357 | + cr1 *= -0.25*z2/(k*(k+1.0)); | |
1358 | + cs1 += cr1*(2.0*w1+1.0/(k+1.0)); | |
1359 | + } | |
1360 | + cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1); | |
1361 | + } | |
1362 | + } | |
1363 | + if (real(z) < 0.0) { | |
1364 | + cfac0 = exp(pv0*cii); | |
1365 | + cfac1 = exp(pv1*cii); | |
1366 | + if (imag(z) < 0.0) { | |
1367 | + cyv0 = cfac0*cyv0-(P)2.0*(complex<P>)cii*cos(pv0)*cjv0; | |
1368 | + cyv1 = cfac1*cyv1-(P)2.0*(complex<P>)cii*cos(pv1)*cjv1; | |
1369 | + cjv0 /= cfac0; | |
1370 | + cjv1 /= cfac1; | |
1371 | + } | |
1372 | + else if (imag(z) > 0.0) { | |
1373 | + cyv0 = cyv0/cfac0+(P)2.0*(complex<P>)cii*cos(pv0)*cjv0; | |
1374 | + cyv1 = cyv1/cfac1+(P)2.0*(complex<P>)cii*cos(pv1)*cjv1; | |
1375 | + cjv0 *= cfac0; | |
1376 | + cjv1 *= cfac1; | |
1377 | + } | |
1378 | + } | |
1379 | + cjv[0] = cjv0; | |
1380 | + cjv[1] = cjv1; | |
1381 | + if ((n >= 2) && (n <= (int)(0.25*a0))) { | |
1382 | + cf0 = cjv0; | |
1383 | + cf1 = cjv1; | |
1384 | + for (k=2;k<= n;k++) { | |
1385 | + cf = 2.0*(k+v0-1.0)*cf1/z-cf0; | |
1386 | + cjv[k] = cf; | |
1387 | + cf0 = cf1; | |
1388 | + cf1 = cf; | |
1389 | + } | |
1390 | + } | |
1391 | + else if (n >= 2) { | |
1392 | + m = msta1(a0,200); | |
1393 | + if (m < n) n = m; | |
1394 | + else m = msta2(a0,n,15); | |
1395 | + cf2 = czero; | |
1396 | + cf1 = complex<P>(1.0e-100,0.0); | |
1397 | + for (k=m;k>=0;k--) { | |
1398 | + cf = 2.0*(v0+k+1.0)*cf1/z-cf2; | |
1399 | + if (k <= n) cjv[k] = cf; | |
1400 | + cf2 = cf1; | |
1401 | + cf1 = cf; | |
1402 | + } | |
1403 | + if (abs(cjv0) > abs(cjv1)) cs = cjv0/cf; | |
1404 | + else cs = cjv1/cf2; | |
1405 | + for (k=0;k<=n;k++) { | |
1406 | + cjv[k] *= cs; | |
1407 | + } | |
1408 | + } | |
1409 | + cjvp[0] = v0*cjv[0]/z-cjv[1]; | |
1410 | + for (k=1;k<=n;k++) { | |
1411 | + cjvp[k] = -(k+v0)*cjv[k]/z+cjv[k-1]; | |
1412 | + } | |
1413 | + cyv[0] = cyv0; | |
1414 | + cyv[1] = cyv1; | |
1415 | + ya0 = abs(cyv0); | |
1416 | + lb = 0; | |
1417 | + cg0 = cyv0; | |
1418 | + cg1 = cyv1; | |
1419 | + for (k=2;k<=n;k++) { | |
1420 | + cyk = 2.0*(v0+k-1.0)*cg1/z-cg0; | |
1421 | + yak = abs(cyk); | |
1422 | + ya1 = abs(cg0); | |
1423 | + if ((yak < ya0) && (yak< ya1)) lb = k; | |
1424 | + cyv[k] = cyk; | |
1425 | + cg0 = cg1; | |
1426 | + cg1 = cyk; | |
1427 | + } | |
1428 | + lb0 = 0; | |
1429 | + if ((lb > 4) && (imag(z) != 0.0)) { | |
1430 | + while(lb != lb0) { | |
1431 | + ch2 = cone; | |
1432 | + ch1 = czero; | |
1433 | + lb0 = lb; | |
1434 | + for (k=lb;k>=1;k--) { | |
1435 | + ch0 = 2.0*(k+v0)*ch1/z-ch2; | |
1436 | + ch2 = ch1; | |
1437 | + ch1 = ch0; | |
1438 | + } | |
1439 | + cp12 = ch0; | |
1440 | + cp22 = ch2; | |
1441 | + ch2 = czero; | |
1442 | + ch1 = cone; | |
1443 | + for (k=lb;k>=1;k--) { | |
1444 | + ch0 = 2.0*(k+v0)*ch1/z-ch2; | |
1445 | + ch2 = ch1; | |
1446 | + ch1 = ch0; | |
1447 | + } | |
1448 | + cp11 = ch0; | |
1449 | + cp21 = ch2; | |
1450 | + if (lb == n) | |
1451 | + cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1]; | |
1452 | + if (abs(cjv[0]) > abs(cjv[1])) { | |
1453 | + cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0]; | |
1454 | + cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0]; | |
1455 | + } | |
1456 | + else { | |
1457 | + cyv[lb+1] = (cjv[lb+1]*cyv1-2.0*cp21/(M_PI*z))/cjv[1]; | |
1458 | + cyv[lb] = (cjv[lb]*cyv1+2.0*cp22/(M_PI*z))/cjv[1]; | |
1459 | + } | |
1460 | + cyl2 = cyv[lb+1]; | |
1461 | + cyl1 = cyv[lb]; | |
1462 | + for (k=lb-1;k>=0;k--) { | |
1463 | + cylk = 2.0*(k+v0+1.0)*cyl1/z-cyl2; | |
1464 | + cyv[k] = cylk; | |
1465 | + cyl2 = cyl1; | |
1466 | + cyl1 = cylk; | |
1467 | + } | |
1468 | + cyl1 = cyv[lb]; | |
1469 | + cyl2 = cyv[lb+1]; | |
1470 | + for (k=lb+1;k<n;k++) { | |
1471 | + cylk = 2.0*(k+v0)*cyl2/z-cyl1; | |
1472 | + cyv[k+1] = cylk; | |
1473 | + cyl1 = cyl2; | |
1474 | + cyl2 = cylk; | |
1475 | + } | |
1476 | + for (k=2;k<=n;k++) { | |
1477 | + wa = abs(cyv[k]); | |
1478 | + if (wa < abs(cyv[k-1])) lb = k; | |
1479 | + } | |
1480 | + } | |
1481 | + } | |
1482 | + cyvp[0] = v0*cyv[0]/z-cyv[1]; | |
1483 | + for (k=1;k<=n;k++) { | |
1484 | + cyvp[k] = cyv[k-1]-(k+v0)*cyv[k]/z; | |
1485 | + } | |
1486 | + vm = n+v0; | |
1487 | + return 0; | |
1488 | +} | |
1489 | + | |
1490 | +template<typename P> | |
1491 | +int cbessjyva_sph(int v,complex<P> z,P &vm,complex<P>*cjv, | |
1492 | + complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp) | |
1493 | +{ | |
1494 | + //first, compute the bessel functions of fractional order | |
1495 | + cbessjyva<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); | |
1496 | + | |
1497 | + //iterate through each and scale | |
1498 | + for(int n = 0; n<=v; n++) | |
1499 | + { | |
1500 | + | |
1501 | + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); | |
1502 | + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); | |
1503 | + | |
1504 | + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); | |
1505 | + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); | |
1506 | + } | |
1507 | + | |
1508 | + return 0; | |
1509 | + | |
1510 | +} | |
1511 | + | |
1512 | +} //end namespace rts | |
1513 | + | |
1514 | + | |
1515 | +#endif | ... | ... |
1 | +#ifndef STIM_VEC3_H | |
2 | +#define STIM_VEC3_H | |
3 | + | |
4 | + | |
5 | +#include <stim/cuda/cudatools/callable.h> | |
6 | +#include <cmath> | |
7 | + | |
8 | + | |
9 | +namespace stim{ | |
10 | + | |
11 | + | |
12 | +/// A class designed to act as a 3D vector with CUDA compatibility | |
13 | +template<typename T> | |
14 | +class vec3{ | |
15 | + | |
16 | +protected: | |
17 | + T ptr[3]; | |
18 | + | |
19 | +public: | |
20 | + | |
21 | + CUDA_CALLABLE vec3(){} | |
22 | + | |
23 | + CUDA_CALLABLE vec3(T v){ | |
24 | + ptr[0] = ptr[1] = ptr[2] = v; | |
25 | + } | |
26 | + | |
27 | + CUDA_CALLABLE vec3(T x, T y, T z){ | |
28 | + ptr[0] = x; | |
29 | + ptr[1] = y; | |
30 | + ptr[2] = z; | |
31 | + } | |
32 | + | |
33 | + //copy constructor | |
34 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | |
35 | + ptr[0] = other.ptr[0]; | |
36 | + ptr[1] = other.ptr[1]; | |
37 | + ptr[2] = other.ptr[2]; | |
38 | + } | |
39 | + | |
40 | + //access an element using an index | |
41 | + CUDA_CALLABLE T& operator[](size_t idx){ | |
42 | + return ptr[idx]; | |
43 | + } | |
44 | + | |
45 | + CUDA_CALLABLE T* data(){ | |
46 | + return ptr; | |
47 | + } | |
48 | + | |
49 | +/// Casting operator. Creates a new vector with a new type U. | |
50 | + template< typename U > | |
51 | + CUDA_CALLABLE operator vec3<U>(){ | |
52 | + vec3<U> result; | |
53 | + result.ptr[0] = (U)ptr[0]; | |
54 | + result.ptr[1] = (U)ptr[1]; | |
55 | + result.ptr[2] = (U)ptr[2]; | |
56 | + | |
57 | + return result; | |
58 | + } | |
59 | + | |
60 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | |
61 | + CUDA_CALLABLE T len_sq() const{ | |
62 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | |
63 | + } | |
64 | + | |
65 | + /// computes the Euclidean length of the vector | |
66 | + CUDA_CALLABLE T len() const{ | |
67 | + return sqrt(len_sq()); | |
68 | + } | |
69 | + | |
70 | + | |
71 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | |
72 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | |
73 | + vec3<T> sph; | |
74 | + sph.ptr[0] = len(); | |
75 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | |
76 | + if(sph.ptr[0] == 0) | |
77 | + sph.ptr[2] = 0; | |
78 | + else | |
79 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | |
80 | + return sph; | |
81 | + } | |
82 | + | |
83 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | |
84 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | |
85 | + vec3<T> cart; | |
86 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | |
87 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | |
88 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | |
89 | + | |
90 | + return cart; | |
91 | + } | |
92 | + | |
93 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | |
94 | + CUDA_CALLABLE vec3<T> norm() const{ | |
95 | + vec3<T> result; | |
96 | + T l = len(); //compute the vector length | |
97 | + return (*this) / l; | |
98 | + } | |
99 | + | |
100 | + /// Computes the cross product of a 3-dimensional vector | |
101 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | |
102 | + | |
103 | + vec3<T> result; | |
104 | + | |
105 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | |
106 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | |
107 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | |
108 | + | |
109 | + return result; | |
110 | + } | |
111 | + | |
112 | + /// Compute the Euclidean inner (dot) product | |
113 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | |
114 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | |
115 | + } | |
116 | + | |
117 | + /// Arithmetic addition operator | |
118 | + | |
119 | + /// @param rhs is the right-hand-side operator for the addition | |
120 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | |
121 | + vec3<T> result; | |
122 | + result.ptr[0] = ptr[0] + rhs[0]; | |
123 | + result.ptr[1] = ptr[1] + rhs[1]; | |
124 | + result.ptr[2] = ptr[2] + rhs[2]; | |
125 | + return result; | |
126 | + } | |
127 | + | |
128 | + /// Arithmetic addition to a scalar | |
129 | + | |
130 | + /// @param rhs is the right-hand-side operator for the addition | |
131 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | |
132 | + vec3<T> result; | |
133 | + result.ptr[0] = ptr[0] + rhs; | |
134 | + result.ptr[1] = ptr[1] + rhs; | |
135 | + result.ptr[2] = ptr[2] + rhs; | |
136 | + return result; | |
137 | + } | |
138 | + | |
139 | + /// Arithmetic subtraction operator | |
140 | + | |
141 | + /// @param rhs is the right-hand-side operator for the subtraction | |
142 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | |
143 | + vec3<T> result; | |
144 | + result.ptr[0] = ptr[0] - rhs[0]; | |
145 | + result.ptr[1] = ptr[1] - rhs[1]; | |
146 | + result.ptr[2] = ptr[2] - rhs[2]; | |
147 | + return result; | |
148 | + } | |
149 | + /// Arithmetic subtraction to a scalar | |
150 | + | |
151 | + /// @param rhs is the right-hand-side operator for the addition | |
152 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | |
153 | + vec3<T> result; | |
154 | + result.ptr[0] = ptr[0] - rhs; | |
155 | + result.ptr[1] = ptr[1] - rhs; | |
156 | + result.ptr[2] = ptr[2] - rhs; | |
157 | + return result; | |
158 | + } | |
159 | + | |
160 | + /// Arithmetic scalar multiplication operator | |
161 | + | |
162 | + /// @param rhs is the right-hand-side operator for the subtraction | |
163 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | |
164 | + vec3<T> result; | |
165 | + result.ptr[0] = ptr[0] * rhs; | |
166 | + result.ptr[1] = ptr[1] * rhs; | |
167 | + result.ptr[2] = ptr[2] * rhs; | |
168 | + return result; | |
169 | + } | |
170 | + | |
171 | + /// Arithmetic scalar division operator | |
172 | + | |
173 | + /// @param rhs is the right-hand-side operator for the subtraction | |
174 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | |
175 | + return (*this) * ((T)1.0/rhs); | |
176 | + } | |
177 | + | |
178 | + /// Multiplication by a scalar, followed by assignment | |
179 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | |
180 | + ptr[0] = ptr[0] * rhs; | |
181 | + ptr[1] = ptr[1] * rhs; | |
182 | + ptr[2] = ptr[2] * rhs; | |
183 | + return *this; | |
184 | + } | |
185 | + | |
186 | + /// Addition and assignment | |
187 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | |
188 | + ptr[0] = ptr[0] + rhs; | |
189 | + ptr[1] = ptr[1] + rhs; | |
190 | + ptr[2] = ptr[2] + rhs; | |
191 | + return *this; | |
192 | + } | |
193 | + | |
194 | + /// Assign a scalar to all values | |
195 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | |
196 | + ptr[0] = ptr[0] = rhs; | |
197 | + ptr[1] = ptr[1] = rhs; | |
198 | + ptr[2] = ptr[2] = rhs; | |
199 | + return *this; | |
200 | + } | |
201 | + | |
202 | + /// Casting and assignment | |
203 | + template<typename Y> | |
204 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | |
205 | + ptr[0] = (T)rhs.ptr[0]; | |
206 | + ptr[1] = (T)rhs.ptr[1]; | |
207 | + ptr[2] = (T)rhs.ptr[2]; | |
208 | + return *this; | |
209 | + } | |
210 | + | |
211 | + /// Unary minus (returns the negative of the vector) | |
212 | + CUDA_CALLABLE vec3<T> operator-() const{ | |
213 | + vec3<T> result; | |
214 | + result.ptr[0] = -ptr[0]; | |
215 | + result.ptr[1] = -ptr[1]; | |
216 | + result.ptr[2] = -ptr[2]; | |
217 | + return result; | |
218 | + } | |
219 | + | |
220 | +<<<<<<< HEAD | |
221 | +//#ifndef __NVCC__ | |
222 | +======= | |
223 | +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c | |
224 | + /// Outputs the vector as a string | |
225 | + std::string str() const{ | |
226 | + std::stringstream ss; | |
227 | + | |
228 | + const size_t N = 3; | |
229 | + | |
230 | + ss<<"["; | |
231 | + for(size_t i=0; i<N; i++) | |
232 | + { | |
233 | + ss<<ptr[i]; | |
234 | + if(i != N-1) | |
235 | + ss<<", "; | |
236 | + } | |
237 | + ss<<"]"; | |
238 | + | |
239 | + return ss.str(); | |
240 | + } | |
241 | +<<<<<<< HEAD | |
242 | +//#endif | |
243 | +======= | |
244 | +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c | |
245 | + | |
246 | + size_t size(){ return 3; } | |
247 | + | |
248 | + }; //end class vec3 | |
249 | +} //end namespace stim | |
250 | + | |
251 | +/// Multiply a vector by a constant when the vector is on the right hand side | |
252 | +template <typename T> | |
253 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | |
254 | + return rhs * lhs; | |
255 | +} | |
256 | + | |
257 | +//stream operator | |
258 | +template<typename T> | |
259 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | |
260 | + os<<rhs.str(); | |
261 | + return os; | |
262 | +} | |
263 | + | |
264 | +#endif | ... | ... |
1 | +#ifndef STIM_VEC3_H | |
2 | +#define STIM_VEC3_H | |
3 | + | |
4 | + | |
5 | +#include <stim/cuda/cudatools/callable.h> | |
6 | +#include <cmath> | |
7 | + | |
8 | + | |
9 | +namespace stim{ | |
10 | + | |
11 | + | |
12 | +/// A class designed to act as a 3D vector with CUDA compatibility | |
13 | +template<typename T> | |
14 | +class vec3{ | |
15 | + | |
16 | +protected: | |
17 | + T ptr[3]; | |
18 | + | |
19 | +public: | |
20 | + | |
21 | + CUDA_CALLABLE vec3(){} | |
22 | + | |
23 | + CUDA_CALLABLE vec3(T v){ | |
24 | + ptr[0] = ptr[1] = ptr[2] = v; | |
25 | + } | |
26 | + | |
27 | + CUDA_CALLABLE vec3(T x, T y, T z){ | |
28 | + ptr[0] = x; | |
29 | + ptr[1] = y; | |
30 | + ptr[2] = z; | |
31 | + } | |
32 | + | |
33 | + //copy constructor | |
34 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | |
35 | + ptr[0] = other.ptr[0]; | |
36 | + ptr[1] = other.ptr[1]; | |
37 | + ptr[2] = other.ptr[2]; | |
38 | + } | |
39 | + | |
40 | + //access an element using an index | |
41 | + CUDA_CALLABLE T& operator[](size_t idx){ | |
42 | + return ptr[idx]; | |
43 | + } | |
44 | + | |
45 | + CUDA_CALLABLE T* data(){ | |
46 | + return ptr; | |
47 | + } | |
48 | + | |
49 | +/// Casting operator. Creates a new vector with a new type U. | |
50 | + template< typename U > | |
51 | + CUDA_CALLABLE operator vec3<U>(){ | |
52 | + vec3<U> result; | |
53 | + result.ptr[0] = (U)ptr[0]; | |
54 | + result.ptr[1] = (U)ptr[1]; | |
55 | + result.ptr[2] = (U)ptr[2]; | |
56 | + | |
57 | + return result; | |
58 | + } | |
59 | + | |
60 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | |
61 | + CUDA_CALLABLE T len_sq() const{ | |
62 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | |
63 | + } | |
64 | + | |
65 | + /// computes the Euclidean length of the vector | |
66 | + CUDA_CALLABLE T len() const{ | |
67 | + return sqrt(len_sq()); | |
68 | + } | |
69 | + | |
70 | + | |
71 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | |
72 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | |
73 | + vec3<T> sph; | |
74 | + sph.ptr[0] = len(); | |
75 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | |
76 | + if(sph.ptr[0] == 0) | |
77 | + sph.ptr[2] = 0; | |
78 | + else | |
79 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | |
80 | + return sph; | |
81 | + } | |
82 | + | |
83 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | |
84 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | |
85 | + vec3<T> cart; | |
86 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | |
87 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | |
88 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | |
89 | + | |
90 | + return cart; | |
91 | + } | |
92 | + | |
93 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | |
94 | + CUDA_CALLABLE vec3<T> norm() const{ | |
95 | + vec3<T> result; | |
96 | + T l = len(); //compute the vector length | |
97 | + return (*this) / l; | |
98 | + } | |
99 | + | |
100 | + /// Computes the cross product of a 3-dimensional vector | |
101 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | |
102 | + | |
103 | + vec3<T> result; | |
104 | + | |
105 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | |
106 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | |
107 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | |
108 | + | |
109 | + return result; | |
110 | + } | |
111 | + | |
112 | + /// Compute the Euclidean inner (dot) product | |
113 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | |
114 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | |
115 | + } | |
116 | + | |
117 | + /// Arithmetic addition operator | |
118 | + | |
119 | + /// @param rhs is the right-hand-side operator for the addition | |
120 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | |
121 | + vec3<T> result; | |
122 | + result.ptr[0] = ptr[0] + rhs[0]; | |
123 | + result.ptr[1] = ptr[1] + rhs[1]; | |
124 | + result.ptr[2] = ptr[2] + rhs[2]; | |
125 | + return result; | |
126 | + } | |
127 | + | |
128 | + /// Arithmetic addition to a scalar | |
129 | + | |
130 | + /// @param rhs is the right-hand-side operator for the addition | |
131 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | |
132 | + vec3<T> result; | |
133 | + result.ptr[0] = ptr[0] + rhs; | |
134 | + result.ptr[1] = ptr[1] + rhs; | |
135 | + result.ptr[2] = ptr[2] + rhs; | |
136 | + return result; | |
137 | + } | |
138 | + | |
139 | + /// Arithmetic subtraction operator | |
140 | + | |
141 | + /// @param rhs is the right-hand-side operator for the subtraction | |
142 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | |
143 | + vec3<T> result; | |
144 | + result.ptr[0] = ptr[0] - rhs[0]; | |
145 | + result.ptr[1] = ptr[1] - rhs[1]; | |
146 | + result.ptr[2] = ptr[2] - rhs[2]; | |
147 | + return result; | |
148 | + } | |
149 | + /// Arithmetic subtraction to a scalar | |
150 | + | |
151 | + /// @param rhs is the right-hand-side operator for the addition | |
152 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | |
153 | + vec3<T> result; | |
154 | + result.ptr[0] = ptr[0] - rhs; | |
155 | + result.ptr[1] = ptr[1] - rhs; | |
156 | + result.ptr[2] = ptr[2] - rhs; | |
157 | + return result; | |
158 | + } | |
159 | + | |
160 | + /// Arithmetic scalar multiplication operator | |
161 | + | |
162 | + /// @param rhs is the right-hand-side operator for the subtraction | |
163 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | |
164 | + vec3<T> result; | |
165 | + result.ptr[0] = ptr[0] * rhs; | |
166 | + result.ptr[1] = ptr[1] * rhs; | |
167 | + result.ptr[2] = ptr[2] * rhs; | |
168 | + return result; | |
169 | + } | |
170 | + | |
171 | + /// Arithmetic scalar division operator | |
172 | + | |
173 | + /// @param rhs is the right-hand-side operator for the subtraction | |
174 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | |
175 | + return (*this) * ((T)1.0/rhs); | |
176 | + } | |
177 | + | |
178 | + /// Multiplication by a scalar, followed by assignment | |
179 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | |
180 | + ptr[0] = ptr[0] * rhs; | |
181 | + ptr[1] = ptr[1] * rhs; | |
182 | + ptr[2] = ptr[2] * rhs; | |
183 | + return *this; | |
184 | + } | |
185 | + | |
186 | + /// Addition and assignment | |
187 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | |
188 | + ptr[0] = ptr[0] + rhs; | |
189 | + ptr[1] = ptr[1] + rhs; | |
190 | + ptr[2] = ptr[2] + rhs; | |
191 | + return *this; | |
192 | + } | |
193 | + | |
194 | + /// Assign a scalar to all values | |
195 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | |
196 | + ptr[0] = ptr[0] = rhs; | |
197 | + ptr[1] = ptr[1] = rhs; | |
198 | + ptr[2] = ptr[2] = rhs; | |
199 | + return *this; | |
200 | + } | |
201 | + | |
202 | + /// Casting and assignment | |
203 | + template<typename Y> | |
204 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | |
205 | + ptr[0] = (T)rhs.ptr[0]; | |
206 | + ptr[1] = (T)rhs.ptr[1]; | |
207 | + ptr[2] = (T)rhs.ptr[2]; | |
208 | + return *this; | |
209 | + } | |
210 | + | |
211 | + /// Unary minus (returns the negative of the vector) | |
212 | + CUDA_CALLABLE vec3<T> operator-() const{ | |
213 | + vec3<T> result; | |
214 | + result.ptr[0] = -ptr[0]; | |
215 | + result.ptr[1] = -ptr[1]; | |
216 | + result.ptr[2] = -ptr[2]; | |
217 | + return result; | |
218 | + } | |
219 | + | |
220 | +<<<<<<< HEAD | |
221 | +//#ifndef __NVCC__ | |
222 | +======= | |
223 | +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c | |
224 | + /// Outputs the vector as a string | |
225 | + std::string str() const{ | |
226 | + std::stringstream ss; | |
227 | + | |
228 | + const size_t N = 3; | |
229 | + | |
230 | + ss<<"["; | |
231 | + for(size_t i=0; i<N; i++) | |
232 | + { | |
233 | + ss<<ptr[i]; | |
234 | + if(i != N-1) | |
235 | + ss<<", "; | |
236 | + } | |
237 | + ss<<"]"; | |
238 | + | |
239 | + return ss.str(); | |
240 | + } | |
241 | +<<<<<<< HEAD | |
242 | +//#endif | |
243 | +======= | |
244 | +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c | |
245 | + | |
246 | + size_t size(){ return 3; } | |
247 | + | |
248 | + }; //end class vec3 | |
249 | +} //end namespace stim | |
250 | + | |
251 | +/// Multiply a vector by a constant when the vector is on the right hand side | |
252 | +template <typename T> | |
253 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | |
254 | + return rhs * lhs; | |
255 | +} | |
256 | + | |
257 | +//stream operator | |
258 | +template<typename T> | |
259 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | |
260 | + os<<rhs.str(); | |
261 | + return os; | |
262 | +} | |
263 | + | |
264 | +#endif | ... | ... |
1 | +#ifndef STIM_VEC3_H | |
2 | +#define STIM_VEC3_H | |
3 | + | |
4 | + | |
5 | +#include <stim/cuda/cudatools/callable.h> | |
6 | + | |
7 | + | |
8 | +namespace stim{ | |
9 | + | |
10 | + | |
11 | +/// A class designed to act as a 3D vector with CUDA compatibility | |
12 | +template<typename T> | |
13 | +class vec3{ | |
14 | + | |
15 | +protected: | |
16 | + T ptr[3]; | |
17 | + | |
18 | +public: | |
19 | + | |
20 | + CUDA_CALLABLE vec3(){} | |
21 | + | |
22 | + CUDA_CALLABLE vec3(T v){ | |
23 | + ptr[0] = ptr[1] = ptr[2] = v; | |
24 | + } | |
25 | + | |
26 | + CUDA_CALLABLE vec3(T x, T y, T z){ | |
27 | + ptr[0] = x; | |
28 | + ptr[1] = y; | |
29 | + ptr[2] = z; | |
30 | + } | |
31 | + | |
32 | + //copy constructor | |
33 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | |
34 | + ptr[0] = other.ptr[0]; | |
35 | + ptr[1] = other.ptr[1]; | |
36 | + ptr[2] = other.ptr[2]; | |
37 | + } | |
38 | + | |
39 | + //access an element using an index | |
40 | + CUDA_CALLABLE T& operator[](size_t idx){ | |
41 | + return ptr[idx]; | |
42 | + } | |
43 | + | |
44 | + CUDA_CALLABLE T* data(){ | |
45 | + return ptr; | |
46 | + } | |
47 | + | |
48 | +/// Casting operator. Creates a new vector with a new type U. | |
49 | + template< typename U > | |
50 | + CUDA_CALLABLE operator vec3<U>(){ | |
51 | + vec3<U> result; | |
52 | + result.ptr[0] = (U)ptr[0]; | |
53 | + result.ptr[1] = (U)ptr[1]; | |
54 | + result.ptr[2] = (U)ptr[2]; | |
55 | + | |
56 | + return result; | |
57 | + } | |
58 | + | |
59 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | |
60 | + CUDA_CALLABLE T len_sq() const{ | |
61 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | |
62 | + } | |
63 | + | |
64 | + /// computes the Euclidean length of the vector | |
65 | + CUDA_CALLABLE T len() const{ | |
66 | + return sqrt(len_sq()); | |
67 | + } | |
68 | + | |
69 | + | |
70 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | |
71 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | |
72 | + vec3<T> sph; | |
73 | + sph.ptr[0] = len(); | |
74 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | |
75 | + if(sph.ptr[0] == 0) | |
76 | + sph.ptr[2] = 0; | |
77 | + else | |
78 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | |
79 | + return sph; | |
80 | + } | |
81 | + | |
82 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | |
83 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | |
84 | + vec3<T> cart; | |
85 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | |
86 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | |
87 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | |
88 | + | |
89 | + return cart; | |
90 | + } | |
91 | + | |
92 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | |
93 | + CUDA_CALLABLE vec3<T> norm() const{ | |
94 | + vec3<T> result; | |
95 | + T l = len(); //compute the vector length | |
96 | + return (*this) / l; | |
97 | + } | |
98 | + | |
99 | + /// Computes the cross product of a 3-dimensional vector | |
100 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | |
101 | + | |
102 | + vec3<T> result; | |
103 | + | |
104 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | |
105 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | |
106 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | |
107 | + | |
108 | + return result; | |
109 | + } | |
110 | + | |
111 | + /// Compute the Euclidean inner (dot) product | |
112 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | |
113 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | |
114 | + } | |
115 | + | |
116 | + /// Arithmetic addition operator | |
117 | + | |
118 | + /// @param rhs is the right-hand-side operator for the addition | |
119 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | |
120 | + vec3<T> result; | |
121 | + result.ptr[0] = ptr[0] + rhs[0]; | |
122 | + result.ptr[1] = ptr[1] + rhs[1]; | |
123 | + result.ptr[2] = ptr[2] + rhs[2]; | |
124 | + return result; | |
125 | + } | |
126 | + | |
127 | + /// Arithmetic addition to a scalar | |
128 | + | |
129 | + /// @param rhs is the right-hand-side operator for the addition | |
130 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | |
131 | + vec3<T> result; | |
132 | + result.ptr[0] = ptr[0] + rhs; | |
133 | + result.ptr[1] = ptr[1] + rhs; | |
134 | + result.ptr[2] = ptr[2] + rhs; | |
135 | + return result; | |
136 | + } | |
137 | + | |
138 | + /// Arithmetic subtraction operator | |
139 | + | |
140 | + /// @param rhs is the right-hand-side operator for the subtraction | |
141 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | |
142 | + vec3<T> result; | |
143 | + result.ptr[0] = ptr[0] - rhs[0]; | |
144 | + result.ptr[1] = ptr[1] - rhs[1]; | |
145 | + result.ptr[2] = ptr[2] - rhs[2]; | |
146 | + return result; | |
147 | + } | |
148 | + /// Arithmetic subtraction to a scalar | |
149 | + | |
150 | + /// @param rhs is the right-hand-side operator for the addition | |
151 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | |
152 | + vec3<T> result; | |
153 | + result.ptr[0] = ptr[0] - rhs; | |
154 | + result.ptr[1] = ptr[1] - rhs; | |
155 | + result.ptr[2] = ptr[2] - rhs; | |
156 | + return result; | |
157 | + } | |
158 | + | |
159 | + /// Arithmetic scalar multiplication operator | |
160 | + | |
161 | + /// @param rhs is the right-hand-side operator for the subtraction | |
162 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | |
163 | + vec3<T> result; | |
164 | + result.ptr[0] = ptr[0] * rhs; | |
165 | + result.ptr[1] = ptr[1] * rhs; | |
166 | + result.ptr[2] = ptr[2] * rhs; | |
167 | + return result; | |
168 | + } | |
169 | + | |
170 | + /// Arithmetic scalar division operator | |
171 | + | |
172 | + /// @param rhs is the right-hand-side operator for the subtraction | |
173 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | |
174 | + return (*this) * ((T)1.0/rhs); | |
175 | + } | |
176 | + | |
177 | + /// Multiplication by a scalar, followed by assignment | |
178 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | |
179 | + ptr[0] = ptr[0] * rhs; | |
180 | + ptr[1] = ptr[1] * rhs; | |
181 | + ptr[2] = ptr[2] * rhs; | |
182 | + return *this; | |
183 | + } | |
184 | + | |
185 | + /// Addition and assignment | |
186 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | |
187 | + ptr[0] = ptr[0] + rhs; | |
188 | + ptr[1] = ptr[1] + rhs; | |
189 | + ptr[2] = ptr[2] + rhs; | |
190 | + return *this; | |
191 | + } | |
192 | + | |
193 | + /// Assign a scalar to all values | |
194 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | |
195 | + ptr[0] = ptr[0] = rhs; | |
196 | + ptr[1] = ptr[1] = rhs; | |
197 | + ptr[2] = ptr[2] = rhs; | |
198 | + return *this; | |
199 | + } | |
200 | + | |
201 | + /// Casting and assignment | |
202 | + template<typename Y> | |
203 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | |
204 | + ptr[0] = (T)rhs.ptr[0]; | |
205 | + ptr[1] = (T)rhs.ptr[1]; | |
206 | + ptr[2] = (T)rhs.ptr[2]; | |
207 | + return *this; | |
208 | + } | |
209 | + | |
210 | + /// Unary minus (returns the negative of the vector) | |
211 | + CUDA_CALLABLE vec3<T> operator-() const{ | |
212 | + vec3<T> result; | |
213 | + result.ptr[0] = -ptr[0]; | |
214 | + result.ptr[1] = -ptr[1]; | |
215 | + result.ptr[2] = -ptr[2]; | |
216 | + return result; | |
217 | + } | |
218 | + | |
219 | +#ifndef __NVCC__ | |
220 | + /// Outputs the vector as a string | |
221 | + std::string str() const{ | |
222 | + std::stringstream ss; | |
223 | + | |
224 | + const size_t N = 3; | |
225 | + | |
226 | + ss<<"["; | |
227 | + for(size_t i=0; i<N; i++) | |
228 | + { | |
229 | + ss<<ptr[i]; | |
230 | + if(i != N-1) | |
231 | + ss<<", "; | |
232 | + } | |
233 | + ss<<"]"; | |
234 | + | |
235 | + return ss.str(); | |
236 | + } | |
237 | +#endif | |
238 | + | |
239 | + size_t size(){ return 3; } | |
240 | + | |
241 | + }; //end class vec3 | |
242 | +} //end namespace stim | |
243 | + | |
244 | +/// Multiply a vector by a constant when the vector is on the right hand side | |
245 | +template <typename T> | |
246 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | |
247 | + return rhs * lhs; | |
248 | +} | |
249 | + | |
250 | +//stream operator | |
251 | +template<typename T> | |
252 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | |
253 | + os<<rhs.str(); | |
254 | + return os; | |
255 | +} | |
256 | + | |
257 | +#endif | ... | ... |
1 | +#ifndef STIM_VEC3_H | |
2 | +#define STIM_VEC3_H | |
3 | + | |
4 | + | |
5 | +#include <stim/cuda/cudatools/callable.h> | |
6 | + | |
7 | + | |
8 | +namespace stim{ | |
9 | + | |
10 | + | |
11 | +/// A class designed to act as a 3D vector with CUDA compatibility | |
12 | +template<typename T> | |
13 | +class vec3{ | |
14 | + | |
15 | +protected: | |
16 | + T ptr[3]; | |
17 | + | |
18 | +public: | |
19 | + | |
20 | + CUDA_CALLABLE vec3(){} | |
21 | + | |
22 | + CUDA_CALLABLE vec3(T v){ | |
23 | + ptr[0] = ptr[1] = ptr[2] = v; | |
24 | + } | |
25 | + | |
26 | + CUDA_CALLABLE vec3(T x, T y, T z){ | |
27 | + ptr[0] = x; | |
28 | + ptr[1] = y; | |
29 | + ptr[2] = z; | |
30 | + } | |
31 | + | |
32 | + //copy constructor | |
33 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | |
34 | + ptr[0] = other.ptr[0]; | |
35 | + ptr[1] = other.ptr[1]; | |
36 | + ptr[2] = other.ptr[2]; | |
37 | + } | |
38 | + | |
39 | + //access an element using an index | |
40 | + CUDA_CALLABLE T& operator[](size_t idx){ | |
41 | + return ptr[idx]; | |
42 | + } | |
43 | + | |
44 | + CUDA_CALLABLE T* data(){ | |
45 | + return ptr; | |
46 | + } | |
47 | + | |
48 | +/// Casting operator. Creates a new vector with a new type U. | |
49 | + template< typename U > | |
50 | + CUDA_CALLABLE operator vec3<U>(){ | |
51 | + vec3<U> result; | |
52 | + result.ptr[0] = (U)ptr[0]; | |
53 | + result.ptr[1] = (U)ptr[1]; | |
54 | + result.ptr[2] = (U)ptr[2]; | |
55 | + | |
56 | + return result; | |
57 | + } | |
58 | + | |
59 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | |
60 | + CUDA_CALLABLE T len_sq() const{ | |
61 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | |
62 | + } | |
63 | + | |
64 | + /// computes the Euclidean length of the vector | |
65 | + CUDA_CALLABLE T len() const{ | |
66 | + return sqrt(len_sq()); | |
67 | + } | |
68 | + | |
69 | + | |
70 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | |
71 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | |
72 | + vec3<T> sph; | |
73 | + sph.ptr[0] = len(); | |
74 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | |
75 | + if(sph.ptr[0] == 0) | |
76 | + sph.ptr[2] = 0; | |
77 | + else | |
78 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | |
79 | + return sph; | |
80 | + } | |
81 | + | |
82 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | |
83 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | |
84 | + vec3<T> cart; | |
85 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | |
86 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | |
87 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | |
88 | + | |
89 | + return cart; | |
90 | + } | |
91 | + | |
92 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | |
93 | + CUDA_CALLABLE vec3<T> norm() const{ | |
94 | + vec3<T> result; | |
95 | + T l = len(); //compute the vector length | |
96 | + return (*this) / l; | |
97 | + } | |
98 | + | |
99 | + /// Computes the cross product of a 3-dimensional vector | |
100 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | |
101 | + | |
102 | + vec3<T> result; | |
103 | + | |
104 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | |
105 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | |
106 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | |
107 | + | |
108 | + return result; | |
109 | + } | |
110 | + | |
111 | + /// Compute the Euclidean inner (dot) product | |
112 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | |
113 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | |
114 | + } | |
115 | + | |
116 | + /// Arithmetic addition operator | |
117 | + | |
118 | + /// @param rhs is the right-hand-side operator for the addition | |
119 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | |
120 | + vec3<T> result; | |
121 | + result.ptr[0] = ptr[0] + rhs[0]; | |
122 | + result.ptr[1] = ptr[1] + rhs[1]; | |
123 | + result.ptr[2] = ptr[2] + rhs[2]; | |
124 | + return result; | |
125 | + } | |
126 | + | |
127 | + /// Arithmetic addition to a scalar | |
128 | + | |
129 | + /// @param rhs is the right-hand-side operator for the addition | |
130 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | |
131 | + vec3<T> result; | |
132 | + result.ptr[0] = ptr[0] + rhs; | |
133 | + result.ptr[1] = ptr[1] + rhs; | |
134 | + result.ptr[2] = ptr[2] + rhs; | |
135 | + return result; | |
136 | + } | |
137 | + | |
138 | + /// Arithmetic subtraction operator | |
139 | + | |
140 | + /// @param rhs is the right-hand-side operator for the subtraction | |
141 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | |
142 | + vec3<T> result; | |
143 | + result.ptr[0] = ptr[0] - rhs[0]; | |
144 | + result.ptr[1] = ptr[1] - rhs[1]; | |
145 | + result.ptr[2] = ptr[2] - rhs[2]; | |
146 | + return result; | |
147 | + } | |
148 | + /// Arithmetic subtraction to a scalar | |
149 | + | |
150 | + /// @param rhs is the right-hand-side operator for the addition | |
151 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | |
152 | + vec3<T> result; | |
153 | + result.ptr[0] = ptr[0] - rhs; | |
154 | + result.ptr[1] = ptr[1] - rhs; | |
155 | + result.ptr[2] = ptr[2] - rhs; | |
156 | + return result; | |
157 | + } | |
158 | + | |
159 | + /// Arithmetic scalar multiplication operator | |
160 | + | |
161 | + /// @param rhs is the right-hand-side operator for the subtraction | |
162 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | |
163 | + vec3<T> result; | |
164 | + result.ptr[0] = ptr[0] * rhs; | |
165 | + result.ptr[1] = ptr[1] * rhs; | |
166 | + result.ptr[2] = ptr[2] * rhs; | |
167 | + return result; | |
168 | + } | |
169 | + | |
170 | + /// Arithmetic scalar division operator | |
171 | + | |
172 | + /// @param rhs is the right-hand-side operator for the subtraction | |
173 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | |
174 | + return (*this) * ((T)1.0/rhs); | |
175 | + } | |
176 | + | |
177 | + /// Multiplication by a scalar, followed by assignment | |
178 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | |
179 | + ptr[0] = ptr[0] * rhs; | |
180 | + ptr[1] = ptr[1] * rhs; | |
181 | + ptr[2] = ptr[2] * rhs; | |
182 | + return *this; | |
183 | + } | |
184 | + | |
185 | + /// Addition and assignment | |
186 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | |
187 | + ptr[0] = ptr[0] + rhs; | |
188 | + ptr[1] = ptr[1] + rhs; | |
189 | + ptr[2] = ptr[2] + rhs; | |
190 | + return *this; | |
191 | + } | |
192 | + | |
193 | + /// Assign a scalar to all values | |
194 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | |
195 | + ptr[0] = ptr[0] = rhs; | |
196 | + ptr[1] = ptr[1] = rhs; | |
197 | + ptr[2] = ptr[2] = rhs; | |
198 | + return *this; | |
199 | + } | |
200 | + | |
201 | + /// Casting and assignment | |
202 | + template<typename Y> | |
203 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | |
204 | + ptr[0] = (T)rhs.ptr[0]; | |
205 | + ptr[1] = (T)rhs.ptr[1]; | |
206 | + ptr[2] = (T)rhs.ptr[2]; | |
207 | + return *this; | |
208 | + } | |
209 | + | |
210 | + /// Unary minus (returns the negative of the vector) | |
211 | + CUDA_CALLABLE vec3<T> operator-() const{ | |
212 | + vec3<T> result; | |
213 | + result.ptr[0] = -ptr[0]; | |
214 | + result.ptr[1] = -ptr[1]; | |
215 | + result.ptr[2] = -ptr[2]; | |
216 | + return result; | |
217 | + } | |
218 | + | |
219 | +//#ifndef __NVCC__ | |
220 | + /// Outputs the vector as a string | |
221 | + std::string str() const{ | |
222 | + std::stringstream ss; | |
223 | + | |
224 | + const size_t N = 3; | |
225 | + | |
226 | + ss<<"["; | |
227 | + for(size_t i=0; i<N; i++) | |
228 | + { | |
229 | + ss<<ptr[i]; | |
230 | + if(i != N-1) | |
231 | + ss<<", "; | |
232 | + } | |
233 | + ss<<"]"; | |
234 | + | |
235 | + return ss.str(); | |
236 | + } | |
237 | +//#endif | |
238 | + | |
239 | + size_t size(){ return 3; } | |
240 | + | |
241 | + }; //end class vec3 | |
242 | +} //end namespace stim | |
243 | + | |
244 | +/// Multiply a vector by a constant when the vector is on the right hand side | |
245 | +template <typename T> | |
246 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | |
247 | + return rhs * lhs; | |
248 | +} | |
249 | + | |
250 | +//stream operator | |
251 | +template<typename T> | |
252 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | |
253 | + os<<rhs.str(); | |
254 | + return os; | |
255 | +} | |
256 | + | |
257 | +#endif | ... | ... |
1 | +#ifndef STIM_VEC3_H | |
2 | +#define STIM_VEC3_H | |
3 | + | |
4 | + | |
5 | +#include <stim/cuda/cudatools/callable.h> | |
6 | +#include <cmath> | |
7 | + | |
8 | + | |
9 | +namespace stim{ | |
10 | + | |
11 | + | |
12 | +/// A class designed to act as a 3D vector with CUDA compatibility | |
13 | +template<typename T> | |
14 | +class vec3{ | |
15 | + | |
16 | +protected: | |
17 | + T ptr[3]; | |
18 | + | |
19 | +public: | |
20 | + | |
21 | + CUDA_CALLABLE vec3(){} | |
22 | + | |
23 | + CUDA_CALLABLE vec3(T v){ | |
24 | + ptr[0] = ptr[1] = ptr[2] = v; | |
25 | + } | |
26 | + | |
27 | + CUDA_CALLABLE vec3(T x, T y, T z){ | |
28 | + ptr[0] = x; | |
29 | + ptr[1] = y; | |
30 | + ptr[2] = z; | |
31 | + } | |
32 | + | |
33 | + //copy constructor | |
34 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | |
35 | + ptr[0] = other.ptr[0]; | |
36 | + ptr[1] = other.ptr[1]; | |
37 | + ptr[2] = other.ptr[2]; | |
38 | + } | |
39 | + | |
40 | + //access an element using an index | |
41 | + CUDA_CALLABLE T& operator[](size_t idx){ | |
42 | + return ptr[idx]; | |
43 | + } | |
44 | + | |
45 | + CUDA_CALLABLE T* data(){ | |
46 | + return ptr; | |
47 | + } | |
48 | + | |
49 | +/// Casting operator. Creates a new vector with a new type U. | |
50 | + template< typename U > | |
51 | + CUDA_CALLABLE operator vec3<U>(){ | |
52 | + vec3<U> result; | |
53 | + result.ptr[0] = (U)ptr[0]; | |
54 | + result.ptr[1] = (U)ptr[1]; | |
55 | + result.ptr[2] = (U)ptr[2]; | |
56 | + | |
57 | + return result; | |
58 | + } | |
59 | + | |
60 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | |
61 | + CUDA_CALLABLE T len_sq() const{ | |
62 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | |
63 | + } | |
64 | + | |
65 | + /// computes the Euclidean length of the vector | |
66 | + CUDA_CALLABLE T len() const{ | |
67 | + return sqrt(len_sq()); | |
68 | + } | |
69 | + | |
70 | + | |
71 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | |
72 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | |
73 | + vec3<T> sph; | |
74 | + sph.ptr[0] = len(); | |
75 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | |
76 | + if(sph.ptr[0] == 0) | |
77 | + sph.ptr[2] = 0; | |
78 | + else | |
79 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | |
80 | + return sph; | |
81 | + } | |
82 | + | |
83 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | |
84 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | |
85 | + vec3<T> cart; | |
86 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | |
87 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | |
88 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | |
89 | + | |
90 | + return cart; | |
91 | + } | |
92 | + | |
93 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | |
94 | + CUDA_CALLABLE vec3<T> norm() const{ | |
95 | + vec3<T> result; | |
96 | + T l = len(); //compute the vector length | |
97 | + return (*this) / l; | |
98 | + } | |
99 | + | |
100 | + /// Computes the cross product of a 3-dimensional vector | |
101 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | |
102 | + | |
103 | + vec3<T> result; | |
104 | + | |
105 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | |
106 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | |
107 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | |
108 | + | |
109 | + return result; | |
110 | + } | |
111 | + | |
112 | + /// Compute the Euclidean inner (dot) product | |
113 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | |
114 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | |
115 | + } | |
116 | + | |
117 | + /// Arithmetic addition operator | |
118 | + | |
119 | + /// @param rhs is the right-hand-side operator for the addition | |
120 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | |
121 | + vec3<T> result; | |
122 | + result.ptr[0] = ptr[0] + rhs[0]; | |
123 | + result.ptr[1] = ptr[1] + rhs[1]; | |
124 | + result.ptr[2] = ptr[2] + rhs[2]; | |
125 | + return result; | |
126 | + } | |
127 | + | |
128 | + /// Arithmetic addition to a scalar | |
129 | + | |
130 | + /// @param rhs is the right-hand-side operator for the addition | |
131 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | |
132 | + vec3<T> result; | |
133 | + result.ptr[0] = ptr[0] + rhs; | |
134 | + result.ptr[1] = ptr[1] + rhs; | |
135 | + result.ptr[2] = ptr[2] + rhs; | |
136 | + return result; | |
137 | + } | |
138 | + | |
139 | + /// Arithmetic subtraction operator | |
140 | + | |
141 | + /// @param rhs is the right-hand-side operator for the subtraction | |
142 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | |
143 | + vec3<T> result; | |
144 | + result.ptr[0] = ptr[0] - rhs[0]; | |
145 | + result.ptr[1] = ptr[1] - rhs[1]; | |
146 | + result.ptr[2] = ptr[2] - rhs[2]; | |
147 | + return result; | |
148 | + } | |
149 | + /// Arithmetic subtraction to a scalar | |
150 | + | |
151 | + /// @param rhs is the right-hand-side operator for the addition | |
152 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | |
153 | + vec3<T> result; | |
154 | + result.ptr[0] = ptr[0] - rhs; | |
155 | + result.ptr[1] = ptr[1] - rhs; | |
156 | + result.ptr[2] = ptr[2] - rhs; | |
157 | + return result; | |
158 | + } | |
159 | + | |
160 | + /// Arithmetic scalar multiplication operator | |
161 | + | |
162 | + /// @param rhs is the right-hand-side operator for the subtraction | |
163 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | |
164 | + vec3<T> result; | |
165 | + result.ptr[0] = ptr[0] * rhs; | |
166 | + result.ptr[1] = ptr[1] * rhs; | |
167 | + result.ptr[2] = ptr[2] * rhs; | |
168 | + return result; | |
169 | + } | |
170 | + | |
171 | + /// Arithmetic scalar division operator | |
172 | + | |
173 | + /// @param rhs is the right-hand-side operator for the subtraction | |
174 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | |
175 | + return (*this) * ((T)1.0/rhs); | |
176 | + } | |
177 | + | |
178 | + /// Multiplication by a scalar, followed by assignment | |
179 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | |
180 | + ptr[0] = ptr[0] * rhs; | |
181 | + ptr[1] = ptr[1] * rhs; | |
182 | + ptr[2] = ptr[2] * rhs; | |
183 | + return *this; | |
184 | + } | |
185 | + | |
186 | + /// Addition and assignment | |
187 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | |
188 | + ptr[0] = ptr[0] + rhs; | |
189 | + ptr[1] = ptr[1] + rhs; | |
190 | + ptr[2] = ptr[2] + rhs; | |
191 | + return *this; | |
192 | + } | |
193 | + | |
194 | + /// Assign a scalar to all values | |
195 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | |
196 | + ptr[0] = ptr[0] = rhs; | |
197 | + ptr[1] = ptr[1] = rhs; | |
198 | + ptr[2] = ptr[2] = rhs; | |
199 | + return *this; | |
200 | + } | |
201 | + | |
202 | + /// Casting and assignment | |
203 | + template<typename Y> | |
204 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | |
205 | + ptr[0] = (T)rhs.ptr[0]; | |
206 | + ptr[1] = (T)rhs.ptr[1]; | |
207 | + ptr[2] = (T)rhs.ptr[2]; | |
208 | + return *this; | |
209 | + } | |
210 | + | |
211 | + /// Unary minus (returns the negative of the vector) | |
212 | + CUDA_CALLABLE vec3<T> operator-() const{ | |
213 | + vec3<T> result; | |
214 | + result.ptr[0] = -ptr[0]; | |
215 | + result.ptr[1] = -ptr[1]; | |
216 | + result.ptr[2] = -ptr[2]; | |
217 | + return result; | |
218 | + } | |
219 | + | |
220 | + /// Outputs the vector as a string | |
221 | + std::string str() const{ | |
222 | + std::stringstream ss; | |
223 | + | |
224 | + const size_t N = 3; | |
225 | + | |
226 | + ss<<"["; | |
227 | + for(size_t i=0; i<N; i++) | |
228 | + { | |
229 | + ss<<ptr[i]; | |
230 | + if(i != N-1) | |
231 | + ss<<", "; | |
232 | + } | |
233 | + ss<<"]"; | |
234 | + | |
235 | + return ss.str(); | |
236 | + } | |
237 | + | |
238 | + size_t size(){ return 3; } | |
239 | + | |
240 | + }; //end class vec3 | |
241 | +} //end namespace stim | |
242 | + | |
243 | +/// Multiply a vector by a constant when the vector is on the right hand side | |
244 | +template <typename T> | |
245 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | |
246 | + return rhs * lhs; | |
247 | +} | |
248 | + | |
249 | +//stream operator | |
250 | +template<typename T> | |
251 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | |
252 | + os<<rhs.str(); | |
253 | + return os; | |
254 | +} | |
255 | + | |
256 | +#endif | ... | ... |
1 | +/// Reconstruct a 1D function from a 2D symmetric function. This function takes a 2D image f(x,y) as input and | |
2 | +/// builds a 1D function f(r) where r = sqrt(x^2 + y^2) to approximate this 2D function. | |
3 | +/// This is useful for several applications, such as: | |
4 | +/// 1) Calculating a 1D function from a noisy 2D image, when you know the 2D image is supposed to be symmetric | |
5 | +/// 2) Calculating the average value for every r = sqrt(x^2 + y^2) | |
6 | + | |
7 | +/// Given a set of function samples equally spaced by dx, calculate the two samples closest to x and the proximity ratio alpha. | |
8 | +/// This can be used to linearly interpolate between an array of equally spaced values. Given the query value x, the | |
9 | +/// interpolated value can be calculated as r = values[sample] * alpha + values[sample + 1] * (1 - alpha) | |
10 | +/// @param sample is the lowest bin closest to the query point x | |
11 | +/// @param alpha is the ratio of x between [sample, sample + 1] | |
12 | +/// @param dx is the spacing between values | |
13 | +/// @param x is the query point | |
14 | +template<typename T> | |
15 | +void lerp_alpha(T& sample, T& alpha, T dx, T x){ | |
16 | + sample = std::floor(x/dx); | |
17 | + alpha = 1 - (x - (b * dx)) / dx; | |
18 | +} | |
19 | + | |
20 | +/// This function assumes that the input image is square, that the # of samples are odd, and that r=0 is at the center | |
21 | +/// @param fr is an array of X elements that will store the reconstructed function | |
22 | +/// @param dr is the spacing (in pixels) between samples in fr | |
23 | +template<typename T> | |
24 | +void cpu_func1_from_symmetric2(T* fr, T& dr, T* fxy, size_t X){ | |
25 | + | |
26 | + if(X%2 == 0){ //the 2D function must be odd (a sample must be available for r=0) | |
27 | + std::err<<"Error, X = "<<X<<" must be odd."<<std::endl; | |
28 | + exit(1); | |
29 | + } | |
30 | + size_t C = X/2+1; //calculate the center pixel coordinate | |
31 | + size_t N = C * C; //number of values in the folded function | |
32 | + | |
33 | + // The first step is to fold the function 8 times to take advantage of symmetry in the grid | |
34 | + T* folded = (T*) malloc(sizeof(T) * N ); //allocate space for the folded function | |
35 | + memset(folded, 0, sizeof(T) * N); | |
36 | + char* count = (char*) malloc( N ); //allocate space for a counter for the folded function | |
37 | + memset(count, 0, sizeof(T) * N); | |
38 | + size_t xi, yi; //indices into the image f(xi, yi) | |
39 | + size_t xii, yii; //indices into the folded image | |
40 | + T v; //register to store the value at point (xi, yi) | |
41 | + for(xi = 0; xi < X; xi++){ | |
42 | + for(yi = 0; yi < X; yi++){ | |
43 | + v = fxy[yi * X + xi]; //retrieve f(x, y) | |
44 | + | |
45 | + xii = xi; | |
46 | + yii = yi; //initialize the indices into the folded image | |
47 | + | |
48 | + //fold the function along the x and y axes | |
49 | + if(xi > C) xii = 2 * C - xi - 1; //calculate the folded index of x | |
50 | + if(yi > C) yii = 2 * C - yi - 1; //calculate the folded index of y | |
51 | + | |
52 | + if(xii < yii) std::swap<T>(xii, yii); //fold the function again along the 45-degree line | |
53 | + | |
54 | + folded[yii * C + xii] += v; //add the value to the folded function | |
55 | + count[yii * C + xii] += 1; //add a counter to the counter table | |
56 | + } | |
57 | + } | |
58 | + | |
59 | + //divide out the counter to correct the folded function | |
60 | + for(size_t i = 0; i < N){ | |
61 | + folded[i] /= (T)count[i]; //divide out the counter | |
62 | + } | |
63 | + | |
64 | + T max_r = sqrt(X * X + Y * Y); //calculate the maximum r value, which will be along the image diagonal | |
65 | + T dr = max_r / (X - 1); //spacing between samples in the output function f(r) | |
66 | + | |
67 | + T* fA = (T*) malloc( sizeof(T) * X); //allocate space for a counter function storing alpha weights | |
68 | + memset(fA, 0, sizeof(T) * X); //zero out the alpha array | |
69 | + memset(fr, 0, sizeof(T) * X); //zero out the output function | |
70 | + | |
71 | + T r; //register to store the value of r at each point | |
72 | + size_t sample; | |
73 | + T alpha; | |
74 | + for(xi = 0; xi < C; xi++){ | |
75 | + for(yi = 0; yi < xi; yi++){ | |
76 | + r = sqrt(xi*xi + yi*yi); //calculate the value of r for the current (x, y) | |
77 | + lerp_alpha(sample, alpha, dr, r); //calculate the lowest nearby sample index and the associated alpha weight | |
78 | + fr[sample] += folded[yi * C + xi] * alpha; //sum the weighted value from the folded function | |
79 | + fA[sample] += alpha; //sum the weight | |
80 | + | |
81 | + if(sample < X - 1){ //if we aren't dealing with the last bin | |
82 | + fr[sample + 1] += folded[yi * C + xi] * (1.0 - alpha); //calculate the weighted value for the second point | |
83 | + fA[sample + 1] += 1 - alpha; //add the second alpha value | |
84 | + } | |
85 | + } | |
86 | + } | |
87 | + | |
88 | + //divide out the alpha values | |
89 | + for(size_t i = 0; i < X; i++) | |
90 | + fr[i] /= fA[i]; | |
91 | + | |
92 | + //free allocated memory | |
93 | + free(folded); | |
94 | + free(count); | |
95 | + free(fA); | |
96 | +} | |
0 | 97 | \ No newline at end of file | ... | ... |
stim/visualization/aabb3.h
... | ... | @@ -2,51 +2,31 @@ |
2 | 2 | #define STIM_AABB3_H |
3 | 3 | |
4 | 4 | #include <stim/cuda/cudatools/callable.h> |
5 | +#include <stim/visualization/aabbn.h> | |
5 | 6 | |
6 | 7 | namespace stim{ |
7 | 8 | |
8 | -/// Structure for a 3D axis aligned bounding box | |
9 | + template<typename T> | |
10 | + using aabb3 = aabbn<T, 3>; | |
11 | +/*/// Structure for a 3D axis aligned bounding box | |
9 | 12 | template<typename T> |
10 | -struct aabb3{ | |
11 | - | |
12 | -//protected: | |
13 | - | |
14 | - T low[3]; //top left corner position | |
15 | - T high[3]; //dimensions along x and y and z | |
16 | - | |
17 | -//public: | |
18 | - | |
19 | - CUDA_CALLABLE aabb3(T x, T y, T z){ //initialize an axis aligned bounding box of size 0 at the given position | |
20 | - low[0] = high[0] = x; //set the position to the user specified coordinates | |
21 | - low[1] = high[1] = y; | |
22 | - low[2] = high[2] = z; | |
13 | +struct aabb3 : public aabbn<T, 3>{ | |
14 | + | |
15 | + aabb3() : aabbn() {} | |
16 | + aabb3(T x0, T y0, T z0, T x1, T y1, T z1){ | |
17 | + low[0] = x0; | |
18 | + low[1] = y0; | |
19 | + low[2] = z0; | |
20 | + high[0] = x0; | |
21 | + high[1] = x1; | |
22 | + high[2] = x2; | |
23 | 23 | } |
24 | 24 | |
25 | - //insert a point into the bounding box, growing the box appropriately | |
26 | - CUDA_CALLABLE void insert(T x, T y, T z){ | |
27 | - if(x < low[0]) low[0] = x; | |
28 | - if(y < low[1]) low[1] = y; | |
29 | - if(z < low[2]) low[2] = z; | |
30 | - | |
31 | - if(x > high[0]) high[0] = x; | |
32 | - if(y > high[1]) high[1] = y; | |
33 | - if(z > high[2]) high[2] = z; | |
34 | - } | |
35 | - | |
36 | - //trim the bounding box so that the lower bounds are (x, y, z) | |
37 | - CUDA_CALLABLE void trim_low(T x, T y, T z){ | |
38 | - if(low[0] < x) low[0] = x; | |
39 | - if(low[1] < y) low[1] = y; | |
40 | - if(low[2] < z) low[2] = z; | |
41 | - } | |
25 | + aabb3 aabbn<T, 3>() { | |
42 | 26 | |
43 | - CUDA_CALLABLE void trim_high(T x, T y, T z){ | |
44 | - if(high[0] > x) high[0] = x; | |
45 | - if(high[1] > y) high[1] = y; | |
46 | - if(high[2] > z) high[2] = z; | |
47 | 27 | } |
48 | 28 | |
49 | -}; | |
29 | +};*/ | |
50 | 30 | |
51 | 31 | } |
52 | 32 | ... | ... |
stim/visualization/aabbn.h
1 | -#ifndef STIM_AABB3_H | |
2 | -#define STIM_AABB3_H | |
1 | +#ifndef STIM_AABBN_H | |
2 | +#define STIM_AABBN_H | |
3 | 3 | |
4 | 4 | #include <vector> |
5 | 5 | #include <stim/cuda/cudatools/callable.h> |
... | ... | @@ -20,16 +20,25 @@ struct aabbn{ |
20 | 20 | low[d] = high[d] = i[d]; |
21 | 21 | } |
22 | 22 | |
23 | -//public: | |
24 | - | |
25 | - //CUDA_CALLABLE aabbn(){ //initialize an axis aligned bounding box of size 0 at the given position | |
26 | - // for(size_t d = 0; d < D; d++) | |
27 | - // low[d] = high[d] = 0; | |
28 | - //} | |
29 | 23 | CUDA_CALLABLE aabbn() {} |
30 | 24 | CUDA_CALLABLE aabbn(T* i) { |
31 | 25 | init(i); |
32 | 26 | } |
27 | + | |
28 | + CUDA_CALLABLE aabbn(T x0, T x1) { | |
29 | + low[0] = x0; | |
30 | + high[0] = x1; | |
31 | + } | |
32 | + | |
33 | + CUDA_CALLABLE aabbn(T x0, T y0, T x1, T y1) : aabbn(x0, x1) { | |
34 | + low[1] = y0; | |
35 | + high[1] = y1; | |
36 | + } | |
37 | + | |
38 | + CUDA_CALLABLE aabbn(T x0, T y0, T z0, T x1, T y1, T z1) : aabbn(x0, y0, x1, y1) { | |
39 | + low[2] = z0; | |
40 | + high[2] = z1; | |
41 | + } | |
33 | 42 | |
34 | 43 | |
35 | 44 | //insert a point into the bounding box, growing the box appropriately |
... | ... | @@ -66,6 +75,14 @@ struct aabbn{ |
66 | 75 | return newbox; |
67 | 76 | } |
68 | 77 | |
78 | + //translate the box along dimension d a distance of v | |
79 | + CUDA_CALLABLE void translate(size_t d, T v) { | |
80 | + for (size_t d = 0; d < D; d++) { | |
81 | + low[d] += v; | |
82 | + high[d] += v; | |
83 | + } | |
84 | + } | |
85 | + | |
69 | 86 | }; |
70 | 87 | |
71 | 88 | } | ... | ... |