Commit 821513c85d6105d8e678cf4867a43d3530c651db

Authored by David Mayerich
1 parent 8d4f0940

ERROR plane wave refraction still doesn't work.

math/bessel.h 0 → 100644
  1 +#ifndef RTS_BESSEL_H
  2 +#define RTS_BESSEL_H
  3 +
  4 +#define _USE_MATH_DEFINES
  5 +#include <math.h>
  6 +#include "../math/complex.h"
  7 +#define eps 1e-15
  8 +#define el 0.5772156649015329
  9 +
  10 +
  11 +namespace rts{
  12 +
  13 +static complex<double> cii(0.0,1.0);
  14 +static complex<double> cone(1.0,0.0);
  15 +static complex<double> czero(0.0,0.0);
  16 +
  17 +template< typename P >
  18 +P gamma(P x)
  19 +{
  20 + int i,k,m;
  21 + P ga,gr,r,z;
  22 +
  23 + static P g[] = {
  24 + 1.0,
  25 + 0.5772156649015329,
  26 + -0.6558780715202538,
  27 + -0.420026350340952e-1,
  28 + 0.1665386113822915,
  29 + -0.421977345555443e-1,
  30 + -0.9621971527877e-2,
  31 + 0.7218943246663e-2,
  32 + -0.11651675918591e-2,
  33 + -0.2152416741149e-3,
  34 + 0.1280502823882e-3,
  35 + -0.201348547807e-4,
  36 + -0.12504934821e-5,
  37 + 0.1133027232e-5,
  38 + -0.2056338417e-6,
  39 + 0.6116095e-8,
  40 + 0.50020075e-8,
  41 + -0.11812746e-8,
  42 + 0.1043427e-9,
  43 + 0.77823e-11,
  44 + -0.36968e-11,
  45 + 0.51e-12,
  46 + -0.206e-13,
  47 + -0.54e-14,
  48 + 0.14e-14};
  49 +
  50 + if (x > 171.0) return 1e308; // This value is an overflow flag.
  51 + if (x == (int)x) {
  52 + if (x > 0.0) {
  53 + ga = 1.0; // use factorial
  54 + for (i=2;i<x;i++) {
  55 + ga *= i;
  56 + }
  57 + }
  58 + else
  59 + ga = 1e308;
  60 + }
  61 + else {
  62 + if (fabs(x) > 1.0) {
  63 + z = fabs(x);
  64 + m = (int)z;
  65 + r = 1.0;
  66 + for (k=1;k<=m;k++) {
  67 + r *= (z-k);
  68 + }
  69 + z -= m;
  70 + }
  71 + else
  72 + z = x;
  73 + gr = g[24];
  74 + for (k=23;k>=0;k--) {
  75 + gr = gr*z+g[k];
  76 + }
  77 + ga = 1.0/(gr*z);
  78 + if (fabs(x) > 1.0) {
  79 + ga *= r;
  80 + if (x < 0.0) {
  81 + ga = -M_PI/(x*ga*sin(M_PI*x));
  82 + }
  83 + }
  84 + }
  85 + return ga;
  86 +}
  87 +
  88 +template<typename P>
  89 +int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1,
  90 + P &j0p,P &j1p,P &y0p,P &y1p)
  91 +{
  92 + P x2,r,ec,w0,w1,r0,r1,cs0,cs1;
  93 + P cu,p0,q0,p1,q1,t1,t2;
  94 + int k,kz;
  95 + static P a[] = {
  96 + -7.03125e-2,
  97 + 0.112152099609375,
  98 + -0.5725014209747314,
  99 + 6.074042001273483,
  100 + -1.100171402692467e2,
  101 + 3.038090510922384e3,
  102 + -1.188384262567832e5,
  103 + 6.252951493434797e6,
  104 + -4.259392165047669e8,
  105 + 3.646840080706556e10,
  106 + -3.833534661393944e12,
  107 + 4.854014686852901e14,
  108 + -7.286857349377656e16,
  109 + 1.279721941975975e19};
  110 + static P b[] = {
  111 + 7.32421875e-2,
  112 + -0.2271080017089844,
  113 + 1.727727502584457,
  114 + -2.438052969955606e1,
  115 + 5.513358961220206e2,
  116 + -1.825775547429318e4,
  117 + 8.328593040162893e5,
  118 + -5.006958953198893e7,
  119 + 3.836255180230433e9,
  120 + -3.649010818849833e11,
  121 + 4.218971570284096e13,
  122 + -5.827244631566907e15,
  123 + 9.476288099260110e17,
  124 + -1.792162323051699e20};
  125 + static P a1[] = {
  126 + 0.1171875,
  127 + -0.1441955566406250,
  128 + 0.6765925884246826,
  129 + -6.883914268109947,
  130 + 1.215978918765359e2,
  131 + -3.302272294480852e3,
  132 + 1.276412726461746e5,
  133 + -6.656367718817688e6,
  134 + 4.502786003050393e8,
  135 + -3.833857520742790e10,
  136 + 4.011838599133198e12,
  137 + -5.060568503314727e14,
  138 + 7.572616461117958e16,
  139 + -1.326257285320556e19};
  140 + static P b1[] = {
  141 + -0.1025390625,
  142 + 0.2775764465332031,
  143 + -1.993531733751297,
  144 + 2.724882731126854e1,
  145 + -6.038440767050702e2,
  146 + 1.971837591223663e4,
  147 + -8.902978767070678e5,
  148 + 5.310411010968522e7,
  149 + -4.043620325107754e9,
  150 + 3.827011346598605e11,
  151 + -4.406481417852278e13,
  152 + 6.065091351222699e15,
  153 + -9.833883876590679e17,
  154 + 1.855045211579828e20};
  155 +
  156 + if (x < 0.0) return 1;
  157 + if (x == 0.0) {
  158 + j0 = 1.0;
  159 + j1 = 0.0;
  160 + y0 = -1e308;
  161 + y1 = -1e308;
  162 + j0p = 0.0;
  163 + j1p = 0.5;
  164 + y0p = 1e308;
  165 + y1p = 1e308;
  166 + return 0;
  167 + }
  168 + x2 = x*x;
  169 + if (x <= 12.0) {
  170 + j0 = 1.0;
  171 + r = 1.0;
  172 + for (k=1;k<=30;k++) {
  173 + r *= -0.25*x2/(k*k);
  174 + j0 += r;
  175 + if (fabs(r) < fabs(j0)*1e-15) break;
  176 + }
  177 + j1 = 1.0;
  178 + r = 1.0;
  179 + for (k=1;k<=30;k++) {
  180 + r *= -0.25*x2/(k*(k+1));
  181 + j1 += r;
  182 + if (fabs(r) < fabs(j1)*1e-15) break;
  183 + }
  184 + j1 *= 0.5*x;
  185 + ec = log(0.5*x)+el;
  186 + cs0 = 0.0;
  187 + w0 = 0.0;
  188 + r0 = 1.0;
  189 + for (k=1;k<=30;k++) {
  190 + w0 += 1.0/k;
  191 + r0 *= -0.25*x2/(k*k);
  192 + r = r0 * w0;
  193 + cs0 += r;
  194 + if (fabs(r) < fabs(cs0)*1e-15) break;
  195 + }
  196 + y0 = M_2_PI*(ec*j0-cs0);
  197 + cs1 = 1.0;
  198 + w1 = 0.0;
  199 + r1 = 1.0;
  200 + for (k=1;k<=30;k++) {
  201 + w1 += 1.0/k;
  202 + r1 *= -0.25*x2/(k*(k+1));
  203 + r = r1*(2.0*w1+1.0/(k+1));
  204 + cs1 += r;
  205 + if (fabs(r) < fabs(cs1)*1e-15) break;
  206 + }
  207 + y1 = M_2_PI * (ec*j1-1.0/x-0.25*x*cs1);
  208 + }
  209 + else {
  210 + if (x >= 50.0) kz = 8; // Can be changed to 10
  211 + else if (x >= 35.0) kz = 10; // " " 12
  212 + else kz = 12; // " " 14
  213 + t1 = x-M_PI_4;
  214 + p0 = 1.0;
  215 + q0 = -0.125/x;
  216 + for (k=0;k<kz;k++) {
  217 + p0 += a[k]*pow(x,-2*k-2);
  218 + q0 += b[k]*pow(x,-2*k-3);
  219 + }
  220 + cu = sqrt(M_2_PI/x);
  221 + j0 = cu*(p0*cos(t1)-q0*sin(t1));
  222 + y0 = cu*(p0*sin(t1)+q0*cos(t1));
  223 + t2 = x-0.75*M_PI;
  224 + p1 = 1.0;
  225 + q1 = 0.375/x;
  226 + for (k=0;k<kz;k++) {
  227 + p1 += a1[k]*pow(x,-2*k-2);
  228 + q1 += b1[k]*pow(x,-2*k-3);
  229 + }
  230 + j1 = cu*(p1*cos(t2)-q1*sin(t2));
  231 + y1 = cu*(p1*sin(t2)+q1*cos(t2));
  232 + }
  233 + j0p = -j1;
  234 + j1p = j0-j1/x;
  235 + y0p = -y1;
  236 + y1p = y0-y1/x;
  237 + return 0;
  238 +}
  239 +//
  240 +// INPUT:
  241 +// double x -- argument of Bessel function
  242 +//
  243 +// OUTPUT:
  244 +// double j0 -- Bessel function of 1st kind, 0th order
  245 +// double j1 -- Bessel function of 1st kind, 1st order
  246 +// double y0 -- Bessel function of 2nd kind, 0th order
  247 +// double y1 -- Bessel function of 2nd kind, 1st order
  248 +// double j0p -- derivative of Bessel function of 1st kind, 0th order
  249 +// double j1p -- derivative of Bessel function of 1st kind, 1st order
  250 +// double y0p -- derivative of Bessel function of 2nd kind, 0th order
  251 +// double y1p -- derivative of Bessel function of 2nd kind, 1st order
  252 +//
  253 +// RETURN:
  254 +// int error code: 0 = OK, 1 = error
  255 +//
  256 +// This algorithm computes the functions using polynomial approximations.
  257 +//
  258 +template<typename P>
  259 +int bessjy01b(P x,P &j0,P &j1,P &y0,P &y1,
  260 + P &j0p,P &j1p,P &y0p,P &y1p)
  261 +{
  262 + P t,t2,dtmp,a0,p0,q0,p1,q1,ta0,ta1;
  263 + if (x < 0.0) return 1;
  264 + if (x == 0.0) {
  265 + j0 = 1.0;
  266 + j1 = 0.0;
  267 + y0 = -1e308;
  268 + y1 = -1e308;
  269 + j0p = 0.0;
  270 + j1p = 0.5;
  271 + y0p = 1e308;
  272 + y1p = 1e308;
  273 + return 0;
  274 + }
  275 + if(x <= 4.0) {
  276 + t = x/4.0;
  277 + t2 = t*t;
  278 + j0 = ((((((-0.5014415e-3*t2+0.76771853e-2)*t2-0.0709253492)*t2+
  279 + 0.4443584263)*t2-1.7777560599)*t2+3.9999973021)*t2
  280 + -3.9999998721)*t2+1.0;
  281 + j1 = t*(((((((-0.1289769e-3*t2+0.22069155e-2)*t2-0.0236616773)*t2+
  282 + 0.1777582922)*t2-0.8888839649)*t2+2.6666660544)*t2-
  283 + 3.999999971)*t2+1.9999999998);
  284 + dtmp = (((((((-0.567433e-4*t2+0.859977e-3)*t2-0.94855882e-2)*t2+
  285 + 0.0772975809)*t2-0.4261737419)*t2+1.4216421221)*t2-
  286 + 2.3498519931)*t2+1.0766115157)*t2+0.3674669052;
  287 + y0 = M_2_PI*log(0.5*x)*j0+dtmp;
  288 + dtmp = (((((((0.6535773e-3*t2-0.0108175626)*t2+0.107657607)*t2-
  289 + 0.7268945577)*t2+3.1261399273)*t2-7.3980241381)*t2+
  290 + 6.8529236342)*t2+0.3932562018)*t2-0.6366197726;
  291 + y1 = M_2_PI*log(0.5*x)*j1+dtmp/x;
  292 + }
  293 + else {
  294 + t = 4.0/x;
  295 + t2 = t*t;
  296 + a0 = sqrt(M_2_PI/x);
  297 + p0 = ((((-0.9285e-5*t2+0.43506e-4)*t2-0.122226e-3)*t2+
  298 + 0.434725e-3)*t2-0.4394275e-2)*t2+0.999999997;
  299 + q0 = t*(((((0.8099e-5*t2-0.35614e-4)*t2+0.85844e-4)*t2-
  300 + 0.218024e-3)*t2+0.1144106e-2)*t2-0.031249995);
  301 + ta0 = x-M_PI_4;
  302 + j0 = a0*(p0*cos(ta0)-q0*sin(ta0));
  303 + y0 = a0*(p0*sin(ta0)+q0*cos(ta0));
  304 + p1 = ((((0.10632e-4*t2-0.50363e-4)*t2+0.145575e-3)*t2
  305 + -0.559487e-3)*t2+0.7323931e-2)*t2+1.000000004;
  306 + q1 = t*(((((-0.9173e-5*t2+0.40658e-4)*t2-0.99941e-4)*t2
  307 + +0.266891e-3)*t2-0.1601836e-2)*t2+0.093749994);
  308 + ta1 = x-0.75*M_PI;
  309 + j1 = a0*(p1*cos(ta1)-q1*sin(ta1));
  310 + y1 = a0*(p1*sin(ta1)+q1*cos(ta1));
  311 + }
  312 + j0p = -j1;
  313 + j1p = j0-j1/x;
  314 + y0p = -y1;
  315 + y1p = y0-y1/x;
  316 + return 0;
  317 +}
  318 +template<typename P>
  319 +int msta1(P x,int mp)
  320 +{
  321 + P a0,f0,f1,f;
  322 + int i,n0,n1,nn;
  323 +
  324 + a0 = fabs(x);
  325 + n0 = (int)(1.1*a0)+1;
  326 + f0 = 0.5*log10(6.28*n0)-n0*log10(1.36*a0/n0)-mp;
  327 + n1 = n0+5;
  328 + f1 = 0.5*log10(6.28*n1)-n1*log10(1.36*a0/n1)-mp;
  329 + for (i=0;i<20;i++) {
  330 + nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
  331 + f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp;
  332 + if (abs(nn-n1) < 1) break;
  333 + n0 = n1;
  334 + f0 = f1;
  335 + n1 = nn;
  336 + f1 = f;
  337 + }
  338 + return nn;
  339 +}
  340 +template<typename P>
  341 +int msta2(P x,int n,int mp)
  342 +{
  343 + P a0,ejn,hmp,f0,f1,f,obj;
  344 + int i,n0,n1,nn;
  345 +
  346 + a0 = fabs(x);
  347 + hmp = 0.5*mp;
  348 + ejn = 0.5*log10(6.28*n)-n*log10(1.36*a0/n);
  349 + if (ejn <= hmp) {
  350 + obj = mp;
  351 + n0 = (int)(1.1*a0);
  352 + if (n0 < 1) n0 = 1;
  353 + }
  354 + else {
  355 + obj = hmp+ejn;
  356 + n0 = n;
  357 + }
  358 + f0 = 0.5*log10(6.28*n0)-n0*log10(1.36*a0/n0)-obj;
  359 + n1 = n0+5;
  360 + f1 = 0.5*log10(6.28*n1)-n1*log10(1.36*a0/n1)-obj;
  361 + for (i=0;i<20;i++) {
  362 + nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
  363 + f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj;
  364 + if (abs(nn-n1) < 1) break;
  365 + n0 = n1;
  366 + f0 = f1;
  367 + n1 = nn;
  368 + f1 = f;
  369 + }
  370 + return nn+10;
  371 +}
  372 +//
  373 +// INPUT:
  374 +// double x -- argument of Bessel function of 1st and 2nd kind.
  375 +// int n -- order
  376 +//
  377 +// OUPUT:
  378 +//
  379 +// int nm -- highest order actually computed (nm <= n)
  380 +// double jn[] -- Bessel function of 1st kind, orders from 0 to nm
  381 +// double yn[] -- Bessel function of 2nd kind, orders from 0 to nm
  382 +// double j'n[]-- derivative of Bessel function of 1st kind,
  383 +// orders from 0 to nm
  384 +// double y'n[]-- derivative of Bessel function of 2nd kind,
  385 +// orders from 0 to nm
  386 +//
  387 +// Computes Bessel functions of all order up to 'n' using recurrence
  388 +// relations. If 'nm' < 'n' only 'nm' orders are returned.
  389 +//
  390 +template<typename P>
  391 +int bessjyna(int n,P x,int &nm,P *jn,P *yn,
  392 + P *jnp,P *ynp)
  393 +{
  394 + P bj0,bj1,f,f0,f1,f2,cs;
  395 + int i,k,m,ecode;
  396 +
  397 + nm = n;
  398 + if ((x < 0.0) || (n < 0)) return 1;
  399 + if (x < 1e-15) {
  400 + for (i=0;i<=n;i++) {
  401 + jn[i] = 0.0;
  402 + yn[i] = -1e308;
  403 + jnp[i] = 0.0;
  404 + ynp[i] = 1e308;
  405 + }
  406 + jn[0] = 1.0;
  407 + jnp[1] = 0.5;
  408 + return 0;
  409 + }
  410 + ecode = bessjy01a(x,jn[0],jn[1],yn[0],yn[1],jnp[0],jnp[1],ynp[0],ynp[1]);
  411 + if (n < 2) return 0;
  412 + bj0 = jn[0];
  413 + bj1 = jn[1];
  414 + if (n < (int)0.9*x) {
  415 + for (k=2;k<=n;k++) {
  416 + jn[k] = 2.0*(k-1.0)*bj1/x-bj0;
  417 + bj0 = bj1;
  418 + bj1 = jn[k];
  419 + }
  420 + }
  421 + else {
  422 + m = msta1(x,200);
  423 + if (m < n) nm = m;
  424 + else m = msta2(x,n,15);
  425 + f2 = 0.0;
  426 + f1 = 1.0e-100;
  427 + for (k=m;k>=0;k--) {
  428 + f = 2.0*(k+1.0)/x*f1-f2;
  429 + if (k <= nm) jn[k] = f;
  430 + f2 = f1;
  431 + f1 = f;
  432 + }
  433 + if (fabs(bj0) > fabs(bj1)) cs = bj0/f;
  434 + else cs = bj1/f2;
  435 + for (k=0;k<=nm;k++) {
  436 + jn[k] *= cs;
  437 + }
  438 + }
  439 + for (k=2;k<=nm;k++) {
  440 + jnp[k] = jn[k-1]-k*jn[k]/x;
  441 + }
  442 + f0 = yn[0];
  443 + f1 = yn[1];
  444 + for (k=2;k<=nm;k++) {
  445 + f = 2.0*(k-1.0)*f1/x-f0;
  446 + yn[k] = f;
  447 + f0 = f1;
  448 + f1 = f;
  449 + }
  450 + for (k=2;k<=nm;k++) {
  451 + ynp[k] = yn[k-1]-k*yn[k]/x;
  452 + }
  453 + return 0;
  454 +}
  455 +//
  456 +// Same input and output conventions as above. Different recurrence
  457 +// relations used for 'x' < 300.
  458 +//
  459 +template<typename P>
  460 +int bessjynb(int n,P x,int &nm,P *jn,P *yn,
  461 + P *jnp,P *ynp)
  462 +{
  463 + P t1,t2,f,f1,f2,bj0,bj1,bjk,by0,by1,cu,s0,su,sv;
  464 + P ec,bs,byk,p0,p1,q0,q1;
  465 + static P a[] = {
  466 + -0.7031250000000000e-1,
  467 + 0.1121520996093750,
  468 + -0.5725014209747314,
  469 + 6.074042001273483};
  470 + static P b[] = {
  471 + 0.7324218750000000e-1,
  472 + -0.2271080017089844,
  473 + 1.727727502584457,
  474 + -2.438052969955606e1};
  475 + static P a1[] = {
  476 + 0.1171875,
  477 + -0.1441955566406250,
  478 + 0.6765925884246826,
  479 + -6.883914268109947};
  480 + static P b1[] = {
  481 + -0.1025390625,
  482 + 0.2775764465332031,
  483 + -1.993531733751297,
  484 + 2.724882731126854e1};
  485 +
  486 + int i,k,m;
  487 + nm = n;
  488 + if ((x < 0.0) || (n < 0)) return 1;
  489 + if (x < 1e-15) {
  490 + for (i=0;i<=n;i++) {
  491 + jn[i] = 0.0;
  492 + yn[i] = -1e308;
  493 + jnp[i] = 0.0;
  494 + ynp[i] = 1e308;
  495 + }
  496 + jn[0] = 1.0;
  497 + jnp[1] = 0.5;
  498 + return 0;
  499 + }
  500 + if (x <= 300.0 || n > (int)(0.9*x)) {
  501 + if (n == 0) nm = 1;
  502 + m = msta1(x,200);
  503 + if (m < nm) nm = m;
  504 + else m = msta2(x,nm,15);
  505 + bs = 0.0;
  506 + su = 0.0;
  507 + sv = 0.0;
  508 + f2 = 0.0;
  509 + f1 = 1.0e-100;
  510 + for (k = m;k>=0;k--) {
  511 + f = 2.0*(k+1.0)/x*f1 - f2;
  512 + if (k <= nm) jn[k] = f;
  513 + if ((k == 2*(int)(k/2)) && (k != 0)) {
  514 + bs += 2.0*f;
  515 +// su += pow(-1,k>>1)*f/(double)k;
  516 + su += (-1)*((k & 2)-1)*f/(P)k;
  517 + }
  518 + else if (k > 1) {
  519 +// sv += pow(-1,k>>1)*k*f/(k*k-1.0);
  520 + sv += (-1)*((k & 2)-1)*(P)k*f/(k*k-1.0);
  521 + }
  522 + f2 = f1;
  523 + f1 = f;
  524 + }
  525 + s0 = bs+f;
  526 + for (k=0;k<=nm;k++) {
  527 + jn[k] /= s0;
  528 + }
  529 + ec = log(0.5*x) +0.5772156649015329;
  530 + by0 = M_2_PI*(ec*jn[0]-4.0*su/s0);
  531 + yn[0] = by0;
  532 + by1 = M_2_PI*((ec-1.0)*jn[1]-jn[0]/x-4.0*sv/s0);
  533 + yn[1] = by1;
  534 + }
  535 + else {
  536 + t1 = x-M_PI_4;
  537 + p0 = 1.0;
  538 + q0 = -0.125/x;
  539 + for (k=0;k<4;k++) {
  540 + p0 += a[k]*pow(x,-2*k-2);
  541 + q0 += b[k]*pow(x,-2*k-3);
  542 + }
  543 + cu = sqrt(M_2_PI/x);
  544 + bj0 = cu*(p0*cos(t1)-q0*sin(t1));
  545 + by0 = cu*(p0*sin(t1)+q0*cos(t1));
  546 + jn[0] = bj0;
  547 + yn[0] = by0;
  548 + t2 = x-0.75*M_PI;
  549 + p1 = 1.0;
  550 + q1 = 0.375/x;
  551 + for (k=0;k<4;k++) {
  552 + p1 += a1[k]*pow(x,-2*k-2);
  553 + q1 += b1[k]*pow(x,-2*k-3);
  554 + }
  555 + bj1 = cu*(p1*cos(t2)-q1*sin(t2));
  556 + by1 = cu*(p1*sin(t2)+q1*cos(t2));
  557 + jn[1] = bj1;
  558 + yn[1] = by1;
  559 + for (k=2;k<=nm;k++) {
  560 + bjk = 2.0*(k-1.0)*bj1/x-bj0;
  561 + jn[k] = bjk;
  562 + bj0 = bj1;
  563 + bj1 = bjk;
  564 + }
  565 + }
  566 + jnp[0] = -jn[1];
  567 + for (k=1;k<=nm;k++) {
  568 + jnp[k] = jn[k-1]-k*jn[k]/x;
  569 + }
  570 + for (k=2;k<=nm;k++) {
  571 + byk = 2.0*(k-1.0)*by1/x-by0;
  572 + yn[k] = byk;
  573 + by0 = by1;
  574 + by1 = byk;
  575 + }
  576 + ynp[0] = -yn[1];
  577 + for (k=1;k<=nm;k++) {
  578 + ynp[k] = yn[k-1]-k*yn[k]/x;
  579 + }
  580 + return 0;
  581 +
  582 +}
  583 +
  584 +// The following routine computes Bessel Jv(x) and Yv(x) for
  585 +// arbitrary positive order (v). For negative order, use:
  586 +//
  587 +// J-v(x) = Jv(x)cos(v pi) - Yv(x)sin(v pi)
  588 +// Y-v(x) = Jv(x)sin(v pi) + Yv(x)cos(v pi)
  589 +//
  590 +template<typename P>
  591 +int bessjyv(P v,P x,P &vm,P *jv,P *yv,
  592 + P *djv,P *dyv)
  593 +{
  594 + P v0,vl,vg,vv,a,a0,r,x2,bjv0,bjv1,bjvl,f,f0,f1,f2;
  595 + P r0,r1,ck,cs,cs0,cs1,sk,qx,px,byv0,byv1,rp,xk,rq;
  596 + P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk;
  597 + int j,k,l,m,n,kz;
  598 +
  599 + x2 = x*x;
  600 + n = (int)v;
  601 + v0 = v-n;
  602 + if ((x < 0.0) || (v < 0.0)) return 1;
  603 + if (x < 1e-15) {
  604 + for (k=0;k<=n;k++) {
  605 + jv[k] = 0.0;
  606 + yv[k] = -1e308;
  607 + djv[k] = 0.0;
  608 + dyv[k] = 1e308;
  609 + if (v0 == 0.0) {
  610 + jv[0] = 1.0;
  611 + djv[1] = 0.5;
  612 + }
  613 + else djv[0] = 1e308;
  614 + }
  615 + vm = v;
  616 + return 0;
  617 + }
  618 + if (x <= 12.0) {
  619 + for (l=0;l<2;l++) {
  620 + vl = v0 + l;
  621 + bjvl = 1.0;
  622 + r = 1.0;
  623 + for (k=1;k<=40;k++) {
  624 + r *= -0.25*x2/(k*(k+vl));
  625 + bjvl += r;
  626 + if (fabs(r) < fabs(bjvl)*1e-15) break;
  627 + }
  628 + vg = 1.0 + vl;
  629 + a = pow(0.5*x,vl)/gamma(vg);
  630 + if (l == 0) bjv0 = bjvl*a;
  631 + else bjv1 = bjvl*a;
  632 + }
  633 + }
  634 + else {
  635 + if (x >= 50.0) kz = 8;
  636 + else if (x >= 35.0) kz = 10;
  637 + else kz = 11;
  638 + for (j=0;j<2;j++) {
  639 + vv = 4.0*(j+v0)*(j+v0);
  640 + px = 1.0;
  641 + rp = 1.0;
  642 + for (k=1;k<=kz;k++) {
  643 + rp *= (-0.78125e-2)*(vv-pow(4.0*k-3.0,2.0))*
  644 + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*x2);
  645 + px += rp;
  646 + }
  647 + qx = 1.0;
  648 + rq = 1.0;
  649 + for (k=1;k<=kz;k++) {
  650 + rq *= (-0.78125e-2)*(vv-pow(4.0*k-1.0,2.0))*
  651 + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*x2);
  652 + qx += rq;
  653 + }
  654 + qx *= 0.125*(vv-1.0)/x;
  655 + xk = x-(0.5*(j+v0)+0.25)*M_PI;
  656 + a0 = sqrt(M_2_PI/x);
  657 + ck = cos(xk);
  658 + sk = sin(xk);
  659 +
  660 + if (j == 0) {
  661 + bjv0 = a0*(px*ck-qx*sk);
  662 + byv0 = a0*(px*sk+qx*ck);
  663 + }
  664 + else if (j == 1) {
  665 + bjv1 = a0*(px*ck-qx*sk);
  666 + byv1 = a0*(px*sk+qx*ck);
  667 + }
  668 + }
  669 + }
  670 + jv[0] = bjv0;
  671 + jv[1] = bjv1;
  672 + djv[0] = v0*jv[0]/x-jv[1];
  673 + djv[1] = -(1.0+v0)*jv[1]/x+jv[0];
  674 + if ((n >= 2) && (n <= (int)(0.9*x))) {
  675 + f0 = bjv0;
  676 + f1 = bjv1;
  677 + for (k=2;k<=n;k++) {
  678 + f = 2.0*(k+v0-1.0)*f1/x-f0;
  679 + jv[k] = f;
  680 + f0 = f1;
  681 + f1 = f;
  682 + }
  683 + }
  684 + else if (n >= 2) {
  685 + m = msta1(x,200);
  686 + if (m < n) n = m;
  687 + else m = msta2(x,n,15);
  688 + f2 = 0.0;
  689 + f1 = 1.0e-100;
  690 + for (k=m;k>=0;k--) {
  691 + f = 2.0*(v0+k+1.0)*f1/x-f2;
  692 + if (k <= n) jv[k] = f;
  693 + f2 = f1;
  694 + f1 = f;
  695 + }
  696 + if (fabs(bjv0) > fabs(bjv1)) cs = bjv0/f;
  697 + else cs = bjv1/f2;
  698 + for (k=0;k<=n;k++) {
  699 + jv[k] *= cs;
  700 + }
  701 + }
  702 + for (k=2;k<=n;k++) {
  703 + djv[k] = -(k+v0)*jv[k]/x+jv[k-1];
  704 + }
  705 + if (x <= 12.0) {
  706 + if (v0 != 0.0) {
  707 + for (l=0;l<2;l++) {
  708 + vl = v0 +l;
  709 + bjvl = 1.0;
  710 + r = 1.0;
  711 + for (k=1;k<=40;k++) {
  712 + r *= -0.25*x2/(k*(k-vl));
  713 + bjvl += r;
  714 + if (fabs(r) < fabs(bjvl)*1e-15) break;
  715 + }
  716 + vg = 1.0-vl;
  717 + b = pow(2.0/x,vl)/gamma(vg);
  718 + if (l == 0) bju0 = bjvl*b;
  719 + else bju1 = bjvl*b;
  720 + }
  721 + pv0 = M_PI*v0;
  722 + pv1 = M_PI*(1.0+v0);
  723 + byv0 = (bjv0*cos(pv0)-bju0)/sin(pv0);
  724 + byv1 = (bjv1*cos(pv1)-bju1)/sin(pv1);
  725 + }
  726 + else {
  727 + ec = log(0.5*x)+el;
  728 + cs0 = 0.0;
  729 + w0 = 0.0;
  730 + r0 = 1.0;
  731 + for (k=1;k<=30;k++) {
  732 + w0 += 1.0/k;
  733 + r0 *= -0.25*x2/(k*k);
  734 + cs0 += r0*w0;
  735 + }
  736 + byv0 = M_2_PI*(ec*bjv0-cs0);
  737 + cs1 = 1.0;
  738 + w1 = 0.0;
  739 + r1 = 1.0;
  740 + for (k=1;k<=30;k++) {
  741 + w1 += 1.0/k;
  742 + r1 *= -0.25*x2/(k*(k+1));
  743 + cs1 += r1*(2.0*w1+1.0/(k+1.0));
  744 + }
  745 + byv1 = M_2_PI*(ec*bjv1-1.0/x-0.25*x*cs1);
  746 + }
  747 + }
  748 + yv[0] = byv0;
  749 + yv[1] = byv1;
  750 + for (k=2;k<=n;k++) {
  751 + byvk = 2.0*(v0+k-1.0)*byv1/x-byv0;
  752 + yv[k] = byvk;
  753 + byv0 = byv1;
  754 + byv1 = byvk;
  755 + }
  756 + dyv[0] = v0*yv[0]/x-yv[1];
  757 + for (k=1;k<=n;k++) {
  758 + dyv[k] = -(k+v0)*yv[k]/x+yv[k-1];
  759 + }
  760 + vm = n + v0;
  761 + return 0;
  762 +}
  763 +
  764 +template<typename P>
  765 +int bessjyv_sph(int v, P z, P &vm, P* cjv,
  766 + P* cyv, P* cjvp, P* cyvp)
  767 +{
  768 + //first, compute the bessel functions of fractional order
  769 + bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  770 +
  771 + //iterate through each and scale
  772 + for(int n = 0; n<=v; n++)
  773 + {
  774 +
  775 + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0));
  776 + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0));
  777 +
  778 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0));
  779 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0));
  780 + }
  781 +
  782 + return 0;
  783 +
  784 +}
  785 +
  786 +template<typename P>
  787 +int cbessjy01(complex<P> z,complex<P> &cj0,complex<P> &cj1,
  788 + complex<P> &cy0,complex<P> &cy1,complex<P> &cj0p,
  789 + complex<P> &cj1p,complex<P> &cy0p,complex<P> &cy1p)
  790 +{
  791 + complex<P> z1,z2,cr,cp,cs,cp0,cq0,cp1,cq1,ct1,ct2,cu;
  792 + P a0,w0,w1;
  793 + int k,kz;
  794 +
  795 + static P a[] = {
  796 + -7.03125e-2,
  797 + 0.112152099609375,
  798 + -0.5725014209747314,
  799 + 6.074042001273483,
  800 + -1.100171402692467e2,
  801 + 3.038090510922384e3,
  802 + -1.188384262567832e5,
  803 + 6.252951493434797e6,
  804 + -4.259392165047669e8,
  805 + 3.646840080706556e10,
  806 + -3.833534661393944e12,
  807 + 4.854014686852901e14,
  808 + -7.286857349377656e16,
  809 + 1.279721941975975e19};
  810 + static P b[] = {
  811 + 7.32421875e-2,
  812 + -0.2271080017089844,
  813 + 1.727727502584457,
  814 + -2.438052969955606e1,
  815 + 5.513358961220206e2,
  816 + -1.825775547429318e4,
  817 + 8.328593040162893e5,
  818 + -5.006958953198893e7,
  819 + 3.836255180230433e9,
  820 + -3.649010818849833e11,
  821 + 4.218971570284096e13,
  822 + -5.827244631566907e15,
  823 + 9.476288099260110e17,
  824 + -1.792162323051699e20};
  825 + static P a1[] = {
  826 + 0.1171875,
  827 + -0.1441955566406250,
  828 + 0.6765925884246826,
  829 + -6.883914268109947,
  830 + 1.215978918765359e2,
  831 + -3.302272294480852e3,
  832 + 1.276412726461746e5,
  833 + -6.656367718817688e6,
  834 + 4.502786003050393e8,
  835 + -3.833857520742790e10,
  836 + 4.011838599133198e12,
  837 + -5.060568503314727e14,
  838 + 7.572616461117958e16,
  839 + -1.326257285320556e19};
  840 + static P b1[] = {
  841 + -0.1025390625,
  842 + 0.2775764465332031,
  843 + -1.993531733751297,
  844 + 2.724882731126854e1,
  845 + -6.038440767050702e2,
  846 + 1.971837591223663e4,
  847 + -8.902978767070678e5,
  848 + 5.310411010968522e7,
  849 + -4.043620325107754e9,
  850 + 3.827011346598605e11,
  851 + -4.406481417852278e13,
  852 + 6.065091351222699e15,
  853 + -9.833883876590679e17,
  854 + 1.855045211579828e20};
  855 +
  856 + a0 = abs(z);
  857 + z2 = z*z;
  858 + z1 = z;
  859 + if (a0 == 0.0) {
  860 + cj0 = cone;
  861 + cj1 = czero;
  862 + cy0 = complex<P>(-1e308,0);
  863 + cy1 = complex<P>(-1e308,0);
  864 + cj0p = czero;
  865 + cj1p = complex<P>(0.5,0.0);
  866 + cy0p = complex<P>(1e308,0);
  867 + cy1p = complex<P>(1e308,0);
  868 + return 0;
  869 + }
  870 + if (real(z) < 0.0) z1 = -z;
  871 + if (a0 <= 12.0) {
  872 + cj0 = cone;
  873 + cr = cone;
  874 + for (k=1;k<=40;k++) {
  875 + cr *= -0.25*z2/(P)(k*k);
  876 + cj0 += cr;
  877 + if (abs(cr) < abs(cj0)*eps) break;
  878 + }
  879 + cj1 = cone;
  880 + cr = cone;
  881 + for (k=1;k<=40;k++) {
  882 + cr *= -0.25*z2/(k*(k+1.0));
  883 + cj1 += cr;
  884 + if (abs(cr) < abs(cj1)*eps) break;
  885 + }
  886 + cj1 *= 0.5*z1;
  887 + w0 = 0.0;
  888 + cr = cone;
  889 + cs = czero;
  890 + for (k=1;k<=40;k++) {
  891 + w0 += 1.0/k;
  892 + cr *= -0.25*z2/(P)(k*k);
  893 + cp = cr*w0;
  894 + cs += cp;
  895 + if (abs(cp) < abs(cs)*eps) break;
  896 + }
  897 + cy0 = M_2_PI*((log(0.5*z1)+el)*cj0-cs);
  898 + w1 = 0.0;
  899 + cr = cone;
  900 + cs = cone;
  901 + for (k=1;k<=40;k++) {
  902 + w1 += 1.0/k;
  903 + cr *= -0.25*z2/(k*(k+1.0));
  904 + cp = cr*(2.0*w1+1.0/(k+1.0));
  905 + cs += cp;
  906 + if (abs(cp) < abs(cs)*eps) break;
  907 + }
  908 + cy1 = M_2_PI*((log(0.5*z1)+el)*cj1-1.0/z1-0.25*z1*cs);
  909 + }
  910 + else {
  911 + if (a0 >= 50.0) kz = 8; // can be changed to 10
  912 + else if (a0 >= 35.0) kz = 10; // " " " 12
  913 + else kz = 12; // " " " 14
  914 + ct1 = z1 - M_PI_4;
  915 + cp0 = cone;
  916 + for (k=0;k<kz;k++) {
  917 + cp0 += a[k]*pow(z1,-2.0*k-2.0);
  918 + }
  919 + cq0 = -0.125/z1;
  920 + for (k=0;k<kz;k++) {
  921 + cq0 += b[k]*pow(z1,-2.0*k-3.0);
  922 + }
  923 + cu = sqrt(M_2_PI/z1);
  924 + cj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1));
  925 + cy0 = cu*(cp0*sin(ct1)+cq0*cos(ct1));
  926 + ct2 = z1 - 0.75*M_PI;
  927 + cp1 = cone;
  928 + for (k=0;k<kz;k++) {
  929 + cp1 += a1[k]*pow(z1,-2.0*k-2.0);
  930 + }
  931 + cq1 = 0.375/z1;
  932 + for (k=0;k<kz;k++) {
  933 + cq1 += b1[k]*pow(z1,-2.0*k-3.0);
  934 + }
  935 + cj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2));
  936 + cy1 = cu*(cp1*sin(ct2)+cq1*cos(ct2));
  937 + }
  938 + if (real(z) < 0.0) {
  939 + if (imag(z) < 0.0) {
  940 + cy0 -= 2.0*cii*cj0;
  941 + cy1 = -(cy1-2.0*cii*cj1);
  942 + }
  943 + else if (imag(z) > 0.0) {
  944 + cy0 += 2.0*cii*cj0;
  945 + cy1 = -(cy1+2.0*cii*cj1);
  946 + }
  947 + cj1 = -cj1;
  948 + }
  949 + cj0p = -cj1;
  950 + cj1p = cj0-cj1/z;
  951 + cy0p = -cy1;
  952 + cy1p = cy0-cy1/z;
  953 + return 0;
  954 +}
  955 +
  956 +template<typename P>
  957 +int cbessjyna(int n,complex<P> z,int &nm,complex<P> *cj,
  958 + complex<P> *cy,complex<P> *cjp,complex<P> *cyp)
  959 +{
  960 + complex<P> cbj0,cbj1,cby0,cby1,cj0,cjk,cj1,cf,cf1,cf2;
  961 + complex<P> cs,cg0,cg1,cyk,cyl1,cyl2,cylk,cp11,cp12,cp21,cp22;
  962 + complex<P> ch0,ch1,ch2;
  963 + P a0,yak,ya1,ya0,wa;
  964 + int m,k,lb,lb0;
  965 +
  966 + if (n < 0) return 1;
  967 + a0 = abs(z);
  968 + nm = n;
  969 + if (a0 < 1.0e-100) {
  970 + for (k=0;k<=n;k++) {
  971 + cj[k] = czero;
  972 + cy[k] = complex<P> (-1e308,0);
  973 + cjp[k] = czero;
  974 + cyp[k] = complex<P>(1e308,0);
  975 + }
  976 + cj[0] = cone;
  977 + cjp[1] = complex<P>(0.5,0.0);
  978 + return 0;
  979 + }
  980 + cbessjy01(z,cj[0],cj[1],cy[0],cy[1],cjp[0],cjp[1],cyp[0],cyp[1]);
  981 + cbj0 = cj[0];
  982 + cbj1 = cj[1];
  983 + cby0 = cy[0];
  984 + cby1 = cy[1];
  985 + if (n <= 1) return 0;
  986 + if (n < (int)0.25*a0) {
  987 + cj0 = cbj0;
  988 + cj1 = cbj1;
  989 + for (k=2;k<=n;k++) {
  990 + cjk = 2.0*(k-1.0)*cj1/z-cj0;
  991 + cj[k] = cjk;
  992 + cj0 = cj1;
  993 + cj1 = cjk;
  994 + }
  995 + }
  996 + else {
  997 + m = msta1(a0,200);
  998 + if (m < n) nm = m;
  999 + else m = msta2(a0,n,15);
  1000 + cf2 = czero;
  1001 + cf1 = complex<P> (1.0e-100,0.0);
  1002 + for (k=m;k>=0;k--) {
  1003 + cf = 2.0*(k+1.0)*cf1/z-cf2;
  1004 + if (k <=nm) cj[k] = cf;
  1005 + cf2 = cf1;
  1006 + cf1 = cf;
  1007 + }
  1008 + if (abs(cbj0) > abs(cbj1)) cs = cbj0/cf;
  1009 + else cs = cbj1/cf2;
  1010 + for (k=0;k<=nm;k++) {
  1011 + cj[k] *= cs;
  1012 + }
  1013 + }
  1014 + for (k=2;k<=nm;k++) {
  1015 + cjp[k] = cj[k-1]-(P)k*cj[k]/z;
  1016 + }
  1017 + ya0 = abs(cby0);
  1018 + lb = 0;
  1019 + cg0 = cby0;
  1020 + cg1 = cby1;
  1021 + for (k=2;k<=nm;k++) {
  1022 + cyk = 2.0*(k-1.0)*cg1/z-cg0;
  1023 + yak = abs(cyk);
  1024 + ya1 = abs(cg0);
  1025 + if ((yak < ya0) && (yak < ya1)) lb = k;
  1026 + cy[k] = cyk;
  1027 + cg0 = cg1;
  1028 + cg1 = cyk;
  1029 + }
  1030 + lb0 = 0;
  1031 + if ((lb > 4) && (imag(z) != 0.0)) {
  1032 + while (lb != lb0) {
  1033 + ch2 = cone;
  1034 + ch1 = czero;
  1035 + lb0 = lb;
  1036 + for (k=lb;k>=1;k--) {
  1037 + ch0 = 2.0*k*ch1/z-ch2;
  1038 + ch2 = ch1;
  1039 + ch1 = ch0;
  1040 + }
  1041 + cp12 = ch0;
  1042 + cp22 = ch2;
  1043 + ch2 = czero;
  1044 + ch1 = cone;
  1045 + for (k=lb;k>=1;k--) {
  1046 + ch0 = 2.0*k*ch1/z-ch2;
  1047 + ch2 = ch1;
  1048 + ch1 = ch0;
  1049 + }
  1050 + cp11 = ch0;
  1051 + cp21 = ch2;
  1052 + if (lb == nm)
  1053 + cj[lb+1] = 2.0*lb*cj[lb]/z-cj[lb-1];
  1054 + if (abs(cj[0]) > abs(cj[1])) {
  1055 + cy[lb+1] = (cj[lb+1]*cby0-2.0*cp11/(M_PI*z))/cj[0];
  1056 + cy[lb] = (cj[lb]*cby0+2.0*cp12/(M_PI*z))/cj[0];
  1057 + }
  1058 + else {
  1059 + cy[lb+1] = (cj[lb+1]*cby1-2.0*cp21/(M_PI*z))/cj[1];
  1060 + cy[lb] = (cj[lb]*cby1+2.0*cp22/(M_PI*z))/cj[1];
  1061 + }
  1062 + cyl2 = cy[lb+1];
  1063 + cyl1 = cy[lb];
  1064 + for (k=lb-1;k>=0;k--) {
  1065 + cylk = 2.0*(k+1.0)*cyl1/z-cyl2;
  1066 + cy[k] = cylk;
  1067 + cyl2 = cyl1;
  1068 + cyl1 = cylk;
  1069 + }
  1070 + cyl1 = cy[lb];
  1071 + cyl2 = cy[lb+1];
  1072 + for (k=lb+1;k<n;k++) {
  1073 + cylk = 2.0*k*cyl2/z-cyl1;
  1074 + cy[k+1] = cylk;
  1075 + cyl1 = cyl2;
  1076 + cyl2 = cylk;
  1077 + }
  1078 + for (k=2;k<=nm;k++) {
  1079 + wa = abs(cy[k]);
  1080 + if (wa < abs(cy[k-1])) lb = k;
  1081 + }
  1082 + }
  1083 + }
  1084 + for (k=2;k<=nm;k++) {
  1085 + cyp[k] = cy[k-1]-(P)k*cy[k]/z;
  1086 + }
  1087 + return 0;
  1088 +}
  1089 +
  1090 +template<typename P>
  1091 +int cbessjynb(int n,complex<P> z,int &nm,complex<P> *cj,
  1092 + complex<P> *cy,complex<P> *cjp,complex<P> *cyp)
  1093 +{
  1094 + complex<P> cf,cf0,cf1,cf2,cbs,csu,csv,cs0,ce;
  1095 + complex<P> ct1,cp0,cq0,cp1,cq1,cu,cbj0,cby0,cbj1,cby1;
  1096 + complex<P> cyy,cbjk,ct2;
  1097 + P a0,y0;
  1098 + int k,m;
  1099 + static P a[] = {
  1100 + -0.7031250000000000e-1,
  1101 + 0.1121520996093750,
  1102 + -0.5725014209747314,
  1103 + 6.074042001273483};
  1104 + static P b[] = {
  1105 + 0.7324218750000000e-1,
  1106 + -0.2271080017089844,
  1107 + 1.727727502584457,
  1108 + -2.438052969955606e1};
  1109 + static P a1[] = {
  1110 + 0.1171875,
  1111 + -0.1441955566406250,
  1112 + 0.6765925884246826,
  1113 + -6.883914268109947};
  1114 + static P b1[] = {
  1115 + -0.1025390625,
  1116 + 0.2775764465332031,
  1117 + -1.993531733751297,
  1118 + 2.724882731126854e1};
  1119 +
  1120 + y0 = abs(imag(z));
  1121 + a0 = abs(z);
  1122 + nm = n;
  1123 + if (a0 < 1.0e-100) {
  1124 + for (k=0;k<=n;k++) {
  1125 + cj[k] = czero;
  1126 + cy[k] = complex<P> (-1e308,0);
  1127 + cjp[k] = czero;
  1128 + cyp[k] = complex<P>(1e308,0);
  1129 + }
  1130 + cj[0] = cone;
  1131 + cjp[1] = complex<P>(0.5,0.0);
  1132 + return 0;
  1133 + }
  1134 + if ((a0 <= 300.0) || (n > (int)(0.25*a0))) {
  1135 + if (n == 0) nm = 1;
  1136 + m = msta1(a0,200);
  1137 + if (m < nm) nm = m;
  1138 + else m = msta2(a0,nm,15);
  1139 + cbs = czero;
  1140 + csu = czero;
  1141 + csv = czero;
  1142 + cf2 = czero;
  1143 + cf1 = complex<P> (1.0e-100,0.0);
  1144 + for (k=m;k>=0;k--) {
  1145 + cf = 2.0*(k+1.0)*cf1/z-cf2;
  1146 + if (k <= nm) cj[k] = cf;
  1147 + if (((k & 1) == 0) && (k != 0)) {
  1148 + if (y0 <= 1.0) {
  1149 + cbs += 2.0*cf;
  1150 + }
  1151 + else {
  1152 + cbs += (-1)*((k & 2)-1)*2.0*cf;
  1153 + }
  1154 + csu += (P)((-1)*((k & 2)-1))*cf/(P)k;
  1155 + }
  1156 + else if (k > 1) {
  1157 + csv += (P)((-1)*((k & 2)-1)*k)*cf/(P)(k*k-1.0);
  1158 + }
  1159 + cf2 = cf1;
  1160 + cf1 = cf;
  1161 + }
  1162 + if (y0 <= 1.0) cs0 = cbs+cf;
  1163 + else cs0 = (cbs+cf)/cos(z);
  1164 + for (k=0;k<=nm;k++) {
  1165 + cj[k] /= cs0;
  1166 + }
  1167 + ce = log(0.5*z)+el;
  1168 + cy[0] = M_2_PI*(ce*cj[0]-4.0*csu/cs0);
  1169 + cy[1] = M_2_PI*(-cj[0]/z+(ce-1.0)*cj[1]-4.0*csv/cs0);
  1170 + }
  1171 + else {
  1172 + ct1 = z-M_PI_4;
  1173 + cp0 = cone;
  1174 + for (k=0;k<4;k++) {
  1175 + cp0 += a[k]*pow(z,-2.0*k-2.0);
  1176 + }
  1177 + cq0 = -0.125/z;
  1178 + for (k=0;k<4;k++) {
  1179 + cq0 += b[k] *pow(z,-2.0*k-3.0);
  1180 + }
  1181 + cu = sqrt(M_2_PI/z);
  1182 + cbj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1));
  1183 + cby0 = cu*(cp0*sin(ct1)+cq0*cos(ct1));
  1184 + cj[0] = cbj0;
  1185 + cy[0] = cby0;
  1186 + ct2 = z-0.75*M_PI;
  1187 + cp1 = cone;
  1188 + for (k=0;k<4;k++) {
  1189 + cp1 += a1[k]*pow(z,-2.0*k-2.0);
  1190 + }
  1191 + cq1 = 0.375/z;
  1192 + for (k=0;k<4;k++) {
  1193 + cq1 += b1[k]*pow(z,-2.0*k-3.0);
  1194 + }
  1195 + cbj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2));
  1196 + cby1 = cu*(cp1*sin(ct2)+cq1*cos(ct2));
  1197 + cj[1] = cbj1;
  1198 + cy[1] = cby1;
  1199 + for (k=2;k<=n;k++) {
  1200 + cbjk = 2.0*(k-1.0)*cbj1/z-cbj0;
  1201 + cj[k] = cbjk;
  1202 + cbj0 = cbj1;
  1203 + cbj1 = cbjk;
  1204 + }
  1205 + }
  1206 + cjp[0] = -cj[1];
  1207 + for (k=1;k<=nm;k++) {
  1208 + cjp[k] = cj[k-1]-(P)k*cj[k]/z;
  1209 + }
  1210 + if (abs(cj[0]) > 1.0)
  1211 + cy[1] = (cj[1]*cy[0]-2.0/(M_PI*z))/cj[0];
  1212 + for (k=2;k<=nm;k++) {
  1213 + if (abs(cj[k-1]) >= abs(cj[k-2]))
  1214 + cyy = (cj[k]*cy[k-1]-2.0/(M_PI*z))/cj[k-1];
  1215 + else
  1216 + cyy = (cj[k]*cy[k-2]-4.0*(k-1.0)/(M_PI*z*z))/cj[k-2];
  1217 + cy[k] = cyy;
  1218 + }
  1219 + cyp[0] = -cy[1];
  1220 + for (k=1;k<=nm;k++) {
  1221 + cyp[k] = cy[k-1]-(P)k*cy[k]/z;
  1222 + }
  1223 +
  1224 + return 0;
  1225 +}
  1226 +
  1227 +template<typename P>
  1228 +int cbessjyva(P v,complex<P> z,P &vm,complex<P>*cjv,
  1229 + complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp)
  1230 +{
  1231 + complex<P> z1,z2,zk,cjvl,cr,ca,cjv0,cjv1,cpz,crp;
  1232 + complex<P> cqz,crq,ca0,cck,csk,cyv0,cyv1,cju0,cju1,cb;
  1233 + complex<P> cs,cs0,cr0,cs1,cr1,cec,cf,cf0,cf1,cf2;
  1234 + complex<P> cfac0,cfac1,cg0,cg1,cyk,cp11,cp12,cp21,cp22;
  1235 + complex<P> ch0,ch1,ch2,cyl1,cyl2,cylk;
  1236 +
  1237 + P a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa;
  1238 + int j,n,k,kz,l,lb,lb0,m;
  1239 +
  1240 + a0 = abs(z);
  1241 + z1 = z;
  1242 + z2 = z*z;
  1243 + n = (int)v;
  1244 +
  1245 +
  1246 + v0 = v-n;
  1247 +
  1248 + pv0 = M_PI*v0;
  1249 + pv1 = M_PI*(1.0+v0);
  1250 + if (a0 < 1.0e-100) {
  1251 + for (k=0;k<=n;k++) {
  1252 + cjv[k] = czero;
  1253 + cyv[k] = complex<P> (-1e308,0);
  1254 + cjvp[k] = czero;
  1255 + cyvp[k] = complex<P> (1e308,0);
  1256 +
  1257 + }
  1258 + if (v0 == 0.0) {
  1259 + cjv[0] = cone;
  1260 + cjvp[1] = complex<P> (0.5,0.0);
  1261 + }
  1262 + else {
  1263 + cjvp[0] = complex<P> (1e308,0);
  1264 + }
  1265 + vm = v;
  1266 + return 0;
  1267 + }
  1268 + if (real(z1) < 0.0) z1 = -z;
  1269 + if (a0 <= 12.0) {
  1270 + for (l=0;l<2;l++) {
  1271 + vl = v0+l;
  1272 + cjvl = cone;
  1273 + cr = cone;
  1274 + for (k=1;k<=40;k++) {
  1275 + cr *= -0.25*z2/(k*(k+vl));
  1276 + cjvl += cr;
  1277 + if (abs(cr) < abs(cjvl)*eps) break;
  1278 + }
  1279 + vg = 1.0 + vl;
  1280 + ga = gamma(vg);
  1281 + ca = pow(0.5*z1,vl)/ga;
  1282 + if (l == 0) cjv0 = cjvl*ca;
  1283 + else cjv1 = cjvl*ca;
  1284 + }
  1285 + }
  1286 + else {
  1287 + if (a0 >= 50.0) kz = 8;
  1288 + else if (a0 >= 35.0) kz = 10;
  1289 + else kz = 11;
  1290 + for (j=0;j<2;j++) {
  1291 + vv = 4.0*(j+v0)*(j+v0);
  1292 + cpz = cone;
  1293 + crp = cone;
  1294 + for (k=1;k<=kz;k++) {
  1295 + crp = -0.78125e-2*crp*(vv-pow(4.0*k-3.0,2.0))*
  1296 + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*z2);
  1297 + cpz += crp;
  1298 + }
  1299 + cqz = cone;
  1300 + crq = cone;
  1301 + for (k=1;k<=kz;k++) {
  1302 + crq = -0.78125e-2*crq*(vv-pow(4.0*k-1.0,2.0))*
  1303 + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*z2);
  1304 + cqz += crq;
  1305 + }
  1306 + cqz *= 0.125*(vv-1.0)/z1;
  1307 + zk = z1-(0.5*(j+v0)+0.25)*M_PI;
  1308 + ca0 = sqrt(M_2_PI/z1);
  1309 + cck = cos(zk);
  1310 + csk = sin(zk);
  1311 + if (j == 0) {
  1312 + cjv0 = ca0*(cpz*cck-cqz*csk);
  1313 + cyv0 = ca0*(cpz*csk+cqz+cck);
  1314 + }
  1315 + else {
  1316 + cjv1 = ca0*(cpz*cck-cqz*csk);
  1317 + cyv1 = ca0*(cpz*csk+cqz*cck);
  1318 + }
  1319 + }
  1320 + }
  1321 + if (a0 <= 12.0) {
  1322 + if (v0 != 0.0) {
  1323 + for (l=0;l<2;l++) {
  1324 + vl = v0+l;
  1325 + cjvl = cone;
  1326 + cr = cone;
  1327 + for (k=1;k<=40;k++) {
  1328 + cr *= -0.25*z2/(k*(k-vl));
  1329 + cjvl += cr;
  1330 + if (abs(cr) < abs(cjvl)*eps) break;
  1331 + }
  1332 + vg = 1.0-vl;
  1333 + gb = gamma(vg);
  1334 + cb = pow(2.0/z1,vl)/gb;
  1335 + if (l == 0) cju0 = cjvl*cb;
  1336 + else cju1 = cjvl*cb;
  1337 + }
  1338 + cyv0 = (cjv0*cos(pv0)-cju0)/sin(pv0);
  1339 + cyv1 = (cjv1*cos(pv1)-cju1)/sin(pv1);
  1340 + }
  1341 + else {
  1342 + cec = log(0.5*z1)+el;
  1343 + cs0 = czero;
  1344 + w0 = 0.0;
  1345 + cr0 = cone;
  1346 + for (k=1;k<=30;k++) {
  1347 + w0 += 1.0/k;
  1348 + cr0 *= -0.25*z2/(P)(k*k);
  1349 + cs0 += cr0*w0;
  1350 + }
  1351 + cyv0 = M_2_PI*(cec*cjv0-cs0);
  1352 + cs1 = cone;
  1353 + w1 = 0.0;
  1354 + cr1 = cone;
  1355 + for (k=1;k<=30;k++) {
  1356 + w1 += 1.0/k;
  1357 + cr1 *= -0.25*z2/(k*(k+1.0));
  1358 + cs1 += cr1*(2.0*w1+1.0/(k+1.0));
  1359 + }
  1360 + cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1);
  1361 + }
  1362 + }
  1363 + if (real(z) < 0.0) {
  1364 + cfac0 = exp(pv0*cii);
  1365 + cfac1 = exp(pv1*cii);
  1366 + if (imag(z) < 0.0) {
  1367 + cyv0 = cfac0*cyv0-(P)2.0*(complex<P>)cii*cos(pv0)*cjv0;
  1368 + cyv1 = cfac1*cyv1-(P)2.0*(complex<P>)cii*cos(pv1)*cjv1;
  1369 + cjv0 /= cfac0;
  1370 + cjv1 /= cfac1;
  1371 + }
  1372 + else if (imag(z) > 0.0) {
  1373 + cyv0 = cyv0/cfac0+(P)2.0*(complex<P>)cii*cos(pv0)*cjv0;
  1374 + cyv1 = cyv1/cfac1+(P)2.0*(complex<P>)cii*cos(pv1)*cjv1;
  1375 + cjv0 *= cfac0;
  1376 + cjv1 *= cfac1;
  1377 + }
  1378 + }
  1379 + cjv[0] = cjv0;
  1380 + cjv[1] = cjv1;
  1381 + if ((n >= 2) && (n <= (int)(0.25*a0))) {
  1382 + cf0 = cjv0;
  1383 + cf1 = cjv1;
  1384 + for (k=2;k<= n;k++) {
  1385 + cf = 2.0*(k+v0-1.0)*cf1/z-cf0;
  1386 + cjv[k] = cf;
  1387 + cf0 = cf1;
  1388 + cf1 = cf;
  1389 + }
  1390 + }
  1391 + else if (n >= 2) {
  1392 + m = msta1(a0,200);
  1393 + if (m < n) n = m;
  1394 + else m = msta2(a0,n,15);
  1395 + cf2 = czero;
  1396 + cf1 = complex<P>(1.0e-100,0.0);
  1397 + for (k=m;k>=0;k--) {
  1398 + cf = 2.0*(v0+k+1.0)*cf1/z-cf2;
  1399 + if (k <= n) cjv[k] = cf;
  1400 + cf2 = cf1;
  1401 + cf1 = cf;
  1402 + }
  1403 + if (abs(cjv0) > abs(cjv1)) cs = cjv0/cf;
  1404 + else cs = cjv1/cf2;
  1405 + for (k=0;k<=n;k++) {
  1406 + cjv[k] *= cs;
  1407 + }
  1408 + }
  1409 + cjvp[0] = v0*cjv[0]/z-cjv[1];
  1410 + for (k=1;k<=n;k++) {
  1411 + cjvp[k] = -(k+v0)*cjv[k]/z+cjv[k-1];
  1412 + }
  1413 + cyv[0] = cyv0;
  1414 + cyv[1] = cyv1;
  1415 + ya0 = abs(cyv0);
  1416 + lb = 0;
  1417 + cg0 = cyv0;
  1418 + cg1 = cyv1;
  1419 + for (k=2;k<=n;k++) {
  1420 + cyk = 2.0*(v0+k-1.0)*cg1/z-cg0;
  1421 + yak = abs(cyk);
  1422 + ya1 = abs(cg0);
  1423 + if ((yak < ya0) && (yak< ya1)) lb = k;
  1424 + cyv[k] = cyk;
  1425 + cg0 = cg1;
  1426 + cg1 = cyk;
  1427 + }
  1428 + lb0 = 0;
  1429 + if ((lb > 4) && (imag(z) != 0.0)) {
  1430 + while(lb != lb0) {
  1431 + ch2 = cone;
  1432 + ch1 = czero;
  1433 + lb0 = lb;
  1434 + for (k=lb;k>=1;k--) {
  1435 + ch0 = 2.0*(k+v0)*ch1/z-ch2;
  1436 + ch2 = ch1;
  1437 + ch1 = ch0;
  1438 + }
  1439 + cp12 = ch0;
  1440 + cp22 = ch2;
  1441 + ch2 = czero;
  1442 + ch1 = cone;
  1443 + for (k=lb;k>=1;k--) {
  1444 + ch0 = 2.0*(k+v0)*ch1/z-ch2;
  1445 + ch2 = ch1;
  1446 + ch1 = ch0;
  1447 + }
  1448 + cp11 = ch0;
  1449 + cp21 = ch2;
  1450 + if (lb == n)
  1451 + cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1];
  1452 + if (abs(cjv[0]) > abs(cjv[1])) {
  1453 + cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0];
  1454 + cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0];
  1455 + }
  1456 + else {
  1457 + cyv[lb+1] = (cjv[lb+1]*cyv1-2.0*cp21/(M_PI*z))/cjv[1];
  1458 + cyv[lb] = (cjv[lb]*cyv1+2.0*cp22/(M_PI*z))/cjv[1];
  1459 + }
  1460 + cyl2 = cyv[lb+1];
  1461 + cyl1 = cyv[lb];
  1462 + for (k=lb-1;k>=0;k--) {
  1463 + cylk = 2.0*(k+v0+1.0)*cyl1/z-cyl2;
  1464 + cyv[k] = cylk;
  1465 + cyl2 = cyl1;
  1466 + cyl1 = cylk;
  1467 + }
  1468 + cyl1 = cyv[lb];
  1469 + cyl2 = cyv[lb+1];
  1470 + for (k=lb+1;k<n;k++) {
  1471 + cylk = 2.0*(k+v0)*cyl2/z-cyl1;
  1472 + cyv[k+1] = cylk;
  1473 + cyl1 = cyl2;
  1474 + cyl2 = cylk;
  1475 + }
  1476 + for (k=2;k<=n;k++) {
  1477 + wa = abs(cyv[k]);
  1478 + if (wa < abs(cyv[k-1])) lb = k;
  1479 + }
  1480 + }
  1481 + }
  1482 + cyvp[0] = v0*cyv[0]/z-cyv[1];
  1483 + for (k=1;k<=n;k++) {
  1484 + cyvp[k] = cyv[k-1]-(k+v0)*cyv[k]/z;
  1485 + }
  1486 + vm = n+v0;
  1487 + return 0;
  1488 +}
  1489 +
  1490 +template<typename P>
  1491 +int cbessjyva_sph(int v,complex<P> z,P &vm,complex<P>*cjv,
  1492 + complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp)
  1493 +{
  1494 + //first, compute the bessel functions of fractional order
  1495 + cbessjyva<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  1496 +
  1497 + //iterate through each and scale
  1498 + for(int n = 0; n<=v; n++)
  1499 + {
  1500 +
  1501 + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0));
  1502 + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0));
  1503 +
  1504 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0));
  1505 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0));
  1506 + }
  1507 +
  1508 + return 0;
  1509 +
  1510 +}
  1511 +
  1512 +} //end namespace rts
  1513 +
  1514 +
  1515 +#endif
0 1516 \ No newline at end of file
... ...
math/constants.h 0 → 100644
  1 +#ifndef RTS_CONSTANTS_H
  2 +#define RTS_CONSTANTS_H
  3 +
  4 +namespace rts{
  5 +
  6 + double PI = 3.14159;
  7 + double TAU = 2 * PI;
  8 +}
  9 +
  10 +#endif
0 11 \ No newline at end of file
... ...
math/realfield.cuh 0 → 100644
  1 +#ifndef RTS_REALFIELD_H
  2 +#define RTS_REALFIELD_H
  3 +
  4 +#include "../visualization/colormap.h"
  5 +#include "../envi/envi.h"
  6 +#include "../math/quad.h"
  7 +#include "../cuda/devices.h"
  8 +#include "cublas_v2.h"
  9 +#include <cuda_runtime.h>
  10 +
  11 +///Compute a Gaussian function in 3D (mostly for testing)
  12 +/*template<typename T>
  13 +__global__ void gpu_gaussian(T* dest, unsigned int r0, unsigned int r1, T mean, T std, rts::quad<T> shape)
  14 +{
  15 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  16 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  17 +
  18 + //make sure that the thread indices are in-bounds
  19 + if(iu >= r0 || iv >= r1) return;
  20 +
  21 + //compute the index into the field
  22 + int i = iv*r0 + iu;
  23 +
  24 + T u = (T)iu / (T)r0;
  25 + T v = (T)iv / (T)r1;
  26 +
  27 + rts::vec<T> p = shape(u, v);
  28 +
  29 + T fx = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[0] - mean, 2) / (2 * std*std) );
  30 + T fy = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[1] - mean, 2) / (2 * std*std) );
  31 + T fz = (T)1.0 / (std * (T)sqrt(2 * 3.14159f) ) * exp( - pow(p[2] - mean, 2) / (2 * std*std) );
  32 +
  33 + dest[i] = fx * fy * fz;
  34 +}*/
  35 +
  36 +namespace rts{
  37 +
  38 +template<typename P, unsigned int N = 1, bool positive = false>
  39 +class realfield{
  40 +
  41 + P* X[N]; //an array of N gpu pointers for each field component
  42 + int R[2]; //resolution of the slice
  43 + quad<P> shape;
  44 +
  45 + void process_filename(std::string name, std::string &prefix, std::string &postfix,
  46 + std::string &ext, unsigned int &digits)
  47 + {
  48 + std::stringstream ss(name);
  49 + std::string item;
  50 + std::vector<std::string> elems;
  51 + while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension)
  52 + {
  53 + elems.push_back(item);
  54 + }
  55 +
  56 + prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters)
  57 + ext = elems[1]; //file extension (ex. .bmp, .png)
  58 + ext = std::string(".") + ext; //add a period back into the extension
  59 +
  60 + size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'')
  61 + size_t i1 = prefix.find_last_of("?");
  62 +
  63 + postfix = prefix.substr(i1+1);
  64 + prefix = prefix.substr(0, i0);
  65 +
  66 + digits = i1 - i0 + 1; //compute the number of wildcards
  67 + }
  68 +
  69 + void init()
  70 + {
  71 + for(unsigned int n=0; n<N; n++)
  72 + X[n] = NULL;
  73 + }
  74 + void destroy()
  75 + {
  76 + for(unsigned int n=0; n<N; n++)
  77 + if(X[n] != NULL)
  78 + HANDLE_ERROR(cudaFree(X[n]));
  79 + }
  80 +
  81 +public:
  82 + //field constructor
  83 + realfield()
  84 + {
  85 + R[0] = R[1] = 0;
  86 + init();
  87 + std::cout<<"realfield CONSTRUCTOR"<<std::endl;
  88 + }
  89 + realfield(unsigned int x, unsigned int y)
  90 + {
  91 + //set the resolution
  92 + R[0] = x;
  93 + R[1] = y;
  94 + //allocate memory on the GPU
  95 + for(unsigned int n=0; n<N; n++)
  96 + {
  97 + HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(P) * R[0] * R[1] ));
  98 + }
  99 + shape = quad<P>(vec<P>(-1, -1, 0), vec<P>(-1, 1, 0), vec<P>(1, 1, 0)); //default geometry
  100 + clear(); //zero the field
  101 + std::cout<<"realfield CONSTRUCTOR"<<std::endl;
  102 + }
  103 +
  104 + ~realfield()
  105 + {
  106 + destroy();
  107 + std::cout<<"realfield DESTRUCTOR"<<std::endl;
  108 + }
  109 +
  110 + P* ptr(unsigned int n)
  111 + {
  112 + if(n < N)
  113 + return X[n];
  114 + else return NULL;
  115 + }
  116 +
  117 + //set all components of the field to zero
  118 + void clear()
  119 + {
  120 + for(unsigned int n=0; n<N; n++)
  121 + if(X[n] != NULL)
  122 + HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(P) * R[0] * R[1]));
  123 + }
  124 +
  125 + void toImage(std::string filename, unsigned int n, P vmin, P vmax, rts::colormapType cmap = rts::cmBrewer)
  126 + {
  127 + rts::gpu2image<P>(X[n], filename, R[0], R[1], vmin, vmax, cmap);
  128 + }
  129 +
  130 + void toImages(std::string filename, rts::colormapType cmap = rts::cmBrewer)
  131 + {
  132 + std::string prefix, postfix, extension;
  133 + unsigned int digits;
  134 + process_filename(filename, prefix, postfix, extension, digits); //process the filename for wild cards
  135 +
  136 + cublasStatus_t stat;
  137 + cublasHandle_t handle;
  138 +
  139 + //create a CUBLAS handle
  140 + stat = cublasCreate(&handle);
  141 + if(stat != CUBLAS_STATUS_SUCCESS)
  142 + {
  143 + std::cout<<"CUBLAS Error: initialization failed"<<std::endl;
  144 + exit(1);
  145 + }
  146 +
  147 + int L = R[0] * R[1]; //compute the number of discrete points in a slice
  148 + int result; //result of the max operation
  149 +
  150 + P maxVal[N]; //array stores minimum and maximum values
  151 + P maxAll = 0; //largest value in the data set
  152 +
  153 + //compute the maximum value for each vector component
  154 + for(int n=0; n<N; n++)
  155 + {
  156 + if(sizeof(P) == 4)
  157 + stat = cublasIsamax(handle, L, (const float*)X[n], 1, &result);
  158 + else
  159 + stat = cublasIdamax(handle, L, (const double*)X[n], 1, &result);
  160 +
  161 + result -= 1; //adjust for 1-based indexing
  162 +
  163 + if(stat != CUBLAS_STATUS_SUCCESS) //if there was a GPU error, terminate
  164 + {
  165 + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl;
  166 + exit(1);
  167 + }
  168 +
  169 + //retrieve the maximum value for this slice and store it in the maxVal array
  170 + HANDLE_ERROR(cudaMemcpy(&maxVal[n], X[n] + result, sizeof(P), cudaMemcpyDeviceToHost));
  171 + if(abs(maxVal[n]) > maxAll) //if maxVal is larger, update the maxAll variable
  172 + maxAll = maxVal[n];
  173 +
  174 + }
  175 +
  176 + cublasDestroy(handle); //destroy the CUBLAS handle
  177 +
  178 + for(int n=0; n<N; n++) //for each image
  179 + {
  180 + stringstream ss; //assemble the file name
  181 + ss<<prefix<<std::setfill('0')<<std::setw(digits)<<n<<postfix<<extension;
  182 + std::cout<<ss.str()<<std::endl;
  183 + if(positive) //if the image is positive
  184 + toImage(ss.str(), n, 0, maxAll, cmap); //save the image using the global maximum
  185 + else
  186 + toImage(ss.str(), n, -abs(maxVal[n]), abs(maxVal[n]), cmap); //save the image using the global maximum
  187 + }
  188 + }
  189 +
  190 + //assignment operator
  191 + realfield & operator= (const realfield & rhs)
  192 + {
  193 + //de-allocate any existing GPU memory
  194 + destroy();
  195 +
  196 + //copy the slice resolution
  197 + R[0] = rhs.R[0];
  198 + R[1] = rhs.R[1];
  199 +
  200 + for(unsigned int n=0; n<N; n++)
  201 + {
  202 + //allocate the necessary memory
  203 + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1]));
  204 + //copy the slice
  205 + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  206 + }
  207 + std::cout<<"Assignment operator."<<std::endl;
  208 +
  209 + return *this;
  210 + }
  211 +
  212 + ///copy constructor
  213 + realfield(const realfield &rhs)
  214 + {
  215 + //first make a shallow copy
  216 + R[0] = rhs.R[0];
  217 + R[1] = rhs.R[1];
  218 +
  219 + for(unsigned int n=0; n<N; n++)
  220 + {
  221 + //do we have to make a deep copy?
  222 + if(rhs.X[n] == NULL)
  223 + X[n] = NULL; //no
  224 + else
  225 + {
  226 + //allocate the necessary memory
  227 + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(P) * R[0] * R[1]));
  228 +
  229 + //copy the slice
  230 + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(P) * R[0] * R[1], cudaMemcpyDeviceToDevice));
  231 + }
  232 + }
  233 +
  234 + std::cout<<"realfield COPY CONSTRUCTOR"<<std::endl;
  235 + }
  236 +
  237 + /*void gaussian(P mean, P std, unsigned int n=0) //creates a 3D gaussian using component n
  238 + {
  239 + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size
  240 + int SQRT_BLOCK = (int)sqrt((float)maxThreads);
  241 + //create one thread for each detector pixel
  242 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  243 + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  244 +
  245 + gpu_gaussian<float> <<<dimGrid, dimBlock>>> (X[n], R[0], R[1], mean, std, shape);
  246 + }*/
  247 +
  248 +
  249 +
  250 +};
  251 +
  252 +
  253 +} //end namespace rts
  254 +
  255 +
  256 +#endif
0 257 \ No newline at end of file
... ...
optics/esphere.cuh 0 → 100644
  1 +#ifndef RTS_ESPHERE
  2 +#define RTS_ESPHERE
  3 +
  4 +#include "../math/complex.h"
  5 +#include "../math/bessel.h"
  6 +#include "../visualization/colormap.h"
  7 +#include "../optics/planewave.h"
  8 +#include "../cuda/devices.h"
  9 +#include "../optics/efield.cuh"
  10 +
  11 +namespace rts{
  12 +
  13 +/* This class implements a discrete representation of an electromagnetic field
  14 + in 2D scattered by a sphere. This class implements Mie scattering.
  15 +*/
  16 +template<typename P>
  17 +class esphere : public efield<P>
  18 +{
  19 +private:
  20 + rts::complex<P> n; //sphere refractive index
  21 + P a; //sphere radius
  22 +
  23 + //parameters dependent on wavelength
  24 + unsigned int Nl; //number of orders for the calculation
  25 + rts::complex<P>* A; //internal scattering coefficients
  26 + rts::complex<P>* B; //external scattering coefficients
  27 +
  28 + void calcNl(P kmag)
  29 + {
  30 + //return ceil( ((P)6.282 * a) / lambda + 4 * pow( ((P)6.282 * a) / lambda, ((P)1/(P)3)) + 2);
  31 + Nl = ceil( kmag*a + 4 * pow(kmag * a, (P)1/(P)3) + 2);
  32 + }
  33 +
  34 + void calcAB(P k, unsigned int Nl, rts::complex<P>* A, rts::complex<P>* B)
  35 + {
  36 + /* These calculations require double precision, so they are computed
  37 + using doubles and converted to P at the end.
  38 +
  39 + Input:
  40 +
  41 + k = magnitude of the k vector (tau/lambda)
  42 + Nl = highest order coefficient ([0 Nl] are computed)
  43 + */
  44 +
  45 + //clear the previous coefficients
  46 + rts::complex<P>* cpuA = (rts::complex<P>*)malloc(sizeof(rts::complex<P>) * (Nl+1));
  47 + rts::complex<P>* cpuB = (rts::complex<P>*)malloc(sizeof(rts::complex<P>) * (Nl+1));
  48 +
  49 + //convert to an std complex value
  50 + complex<double> nc = (rts::complex<double>)n;
  51 +
  52 + //compute the magnitude of the k vector
  53 + complex<double> kna = nc * k * (double)a;
  54 +
  55 + //compute the arguments k*a and k*n*a
  56 + complex<double> ka(k * a, 0.0);
  57 +
  58 + //allocate space for the Bessel functions of the first and second kind (and derivatives)
  59 + unsigned int bytes = sizeof(complex<double>) * (Nl + 1);
  60 + complex<double>* cjv_ka = (complex<double>*)malloc(bytes);
  61 + complex<double>* cyv_ka = (complex<double>*)malloc(bytes);
  62 + complex<double>* cjvp_ka = (complex<double>*)malloc(bytes);
  63 + complex<double>* cyvp_ka = (complex<double>*)malloc(bytes);
  64 + complex<double>* cjv_kna = (complex<double>*)malloc(bytes);
  65 + complex<double>* cyv_kna = (complex<double>*)malloc(bytes);
  66 + complex<double>* cjvp_kna = (complex<double>*)malloc(bytes);
  67 + complex<double>* cyvp_kna = (complex<double>*)malloc(bytes);
  68 +
  69 + //allocate space for the spherical Hankel functions and derivative
  70 + complex<double>* chv_ka = (complex<double>*)malloc(bytes);
  71 + complex<double>* chvp_ka = (complex<double>*)malloc(bytes);
  72 +
  73 + //compute the bessel functions using the CPU-based algorithm
  74 + double vm;
  75 + cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka);
  76 + cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna);
  77 +
  78 + //compute A for each order
  79 + complex<double> i(0, 1);
  80 + complex<double> a, b, c, d;
  81 + complex<double> An, Bn;
  82 + for(int l=0; l<=Nl; l++)
  83 + {
  84 + //compute the Hankel functions from j and y
  85 + chv_ka[l] = cjv_ka[l] + i * cyv_ka[l];
  86 + chvp_ka[l] = cjvp_ka[l] + i * cyvp_ka[l];
  87 +
  88 + //Compute A (internal scattering coefficient)
  89 + //compute the numerator and denominator for A
  90 + a = cjv_ka[l] * chvp_ka[l] - cjvp_ka[l] * chv_ka[l];
  91 + b = cjv_kna[l] * chvp_ka[l] - chv_ka[l] * cjvp_kna[l] * nc;
  92 +
  93 + //calculate A and add it to the list
  94 + rts::complex<double> An = (2.0 * l + 1.0) * pow(i, l) * (a / b);
  95 + cpuA[l] = (rts::complex<P>)An;
  96 +
  97 + //Compute B (external scattering coefficient)
  98 + c = cjv_ka[l] * cjvp_kna[l] * nc - cjv_kna[l] * cjvp_ka[l];
  99 + d = cjv_kna[l] * chvp_ka[l] - chv_ka[l] * cjvp_kna[l] * nc;
  100 +
  101 + //calculate B and add it to the list
  102 + rts::complex<double> Bn = (2.0 * l + 1.0) * pow(i, l) * (c / d);
  103 + cpuB[l] = (rts::complex<P>)Bn;
  104 +
  105 + std::cout<<"A: "<<cpuA[l]<<" B: "<<cpuB[l]<<std::endl;
  106 + }
  107 +
  108 +
  109 + if(A != NULL) cudaFree(A); //free any previous coefficients
  110 + if(B != NULL) cudaFree(B);
  111 + cudaMalloc(&A, sizeof(rts::complex<P>) * (Nl+1)); //allocate memory for new coefficients
  112 + cudaMalloc(&B, sizeof(rts::complex<P>) * (Nl+1));
  113 +
  114 + //copy the calculations from the CPU to the GPU
  115 + cudaMemcpy(A, cpuA, sizeof(rts::complex<P>) * (Nl+1), cudaMemcpyDeviceToHost);
  116 + cudaMemcpy(B, cpuB, sizeof(rts::complex<P>) * (Nl+1), cudaMemcpyDeviceToHost);
  117 + }
  118 +
  119 +public:
  120 +
  121 + esphere(unsigned int res0, unsigned int res1, P _a, rts::complex<P> _n, bool _scalar = false) :
  122 + efield<P>(res0, res1, _scalar)
  123 + {
  124 + std::cout<<"Sphere scattered field created."<<std::endl;
  125 + n = _n; //save the refractive index
  126 + a = _a; //save the radius
  127 + }
  128 +
  129 + //assignment operator: build an electric field from a plane wave
  130 + efield<P> & operator= (const planewave<P> & rhs)
  131 + {
  132 + calcNl(rhs.kmag()); //compute the number of scattering coefficients
  133 + std::cout<<"Nl: "<<Nl<<std::endl;
  134 + calcAB(rhs.kmag(), Nl, A, B); //compute the scattering coefficients
  135 +
  136 + //determine important parameters for the scattering domain
  137 + unsigned int sR = ceil(sqrt( (P)(pow(esphere::R[0],2) + pow(esphere::R[1],2))) );
  138 + unsigned int thetaR = 256;
  139 +
  140 + /////////////////////continue scattering code here/////////////////////////
  141 +
  142 + esphere::clear(); //clear any previous field data
  143 + from_planewave(rhs); //create a field from the planewave
  144 + return *this; //return the current object
  145 + }
  146 +
  147 + string str()
  148 + {
  149 + stringstream ss;
  150 + ss<<"Mie Scattered Field"<<std::endl;
  151 + ss<<(*this).efield<P>::str()<<std::endl;
  152 + ss<<"a = "<<a<<std::endl;
  153 + ss<<"n = "<<n;
  154 +
  155 + return ss.str();
  156 + }
  157 +
  158 +};
  159 +
  160 +} //end namespace rts
  161 +
  162 +#endif
0 163 \ No newline at end of file
... ...