Commit 9b5637091b06c86a533de160293520afa1d1fd5f

Authored by David Mayerich
1 parent feb0c9dd

generalized aabb classes

python/enviProcess.py 0 → 100644
  1 +#!/usr/bin/python3
  2 +
  3 +#import system processes
  4 +import subprocess, sys
  5 +
  6 +if len(sys.argv) > 1:
  7 + infile = int(sys.argv[1])
  8 +
  9 +basefile = infile + "-base"
  10 +normfile = infile + "-norm"
  11 +
  12 +runcommand = "hsiproc " + infile + basefile + " --baseline baseline.txt"
  13 +subprocess.call(runcommand, shell=True)
0 14 \ No newline at end of file
... ...
stim/biomodels/network.h
... ... @@ -12,7 +12,6 @@
12 12 #include <stim/visualization/obj.h>
13 13 #include <stim/visualization/cylinder.h>
14 14 #include <stim/structures/kdtree.cuh>
15   -#include <boost/tuple/tuple.hpp>
16 15 #include <stim/cuda/cudatools/timer.h>
17 16  
18 17  
... ...
stim/math/.vec3.h.swp 0 → 100644
No preview for this file type
stim/math/.vec3_BASE_62876.h.swp 0 → 100644
No preview for this file type
stim/math/.vec3_LOCAL_62876.h.swp 0 → 100644
No preview for this file type
stim/math/.vec3_REMOTE_62876.h.swp 0 → 100644
No preview for this file type
stim/math/bessel - Copy.h 0 → 100644
  1 +#ifndef RTS_BESSEL_H
  2 +#define RTS_BESSEL_H
  3 +
  4 +#define _USE_MATH_DEFINES
  5 +#include <math.h>
  6 +#include "../math/complex.h"
  7 +#define eps 1e-15
  8 +#define el 0.5772156649015329
  9 +
  10 +
  11 +namespace stim{
  12 +
  13 +static complex<double> cii(0.0,1.0);
  14 +static complex<double> cone(1.0,0.0);
  15 +static complex<double> czero(0.0,0.0);
  16 +
  17 +template< typename P >
  18 +P gamma(P x)
  19 +{
  20 + int i,k,m;
  21 + P ga,gr,r,z;
  22 +
  23 + static P g[] = {
  24 + 1.0,
  25 + 0.5772156649015329,
  26 + -0.6558780715202538,
  27 + -0.420026350340952e-1,
  28 + 0.1665386113822915,
  29 + -0.421977345555443e-1,
  30 + -0.9621971527877e-2,
  31 + 0.7218943246663e-2,
  32 + -0.11651675918591e-2,
  33 + -0.2152416741149e-3,
  34 + 0.1280502823882e-3,
  35 + -0.201348547807e-4,
  36 + -0.12504934821e-5,
  37 + 0.1133027232e-5,
  38 + -0.2056338417e-6,
  39 + 0.6116095e-8,
  40 + 0.50020075e-8,
  41 + -0.11812746e-8,
  42 + 0.1043427e-9,
  43 + 0.77823e-11,
  44 + -0.36968e-11,
  45 + 0.51e-12,
  46 + -0.206e-13,
  47 + -0.54e-14,
  48 + 0.14e-14};
  49 +
  50 + if (x > 171.0) return 1e308; // This value is an overflow flag.
  51 + if (x == (int)x) {
  52 + if (x > 0.0) {
  53 + ga = 1.0; // use factorial
  54 + for (i=2;i<x;i++) {
  55 + ga *= i;
  56 + }
  57 + }
  58 + else
  59 + ga = 1e308;
  60 + }
  61 + else {
  62 + if (fabs(x) > 1.0) {
  63 + z = fabs(x);
  64 + m = (int)z;
  65 + r = 1.0;
  66 + for (k=1;k<=m;k++) {
  67 + r *= (z-k);
  68 + }
  69 + z -= m;
  70 + }
  71 + else
  72 + z = x;
  73 + gr = g[24];
  74 + for (k=23;k>=0;k--) {
  75 + gr = gr*z+g[k];
  76 + }
  77 + ga = 1.0/(gr*z);
  78 + if (fabs(x) > 1.0) {
  79 + ga *= r;
  80 + if (x < 0.0) {
  81 + ga = -M_PI/(x*ga*sin(M_PI*x));
  82 + }
  83 + }
  84 + }
  85 + return ga;
  86 +}
  87 +
  88 +template<typename P>
  89 +int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1,
  90 + P &j0p,P &j1p,P &y0p,P &y1p)
  91 +{
  92 + P x2,r,ec,w0,w1,r0,r1,cs0,cs1;
  93 + P cu,p0,q0,p1,q1,t1,t2;
  94 + int k,kz;
  95 + static P a[] = {
  96 + -7.03125e-2,
  97 + 0.112152099609375,
  98 + -0.5725014209747314,
  99 + 6.074042001273483,
  100 + -1.100171402692467e2,
  101 + 3.038090510922384e3,
  102 + -1.188384262567832e5,
  103 + 6.252951493434797e6,
  104 + -4.259392165047669e8,
  105 + 3.646840080706556e10,
  106 + -3.833534661393944e12,
  107 + 4.854014686852901e14,
  108 + -7.286857349377656e16,
  109 + 1.279721941975975e19};
  110 + static P b[] = {
  111 + 7.32421875e-2,
  112 + -0.2271080017089844,
  113 + 1.727727502584457,
  114 + -2.438052969955606e1,
  115 + 5.513358961220206e2,
  116 + -1.825775547429318e4,
  117 + 8.328593040162893e5,
  118 + -5.006958953198893e7,
  119 + 3.836255180230433e9,
  120 + -3.649010818849833e11,
  121 + 4.218971570284096e13,
  122 + -5.827244631566907e15,
  123 + 9.476288099260110e17,
  124 + -1.792162323051699e20};
  125 + static P a1[] = {
  126 + 0.1171875,
  127 + -0.1441955566406250,
  128 + 0.6765925884246826,
  129 + -6.883914268109947,
  130 + 1.215978918765359e2,
  131 + -3.302272294480852e3,
  132 + 1.276412726461746e5,
  133 + -6.656367718817688e6,
  134 + 4.502786003050393e8,
  135 + -3.833857520742790e10,
  136 + 4.011838599133198e12,
  137 + -5.060568503314727e14,
  138 + 7.572616461117958e16,
  139 + -1.326257285320556e19};
  140 + static P b1[] = {
  141 + -0.1025390625,
  142 + 0.2775764465332031,
  143 + -1.993531733751297,
  144 + 2.724882731126854e1,
  145 + -6.038440767050702e2,
  146 + 1.971837591223663e4,
  147 + -8.902978767070678e5,
  148 + 5.310411010968522e7,
  149 + -4.043620325107754e9,
  150 + 3.827011346598605e11,
  151 + -4.406481417852278e13,
  152 + 6.065091351222699e15,
  153 + -9.833883876590679e17,
  154 + 1.855045211579828e20};
  155 +
  156 + if (x < 0.0) return 1;
  157 + if (x == 0.0) {
  158 + j0 = 1.0;
  159 + j1 = 0.0;
  160 + y0 = -1e308;
  161 + y1 = -1e308;
  162 + j0p = 0.0;
  163 + j1p = 0.5;
  164 + y0p = 1e308;
  165 + y1p = 1e308;
  166 + return 0;
  167 + }
  168 + x2 = x*x;
  169 + if (x <= 12.0) {
  170 + j0 = 1.0;
  171 + r = 1.0;
  172 + for (k=1;k<=30;k++) {
  173 + r *= -0.25*x2/(k*k);
  174 + j0 += r;
  175 + if (fabs(r) < fabs(j0)*1e-15) break;
  176 + }
  177 + j1 = 1.0;
  178 + r = 1.0;
  179 + for (k=1;k<=30;k++) {
  180 + r *= -0.25*x2/(k*(k+1));
  181 + j1 += r;
  182 + if (fabs(r) < fabs(j1)*1e-15) break;
  183 + }
  184 + j1 *= 0.5*x;
  185 + ec = log(0.5*x)+el;
  186 + cs0 = 0.0;
  187 + w0 = 0.0;
  188 + r0 = 1.0;
  189 + for (k=1;k<=30;k++) {
  190 + w0 += 1.0/k;
  191 + r0 *= -0.25*x2/(k*k);
  192 + r = r0 * w0;
  193 + cs0 += r;
  194 + if (fabs(r) < fabs(cs0)*1e-15) break;
  195 + }
  196 + y0 = M_2_PI*(ec*j0-cs0);
  197 + cs1 = 1.0;
  198 + w1 = 0.0;
  199 + r1 = 1.0;
  200 + for (k=1;k<=30;k++) {
  201 + w1 += 1.0/k;
  202 + r1 *= -0.25*x2/(k*(k+1));
  203 + r = r1*(2.0*w1+1.0/(k+1));
  204 + cs1 += r;
  205 + if (fabs(r) < fabs(cs1)*1e-15) break;
  206 + }
  207 + y1 = M_2_PI * (ec*j1-1.0/x-0.25*x*cs1);
  208 + }
  209 + else {
  210 + if (x >= 50.0) kz = 8; // Can be changed to 10
  211 + else if (x >= 35.0) kz = 10; // " " 12
  212 + else kz = 12; // " " 14
  213 + t1 = x-M_PI_4;
  214 + p0 = 1.0;
  215 + q0 = -0.125/x;
  216 + for (k=0;k<kz;k++) {
  217 + p0 += a[k]*pow(x,-2*k-2);
  218 + q0 += b[k]*pow(x,-2*k-3);
  219 + }
  220 + cu = sqrt(M_2_PI/x);
  221 + j0 = cu*(p0*cos(t1)-q0*sin(t1));
  222 + y0 = cu*(p0*sin(t1)+q0*cos(t1));
  223 + t2 = x-0.75*M_PI;
  224 + p1 = 1.0;
  225 + q1 = 0.375/x;
  226 + for (k=0;k<kz;k++) {
  227 + p1 += a1[k]*pow(x,-2*k-2);
  228 + q1 += b1[k]*pow(x,-2*k-3);
  229 + }
  230 + j1 = cu*(p1*cos(t2)-q1*sin(t2));
  231 + y1 = cu*(p1*sin(t2)+q1*cos(t2));
  232 + }
  233 + j0p = -j1;
  234 + j1p = j0-j1/x;
  235 + y0p = -y1;
  236 + y1p = y0-y1/x;
  237 + return 0;
  238 +}
  239 +//
  240 +// INPUT:
  241 +// double x -- argument of Bessel function
  242 +//
  243 +// OUTPUT:
  244 +// double j0 -- Bessel function of 1st kind, 0th order
  245 +// double j1 -- Bessel function of 1st kind, 1st order
  246 +// double y0 -- Bessel function of 2nd kind, 0th order
  247 +// double y1 -- Bessel function of 2nd kind, 1st order
  248 +// double j0p -- derivative of Bessel function of 1st kind, 0th order
  249 +// double j1p -- derivative of Bessel function of 1st kind, 1st order
  250 +// double y0p -- derivative of Bessel function of 2nd kind, 0th order
  251 +// double y1p -- derivative of Bessel function of 2nd kind, 1st order
  252 +//
  253 +// RETURN:
  254 +// int error code: 0 = OK, 1 = error
  255 +//
  256 +// This algorithm computes the functions using polynomial approximations.
  257 +//
  258 +template<typename P>
  259 +int bessjy01b(P x,P &j0,P &j1,P &y0,P &y1,
  260 + P &j0p,P &j1p,P &y0p,P &y1p)
  261 +{
  262 + P t,t2,dtmp,a0,p0,q0,p1,q1,ta0,ta1;
  263 + if (x < 0.0) return 1;
  264 + if (x == 0.0) {
  265 + j0 = 1.0;
  266 + j1 = 0.0;
  267 + y0 = -1e308;
  268 + y1 = -1e308;
  269 + j0p = 0.0;
  270 + j1p = 0.5;
  271 + y0p = 1e308;
  272 + y1p = 1e308;
  273 + return 0;
  274 + }
  275 + if(x <= 4.0) {
  276 + t = x/4.0;
  277 + t2 = t*t;
  278 + j0 = ((((((-0.5014415e-3*t2+0.76771853e-2)*t2-0.0709253492)*t2+
  279 + 0.4443584263)*t2-1.7777560599)*t2+3.9999973021)*t2
  280 + -3.9999998721)*t2+1.0;
  281 + j1 = t*(((((((-0.1289769e-3*t2+0.22069155e-2)*t2-0.0236616773)*t2+
  282 + 0.1777582922)*t2-0.8888839649)*t2+2.6666660544)*t2-
  283 + 3.999999971)*t2+1.9999999998);
  284 + dtmp = (((((((-0.567433e-4*t2+0.859977e-3)*t2-0.94855882e-2)*t2+
  285 + 0.0772975809)*t2-0.4261737419)*t2+1.4216421221)*t2-
  286 + 2.3498519931)*t2+1.0766115157)*t2+0.3674669052;
  287 + y0 = M_2_PI*log(0.5*x)*j0+dtmp;
  288 + dtmp = (((((((0.6535773e-3*t2-0.0108175626)*t2+0.107657607)*t2-
  289 + 0.7268945577)*t2+3.1261399273)*t2-7.3980241381)*t2+
  290 + 6.8529236342)*t2+0.3932562018)*t2-0.6366197726;
  291 + y1 = M_2_PI*log(0.5*x)*j1+dtmp/x;
  292 + }
  293 + else {
  294 + t = 4.0/x;
  295 + t2 = t*t;
  296 + a0 = sqrt(M_2_PI/x);
  297 + p0 = ((((-0.9285e-5*t2+0.43506e-4)*t2-0.122226e-3)*t2+
  298 + 0.434725e-3)*t2-0.4394275e-2)*t2+0.999999997;
  299 + q0 = t*(((((0.8099e-5*t2-0.35614e-4)*t2+0.85844e-4)*t2-
  300 + 0.218024e-3)*t2+0.1144106e-2)*t2-0.031249995);
  301 + ta0 = x-M_PI_4;
  302 + j0 = a0*(p0*cos(ta0)-q0*sin(ta0));
  303 + y0 = a0*(p0*sin(ta0)+q0*cos(ta0));
  304 + p1 = ((((0.10632e-4*t2-0.50363e-4)*t2+0.145575e-3)*t2
  305 + -0.559487e-3)*t2+0.7323931e-2)*t2+1.000000004;
  306 + q1 = t*(((((-0.9173e-5*t2+0.40658e-4)*t2-0.99941e-4)*t2
  307 + +0.266891e-3)*t2-0.1601836e-2)*t2+0.093749994);
  308 + ta1 = x-0.75*M_PI;
  309 + j1 = a0*(p1*cos(ta1)-q1*sin(ta1));
  310 + y1 = a0*(p1*sin(ta1)+q1*cos(ta1));
  311 + }
  312 + j0p = -j1;
  313 + j1p = j0-j1/x;
  314 + y0p = -y1;
  315 + y1p = y0-y1/x;
  316 + return 0;
  317 +}
  318 +template<typename P>
  319 +int msta1(P x,int mp)
  320 +{
  321 + P a0,f0,f1,f;
  322 + int i,n0,n1,nn;
  323 +
  324 + a0 = fabs(x);
  325 + n0 = (int)(1.1*a0)+1;
  326 + f0 = 0.5*log10(6.28*n0)-n0*log10(1.36*a0/n0)-mp;
  327 + n1 = n0+5;
  328 + f1 = 0.5*log10(6.28*n1)-n1*log10(1.36*a0/n1)-mp;
  329 + for (i=0;i<20;i++) {
  330 + nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
  331 + f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp;
  332 + if (std::abs(nn-n1) < 1) break;
  333 + n0 = n1;
  334 + f0 = f1;
  335 + n1 = nn;
  336 + f1 = f;
  337 + }
  338 + return nn;
  339 +}
  340 +template<typename P>
  341 +int msta2(P x,int n,int mp)
  342 +{
  343 + P a0,ejn,hmp,f0,f1,f,obj;
  344 + int i,n0,n1,nn;
  345 +
  346 + a0 = fabs(x);
  347 + hmp = 0.5*mp;
  348 + ejn = 0.5*log10(6.28*n)-n*log10(1.36*a0/n);
  349 + if (ejn <= hmp) {
  350 + obj = mp;
  351 + n0 = (int)(1.1*a0);
  352 + if (n0 < 1) n0 = 1;
  353 + }
  354 + else {
  355 + obj = hmp+ejn;
  356 + n0 = n;
  357 + }
  358 + f0 = 0.5*log10(6.28*n0)-n0*log10(1.36*a0/n0)-obj;
  359 + n1 = n0+5;
  360 + f1 = 0.5*log10(6.28*n1)-n1*log10(1.36*a0/n1)-obj;
  361 + for (i=0;i<20;i++) {
  362 + nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
  363 + f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj;
  364 + if (std::abs(nn-n1) < 1) break;
  365 + n0 = n1;
  366 + f0 = f1;
  367 + n1 = nn;
  368 + f1 = f;
  369 + }
  370 + return nn+10;
  371 +}
  372 +//
  373 +// INPUT:
  374 +// double x -- argument of Bessel function of 1st and 2nd kind.
  375 +// int n -- order
  376 +//
  377 +// OUPUT:
  378 +//
  379 +// int nm -- highest order actually computed (nm <= n)
  380 +// double jn[] -- Bessel function of 1st kind, orders from 0 to nm
  381 +// double yn[] -- Bessel function of 2nd kind, orders from 0 to nm
  382 +// double j'n[]-- derivative of Bessel function of 1st kind,
  383 +// orders from 0 to nm
  384 +// double y'n[]-- derivative of Bessel function of 2nd kind,
  385 +// orders from 0 to nm
  386 +//
  387 +// Computes Bessel functions of all order up to 'n' using recurrence
  388 +// relations. If 'nm' < 'n' only 'nm' orders are returned.
  389 +//
  390 +template<typename P>
  391 +int bessjyna(int n,P x,int &nm,P *jn,P *yn,
  392 + P *jnp,P *ynp)
  393 +{
  394 + P bj0,bj1,f,f0,f1,f2,cs;
  395 + int i,k,m,ecode;
  396 +
  397 + nm = n;
  398 + if ((x < 0.0) || (n < 0)) return 1;
  399 + if (x < 1e-15) {
  400 + for (i=0;i<=n;i++) {
  401 + jn[i] = 0.0;
  402 + yn[i] = -1e308;
  403 + jnp[i] = 0.0;
  404 + ynp[i] = 1e308;
  405 + }
  406 + jn[0] = 1.0;
  407 + jnp[1] = 0.5;
  408 + return 0;
  409 + }
  410 + ecode = bessjy01a(x,jn[0],jn[1],yn[0],yn[1],jnp[0],jnp[1],ynp[0],ynp[1]);
  411 + if (n < 2) return 0;
  412 + bj0 = jn[0];
  413 + bj1 = jn[1];
  414 + if (n < (int)0.9*x) {
  415 + for (k=2;k<=n;k++) {
  416 + jn[k] = 2.0*(k-1.0)*bj1/x-bj0;
  417 + bj0 = bj1;
  418 + bj1 = jn[k];
  419 + }
  420 + }
  421 + else {
  422 + m = msta1(x,200);
  423 + if (m < n) nm = m;
  424 + else m = msta2(x,n,15);
  425 + f2 = 0.0;
  426 + f1 = 1.0e-100;
  427 + for (k=m;k>=0;k--) {
  428 + f = 2.0*(k+1.0)/x*f1-f2;
  429 + if (k <= nm) jn[k] = f;
  430 + f2 = f1;
  431 + f1 = f;
  432 + }
  433 + if (fabs(bj0) > fabs(bj1)) cs = bj0/f;
  434 + else cs = bj1/f2;
  435 + for (k=0;k<=nm;k++) {
  436 + jn[k] *= cs;
  437 + }
  438 + }
  439 + for (k=2;k<=nm;k++) {
  440 + jnp[k] = jn[k-1]-k*jn[k]/x;
  441 + }
  442 + f0 = yn[0];
  443 + f1 = yn[1];
  444 + for (k=2;k<=nm;k++) {
  445 + f = 2.0*(k-1.0)*f1/x-f0;
  446 + yn[k] = f;
  447 + f0 = f1;
  448 + f1 = f;
  449 + }
  450 + for (k=2;k<=nm;k++) {
  451 + ynp[k] = yn[k-1]-k*yn[k]/x;
  452 + }
  453 + return 0;
  454 +}
  455 +//
  456 +// Same input and output conventions as above. Different recurrence
  457 +// relations used for 'x' < 300.
  458 +//
  459 +template<typename P>
  460 +int bessjynb(int n,P x,int &nm,P *jn,P *yn,
  461 + P *jnp,P *ynp)
  462 +{
  463 + P t1,t2,f,f1,f2,bj0,bj1,bjk,by0,by1,cu,s0,su,sv;
  464 + P ec,bs,byk,p0,p1,q0,q1;
  465 + static P a[] = {
  466 + -0.7031250000000000e-1,
  467 + 0.1121520996093750,
  468 + -0.5725014209747314,
  469 + 6.074042001273483};
  470 + static P b[] = {
  471 + 0.7324218750000000e-1,
  472 + -0.2271080017089844,
  473 + 1.727727502584457,
  474 + -2.438052969955606e1};
  475 + static P a1[] = {
  476 + 0.1171875,
  477 + -0.1441955566406250,
  478 + 0.6765925884246826,
  479 + -6.883914268109947};
  480 + static P b1[] = {
  481 + -0.1025390625,
  482 + 0.2775764465332031,
  483 + -1.993531733751297,
  484 + 2.724882731126854e1};
  485 +
  486 + int i,k,m;
  487 + nm = n;
  488 + if ((x < 0.0) || (n < 0)) return 1;
  489 + if (x < 1e-15) {
  490 + for (i=0;i<=n;i++) {
  491 + jn[i] = 0.0;
  492 + yn[i] = -1e308;
  493 + jnp[i] = 0.0;
  494 + ynp[i] = 1e308;
  495 + }
  496 + jn[0] = 1.0;
  497 + jnp[1] = 0.5;
  498 + return 0;
  499 + }
  500 + if (x <= 300.0 || n > (int)(0.9*x)) {
  501 + if (n == 0) nm = 1;
  502 + m = msta1(x,200);
  503 + if (m < nm) nm = m;
  504 + else m = msta2(x,nm,15);
  505 + bs = 0.0;
  506 + su = 0.0;
  507 + sv = 0.0;
  508 + f2 = 0.0;
  509 + f1 = 1.0e-100;
  510 + for (k = m;k>=0;k--) {
  511 + f = 2.0*(k+1.0)/x*f1 - f2;
  512 + if (k <= nm) jn[k] = f;
  513 + if ((k == 2*(int)(k/2)) && (k != 0)) {
  514 + bs += 2.0*f;
  515 +// su += pow(-1,k>>1)*f/(double)k;
  516 + su += (-1)*((k & 2)-1)*f/(P)k;
  517 + }
  518 + else if (k > 1) {
  519 +// sv += pow(-1,k>>1)*k*f/(k*k-1.0);
  520 + sv += (-1)*((k & 2)-1)*(P)k*f/(k*k-1.0);
  521 + }
  522 + f2 = f1;
  523 + f1 = f;
  524 + }
  525 + s0 = bs+f;
  526 + for (k=0;k<=nm;k++) {
  527 + jn[k] /= s0;
  528 + }
  529 + ec = log(0.5*x) +0.5772156649015329;
  530 + by0 = M_2_PI*(ec*jn[0]-4.0*su/s0);
  531 + yn[0] = by0;
  532 + by1 = M_2_PI*((ec-1.0)*jn[1]-jn[0]/x-4.0*sv/s0);
  533 + yn[1] = by1;
  534 + }
  535 + else {
  536 + t1 = x-M_PI_4;
  537 + p0 = 1.0;
  538 + q0 = -0.125/x;
  539 + for (k=0;k<4;k++) {
  540 + p0 += a[k]*pow(x,-2*k-2);
  541 + q0 += b[k]*pow(x,-2*k-3);
  542 + }
  543 + cu = sqrt(M_2_PI/x);
  544 + bj0 = cu*(p0*cos(t1)-q0*sin(t1));
  545 + by0 = cu*(p0*sin(t1)+q0*cos(t1));
  546 + jn[0] = bj0;
  547 + yn[0] = by0;
  548 + t2 = x-0.75*M_PI;
  549 + p1 = 1.0;
  550 + q1 = 0.375/x;
  551 + for (k=0;k<4;k++) {
  552 + p1 += a1[k]*pow(x,-2*k-2);
  553 + q1 += b1[k]*pow(x,-2*k-3);
  554 + }
  555 + bj1 = cu*(p1*cos(t2)-q1*sin(t2));
  556 + by1 = cu*(p1*sin(t2)+q1*cos(t2));
  557 + jn[1] = bj1;
  558 + yn[1] = by1;
  559 + for (k=2;k<=nm;k++) {
  560 + bjk = 2.0*(k-1.0)*bj1/x-bj0;
  561 + jn[k] = bjk;
  562 + bj0 = bj1;
  563 + bj1 = bjk;
  564 + }
  565 + }
  566 + jnp[0] = -jn[1];
  567 + for (k=1;k<=nm;k++) {
  568 + jnp[k] = jn[k-1]-k*jn[k]/x;
  569 + }
  570 + for (k=2;k<=nm;k++) {
  571 + byk = 2.0*(k-1.0)*by1/x-by0;
  572 + yn[k] = byk;
  573 + by0 = by1;
  574 + by1 = byk;
  575 + }
  576 + ynp[0] = -yn[1];
  577 + for (k=1;k<=nm;k++) {
  578 + ynp[k] = yn[k-1]-k*yn[k]/x;
  579 + }
  580 + return 0;
  581 +
  582 +}
  583 +
  584 +// The following routine computes Bessel Jv(x) and Yv(x) for
  585 +// arbitrary positive order (v). For negative order, use:
  586 +//
  587 +// J-v(x) = Jv(x)cos(v pi) - Yv(x)sin(v pi)
  588 +// Y-v(x) = Jv(x)sin(v pi) + Yv(x)cos(v pi)
  589 +//
  590 +template<typename P>
  591 +int bessjyv(P v,P x,P &vm,P *jv,P *yv,
  592 + P *djv,P *dyv)
  593 +{
  594 + P v0,vl,vg,vv,a,a0,r,x2,bjv0,bjv1,bjvl,f,f0,f1,f2;
  595 + P r0,r1,ck,cs,cs0,cs1,sk,qx,px,byv0,byv1,rp,xk,rq;
  596 + P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk;
  597 + int j,k,l,m,n,kz;
  598 +
  599 + x2 = x*x;
  600 + n = (int)v;
  601 + v0 = v-n;
  602 + if ((x < 0.0) || (v < 0.0)) return 1;
  603 + if (x < 1e-15) {
  604 + for (k=0;k<=n;k++) {
  605 + jv[k] = 0.0;
  606 + yv[k] = -1e308;
  607 + djv[k] = 0.0;
  608 + dyv[k] = 1e308;
  609 + if (v0 == 0.0) {
  610 + jv[0] = 1.0;
  611 + djv[1] = 0.5;
  612 + }
  613 + else djv[0] = 1e308;
  614 + }
  615 + vm = v;
  616 + return 0;
  617 + }
  618 + if (x <= 12.0) {
  619 + for (l=0;l<2;l++) {
  620 + vl = v0 + l;
  621 + bjvl = 1.0;
  622 + r = 1.0;
  623 + for (k=1;k<=40;k++) {
  624 + r *= -0.25*x2/(k*(k+vl));
  625 + bjvl += r;
  626 + if (fabs(r) < fabs(bjvl)*1e-15) break;
  627 + }
  628 + vg = 1.0 + vl;
  629 + a = pow(0.5*x,vl)/gamma(vg);
  630 + if (l == 0) bjv0 = bjvl*a;
  631 + else bjv1 = bjvl*a;
  632 + }
  633 + }
  634 + else {
  635 + if (x >= 50.0) kz = 8;
  636 + else if (x >= 35.0) kz = 10;
  637 + else kz = 11;
  638 + for (j=0;j<2;j++) {
  639 + vv = 4.0*(j+v0)*(j+v0);
  640 + px = 1.0;
  641 + rp = 1.0;
  642 + for (k=1;k<=kz;k++) {
  643 + rp *= (-0.78125e-2)*(vv-pow(4.0*k-3.0,2.0))*
  644 + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*x2);
  645 + px += rp;
  646 + }
  647 + qx = 1.0;
  648 + rq = 1.0;
  649 + for (k=1;k<=kz;k++) {
  650 + rq *= (-0.78125e-2)*(vv-pow(4.0*k-1.0,2.0))*
  651 + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*x2);
  652 + qx += rq;
  653 + }
  654 + qx *= 0.125*(vv-1.0)/x;
  655 + xk = x-(0.5*(j+v0)+0.25)*M_PI;
  656 + a0 = sqrt(M_2_PI/x);
  657 + ck = cos(xk);
  658 + sk = sin(xk);
  659 +
  660 + if (j == 0) {
  661 + bjv0 = a0*(px*ck-qx*sk);
  662 + byv0 = a0*(px*sk+qx*ck);
  663 + }
  664 + else if (j == 1) {
  665 + bjv1 = a0*(px*ck-qx*sk);
  666 + byv1 = a0*(px*sk+qx*ck);
  667 + }
  668 + }
  669 + }
  670 + jv[0] = bjv0;
  671 + jv[1] = bjv1;
  672 + djv[0] = v0*jv[0]/x-jv[1];
  673 + djv[1] = -(1.0+v0)*jv[1]/x+jv[0];
  674 + if ((n >= 2) && (n <= (int)(0.9*x))) {
  675 + f0 = bjv0;
  676 + f1 = bjv1;
  677 + for (k=2;k<=n;k++) {
  678 + f = 2.0*(k+v0-1.0)*f1/x-f0;
  679 + jv[k] = f;
  680 + f0 = f1;
  681 + f1 = f;
  682 + }
  683 + }
  684 + else if (n >= 2) {
  685 + m = msta1(x,200);
  686 + if (m < n) n = m;
  687 + else m = msta2(x,n,15);
  688 + f2 = 0.0;
  689 + f1 = 1.0e-100;
  690 + for (k=m;k>=0;k--) {
  691 + f = 2.0*(v0+k+1.0)*f1/x-f2;
  692 + if (k <= n) jv[k] = f;
  693 + f2 = f1;
  694 + f1 = f;
  695 + }
  696 + if (fabs(bjv0) > fabs(bjv1)) cs = bjv0/f;
  697 + else cs = bjv1/f2;
  698 + for (k=0;k<=n;k++) {
  699 + jv[k] *= cs;
  700 + }
  701 + }
  702 + for (k=2;k<=n;k++) {
  703 + djv[k] = -(k+v0)*jv[k]/x+jv[k-1];
  704 + }
  705 + if (x <= 12.0) {
  706 + if (v0 != 0.0) {
  707 + for (l=0;l<2;l++) {
  708 + vl = v0 +l;
  709 + bjvl = 1.0;
  710 + r = 1.0;
  711 + for (k=1;k<=40;k++) {
  712 + r *= -0.25*x2/(k*(k-vl));
  713 + bjvl += r;
  714 + if (fabs(r) < fabs(bjvl)*1e-15) break;
  715 + }
  716 + vg = 1.0-vl;
  717 + b = pow(2.0/x,vl)/gamma(vg);
  718 + if (l == 0) bju0 = bjvl*b;
  719 + else bju1 = bjvl*b;
  720 + }
  721 + pv0 = M_PI*v0;
  722 + pv1 = M_PI*(1.0+v0);
  723 + byv0 = (bjv0*cos(pv0)-bju0)/sin(pv0);
  724 + byv1 = (bjv1*cos(pv1)-bju1)/sin(pv1);
  725 + }
  726 + else {
  727 + ec = log(0.5*x)+el;
  728 + cs0 = 0.0;
  729 + w0 = 0.0;
  730 + r0 = 1.0;
  731 + for (k=1;k<=30;k++) {
  732 + w0 += 1.0/k;
  733 + r0 *= -0.25*x2/(k*k);
  734 + cs0 += r0*w0;
  735 + }
  736 + byv0 = M_2_PI*(ec*bjv0-cs0);
  737 + cs1 = 1.0;
  738 + w1 = 0.0;
  739 + r1 = 1.0;
  740 + for (k=1;k<=30;k++) {
  741 + w1 += 1.0/k;
  742 + r1 *= -0.25*x2/(k*(k+1));
  743 + cs1 += r1*(2.0*w1+1.0/(k+1.0));
  744 + }
  745 + byv1 = M_2_PI*(ec*bjv1-1.0/x-0.25*x*cs1);
  746 + }
  747 + }
  748 + yv[0] = byv0;
  749 + yv[1] = byv1;
  750 + for (k=2;k<=n;k++) {
  751 + byvk = 2.0*(v0+k-1.0)*byv1/x-byv0;
  752 + yv[k] = byvk;
  753 + byv0 = byv1;
  754 + byv1 = byvk;
  755 + }
  756 + dyv[0] = v0*yv[0]/x-yv[1];
  757 + for (k=1;k<=n;k++) {
  758 + dyv[k] = -(k+v0)*yv[k]/x+yv[k-1];
  759 + }
  760 + vm = n + v0;
  761 + return 0;
  762 +}
  763 +
  764 +template<typename P>
  765 +int bessjyv_sph(int v, P z, P &vm, P* cjv,
  766 + P* cyv, P* cjvp, P* cyvp)
  767 +{
  768 + //first, compute the bessel functions of fractional order
  769 + bessjyv<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  770 +
  771 + //iterate through each and scale
  772 + for(int n = 0; n<=v; n++)
  773 + {
  774 +
  775 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  776 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
  777 +
  778 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  779 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
  780 + }
  781 +
  782 + return 0;
  783 +
  784 +}
  785 +
  786 +template<typename P>
  787 +int cbessjy01(complex<P> z,complex<P> &cj0,complex<P> &cj1,
  788 + complex<P> &cy0,complex<P> &cy1,complex<P> &cj0p,
  789 + complex<P> &cj1p,complex<P> &cy0p,complex<P> &cy1p)
  790 +{
  791 + complex<P> z1,z2,cr,cp,cs,cp0,cq0,cp1,cq1,ct1,ct2,cu;
  792 + P a0,w0,w1;
  793 + int k,kz;
  794 +
  795 + static P a[] = {
  796 + -7.03125e-2,
  797 + 0.112152099609375,
  798 + -0.5725014209747314,
  799 + 6.074042001273483,
  800 + -1.100171402692467e2,
  801 + 3.038090510922384e3,
  802 + -1.188384262567832e5,
  803 + 6.252951493434797e6,
  804 + -4.259392165047669e8,
  805 + 3.646840080706556e10,
  806 + -3.833534661393944e12,
  807 + 4.854014686852901e14,
  808 + -7.286857349377656e16,
  809 + 1.279721941975975e19};
  810 + static P b[] = {
  811 + 7.32421875e-2,
  812 + -0.2271080017089844,
  813 + 1.727727502584457,
  814 + -2.438052969955606e1,
  815 + 5.513358961220206e2,
  816 + -1.825775547429318e4,
  817 + 8.328593040162893e5,
  818 + -5.006958953198893e7,
  819 + 3.836255180230433e9,
  820 + -3.649010818849833e11,
  821 + 4.218971570284096e13,
  822 + -5.827244631566907e15,
  823 + 9.476288099260110e17,
  824 + -1.792162323051699e20};
  825 + static P a1[] = {
  826 + 0.1171875,
  827 + -0.1441955566406250,
  828 + 0.6765925884246826,
  829 + -6.883914268109947,
  830 + 1.215978918765359e2,
  831 + -3.302272294480852e3,
  832 + 1.276412726461746e5,
  833 + -6.656367718817688e6,
  834 + 4.502786003050393e8,
  835 + -3.833857520742790e10,
  836 + 4.011838599133198e12,
  837 + -5.060568503314727e14,
  838 + 7.572616461117958e16,
  839 + -1.326257285320556e19};
  840 + static P b1[] = {
  841 + -0.1025390625,
  842 + 0.2775764465332031,
  843 + -1.993531733751297,
  844 + 2.724882731126854e1,
  845 + -6.038440767050702e2,
  846 + 1.971837591223663e4,
  847 + -8.902978767070678e5,
  848 + 5.310411010968522e7,
  849 + -4.043620325107754e9,
  850 + 3.827011346598605e11,
  851 + -4.406481417852278e13,
  852 + 6.065091351222699e15,
  853 + -9.833883876590679e17,
  854 + 1.855045211579828e20};
  855 +
  856 + a0 = abs(z);
  857 + z2 = z*z;
  858 + z1 = z;
  859 + if (a0 == 0.0) {
  860 + cj0 = cone;
  861 + cj1 = czero;
  862 + cy0 = complex<P>(-1e308,0);
  863 + cy1 = complex<P>(-1e308,0);
  864 + cj0p = czero;
  865 + cj1p = complex<P>(0.5,0.0);
  866 + cy0p = complex<P>(1e308,0);
  867 + cy1p = complex<P>(1e308,0);
  868 + return 0;
  869 + }
  870 + if (real(z) < 0.0) z1 = -z;
  871 + if (a0 <= 12.0) {
  872 + cj0 = cone;
  873 + cr = cone;
  874 + for (k=1;k<=40;k++) {
  875 + cr *= -0.25*z2/(P)(k*k);
  876 + cj0 += cr;
  877 + if (abs(cr) < abs(cj0)*eps) break;
  878 + }
  879 + cj1 = cone;
  880 + cr = cone;
  881 + for (k=1;k<=40;k++) {
  882 + cr *= -0.25*z2/(k*(k+1.0));
  883 + cj1 += cr;
  884 + if (abs(cr) < abs(cj1)*eps) break;
  885 + }
  886 + cj1 *= 0.5*z1;
  887 + w0 = 0.0;
  888 + cr = cone;
  889 + cs = czero;
  890 + for (k=1;k<=40;k++) {
  891 + w0 += 1.0/k;
  892 + cr *= -0.25*z2/(P)(k*k);
  893 + cp = cr*w0;
  894 + cs += cp;
  895 + if (abs(cp) < abs(cs)*eps) break;
  896 + }
  897 + cy0 = M_2_PI*((log(0.5*z1)+el)*cj0-cs);
  898 + w1 = 0.0;
  899 + cr = cone;
  900 + cs = cone;
  901 + for (k=1;k<=40;k++) {
  902 + w1 += 1.0/k;
  903 + cr *= -0.25*z2/(k*(k+1.0));
  904 + cp = cr*(2.0*w1+1.0/(k+1.0));
  905 + cs += cp;
  906 + if (abs(cp) < abs(cs)*eps) break;
  907 + }
  908 + cy1 = M_2_PI*((log(0.5*z1)+el)*cj1-1.0/z1-0.25*z1*cs);
  909 + }
  910 + else {
  911 + if (a0 >= 50.0) kz = 8; // can be changed to 10
  912 + else if (a0 >= 35.0) kz = 10; // " " " 12
  913 + else kz = 12; // " " " 14
  914 + ct1 = z1 - M_PI_4;
  915 + cp0 = cone;
  916 + for (k=0;k<kz;k++) {
  917 + cp0 += a[k]*pow(z1,-2.0*k-2.0);
  918 + }
  919 + cq0 = -0.125/z1;
  920 + for (k=0;k<kz;k++) {
  921 + cq0 += b[k]*pow(z1,-2.0*k-3.0);
  922 + }
  923 + cu = sqrt(M_2_PI/z1);
  924 + cj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1));
  925 + cy0 = cu*(cp0*sin(ct1)+cq0*cos(ct1));
  926 + ct2 = z1 - 0.75*M_PI;
  927 + cp1 = cone;
  928 + for (k=0;k<kz;k++) {
  929 + cp1 += a1[k]*pow(z1,-2.0*k-2.0);
  930 + }
  931 + cq1 = 0.375/z1;
  932 + for (k=0;k<kz;k++) {
  933 + cq1 += b1[k]*pow(z1,-2.0*k-3.0);
  934 + }
  935 + cj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2));
  936 + cy1 = cu*(cp1*sin(ct2)+cq1*cos(ct2));
  937 + }
  938 + if (real(z) < 0.0) {
  939 + if (imag(z) < 0.0) {
  940 + cy0 -= 2.0*cii*cj0;
  941 + cy1 = -(cy1-2.0*cii*cj1);
  942 + }
  943 + else if (imag(z) > 0.0) {
  944 + cy0 += 2.0*cii*cj0;
  945 + cy1 = -(cy1+2.0*cii*cj1);
  946 + }
  947 + cj1 = -cj1;
  948 + }
  949 + cj0p = -cj1;
  950 + cj1p = cj0-cj1/z;
  951 + cy0p = -cy1;
  952 + cy1p = cy0-cy1/z;
  953 + return 0;
  954 +}
  955 +
  956 +template<typename P>
  957 +int cbessjyna(int n,complex<P> z,int &nm,complex<P> *cj,
  958 + complex<P> *cy,complex<P> *cjp,complex<P> *cyp)
  959 +{
  960 + complex<P> cbj0,cbj1,cby0,cby1,cj0,cjk,cj1,cf,cf1,cf2;
  961 + complex<P> cs,cg0,cg1,cyk,cyl1,cyl2,cylk,cp11,cp12,cp21,cp22;
  962 + complex<P> ch0,ch1,ch2;
  963 + P a0,yak,ya1,ya0,wa;
  964 + int m,k,lb,lb0;
  965 +
  966 + if (n < 0) return 1;
  967 + a0 = abs(z);
  968 + nm = n;
  969 + if (a0 < 1.0e-100) {
  970 + for (k=0;k<=n;k++) {
  971 + cj[k] = czero;
  972 + cy[k] = complex<P> (-1e308,0);
  973 + cjp[k] = czero;
  974 + cyp[k] = complex<P>(1e308,0);
  975 + }
  976 + cj[0] = cone;
  977 + cjp[1] = complex<P>(0.5,0.0);
  978 + return 0;
  979 + }
  980 + cbessjy01(z,cj[0],cj[1],cy[0],cy[1],cjp[0],cjp[1],cyp[0],cyp[1]);
  981 + cbj0 = cj[0];
  982 + cbj1 = cj[1];
  983 + cby0 = cy[0];
  984 + cby1 = cy[1];
  985 + if (n <= 1) return 0;
  986 + if (n < (int)0.25*a0) {
  987 + cj0 = cbj0;
  988 + cj1 = cbj1;
  989 + for (k=2;k<=n;k++) {
  990 + cjk = 2.0*(k-1.0)*cj1/z-cj0;
  991 + cj[k] = cjk;
  992 + cj0 = cj1;
  993 + cj1 = cjk;
  994 + }
  995 + }
  996 + else {
  997 + m = msta1(a0,200);
  998 + if (m < n) nm = m;
  999 + else m = msta2(a0,n,15);
  1000 + cf2 = czero;
  1001 + cf1 = complex<P> (1.0e-100,0.0);
  1002 + for (k=m;k>=0;k--) {
  1003 + cf = 2.0*(k+1.0)*cf1/z-cf2;
  1004 + if (k <=nm) cj[k] = cf;
  1005 + cf2 = cf1;
  1006 + cf1 = cf;
  1007 + }
  1008 + if (abs(cbj0) > abs(cbj1)) cs = cbj0/cf;
  1009 + else cs = cbj1/cf2;
  1010 + for (k=0;k<=nm;k++) {
  1011 + cj[k] *= cs;
  1012 + }
  1013 + }
  1014 + for (k=2;k<=nm;k++) {
  1015 + cjp[k] = cj[k-1]-(P)k*cj[k]/z;
  1016 + }
  1017 + ya0 = abs(cby0);
  1018 + lb = 0;
  1019 + cg0 = cby0;
  1020 + cg1 = cby1;
  1021 + for (k=2;k<=nm;k++) {
  1022 + cyk = 2.0*(k-1.0)*cg1/z-cg0;
  1023 + yak = abs(cyk);
  1024 + ya1 = abs(cg0);
  1025 + if ((yak < ya0) && (yak < ya1)) lb = k;
  1026 + cy[k] = cyk;
  1027 + cg0 = cg1;
  1028 + cg1 = cyk;
  1029 + }
  1030 + lb0 = 0;
  1031 + if ((lb > 4) && (imag(z) != 0.0)) {
  1032 + while (lb != lb0) {
  1033 + ch2 = cone;
  1034 + ch1 = czero;
  1035 + lb0 = lb;
  1036 + for (k=lb;k>=1;k--) {
  1037 + ch0 = 2.0*k*ch1/z-ch2;
  1038 + ch2 = ch1;
  1039 + ch1 = ch0;
  1040 + }
  1041 + cp12 = ch0;
  1042 + cp22 = ch2;
  1043 + ch2 = czero;
  1044 + ch1 = cone;
  1045 + for (k=lb;k>=1;k--) {
  1046 + ch0 = 2.0*k*ch1/z-ch2;
  1047 + ch2 = ch1;
  1048 + ch1 = ch0;
  1049 + }
  1050 + cp11 = ch0;
  1051 + cp21 = ch2;
  1052 + if (lb == nm)
  1053 + cj[lb+1] = 2.0*lb*cj[lb]/z-cj[lb-1];
  1054 + if (abs(cj[0]) > abs(cj[1])) {
  1055 + cy[lb+1] = (cj[lb+1]*cby0-2.0*cp11/(M_PI*z))/cj[0];
  1056 + cy[lb] = (cj[lb]*cby0+2.0*cp12/(M_PI*z))/cj[0];
  1057 + }
  1058 + else {
  1059 + cy[lb+1] = (cj[lb+1]*cby1-2.0*cp21/(M_PI*z))/cj[1];
  1060 + cy[lb] = (cj[lb]*cby1+2.0*cp22/(M_PI*z))/cj[1];
  1061 + }
  1062 + cyl2 = cy[lb+1];
  1063 + cyl1 = cy[lb];
  1064 + for (k=lb-1;k>=0;k--) {
  1065 + cylk = 2.0*(k+1.0)*cyl1/z-cyl2;
  1066 + cy[k] = cylk;
  1067 + cyl2 = cyl1;
  1068 + cyl1 = cylk;
  1069 + }
  1070 + cyl1 = cy[lb];
  1071 + cyl2 = cy[lb+1];
  1072 + for (k=lb+1;k<n;k++) {
  1073 + cylk = 2.0*k*cyl2/z-cyl1;
  1074 + cy[k+1] = cylk;
  1075 + cyl1 = cyl2;
  1076 + cyl2 = cylk;
  1077 + }
  1078 + for (k=2;k<=nm;k++) {
  1079 + wa = abs(cy[k]);
  1080 + if (wa < abs(cy[k-1])) lb = k;
  1081 + }
  1082 + }
  1083 + }
  1084 + for (k=2;k<=nm;k++) {
  1085 + cyp[k] = cy[k-1]-(P)k*cy[k]/z;
  1086 + }
  1087 + return 0;
  1088 +}
  1089 +
  1090 +template<typename P>
  1091 +int cbessjynb(int n,complex<P> z,int &nm,complex<P> *cj,
  1092 + complex<P> *cy,complex<P> *cjp,complex<P> *cyp)
  1093 +{
  1094 + complex<P> cf,cf0,cf1,cf2,cbs,csu,csv,cs0,ce;
  1095 + complex<P> ct1,cp0,cq0,cp1,cq1,cu,cbj0,cby0,cbj1,cby1;
  1096 + complex<P> cyy,cbjk,ct2;
  1097 + P a0,y0;
  1098 + int k,m;
  1099 + static P a[] = {
  1100 + -0.7031250000000000e-1,
  1101 + 0.1121520996093750,
  1102 + -0.5725014209747314,
  1103 + 6.074042001273483};
  1104 + static P b[] = {
  1105 + 0.7324218750000000e-1,
  1106 + -0.2271080017089844,
  1107 + 1.727727502584457,
  1108 + -2.438052969955606e1};
  1109 + static P a1[] = {
  1110 + 0.1171875,
  1111 + -0.1441955566406250,
  1112 + 0.6765925884246826,
  1113 + -6.883914268109947};
  1114 + static P b1[] = {
  1115 + -0.1025390625,
  1116 + 0.2775764465332031,
  1117 + -1.993531733751297,
  1118 + 2.724882731126854e1};
  1119 +
  1120 + y0 = abs(imag(z));
  1121 + a0 = abs(z);
  1122 + nm = n;
  1123 + if (a0 < 1.0e-100) {
  1124 + for (k=0;k<=n;k++) {
  1125 + cj[k] = czero;
  1126 + cy[k] = complex<P> (-1e308,0);
  1127 + cjp[k] = czero;
  1128 + cyp[k] = complex<P>(1e308,0);
  1129 + }
  1130 + cj[0] = cone;
  1131 + cjp[1] = complex<P>(0.5,0.0);
  1132 + return 0;
  1133 + }
  1134 + if ((a0 <= 300.0) || (n > (int)(0.25*a0))) {
  1135 + if (n == 0) nm = 1;
  1136 + m = msta1(a0,200);
  1137 + if (m < nm) nm = m;
  1138 + else m = msta2(a0,nm,15);
  1139 + cbs = czero;
  1140 + csu = czero;
  1141 + csv = czero;
  1142 + cf2 = czero;
  1143 + cf1 = complex<P> (1.0e-100,0.0);
  1144 + for (k=m;k>=0;k--) {
  1145 + cf = 2.0*(k+1.0)*cf1/z-cf2;
  1146 + if (k <= nm) cj[k] = cf;
  1147 + if (((k & 1) == 0) && (k != 0)) {
  1148 + if (y0 <= 1.0) {
  1149 + cbs += 2.0*cf;
  1150 + }
  1151 + else {
  1152 + cbs += (-1)*((k & 2)-1)*2.0*cf;
  1153 + }
  1154 + csu += (P)((-1)*((k & 2)-1))*cf/(P)k;
  1155 + }
  1156 + else if (k > 1) {
  1157 + csv += (P)((-1)*((k & 2)-1)*k)*cf/(P)(k*k-1.0);
  1158 + }
  1159 + cf2 = cf1;
  1160 + cf1 = cf;
  1161 + }
  1162 + if (y0 <= 1.0) cs0 = cbs+cf;
  1163 + else cs0 = (cbs+cf)/cos(z);
  1164 + for (k=0;k<=nm;k++) {
  1165 + cj[k] /= cs0;
  1166 + }
  1167 + ce = log(0.5*z)+el;
  1168 + cy[0] = M_2_PI*(ce*cj[0]-4.0*csu/cs0);
  1169 + cy[1] = M_2_PI*(-cj[0]/z+(ce-1.0)*cj[1]-4.0*csv/cs0);
  1170 + }
  1171 + else {
  1172 + ct1 = z-M_PI_4;
  1173 + cp0 = cone;
  1174 + for (k=0;k<4;k++) {
  1175 + cp0 += a[k]*pow(z,-2.0*k-2.0);
  1176 + }
  1177 + cq0 = -0.125/z;
  1178 + for (k=0;k<4;k++) {
  1179 + cq0 += b[k] *pow(z,-2.0*k-3.0);
  1180 + }
  1181 + cu = sqrt(M_2_PI/z);
  1182 + cbj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1));
  1183 + cby0 = cu*(cp0*sin(ct1)+cq0*cos(ct1));
  1184 + cj[0] = cbj0;
  1185 + cy[0] = cby0;
  1186 + ct2 = z-0.75*M_PI;
  1187 + cp1 = cone;
  1188 + for (k=0;k<4;k++) {
  1189 + cp1 += a1[k]*pow(z,-2.0*k-2.0);
  1190 + }
  1191 + cq1 = 0.375/z;
  1192 + for (k=0;k<4;k++) {
  1193 + cq1 += b1[k]*pow(z,-2.0*k-3.0);
  1194 + }
  1195 + cbj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2));
  1196 + cby1 = cu*(cp1*sin(ct2)+cq1*cos(ct2));
  1197 + cj[1] = cbj1;
  1198 + cy[1] = cby1;
  1199 + for (k=2;k<=n;k++) {
  1200 + cbjk = 2.0*(k-1.0)*cbj1/z-cbj0;
  1201 + cj[k] = cbjk;
  1202 + cbj0 = cbj1;
  1203 + cbj1 = cbjk;
  1204 + }
  1205 + }
  1206 + cjp[0] = -cj[1];
  1207 + for (k=1;k<=nm;k++) {
  1208 + cjp[k] = cj[k-1]-(P)k*cj[k]/z;
  1209 + }
  1210 + if (abs(cj[0]) > 1.0)
  1211 + cy[1] = (cj[1]*cy[0]-2.0/(M_PI*z))/cj[0];
  1212 + for (k=2;k<=nm;k++) {
  1213 + if (abs(cj[k-1]) >= abs(cj[k-2]))
  1214 + cyy = (cj[k]*cy[k-1]-2.0/(M_PI*z))/cj[k-1];
  1215 + else
  1216 + cyy = (cj[k]*cy[k-2]-4.0*(k-1.0)/(M_PI*z*z))/cj[k-2];
  1217 + cy[k] = cyy;
  1218 + }
  1219 + cyp[0] = -cy[1];
  1220 + for (k=1;k<=nm;k++) {
  1221 + cyp[k] = cy[k-1]-(P)k*cy[k]/z;
  1222 + }
  1223 +
  1224 + return 0;
  1225 +}
  1226 +
  1227 +template<typename P>
  1228 +int cbessjyva(P v,complex<P> z,P &vm,complex<P>*cjv,
  1229 + complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp)
  1230 +{
  1231 + complex<P> z1,z2,zk,cjvl,cr,ca,cjv0,cjv1,cpz,crp;
  1232 + complex<P> cqz,crq,ca0,cck,csk,cyv0,cyv1,cju0,cju1,cb;
  1233 + complex<P> cs,cs0,cr0,cs1,cr1,cec,cf,cf0,cf1,cf2;
  1234 + complex<P> cfac0,cfac1,cg0,cg1,cyk,cp11,cp12,cp21,cp22;
  1235 + complex<P> ch0,ch1,ch2,cyl1,cyl2,cylk;
  1236 +
  1237 + P a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa;
  1238 + int j,n,k,kz,l,lb,lb0,m;
  1239 +
  1240 + a0 = abs(z);
  1241 + z1 = z;
  1242 + z2 = z*z;
  1243 + n = (int)v;
  1244 +
  1245 +
  1246 + v0 = v-n;
  1247 +
  1248 + pv0 = M_PI*v0;
  1249 + pv1 = M_PI*(1.0+v0);
  1250 + if (a0 < 1.0e-100) {
  1251 + for (k=0;k<=n;k++) {
  1252 + cjv[k] = czero;
  1253 + cyv[k] = complex<P> (-1e308,0);
  1254 + cjvp[k] = czero;
  1255 + cyvp[k] = complex<P> (1e308,0);
  1256 +
  1257 + }
  1258 + if (v0 == 0.0) {
  1259 + cjv[0] = cone;
  1260 + cjvp[1] = complex<P> (0.5,0.0);
  1261 + }
  1262 + else {
  1263 + cjvp[0] = complex<P> (1e308,0);
  1264 + }
  1265 + vm = v;
  1266 + return 0;
  1267 + }
  1268 + if (real(z1) < 0.0) z1 = -z;
  1269 + if (a0 <= 12.0) {
  1270 + for (l=0;l<2;l++) {
  1271 + vl = v0+l;
  1272 + cjvl = cone;
  1273 + cr = cone;
  1274 + for (k=1;k<=40;k++) {
  1275 + cr *= -0.25*z2/(k*(k+vl));
  1276 + cjvl += cr;
  1277 + if (abs(cr) < abs(cjvl)*eps) break;
  1278 + }
  1279 + vg = 1.0 + vl;
  1280 + ga = gamma(vg);
  1281 + ca = pow(0.5*z1,vl)/ga;
  1282 + if (l == 0) cjv0 = cjvl*ca;
  1283 + else cjv1 = cjvl*ca;
  1284 + }
  1285 + }
  1286 + else {
  1287 + if (a0 >= 50.0) kz = 8;
  1288 + else if (a0 >= 35.0) kz = 10;
  1289 + else kz = 11;
  1290 + for (j=0;j<2;j++) {
  1291 + vv = 4.0*(j+v0)*(j+v0);
  1292 + cpz = cone;
  1293 + crp = cone;
  1294 + for (k=1;k<=kz;k++) {
  1295 + crp = -0.78125e-2*crp*(vv-pow(4.0*k-3.0,2.0))*
  1296 + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*z2);
  1297 + cpz += crp;
  1298 + }
  1299 + cqz = cone;
  1300 + crq = cone;
  1301 + for (k=1;k<=kz;k++) {
  1302 + crq = -0.78125e-2*crq*(vv-pow(4.0*k-1.0,2.0))*
  1303 + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*z2);
  1304 + cqz += crq;
  1305 + }
  1306 + cqz *= 0.125*(vv-1.0)/z1;
  1307 + zk = z1-(0.5*(j+v0)+0.25)*M_PI;
  1308 + ca0 = sqrt(M_2_PI/z1);
  1309 + cck = cos(zk);
  1310 + csk = sin(zk);
  1311 + if (j == 0) {
  1312 + cjv0 = ca0*(cpz*cck-cqz*csk);
  1313 + cyv0 = ca0*(cpz*csk+cqz+cck);
  1314 + }
  1315 + else {
  1316 + cjv1 = ca0*(cpz*cck-cqz*csk);
  1317 + cyv1 = ca0*(cpz*csk+cqz*cck);
  1318 + }
  1319 + }
  1320 + }
  1321 + if (a0 <= 12.0) {
  1322 + if (v0 != 0.0) {
  1323 + for (l=0;l<2;l++) {
  1324 + vl = v0+l;
  1325 + cjvl = cone;
  1326 + cr = cone;
  1327 + for (k=1;k<=40;k++) {
  1328 + cr *= -0.25*z2/(k*(k-vl));
  1329 + cjvl += cr;
  1330 + if (abs(cr) < abs(cjvl)*eps) break;
  1331 + }
  1332 + vg = 1.0-vl;
  1333 + gb = gamma(vg);
  1334 + cb = pow(2.0/z1,vl)/gb;
  1335 + if (l == 0) cju0 = cjvl*cb;
  1336 + else cju1 = cjvl*cb;
  1337 + }
  1338 + cyv0 = (cjv0*cos(pv0)-cju0)/sin(pv0);
  1339 + cyv1 = (cjv1*cos(pv1)-cju1)/sin(pv1);
  1340 + }
  1341 + else {
  1342 + cec = log(0.5*z1)+el;
  1343 + cs0 = czero;
  1344 + w0 = 0.0;
  1345 + cr0 = cone;
  1346 + for (k=1;k<=30;k++) {
  1347 + w0 += 1.0/k;
  1348 + cr0 *= -0.25*z2/(P)(k*k);
  1349 + cs0 += cr0*w0;
  1350 + }
  1351 + cyv0 = M_2_PI*(cec*cjv0-cs0);
  1352 + cs1 = cone;
  1353 + w1 = 0.0;
  1354 + cr1 = cone;
  1355 + for (k=1;k<=30;k++) {
  1356 + w1 += 1.0/k;
  1357 + cr1 *= -0.25*z2/(k*(k+1.0));
  1358 + cs1 += cr1*(2.0*w1+1.0/(k+1.0));
  1359 + }
  1360 + cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1);
  1361 + }
  1362 + }
  1363 + if (real(z) < 0.0) {
  1364 + cfac0 = exp(pv0*cii);
  1365 + cfac1 = exp(pv1*cii);
  1366 + if (imag(z) < 0.0) {
  1367 + cyv0 = cfac0*cyv0-(P)2.0*(complex<P>)cii*cos(pv0)*cjv0;
  1368 + cyv1 = cfac1*cyv1-(P)2.0*(complex<P>)cii*cos(pv1)*cjv1;
  1369 + cjv0 /= cfac0;
  1370 + cjv1 /= cfac1;
  1371 + }
  1372 + else if (imag(z) > 0.0) {
  1373 + cyv0 = cyv0/cfac0+(P)2.0*(complex<P>)cii*cos(pv0)*cjv0;
  1374 + cyv1 = cyv1/cfac1+(P)2.0*(complex<P>)cii*cos(pv1)*cjv1;
  1375 + cjv0 *= cfac0;
  1376 + cjv1 *= cfac1;
  1377 + }
  1378 + }
  1379 + cjv[0] = cjv0;
  1380 + cjv[1] = cjv1;
  1381 + if ((n >= 2) && (n <= (int)(0.25*a0))) {
  1382 + cf0 = cjv0;
  1383 + cf1 = cjv1;
  1384 + for (k=2;k<= n;k++) {
  1385 + cf = 2.0*(k+v0-1.0)*cf1/z-cf0;
  1386 + cjv[k] = cf;
  1387 + cf0 = cf1;
  1388 + cf1 = cf;
  1389 + }
  1390 + }
  1391 + else if (n >= 2) {
  1392 + m = msta1(a0,200);
  1393 + if (m < n) n = m;
  1394 + else m = msta2(a0,n,15);
  1395 + cf2 = czero;
  1396 + cf1 = complex<P>(1.0e-100,0.0);
  1397 + for (k=m;k>=0;k--) {
  1398 + cf = 2.0*(v0+k+1.0)*cf1/z-cf2;
  1399 + if (k <= n) cjv[k] = cf;
  1400 + cf2 = cf1;
  1401 + cf1 = cf;
  1402 + }
  1403 + if (abs(cjv0) > abs(cjv1)) cs = cjv0/cf;
  1404 + else cs = cjv1/cf2;
  1405 + for (k=0;k<=n;k++) {
  1406 + cjv[k] *= cs;
  1407 + }
  1408 + }
  1409 + cjvp[0] = v0*cjv[0]/z-cjv[1];
  1410 + for (k=1;k<=n;k++) {
  1411 + cjvp[k] = -(k+v0)*cjv[k]/z+cjv[k-1];
  1412 + }
  1413 + cyv[0] = cyv0;
  1414 + cyv[1] = cyv1;
  1415 + ya0 = abs(cyv0);
  1416 + lb = 0;
  1417 + cg0 = cyv0;
  1418 + cg1 = cyv1;
  1419 + for (k=2;k<=n;k++) {
  1420 + cyk = 2.0*(v0+k-1.0)*cg1/z-cg0;
  1421 + yak = abs(cyk);
  1422 + ya1 = abs(cg0);
  1423 + if ((yak < ya0) && (yak< ya1)) lb = k;
  1424 + cyv[k] = cyk;
  1425 + cg0 = cg1;
  1426 + cg1 = cyk;
  1427 + }
  1428 + lb0 = 0;
  1429 + if ((lb > 4) && (imag(z) != 0.0)) {
  1430 + while(lb != lb0) {
  1431 + ch2 = cone;
  1432 + ch1 = czero;
  1433 + lb0 = lb;
  1434 + for (k=lb;k>=1;k--) {
  1435 + ch0 = 2.0*(k+v0)*ch1/z-ch2;
  1436 + ch2 = ch1;
  1437 + ch1 = ch0;
  1438 + }
  1439 + cp12 = ch0;
  1440 + cp22 = ch2;
  1441 + ch2 = czero;
  1442 + ch1 = cone;
  1443 + for (k=lb;k>=1;k--) {
  1444 + ch0 = 2.0*(k+v0)*ch1/z-ch2;
  1445 + ch2 = ch1;
  1446 + ch1 = ch0;
  1447 + }
  1448 + cp11 = ch0;
  1449 + cp21 = ch2;
  1450 + if (lb == n)
  1451 + cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1];
  1452 + if (abs(cjv[0]) > abs(cjv[1])) {
  1453 + cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0];
  1454 + cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0];
  1455 + }
  1456 + else {
  1457 + cyv[lb+1] = (cjv[lb+1]*cyv1-2.0*cp21/(M_PI*z))/cjv[1];
  1458 + cyv[lb] = (cjv[lb]*cyv1+2.0*cp22/(M_PI*z))/cjv[1];
  1459 + }
  1460 + cyl2 = cyv[lb+1];
  1461 + cyl1 = cyv[lb];
  1462 + for (k=lb-1;k>=0;k--) {
  1463 + cylk = 2.0*(k+v0+1.0)*cyl1/z-cyl2;
  1464 + cyv[k] = cylk;
  1465 + cyl2 = cyl1;
  1466 + cyl1 = cylk;
  1467 + }
  1468 + cyl1 = cyv[lb];
  1469 + cyl2 = cyv[lb+1];
  1470 + for (k=lb+1;k<n;k++) {
  1471 + cylk = 2.0*(k+v0)*cyl2/z-cyl1;
  1472 + cyv[k+1] = cylk;
  1473 + cyl1 = cyl2;
  1474 + cyl2 = cylk;
  1475 + }
  1476 + for (k=2;k<=n;k++) {
  1477 + wa = abs(cyv[k]);
  1478 + if (wa < abs(cyv[k-1])) lb = k;
  1479 + }
  1480 + }
  1481 + }
  1482 + cyvp[0] = v0*cyv[0]/z-cyv[1];
  1483 + for (k=1;k<=n;k++) {
  1484 + cyvp[k] = cyv[k-1]-(k+v0)*cyv[k]/z;
  1485 + }
  1486 + vm = n+v0;
  1487 + return 0;
  1488 +}
  1489 +
  1490 +template<typename P>
  1491 +int cbessjyva_sph(int v,complex<P> z,P &vm,complex<P>*cjv,
  1492 + complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp)
  1493 +{
  1494 + //first, compute the bessel functions of fractional order
  1495 + cbessjyva<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  1496 +
  1497 + //iterate through each and scale
  1498 + for(int n = 0; n<=v; n++)
  1499 + {
  1500 +
  1501 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  1502 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
  1503 +
  1504 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  1505 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
  1506 + }
  1507 +
  1508 + return 0;
  1509 +
  1510 +}
  1511 +
  1512 +} //end namespace rts
  1513 +
  1514 +
  1515 +#endif
... ...
stim/math/vec3.h.orig 0 → 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +#include <cmath>
  7 +
  8 +
  9 +namespace stim{
  10 +
  11 +
  12 +/// A class designed to act as a 3D vector with CUDA compatibility
  13 +template<typename T>
  14 +class vec3{
  15 +
  16 +protected:
  17 + T ptr[3];
  18 +
  19 +public:
  20 +
  21 + CUDA_CALLABLE vec3(){}
  22 +
  23 + CUDA_CALLABLE vec3(T v){
  24 + ptr[0] = ptr[1] = ptr[2] = v;
  25 + }
  26 +
  27 + CUDA_CALLABLE vec3(T x, T y, T z){
  28 + ptr[0] = x;
  29 + ptr[1] = y;
  30 + ptr[2] = z;
  31 + }
  32 +
  33 + //copy constructor
  34 + CUDA_CALLABLE vec3( const vec3<T>& other){
  35 + ptr[0] = other.ptr[0];
  36 + ptr[1] = other.ptr[1];
  37 + ptr[2] = other.ptr[2];
  38 + }
  39 +
  40 + //access an element using an index
  41 + CUDA_CALLABLE T& operator[](size_t idx){
  42 + return ptr[idx];
  43 + }
  44 +
  45 + CUDA_CALLABLE T* data(){
  46 + return ptr;
  47 + }
  48 +
  49 +/// Casting operator. Creates a new vector with a new type U.
  50 + template< typename U >
  51 + CUDA_CALLABLE operator vec3<U>(){
  52 + vec3<U> result;
  53 + result.ptr[0] = (U)ptr[0];
  54 + result.ptr[1] = (U)ptr[1];
  55 + result.ptr[2] = (U)ptr[2];
  56 +
  57 + return result;
  58 + }
  59 +
  60 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  61 + CUDA_CALLABLE T len_sq() const{
  62 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  63 + }
  64 +
  65 + /// computes the Euclidean length of the vector
  66 + CUDA_CALLABLE T len() const{
  67 + return sqrt(len_sq());
  68 + }
  69 +
  70 +
  71 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  72 + CUDA_CALLABLE vec3<T> cart2sph() const{
  73 + vec3<T> sph;
  74 + sph.ptr[0] = len();
  75 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  76 + if(sph.ptr[0] == 0)
  77 + sph.ptr[2] = 0;
  78 + else
  79 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  80 + return sph;
  81 + }
  82 +
  83 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  84 + CUDA_CALLABLE vec3<T> sph2cart() const{
  85 + vec3<T> cart;
  86 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  87 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  88 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  89 +
  90 + return cart;
  91 + }
  92 +
  93 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  94 + CUDA_CALLABLE vec3<T> norm() const{
  95 + vec3<T> result;
  96 + T l = len(); //compute the vector length
  97 + return (*this) / l;
  98 + }
  99 +
  100 + /// Computes the cross product of a 3-dimensional vector
  101 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  102 +
  103 + vec3<T> result;
  104 +
  105 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  106 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  107 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  108 +
  109 + return result;
  110 + }
  111 +
  112 + /// Compute the Euclidean inner (dot) product
  113 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  114 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  115 + }
  116 +
  117 + /// Arithmetic addition operator
  118 +
  119 + /// @param rhs is the right-hand-side operator for the addition
  120 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  121 + vec3<T> result;
  122 + result.ptr[0] = ptr[0] + rhs[0];
  123 + result.ptr[1] = ptr[1] + rhs[1];
  124 + result.ptr[2] = ptr[2] + rhs[2];
  125 + return result;
  126 + }
  127 +
  128 + /// Arithmetic addition to a scalar
  129 +
  130 + /// @param rhs is the right-hand-side operator for the addition
  131 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  132 + vec3<T> result;
  133 + result.ptr[0] = ptr[0] + rhs;
  134 + result.ptr[1] = ptr[1] + rhs;
  135 + result.ptr[2] = ptr[2] + rhs;
  136 + return result;
  137 + }
  138 +
  139 + /// Arithmetic subtraction operator
  140 +
  141 + /// @param rhs is the right-hand-side operator for the subtraction
  142 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  143 + vec3<T> result;
  144 + result.ptr[0] = ptr[0] - rhs[0];
  145 + result.ptr[1] = ptr[1] - rhs[1];
  146 + result.ptr[2] = ptr[2] - rhs[2];
  147 + return result;
  148 + }
  149 + /// Arithmetic subtraction to a scalar
  150 +
  151 + /// @param rhs is the right-hand-side operator for the addition
  152 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  153 + vec3<T> result;
  154 + result.ptr[0] = ptr[0] - rhs;
  155 + result.ptr[1] = ptr[1] - rhs;
  156 + result.ptr[2] = ptr[2] - rhs;
  157 + return result;
  158 + }
  159 +
  160 + /// Arithmetic scalar multiplication operator
  161 +
  162 + /// @param rhs is the right-hand-side operator for the subtraction
  163 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  164 + vec3<T> result;
  165 + result.ptr[0] = ptr[0] * rhs;
  166 + result.ptr[1] = ptr[1] * rhs;
  167 + result.ptr[2] = ptr[2] * rhs;
  168 + return result;
  169 + }
  170 +
  171 + /// Arithmetic scalar division operator
  172 +
  173 + /// @param rhs is the right-hand-side operator for the subtraction
  174 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  175 + return (*this) * ((T)1.0/rhs);
  176 + }
  177 +
  178 + /// Multiplication by a scalar, followed by assignment
  179 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  180 + ptr[0] = ptr[0] * rhs;
  181 + ptr[1] = ptr[1] * rhs;
  182 + ptr[2] = ptr[2] * rhs;
  183 + return *this;
  184 + }
  185 +
  186 + /// Addition and assignment
  187 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  188 + ptr[0] = ptr[0] + rhs;
  189 + ptr[1] = ptr[1] + rhs;
  190 + ptr[2] = ptr[2] + rhs;
  191 + return *this;
  192 + }
  193 +
  194 + /// Assign a scalar to all values
  195 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  196 + ptr[0] = ptr[0] = rhs;
  197 + ptr[1] = ptr[1] = rhs;
  198 + ptr[2] = ptr[2] = rhs;
  199 + return *this;
  200 + }
  201 +
  202 + /// Casting and assignment
  203 + template<typename Y>
  204 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  205 + ptr[0] = (T)rhs.ptr[0];
  206 + ptr[1] = (T)rhs.ptr[1];
  207 + ptr[2] = (T)rhs.ptr[2];
  208 + return *this;
  209 + }
  210 +
  211 + /// Unary minus (returns the negative of the vector)
  212 + CUDA_CALLABLE vec3<T> operator-() const{
  213 + vec3<T> result;
  214 + result.ptr[0] = -ptr[0];
  215 + result.ptr[1] = -ptr[1];
  216 + result.ptr[2] = -ptr[2];
  217 + return result;
  218 + }
  219 +
  220 +<<<<<<< HEAD
  221 +//#ifndef __NVCC__
  222 +=======
  223 +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c
  224 + /// Outputs the vector as a string
  225 + std::string str() const{
  226 + std::stringstream ss;
  227 +
  228 + const size_t N = 3;
  229 +
  230 + ss<<"[";
  231 + for(size_t i=0; i<N; i++)
  232 + {
  233 + ss<<ptr[i];
  234 + if(i != N-1)
  235 + ss<<", ";
  236 + }
  237 + ss<<"]";
  238 +
  239 + return ss.str();
  240 + }
  241 +<<<<<<< HEAD
  242 +//#endif
  243 +=======
  244 +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c
  245 +
  246 + size_t size(){ return 3; }
  247 +
  248 + }; //end class vec3
  249 +} //end namespace stim
  250 +
  251 +/// Multiply a vector by a constant when the vector is on the right hand side
  252 +template <typename T>
  253 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  254 + return rhs * lhs;
  255 +}
  256 +
  257 +//stream operator
  258 +template<typename T>
  259 +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){
  260 + os<<rhs.str();
  261 + return os;
  262 +}
  263 +
  264 +#endif
... ...
stim/math/vec3_BACKUP_62876.h 0 → 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +#include <cmath>
  7 +
  8 +
  9 +namespace stim{
  10 +
  11 +
  12 +/// A class designed to act as a 3D vector with CUDA compatibility
  13 +template<typename T>
  14 +class vec3{
  15 +
  16 +protected:
  17 + T ptr[3];
  18 +
  19 +public:
  20 +
  21 + CUDA_CALLABLE vec3(){}
  22 +
  23 + CUDA_CALLABLE vec3(T v){
  24 + ptr[0] = ptr[1] = ptr[2] = v;
  25 + }
  26 +
  27 + CUDA_CALLABLE vec3(T x, T y, T z){
  28 + ptr[0] = x;
  29 + ptr[1] = y;
  30 + ptr[2] = z;
  31 + }
  32 +
  33 + //copy constructor
  34 + CUDA_CALLABLE vec3( const vec3<T>& other){
  35 + ptr[0] = other.ptr[0];
  36 + ptr[1] = other.ptr[1];
  37 + ptr[2] = other.ptr[2];
  38 + }
  39 +
  40 + //access an element using an index
  41 + CUDA_CALLABLE T& operator[](size_t idx){
  42 + return ptr[idx];
  43 + }
  44 +
  45 + CUDA_CALLABLE T* data(){
  46 + return ptr;
  47 + }
  48 +
  49 +/// Casting operator. Creates a new vector with a new type U.
  50 + template< typename U >
  51 + CUDA_CALLABLE operator vec3<U>(){
  52 + vec3<U> result;
  53 + result.ptr[0] = (U)ptr[0];
  54 + result.ptr[1] = (U)ptr[1];
  55 + result.ptr[2] = (U)ptr[2];
  56 +
  57 + return result;
  58 + }
  59 +
  60 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  61 + CUDA_CALLABLE T len_sq() const{
  62 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  63 + }
  64 +
  65 + /// computes the Euclidean length of the vector
  66 + CUDA_CALLABLE T len() const{
  67 + return sqrt(len_sq());
  68 + }
  69 +
  70 +
  71 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  72 + CUDA_CALLABLE vec3<T> cart2sph() const{
  73 + vec3<T> sph;
  74 + sph.ptr[0] = len();
  75 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  76 + if(sph.ptr[0] == 0)
  77 + sph.ptr[2] = 0;
  78 + else
  79 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  80 + return sph;
  81 + }
  82 +
  83 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  84 + CUDA_CALLABLE vec3<T> sph2cart() const{
  85 + vec3<T> cart;
  86 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  87 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  88 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  89 +
  90 + return cart;
  91 + }
  92 +
  93 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  94 + CUDA_CALLABLE vec3<T> norm() const{
  95 + vec3<T> result;
  96 + T l = len(); //compute the vector length
  97 + return (*this) / l;
  98 + }
  99 +
  100 + /// Computes the cross product of a 3-dimensional vector
  101 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  102 +
  103 + vec3<T> result;
  104 +
  105 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  106 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  107 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  108 +
  109 + return result;
  110 + }
  111 +
  112 + /// Compute the Euclidean inner (dot) product
  113 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  114 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  115 + }
  116 +
  117 + /// Arithmetic addition operator
  118 +
  119 + /// @param rhs is the right-hand-side operator for the addition
  120 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  121 + vec3<T> result;
  122 + result.ptr[0] = ptr[0] + rhs[0];
  123 + result.ptr[1] = ptr[1] + rhs[1];
  124 + result.ptr[2] = ptr[2] + rhs[2];
  125 + return result;
  126 + }
  127 +
  128 + /// Arithmetic addition to a scalar
  129 +
  130 + /// @param rhs is the right-hand-side operator for the addition
  131 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  132 + vec3<T> result;
  133 + result.ptr[0] = ptr[0] + rhs;
  134 + result.ptr[1] = ptr[1] + rhs;
  135 + result.ptr[2] = ptr[2] + rhs;
  136 + return result;
  137 + }
  138 +
  139 + /// Arithmetic subtraction operator
  140 +
  141 + /// @param rhs is the right-hand-side operator for the subtraction
  142 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  143 + vec3<T> result;
  144 + result.ptr[0] = ptr[0] - rhs[0];
  145 + result.ptr[1] = ptr[1] - rhs[1];
  146 + result.ptr[2] = ptr[2] - rhs[2];
  147 + return result;
  148 + }
  149 + /// Arithmetic subtraction to a scalar
  150 +
  151 + /// @param rhs is the right-hand-side operator for the addition
  152 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  153 + vec3<T> result;
  154 + result.ptr[0] = ptr[0] - rhs;
  155 + result.ptr[1] = ptr[1] - rhs;
  156 + result.ptr[2] = ptr[2] - rhs;
  157 + return result;
  158 + }
  159 +
  160 + /// Arithmetic scalar multiplication operator
  161 +
  162 + /// @param rhs is the right-hand-side operator for the subtraction
  163 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  164 + vec3<T> result;
  165 + result.ptr[0] = ptr[0] * rhs;
  166 + result.ptr[1] = ptr[1] * rhs;
  167 + result.ptr[2] = ptr[2] * rhs;
  168 + return result;
  169 + }
  170 +
  171 + /// Arithmetic scalar division operator
  172 +
  173 + /// @param rhs is the right-hand-side operator for the subtraction
  174 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  175 + return (*this) * ((T)1.0/rhs);
  176 + }
  177 +
  178 + /// Multiplication by a scalar, followed by assignment
  179 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  180 + ptr[0] = ptr[0] * rhs;
  181 + ptr[1] = ptr[1] * rhs;
  182 + ptr[2] = ptr[2] * rhs;
  183 + return *this;
  184 + }
  185 +
  186 + /// Addition and assignment
  187 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  188 + ptr[0] = ptr[0] + rhs;
  189 + ptr[1] = ptr[1] + rhs;
  190 + ptr[2] = ptr[2] + rhs;
  191 + return *this;
  192 + }
  193 +
  194 + /// Assign a scalar to all values
  195 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  196 + ptr[0] = ptr[0] = rhs;
  197 + ptr[1] = ptr[1] = rhs;
  198 + ptr[2] = ptr[2] = rhs;
  199 + return *this;
  200 + }
  201 +
  202 + /// Casting and assignment
  203 + template<typename Y>
  204 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  205 + ptr[0] = (T)rhs.ptr[0];
  206 + ptr[1] = (T)rhs.ptr[1];
  207 + ptr[2] = (T)rhs.ptr[2];
  208 + return *this;
  209 + }
  210 +
  211 + /// Unary minus (returns the negative of the vector)
  212 + CUDA_CALLABLE vec3<T> operator-() const{
  213 + vec3<T> result;
  214 + result.ptr[0] = -ptr[0];
  215 + result.ptr[1] = -ptr[1];
  216 + result.ptr[2] = -ptr[2];
  217 + return result;
  218 + }
  219 +
  220 +<<<<<<< HEAD
  221 +//#ifndef __NVCC__
  222 +=======
  223 +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c
  224 + /// Outputs the vector as a string
  225 + std::string str() const{
  226 + std::stringstream ss;
  227 +
  228 + const size_t N = 3;
  229 +
  230 + ss<<"[";
  231 + for(size_t i=0; i<N; i++)
  232 + {
  233 + ss<<ptr[i];
  234 + if(i != N-1)
  235 + ss<<", ";
  236 + }
  237 + ss<<"]";
  238 +
  239 + return ss.str();
  240 + }
  241 +<<<<<<< HEAD
  242 +//#endif
  243 +=======
  244 +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c
  245 +
  246 + size_t size(){ return 3; }
  247 +
  248 + }; //end class vec3
  249 +} //end namespace stim
  250 +
  251 +/// Multiply a vector by a constant when the vector is on the right hand side
  252 +template <typename T>
  253 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  254 + return rhs * lhs;
  255 +}
  256 +
  257 +//stream operator
  258 +template<typename T>
  259 +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){
  260 + os<<rhs.str();
  261 + return os;
  262 +}
  263 +
  264 +#endif
... ...
stim/math/vec3_BASE_62876.h 0 → 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +
  7 +
  8 +namespace stim{
  9 +
  10 +
  11 +/// A class designed to act as a 3D vector with CUDA compatibility
  12 +template<typename T>
  13 +class vec3{
  14 +
  15 +protected:
  16 + T ptr[3];
  17 +
  18 +public:
  19 +
  20 + CUDA_CALLABLE vec3(){}
  21 +
  22 + CUDA_CALLABLE vec3(T v){
  23 + ptr[0] = ptr[1] = ptr[2] = v;
  24 + }
  25 +
  26 + CUDA_CALLABLE vec3(T x, T y, T z){
  27 + ptr[0] = x;
  28 + ptr[1] = y;
  29 + ptr[2] = z;
  30 + }
  31 +
  32 + //copy constructor
  33 + CUDA_CALLABLE vec3( const vec3<T>& other){
  34 + ptr[0] = other.ptr[0];
  35 + ptr[1] = other.ptr[1];
  36 + ptr[2] = other.ptr[2];
  37 + }
  38 +
  39 + //access an element using an index
  40 + CUDA_CALLABLE T& operator[](size_t idx){
  41 + return ptr[idx];
  42 + }
  43 +
  44 + CUDA_CALLABLE T* data(){
  45 + return ptr;
  46 + }
  47 +
  48 +/// Casting operator. Creates a new vector with a new type U.
  49 + template< typename U >
  50 + CUDA_CALLABLE operator vec3<U>(){
  51 + vec3<U> result;
  52 + result.ptr[0] = (U)ptr[0];
  53 + result.ptr[1] = (U)ptr[1];
  54 + result.ptr[2] = (U)ptr[2];
  55 +
  56 + return result;
  57 + }
  58 +
  59 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  60 + CUDA_CALLABLE T len_sq() const{
  61 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  62 + }
  63 +
  64 + /// computes the Euclidean length of the vector
  65 + CUDA_CALLABLE T len() const{
  66 + return sqrt(len_sq());
  67 + }
  68 +
  69 +
  70 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  71 + CUDA_CALLABLE vec3<T> cart2sph() const{
  72 + vec3<T> sph;
  73 + sph.ptr[0] = len();
  74 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  75 + if(sph.ptr[0] == 0)
  76 + sph.ptr[2] = 0;
  77 + else
  78 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  79 + return sph;
  80 + }
  81 +
  82 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  83 + CUDA_CALLABLE vec3<T> sph2cart() const{
  84 + vec3<T> cart;
  85 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  86 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  87 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  88 +
  89 + return cart;
  90 + }
  91 +
  92 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  93 + CUDA_CALLABLE vec3<T> norm() const{
  94 + vec3<T> result;
  95 + T l = len(); //compute the vector length
  96 + return (*this) / l;
  97 + }
  98 +
  99 + /// Computes the cross product of a 3-dimensional vector
  100 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  101 +
  102 + vec3<T> result;
  103 +
  104 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  105 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  106 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  107 +
  108 + return result;
  109 + }
  110 +
  111 + /// Compute the Euclidean inner (dot) product
  112 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  113 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  114 + }
  115 +
  116 + /// Arithmetic addition operator
  117 +
  118 + /// @param rhs is the right-hand-side operator for the addition
  119 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  120 + vec3<T> result;
  121 + result.ptr[0] = ptr[0] + rhs[0];
  122 + result.ptr[1] = ptr[1] + rhs[1];
  123 + result.ptr[2] = ptr[2] + rhs[2];
  124 + return result;
  125 + }
  126 +
  127 + /// Arithmetic addition to a scalar
  128 +
  129 + /// @param rhs is the right-hand-side operator for the addition
  130 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  131 + vec3<T> result;
  132 + result.ptr[0] = ptr[0] + rhs;
  133 + result.ptr[1] = ptr[1] + rhs;
  134 + result.ptr[2] = ptr[2] + rhs;
  135 + return result;
  136 + }
  137 +
  138 + /// Arithmetic subtraction operator
  139 +
  140 + /// @param rhs is the right-hand-side operator for the subtraction
  141 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  142 + vec3<T> result;
  143 + result.ptr[0] = ptr[0] - rhs[0];
  144 + result.ptr[1] = ptr[1] - rhs[1];
  145 + result.ptr[2] = ptr[2] - rhs[2];
  146 + return result;
  147 + }
  148 + /// Arithmetic subtraction to a scalar
  149 +
  150 + /// @param rhs is the right-hand-side operator for the addition
  151 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  152 + vec3<T> result;
  153 + result.ptr[0] = ptr[0] - rhs;
  154 + result.ptr[1] = ptr[1] - rhs;
  155 + result.ptr[2] = ptr[2] - rhs;
  156 + return result;
  157 + }
  158 +
  159 + /// Arithmetic scalar multiplication operator
  160 +
  161 + /// @param rhs is the right-hand-side operator for the subtraction
  162 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  163 + vec3<T> result;
  164 + result.ptr[0] = ptr[0] * rhs;
  165 + result.ptr[1] = ptr[1] * rhs;
  166 + result.ptr[2] = ptr[2] * rhs;
  167 + return result;
  168 + }
  169 +
  170 + /// Arithmetic scalar division operator
  171 +
  172 + /// @param rhs is the right-hand-side operator for the subtraction
  173 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  174 + return (*this) * ((T)1.0/rhs);
  175 + }
  176 +
  177 + /// Multiplication by a scalar, followed by assignment
  178 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  179 + ptr[0] = ptr[0] * rhs;
  180 + ptr[1] = ptr[1] * rhs;
  181 + ptr[2] = ptr[2] * rhs;
  182 + return *this;
  183 + }
  184 +
  185 + /// Addition and assignment
  186 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  187 + ptr[0] = ptr[0] + rhs;
  188 + ptr[1] = ptr[1] + rhs;
  189 + ptr[2] = ptr[2] + rhs;
  190 + return *this;
  191 + }
  192 +
  193 + /// Assign a scalar to all values
  194 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  195 + ptr[0] = ptr[0] = rhs;
  196 + ptr[1] = ptr[1] = rhs;
  197 + ptr[2] = ptr[2] = rhs;
  198 + return *this;
  199 + }
  200 +
  201 + /// Casting and assignment
  202 + template<typename Y>
  203 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  204 + ptr[0] = (T)rhs.ptr[0];
  205 + ptr[1] = (T)rhs.ptr[1];
  206 + ptr[2] = (T)rhs.ptr[2];
  207 + return *this;
  208 + }
  209 +
  210 + /// Unary minus (returns the negative of the vector)
  211 + CUDA_CALLABLE vec3<T> operator-() const{
  212 + vec3<T> result;
  213 + result.ptr[0] = -ptr[0];
  214 + result.ptr[1] = -ptr[1];
  215 + result.ptr[2] = -ptr[2];
  216 + return result;
  217 + }
  218 +
  219 +#ifndef __NVCC__
  220 + /// Outputs the vector as a string
  221 + std::string str() const{
  222 + std::stringstream ss;
  223 +
  224 + const size_t N = 3;
  225 +
  226 + ss<<"[";
  227 + for(size_t i=0; i<N; i++)
  228 + {
  229 + ss<<ptr[i];
  230 + if(i != N-1)
  231 + ss<<", ";
  232 + }
  233 + ss<<"]";
  234 +
  235 + return ss.str();
  236 + }
  237 +#endif
  238 +
  239 + size_t size(){ return 3; }
  240 +
  241 + }; //end class vec3
  242 +} //end namespace stim
  243 +
  244 +/// Multiply a vector by a constant when the vector is on the right hand side
  245 +template <typename T>
  246 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  247 + return rhs * lhs;
  248 +}
  249 +
  250 +//stream operator
  251 +template<typename T>
  252 +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){
  253 + os<<rhs.str();
  254 + return os;
  255 +}
  256 +
  257 +#endif
... ...
stim/math/vec3_LOCAL_62876.h 0 → 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +
  7 +
  8 +namespace stim{
  9 +
  10 +
  11 +/// A class designed to act as a 3D vector with CUDA compatibility
  12 +template<typename T>
  13 +class vec3{
  14 +
  15 +protected:
  16 + T ptr[3];
  17 +
  18 +public:
  19 +
  20 + CUDA_CALLABLE vec3(){}
  21 +
  22 + CUDA_CALLABLE vec3(T v){
  23 + ptr[0] = ptr[1] = ptr[2] = v;
  24 + }
  25 +
  26 + CUDA_CALLABLE vec3(T x, T y, T z){
  27 + ptr[0] = x;
  28 + ptr[1] = y;
  29 + ptr[2] = z;
  30 + }
  31 +
  32 + //copy constructor
  33 + CUDA_CALLABLE vec3( const vec3<T>& other){
  34 + ptr[0] = other.ptr[0];
  35 + ptr[1] = other.ptr[1];
  36 + ptr[2] = other.ptr[2];
  37 + }
  38 +
  39 + //access an element using an index
  40 + CUDA_CALLABLE T& operator[](size_t idx){
  41 + return ptr[idx];
  42 + }
  43 +
  44 + CUDA_CALLABLE T* data(){
  45 + return ptr;
  46 + }
  47 +
  48 +/// Casting operator. Creates a new vector with a new type U.
  49 + template< typename U >
  50 + CUDA_CALLABLE operator vec3<U>(){
  51 + vec3<U> result;
  52 + result.ptr[0] = (U)ptr[0];
  53 + result.ptr[1] = (U)ptr[1];
  54 + result.ptr[2] = (U)ptr[2];
  55 +
  56 + return result;
  57 + }
  58 +
  59 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  60 + CUDA_CALLABLE T len_sq() const{
  61 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  62 + }
  63 +
  64 + /// computes the Euclidean length of the vector
  65 + CUDA_CALLABLE T len() const{
  66 + return sqrt(len_sq());
  67 + }
  68 +
  69 +
  70 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  71 + CUDA_CALLABLE vec3<T> cart2sph() const{
  72 + vec3<T> sph;
  73 + sph.ptr[0] = len();
  74 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  75 + if(sph.ptr[0] == 0)
  76 + sph.ptr[2] = 0;
  77 + else
  78 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  79 + return sph;
  80 + }
  81 +
  82 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  83 + CUDA_CALLABLE vec3<T> sph2cart() const{
  84 + vec3<T> cart;
  85 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  86 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  87 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  88 +
  89 + return cart;
  90 + }
  91 +
  92 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  93 + CUDA_CALLABLE vec3<T> norm() const{
  94 + vec3<T> result;
  95 + T l = len(); //compute the vector length
  96 + return (*this) / l;
  97 + }
  98 +
  99 + /// Computes the cross product of a 3-dimensional vector
  100 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  101 +
  102 + vec3<T> result;
  103 +
  104 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  105 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  106 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  107 +
  108 + return result;
  109 + }
  110 +
  111 + /// Compute the Euclidean inner (dot) product
  112 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  113 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  114 + }
  115 +
  116 + /// Arithmetic addition operator
  117 +
  118 + /// @param rhs is the right-hand-side operator for the addition
  119 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  120 + vec3<T> result;
  121 + result.ptr[0] = ptr[0] + rhs[0];
  122 + result.ptr[1] = ptr[1] + rhs[1];
  123 + result.ptr[2] = ptr[2] + rhs[2];
  124 + return result;
  125 + }
  126 +
  127 + /// Arithmetic addition to a scalar
  128 +
  129 + /// @param rhs is the right-hand-side operator for the addition
  130 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  131 + vec3<T> result;
  132 + result.ptr[0] = ptr[0] + rhs;
  133 + result.ptr[1] = ptr[1] + rhs;
  134 + result.ptr[2] = ptr[2] + rhs;
  135 + return result;
  136 + }
  137 +
  138 + /// Arithmetic subtraction operator
  139 +
  140 + /// @param rhs is the right-hand-side operator for the subtraction
  141 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  142 + vec3<T> result;
  143 + result.ptr[0] = ptr[0] - rhs[0];
  144 + result.ptr[1] = ptr[1] - rhs[1];
  145 + result.ptr[2] = ptr[2] - rhs[2];
  146 + return result;
  147 + }
  148 + /// Arithmetic subtraction to a scalar
  149 +
  150 + /// @param rhs is the right-hand-side operator for the addition
  151 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  152 + vec3<T> result;
  153 + result.ptr[0] = ptr[0] - rhs;
  154 + result.ptr[1] = ptr[1] - rhs;
  155 + result.ptr[2] = ptr[2] - rhs;
  156 + return result;
  157 + }
  158 +
  159 + /// Arithmetic scalar multiplication operator
  160 +
  161 + /// @param rhs is the right-hand-side operator for the subtraction
  162 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  163 + vec3<T> result;
  164 + result.ptr[0] = ptr[0] * rhs;
  165 + result.ptr[1] = ptr[1] * rhs;
  166 + result.ptr[2] = ptr[2] * rhs;
  167 + return result;
  168 + }
  169 +
  170 + /// Arithmetic scalar division operator
  171 +
  172 + /// @param rhs is the right-hand-side operator for the subtraction
  173 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  174 + return (*this) * ((T)1.0/rhs);
  175 + }
  176 +
  177 + /// Multiplication by a scalar, followed by assignment
  178 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  179 + ptr[0] = ptr[0] * rhs;
  180 + ptr[1] = ptr[1] * rhs;
  181 + ptr[2] = ptr[2] * rhs;
  182 + return *this;
  183 + }
  184 +
  185 + /// Addition and assignment
  186 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  187 + ptr[0] = ptr[0] + rhs;
  188 + ptr[1] = ptr[1] + rhs;
  189 + ptr[2] = ptr[2] + rhs;
  190 + return *this;
  191 + }
  192 +
  193 + /// Assign a scalar to all values
  194 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  195 + ptr[0] = ptr[0] = rhs;
  196 + ptr[1] = ptr[1] = rhs;
  197 + ptr[2] = ptr[2] = rhs;
  198 + return *this;
  199 + }
  200 +
  201 + /// Casting and assignment
  202 + template<typename Y>
  203 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  204 + ptr[0] = (T)rhs.ptr[0];
  205 + ptr[1] = (T)rhs.ptr[1];
  206 + ptr[2] = (T)rhs.ptr[2];
  207 + return *this;
  208 + }
  209 +
  210 + /// Unary minus (returns the negative of the vector)
  211 + CUDA_CALLABLE vec3<T> operator-() const{
  212 + vec3<T> result;
  213 + result.ptr[0] = -ptr[0];
  214 + result.ptr[1] = -ptr[1];
  215 + result.ptr[2] = -ptr[2];
  216 + return result;
  217 + }
  218 +
  219 +//#ifndef __NVCC__
  220 + /// Outputs the vector as a string
  221 + std::string str() const{
  222 + std::stringstream ss;
  223 +
  224 + const size_t N = 3;
  225 +
  226 + ss<<"[";
  227 + for(size_t i=0; i<N; i++)
  228 + {
  229 + ss<<ptr[i];
  230 + if(i != N-1)
  231 + ss<<", ";
  232 + }
  233 + ss<<"]";
  234 +
  235 + return ss.str();
  236 + }
  237 +//#endif
  238 +
  239 + size_t size(){ return 3; }
  240 +
  241 + }; //end class vec3
  242 +} //end namespace stim
  243 +
  244 +/// Multiply a vector by a constant when the vector is on the right hand side
  245 +template <typename T>
  246 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  247 + return rhs * lhs;
  248 +}
  249 +
  250 +//stream operator
  251 +template<typename T>
  252 +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){
  253 + os<<rhs.str();
  254 + return os;
  255 +}
  256 +
  257 +#endif
... ...
stim/math/vec3_REMOTE_62876.h 0 → 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +#include <cmath>
  7 +
  8 +
  9 +namespace stim{
  10 +
  11 +
  12 +/// A class designed to act as a 3D vector with CUDA compatibility
  13 +template<typename T>
  14 +class vec3{
  15 +
  16 +protected:
  17 + T ptr[3];
  18 +
  19 +public:
  20 +
  21 + CUDA_CALLABLE vec3(){}
  22 +
  23 + CUDA_CALLABLE vec3(T v){
  24 + ptr[0] = ptr[1] = ptr[2] = v;
  25 + }
  26 +
  27 + CUDA_CALLABLE vec3(T x, T y, T z){
  28 + ptr[0] = x;
  29 + ptr[1] = y;
  30 + ptr[2] = z;
  31 + }
  32 +
  33 + //copy constructor
  34 + CUDA_CALLABLE vec3( const vec3<T>& other){
  35 + ptr[0] = other.ptr[0];
  36 + ptr[1] = other.ptr[1];
  37 + ptr[2] = other.ptr[2];
  38 + }
  39 +
  40 + //access an element using an index
  41 + CUDA_CALLABLE T& operator[](size_t idx){
  42 + return ptr[idx];
  43 + }
  44 +
  45 + CUDA_CALLABLE T* data(){
  46 + return ptr;
  47 + }
  48 +
  49 +/// Casting operator. Creates a new vector with a new type U.
  50 + template< typename U >
  51 + CUDA_CALLABLE operator vec3<U>(){
  52 + vec3<U> result;
  53 + result.ptr[0] = (U)ptr[0];
  54 + result.ptr[1] = (U)ptr[1];
  55 + result.ptr[2] = (U)ptr[2];
  56 +
  57 + return result;
  58 + }
  59 +
  60 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  61 + CUDA_CALLABLE T len_sq() const{
  62 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  63 + }
  64 +
  65 + /// computes the Euclidean length of the vector
  66 + CUDA_CALLABLE T len() const{
  67 + return sqrt(len_sq());
  68 + }
  69 +
  70 +
  71 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  72 + CUDA_CALLABLE vec3<T> cart2sph() const{
  73 + vec3<T> sph;
  74 + sph.ptr[0] = len();
  75 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  76 + if(sph.ptr[0] == 0)
  77 + sph.ptr[2] = 0;
  78 + else
  79 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  80 + return sph;
  81 + }
  82 +
  83 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  84 + CUDA_CALLABLE vec3<T> sph2cart() const{
  85 + vec3<T> cart;
  86 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  87 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  88 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  89 +
  90 + return cart;
  91 + }
  92 +
  93 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  94 + CUDA_CALLABLE vec3<T> norm() const{
  95 + vec3<T> result;
  96 + T l = len(); //compute the vector length
  97 + return (*this) / l;
  98 + }
  99 +
  100 + /// Computes the cross product of a 3-dimensional vector
  101 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  102 +
  103 + vec3<T> result;
  104 +
  105 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  106 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  107 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  108 +
  109 + return result;
  110 + }
  111 +
  112 + /// Compute the Euclidean inner (dot) product
  113 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  114 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  115 + }
  116 +
  117 + /// Arithmetic addition operator
  118 +
  119 + /// @param rhs is the right-hand-side operator for the addition
  120 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  121 + vec3<T> result;
  122 + result.ptr[0] = ptr[0] + rhs[0];
  123 + result.ptr[1] = ptr[1] + rhs[1];
  124 + result.ptr[2] = ptr[2] + rhs[2];
  125 + return result;
  126 + }
  127 +
  128 + /// Arithmetic addition to a scalar
  129 +
  130 + /// @param rhs is the right-hand-side operator for the addition
  131 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  132 + vec3<T> result;
  133 + result.ptr[0] = ptr[0] + rhs;
  134 + result.ptr[1] = ptr[1] + rhs;
  135 + result.ptr[2] = ptr[2] + rhs;
  136 + return result;
  137 + }
  138 +
  139 + /// Arithmetic subtraction operator
  140 +
  141 + /// @param rhs is the right-hand-side operator for the subtraction
  142 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  143 + vec3<T> result;
  144 + result.ptr[0] = ptr[0] - rhs[0];
  145 + result.ptr[1] = ptr[1] - rhs[1];
  146 + result.ptr[2] = ptr[2] - rhs[2];
  147 + return result;
  148 + }
  149 + /// Arithmetic subtraction to a scalar
  150 +
  151 + /// @param rhs is the right-hand-side operator for the addition
  152 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  153 + vec3<T> result;
  154 + result.ptr[0] = ptr[0] - rhs;
  155 + result.ptr[1] = ptr[1] - rhs;
  156 + result.ptr[2] = ptr[2] - rhs;
  157 + return result;
  158 + }
  159 +
  160 + /// Arithmetic scalar multiplication operator
  161 +
  162 + /// @param rhs is the right-hand-side operator for the subtraction
  163 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  164 + vec3<T> result;
  165 + result.ptr[0] = ptr[0] * rhs;
  166 + result.ptr[1] = ptr[1] * rhs;
  167 + result.ptr[2] = ptr[2] * rhs;
  168 + return result;
  169 + }
  170 +
  171 + /// Arithmetic scalar division operator
  172 +
  173 + /// @param rhs is the right-hand-side operator for the subtraction
  174 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  175 + return (*this) * ((T)1.0/rhs);
  176 + }
  177 +
  178 + /// Multiplication by a scalar, followed by assignment
  179 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  180 + ptr[0] = ptr[0] * rhs;
  181 + ptr[1] = ptr[1] * rhs;
  182 + ptr[2] = ptr[2] * rhs;
  183 + return *this;
  184 + }
  185 +
  186 + /// Addition and assignment
  187 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  188 + ptr[0] = ptr[0] + rhs;
  189 + ptr[1] = ptr[1] + rhs;
  190 + ptr[2] = ptr[2] + rhs;
  191 + return *this;
  192 + }
  193 +
  194 + /// Assign a scalar to all values
  195 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  196 + ptr[0] = ptr[0] = rhs;
  197 + ptr[1] = ptr[1] = rhs;
  198 + ptr[2] = ptr[2] = rhs;
  199 + return *this;
  200 + }
  201 +
  202 + /// Casting and assignment
  203 + template<typename Y>
  204 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  205 + ptr[0] = (T)rhs.ptr[0];
  206 + ptr[1] = (T)rhs.ptr[1];
  207 + ptr[2] = (T)rhs.ptr[2];
  208 + return *this;
  209 + }
  210 +
  211 + /// Unary minus (returns the negative of the vector)
  212 + CUDA_CALLABLE vec3<T> operator-() const{
  213 + vec3<T> result;
  214 + result.ptr[0] = -ptr[0];
  215 + result.ptr[1] = -ptr[1];
  216 + result.ptr[2] = -ptr[2];
  217 + return result;
  218 + }
  219 +
  220 + /// Outputs the vector as a string
  221 + std::string str() const{
  222 + std::stringstream ss;
  223 +
  224 + const size_t N = 3;
  225 +
  226 + ss<<"[";
  227 + for(size_t i=0; i<N; i++)
  228 + {
  229 + ss<<ptr[i];
  230 + if(i != N-1)
  231 + ss<<", ";
  232 + }
  233 + ss<<"]";
  234 +
  235 + return ss.str();
  236 + }
  237 +
  238 + size_t size(){ return 3; }
  239 +
  240 + }; //end class vec3
  241 +} //end namespace stim
  242 +
  243 +/// Multiply a vector by a constant when the vector is on the right hand side
  244 +template <typename T>
  245 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  246 + return rhs * lhs;
  247 +}
  248 +
  249 +//stream operator
  250 +template<typename T>
  251 +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){
  252 + os<<rhs.str();
  253 + return os;
  254 +}
  255 +
  256 +#endif
... ...
stim/sampling/func1_from_symmetric2.h 0 → 100644
  1 +/// Reconstruct a 1D function from a 2D symmetric function. This function takes a 2D image f(x,y) as input and
  2 +/// builds a 1D function f(r) where r = sqrt(x^2 + y^2) to approximate this 2D function.
  3 +/// This is useful for several applications, such as:
  4 +/// 1) Calculating a 1D function from a noisy 2D image, when you know the 2D image is supposed to be symmetric
  5 +/// 2) Calculating the average value for every r = sqrt(x^2 + y^2)
  6 +
  7 +/// Given a set of function samples equally spaced by dx, calculate the two samples closest to x and the proximity ratio alpha.
  8 +/// This can be used to linearly interpolate between an array of equally spaced values. Given the query value x, the
  9 +/// interpolated value can be calculated as r = values[sample] * alpha + values[sample + 1] * (1 - alpha)
  10 +/// @param sample is the lowest bin closest to the query point x
  11 +/// @param alpha is the ratio of x between [sample, sample + 1]
  12 +/// @param dx is the spacing between values
  13 +/// @param x is the query point
  14 +template<typename T>
  15 +void lerp_alpha(T& sample, T& alpha, T dx, T x){
  16 + sample = std::floor(x/dx);
  17 + alpha = 1 - (x - (b * dx)) / dx;
  18 +}
  19 +
  20 +/// This function assumes that the input image is square, that the # of samples are odd, and that r=0 is at the center
  21 +/// @param fr is an array of X elements that will store the reconstructed function
  22 +/// @param dr is the spacing (in pixels) between samples in fr
  23 +template<typename T>
  24 +void cpu_func1_from_symmetric2(T* fr, T& dr, T* fxy, size_t X){
  25 +
  26 + if(X%2 == 0){ //the 2D function must be odd (a sample must be available for r=0)
  27 + std::err<<"Error, X = "<<X<<" must be odd."<<std::endl;
  28 + exit(1);
  29 + }
  30 + size_t C = X/2+1; //calculate the center pixel coordinate
  31 + size_t N = C * C; //number of values in the folded function
  32 +
  33 + // The first step is to fold the function 8 times to take advantage of symmetry in the grid
  34 + T* folded = (T*) malloc(sizeof(T) * N ); //allocate space for the folded function
  35 + memset(folded, 0, sizeof(T) * N);
  36 + char* count = (char*) malloc( N ); //allocate space for a counter for the folded function
  37 + memset(count, 0, sizeof(T) * N);
  38 + size_t xi, yi; //indices into the image f(xi, yi)
  39 + size_t xii, yii; //indices into the folded image
  40 + T v; //register to store the value at point (xi, yi)
  41 + for(xi = 0; xi < X; xi++){
  42 + for(yi = 0; yi < X; yi++){
  43 + v = fxy[yi * X + xi]; //retrieve f(x, y)
  44 +
  45 + xii = xi;
  46 + yii = yi; //initialize the indices into the folded image
  47 +
  48 + //fold the function along the x and y axes
  49 + if(xi > C) xii = 2 * C - xi - 1; //calculate the folded index of x
  50 + if(yi > C) yii = 2 * C - yi - 1; //calculate the folded index of y
  51 +
  52 + if(xii < yii) std::swap<T>(xii, yii); //fold the function again along the 45-degree line
  53 +
  54 + folded[yii * C + xii] += v; //add the value to the folded function
  55 + count[yii * C + xii] += 1; //add a counter to the counter table
  56 + }
  57 + }
  58 +
  59 + //divide out the counter to correct the folded function
  60 + for(size_t i = 0; i < N){
  61 + folded[i] /= (T)count[i]; //divide out the counter
  62 + }
  63 +
  64 + T max_r = sqrt(X * X + Y * Y); //calculate the maximum r value, which will be along the image diagonal
  65 + T dr = max_r / (X - 1); //spacing between samples in the output function f(r)
  66 +
  67 + T* fA = (T*) malloc( sizeof(T) * X); //allocate space for a counter function storing alpha weights
  68 + memset(fA, 0, sizeof(T) * X); //zero out the alpha array
  69 + memset(fr, 0, sizeof(T) * X); //zero out the output function
  70 +
  71 + T r; //register to store the value of r at each point
  72 + size_t sample;
  73 + T alpha;
  74 + for(xi = 0; xi < C; xi++){
  75 + for(yi = 0; yi < xi; yi++){
  76 + r = sqrt(xi*xi + yi*yi); //calculate the value of r for the current (x, y)
  77 + lerp_alpha(sample, alpha, dr, r); //calculate the lowest nearby sample index and the associated alpha weight
  78 + fr[sample] += folded[yi * C + xi] * alpha; //sum the weighted value from the folded function
  79 + fA[sample] += alpha; //sum the weight
  80 +
  81 + if(sample < X - 1){ //if we aren't dealing with the last bin
  82 + fr[sample + 1] += folded[yi * C + xi] * (1.0 - alpha); //calculate the weighted value for the second point
  83 + fA[sample + 1] += 1 - alpha; //add the second alpha value
  84 + }
  85 + }
  86 + }
  87 +
  88 + //divide out the alpha values
  89 + for(size_t i = 0; i < X; i++)
  90 + fr[i] /= fA[i];
  91 +
  92 + //free allocated memory
  93 + free(folded);
  94 + free(count);
  95 + free(fA);
  96 +}
0 97 \ No newline at end of file
... ...
stim/visualization/aabb3.h
... ... @@ -2,51 +2,31 @@
2 2 #define STIM_AABB3_H
3 3  
4 4 #include <stim/cuda/cudatools/callable.h>
  5 +#include <stim/visualization/aabbn.h>
5 6  
6 7 namespace stim{
7 8  
8   -/// Structure for a 3D axis aligned bounding box
  9 + template<typename T>
  10 + using aabb3 = aabbn<T, 3>;
  11 +/*/// Structure for a 3D axis aligned bounding box
9 12 template<typename T>
10   -struct aabb3{
11   -
12   -//protected:
13   -
14   - T low[3]; //top left corner position
15   - T high[3]; //dimensions along x and y and z
16   -
17   -//public:
18   -
19   - CUDA_CALLABLE aabb3(T x, T y, T z){ //initialize an axis aligned bounding box of size 0 at the given position
20   - low[0] = high[0] = x; //set the position to the user specified coordinates
21   - low[1] = high[1] = y;
22   - low[2] = high[2] = z;
  13 +struct aabb3 : public aabbn<T, 3>{
  14 +
  15 + aabb3() : aabbn() {}
  16 + aabb3(T x0, T y0, T z0, T x1, T y1, T z1){
  17 + low[0] = x0;
  18 + low[1] = y0;
  19 + low[2] = z0;
  20 + high[0] = x0;
  21 + high[1] = x1;
  22 + high[2] = x2;
23 23 }
24 24  
25   - //insert a point into the bounding box, growing the box appropriately
26   - CUDA_CALLABLE void insert(T x, T y, T z){
27   - if(x < low[0]) low[0] = x;
28   - if(y < low[1]) low[1] = y;
29   - if(z < low[2]) low[2] = z;
30   -
31   - if(x > high[0]) high[0] = x;
32   - if(y > high[1]) high[1] = y;
33   - if(z > high[2]) high[2] = z;
34   - }
35   -
36   - //trim the bounding box so that the lower bounds are (x, y, z)
37   - CUDA_CALLABLE void trim_low(T x, T y, T z){
38   - if(low[0] < x) low[0] = x;
39   - if(low[1] < y) low[1] = y;
40   - if(low[2] < z) low[2] = z;
41   - }
  25 + aabb3 aabbn<T, 3>() {
42 26  
43   - CUDA_CALLABLE void trim_high(T x, T y, T z){
44   - if(high[0] > x) high[0] = x;
45   - if(high[1] > y) high[1] = y;
46   - if(high[2] > z) high[2] = z;
47 27 }
48 28  
49   -};
  29 +};*/
50 30  
51 31 }
52 32  
... ...
stim/visualization/aabbn.h
1   -#ifndef STIM_AABB3_H
2   -#define STIM_AABB3_H
  1 +#ifndef STIM_AABBN_H
  2 +#define STIM_AABBN_H
3 3  
4 4 #include <vector>
5 5 #include <stim/cuda/cudatools/callable.h>
... ... @@ -20,16 +20,25 @@ struct aabbn{
20 20 low[d] = high[d] = i[d];
21 21 }
22 22  
23   -//public:
24   -
25   - //CUDA_CALLABLE aabbn(){ //initialize an axis aligned bounding box of size 0 at the given position
26   - // for(size_t d = 0; d < D; d++)
27   - // low[d] = high[d] = 0;
28   - //}
29 23 CUDA_CALLABLE aabbn() {}
30 24 CUDA_CALLABLE aabbn(T* i) {
31 25 init(i);
32 26 }
  27 +
  28 + CUDA_CALLABLE aabbn(T x0, T x1) {
  29 + low[0] = x0;
  30 + high[0] = x1;
  31 + }
  32 +
  33 + CUDA_CALLABLE aabbn(T x0, T y0, T x1, T y1) : aabbn(x0, x1) {
  34 + low[1] = y0;
  35 + high[1] = y1;
  36 + }
  37 +
  38 + CUDA_CALLABLE aabbn(T x0, T y0, T z0, T x1, T y1, T z1) : aabbn(x0, y0, x1, y1) {
  39 + low[2] = z0;
  40 + high[2] = z1;
  41 + }
33 42  
34 43  
35 44 //insert a point into the bounding box, growing the box appropriately
... ... @@ -66,6 +75,14 @@ struct aabbn{
66 75 return newbox;
67 76 }
68 77  
  78 + //translate the box along dimension d a distance of v
  79 + CUDA_CALLABLE void translate(size_t d, T v) {
  80 + for (size_t d = 0; d < D; d++) {
  81 + low[d] += v;
  82 + high[d] += v;
  83 + }
  84 + }
  85 +
69 86 };
70 87  
71 88 }
... ...