Program listing for file numerics/src/tools/op3x3.h

Program listing for file numerics/src/tools/op3x3.h#

   1#ifndef _op3x3_h_
   2#define _op3x3_h_
   3
   4#include <math.h>
   5#include <float.h>
   6#include <stdlib.h>
   7#include <stdio.h>
   8#include <assert.h>
   9
  10
  11#ifndef M_PI
  12#define M_PI        3.14159265358979323846264338327950288
  13#define M_PI_2      1.57079632679489661923132169163975144
  14#endif
  15
  16#ifndef MAYBE_UNUSED
  17#ifdef __GNUC__
  18#define MAYBE_UNUSED __attribute__((unused))
  19#else
  20#define MAYBE_UNUSED
  21#endif
  22#endif
  23
  24#ifndef WARN_RESULT_IGNORED
  25#ifdef __GNUC__
  26#define WARN_RESULT_IGNORED __attribute__ ((warn_unused_result))
  27#else
  28#define WARN_RESULT_IGNORED
  29#endif
  30#endif
  31
  32#ifdef __cplusplus
  33#undef restrict
  34#include <sys/cdefs.h>
  35#define restrict __restrict
  36#endif
  37
  38#define SOLVE_3X3_GEPP(MAT, X) \
  39{ \
  40  int res = solve_3x3_gepp(mat, X); \
  41  if (res) { printf("%s :: Call solve_3x3_gepp(" #MAT ", " #X " failed!. "\
  42                    "No pivot could be selected for column %d\n", __func__, res); } }
  43
  44
  45
  46#define OP3X3(EXPR)                             \
  47  do                                            \
  48  {                                             \
  49    EXPR;                                       \
  50    EXPR;                                       \
  51    EXPR;                                       \
  52    EXPR;                                       \
  53    EXPR;                                       \
  54    EXPR;                                       \
  55    EXPR;                                       \
  56    EXPR;                                       \
  57    EXPR;                                       \
  58  } while(0)                                    \
  59
  60
  61#define OP3(EXPR)                               \
  62  do                                            \
  63  {                                             \
  64    EXPR;                                       \
  65    EXPR;                                       \
  66    EXPR;                                       \
  67  } while(0)                                    \
  68
  69
  70#define SET3X3(V)                                                       \
  71  double* V##00 MAYBE_UNUSED = V++;                                     \
  72  double* V##10 MAYBE_UNUSED = V++;                                     \
  73  double* V##20 MAYBE_UNUSED = V++;                                     \
  74  double* V##01 MAYBE_UNUSED = V++;                                     \
  75  double* V##11 MAYBE_UNUSED = V++;                                     \
  76  double* V##21 MAYBE_UNUSED = V++;                                     \
  77  double* V##02 MAYBE_UNUSED = V++;                                     \
  78  double* V##12 MAYBE_UNUSED = V++;                                     \
  79  double* V##22 MAYBE_UNUSED = V++;
  80#define SET3X3MAYBE(V)                                       \
  81  double* V##00 MAYBE_UNUSED = 0;                            \
  82  double* V##10 MAYBE_UNUSED = 0;                            \
  83  double* V##20 MAYBE_UNUSED = 0;                            \
  84  double* V##01 MAYBE_UNUSED = 0;                            \
  85  double* V##11 MAYBE_UNUSED = 0;                            \
  86  double* V##21 MAYBE_UNUSED = 0;                            \
  87  double* V##02 MAYBE_UNUSED = 0;                            \
  88  double* V##12 MAYBE_UNUSED = 0;                            \
  89  double* V##22 MAYBE_UNUSED = 0;                            \
  90  if (V)                                        \
  91  {                                             \
  92    V##00 = V++;                                \
  93    V##10 = V++;                                \
  94    V##20 = V++;                                \
  95    V##01 = V++;                                \
  96    V##11 = V++;                                \
  97    V##21 = V++;                                \
  98    V##02 = V++;                                \
  99    V##12 = V++;                                \
 100    V##22 = V++;                                \
 101  }
 102
 103
 104#define SET3(V)                                 \
 105  double* V##0 MAYBE_UNUSED = V++;                           \
 106  double* V##1 MAYBE_UNUSED = V++;                           \
 107  double* V##2 MAYBE_UNUSED = V++;
 108
 109
 110#define SET3MAYBE(V)                                 \
 111  double* V##0 MAYBE_UNUSED = 0;                                  \
 112  double* V##1 MAYBE_UNUSED = 0;                                  \
 113  double* V##2 MAYBE_UNUSED = 0;                                  \
 114  if (V)                                             \
 115  {                                                  \
 116    V##0 = V++;                                      \
 117    V##1 = V++;                                      \
 118    V##2 = V++;                                      \
 119  }
 120
 121
 122static inline void cpy3x3(double* restrict a, double* restrict b)
 123{
 124  OP3X3(*b++ = *a++);
 125}
 126
 127
 128static inline void add3x3(double a[9], double b[9])
 129{
 130  OP3X3(*b++ += *a++);
 131}
 132
 133
 134static inline void sub3x3(double a[9], double b[9])
 135{
 136  OP3X3(*b++ -= *a++);
 137}
 138
 139
 140static inline void cpy3(double a[3], double b[3])
 141{
 142  OP3(*b++ = *a++);
 143}
 144
 145
 146static inline void add3(double a[3], double b[3])
 147{
 148  OP3(*b++ += *a++);
 149}
 150
 151
 152static inline void sub3(double a[3], double b[3])
 153{
 154  OP3(*b++ -= *a++);
 155}
 156
 157
 158static inline void scal3x3(double scal, double m[9])
 159{
 160  OP3X3(*m++ *= scal);
 161}
 162
 163
 164static inline void diag_scal3(double* restrict scal_coeffs, double* restrict v)
 165{
 166  OP3(*v++ *= *scal_coeffs++);
 167}
 168
 169
 170static inline void scal3(double scal, double* v)
 171{
 172  OP3(*v++ *= scal);
 173}
 174
 175
 176
 177static inline void cpytr3x3(double* restrict a, double* restrict b)
 178{
 179  SET3X3(a);
 180  SET3X3(b);
 181  *b00 = *a00;
 182  *b10 = *a01;
 183  *b20 = *a02;
 184  *b01 = *a10;
 185  *b11 = *a11;
 186  *b21 = *a12;
 187  *b02 = *a20;
 188  *b12 = *a21;
 189  *b22 = *a22;
 190}
 191
 192
 193static inline void mv3x3(double* restrict a, double* restrict v, double* restrict r)
 194{
 195
 196  double* pr;
 197
 198  pr = r;
 199
 200  *pr++ = *a++ * *v;
 201  *pr++ = *a++ * *v;
 202  *pr++ = *a++ * *v++;
 203
 204  pr = r;
 205
 206  *pr++ += *a++ * *v;
 207  *pr++ += *a++ * *v;
 208  *pr++ += *a++ * *v++;
 209
 210  pr = r;
 211
 212  *pr++ += *a++ * *v;
 213  *pr++ += *a++ * *v;
 214  *pr++ += *a++ * *v++;
 215}
 216
 217
 218static inline void mtv3x3(double* restrict a, double* restrict v, double* restrict r)
 219{
 220
 221  double* pv = v;
 222
 223  *r = *a++ * *pv++;
 224  *r += *a++ * *pv++;
 225  *r++ += *a++ * *pv;
 226
 227  pv = v;
 228
 229  *r = *a++ * *pv++;
 230  *r += *a++ * *pv++;
 231  *r++ += *a++ * *pv;
 232
 233  pv = v;
 234
 235  *r = *a++ * *pv++;
 236  *r += *a++ * *pv++;
 237  *r += *a * *pv;
 238}
 239
 240
 241static inline void mvp2x2(const double* restrict a, const double* restrict v, double* restrict r)
 242{
 243
 244  double* pr;
 245
 246  pr = r;
 247
 248  *pr++ += *a++ * *v;
 249  *pr++ += *a++ * *v++;
 250
 251  pr = r;
 252
 253  *pr++ += *a++ * *v;
 254  *pr++ += *a++ * *v++;
 255}
 256
 257
 258static inline void mvp3x3(const double* restrict a, const double* restrict v, double* restrict r)
 259{
 260
 261  double* pr;
 262
 263  pr = r;
 264
 265  *pr++ += *a++ * *v;
 266  *pr++ += *a++ * *v;
 267  *pr++ += *a++ * *v++;
 268
 269  pr = r;
 270
 271  *pr++ += *a++ * *v;
 272  *pr++ += *a++ * *v;
 273  *pr++ += *a++ * *v++;
 274
 275  pr = r;
 276
 277  *pr++ += *a++ * *v;
 278  *pr++ += *a++ * *v;
 279  *pr++ += *a++ * *v++;
 280}
 281
 282static inline void mvp5x5(const double* restrict a, const double* restrict v, double* restrict r)
 283{
 284
 285  double* pr;
 286
 287  pr = r;
 288
 289  *pr++ += *a++ * *v;
 290  *pr++ += *a++ * *v;
 291  *pr++ += *a++ * *v;
 292  *pr++ += *a++ * *v;
 293  *pr++ += *a++ * *v++;
 294
 295  pr = r;
 296
 297  *pr++ += *a++ * *v;
 298  *pr++ += *a++ * *v;
 299  *pr++ += *a++ * *v;
 300  *pr++ += *a++ * *v;
 301  *pr++ += *a++ * *v++;
 302
 303  pr = r;
 304
 305  *pr++ += *a++ * *v;
 306  *pr++ += *a++ * *v;
 307  *pr++ += *a++ * *v;
 308  *pr++ += *a++ * *v;
 309  *pr++ += *a++ * *v++;
 310
 311  pr = r;
 312
 313  *pr++ += *a++ * *v;
 314  *pr++ += *a++ * *v;
 315  *pr++ += *a++ * *v;
 316  *pr++ += *a++ * *v;
 317  *pr++ += *a++ * *v++;
 318
 319  pr = r;
 320
 321  *pr++ += *a++ * *v;
 322  *pr++ += *a++ * *v;
 323  *pr++ += *a++ * *v;
 324  *pr++ += *a++ * *v;
 325  *pr++ += *a++ * *v++;
 326
 327
 328}
 329
 330
 331static inline void mvp_alpha3x3(double alpha, const double* restrict a, const double* restrict v, double* restrict r)
 332{
 333
 334  double* pr;
 335
 336  pr = r;
 337
 338  *pr++ += alpha * *a++ * *v;
 339  *pr++ += alpha * *a++ * *v;
 340  *pr++ += alpha * *a++ * *v++;
 341
 342  pr = r;
 343
 344  *pr++ += alpha *  *a++ * *v;
 345  *pr++ += alpha *  *a++ * *v;
 346  *pr++ += alpha *  *a++ * *v++;
 347
 348  pr = r;
 349
 350  *pr++ += alpha *  *a++ * *v;
 351  *pr++ += alpha *  *a++ * *v;
 352  *pr++ += alpha *  *a++ * *v++;
 353}
 354
 355static inline void mvm3x3(double* restrict a, double* restrict v, double* restrict r)
 356{
 357
 358  double* pr;
 359
 360  pr = r;
 361
 362  *pr++ -= *a++ * *v;
 363  *pr++ -= *a++ * *v;
 364  *pr++ -= *a++ * *v++;
 365
 366  pr = r;
 367
 368  *pr++ -= *a++ * *v;
 369  *pr++ -= *a++ * *v;
 370  *pr++ -= *a++ * *v++;
 371
 372  pr = r;
 373
 374  *pr++ -= *a++ * *v;
 375  *pr++ -= *a++ * *v;
 376  *pr++ -= *a++ * *v++;
 377}
 378
 379
 380
 381static inline void mtvm3x3(double* restrict a, double* restrict v, double* restrict r)
 382{
 383
 384  double* pv = v;
 385
 386  *r -= *a++ * *pv++;
 387  *r -= *a++ * *pv++;
 388  *r++ -= *a++ * *pv;
 389
 390  pv = v;
 391
 392  *r -= *a++ * *pv++;
 393  *r -= *a++ * *pv++;
 394  *r++ -= *a++ * *pv;
 395
 396  pv = v;
 397
 398  *r -= *a++ * *pv++;
 399  *r -= *a++ * *pv++;
 400  *r -= *a * *pv;
 401}
 402
 403static inline void mm3x3(double* restrict a, double* restrict b, double* restrict c)
 404{
 405
 406  SET3X3(a);
 407  SET3X3(b);
 408  SET3X3(c);
 409
 410  *c00 = *a00 * *b00 + *a01 * *b10 + *a02 * *b20;
 411  *c01 = *a00 * *b01 + *a01 * *b11 + *a02 * *b21;
 412  *c02 = *a00 * *b02 + *a01 * *b12 + *a02 * *b22;
 413
 414  *c10 = *a10 * *b00 + *a11 * *b10 + *a12 * *b20;
 415  *c11 = *a10 * *b01 + *a11 * *b11 + *a12 * *b21;
 416  *c12 = *a10 * *b02 + *a11 * *b12 + *a12 * *b22;
 417
 418  *c20 = *a20 * *b00 + *a21 * *b10 + *a22 * *b20;
 419  *c21 = *a20 * *b01 + *a21 * *b11 + *a22 * *b21;
 420  *c22 = *a20 * *b02 + *a21 * *b12 + *a22 * *b22;
 421
 422}
 423
 424
 425static inline void mmp3x3(double* restrict a, double* restrict b, double* restrict c)
 426{
 427
 428  SET3X3(a);
 429  SET3X3(b);
 430  SET3X3(c);
 431
 432  *c00 += *a00 * *b00 + *a01 * *b10 + *a02 * *b20;
 433  *c01 += *a00 * *b01 + *a01 * *b11 + *a02 * *b21;
 434  *c02 += *a00 * *b02 + *a01 * *b12 + *a02 * *b22;
 435
 436  *c10 += *a10 * *b00 + *a11 * *b10 + *a12 * *b20;
 437  *c11 += *a10 * *b01 + *a11 * *b11 + *a12 * *b21;
 438  *c12 += *a10 * *b02 + *a11 * *b12 + *a12 * *b22;
 439
 440  *c20 += *a20 * *b00 + *a21 * *b10 + *a22 * *b20;
 441  *c21 += *a20 * *b01 + *a21 * *b11 + *a22 * *b21;
 442  *c22 += *a20 * *b02 + *a21 * *b12 + *a22 * *b22;
 443
 444}
 445
 446
 447static inline void mmm3x3(double* restrict a, double* restrict b, double* restrict c)
 448{
 449
 450  SET3X3(a);
 451  SET3X3(b);
 452  SET3X3(c);
 453
 454  *c00 -= *a00 * *b00 + *a01 * *b10 + *a02 * *b20;
 455  *c01 -= *a00 * *b01 + *a01 * *b11 + *a02 * *b21;
 456  *c02 -= *a00 * *b02 + *a01 * *b12 + *a02 * *b22;
 457
 458  *c10 -= *a10 * *b00 + *a11 * *b10 + *a12 * *b20;
 459  *c11 -= *a10 * *b01 + *a11 * *b11 + *a12 * *b21;
 460  *c12 -= *a10 * *b02 + *a11 * *b12 + *a12 * *b22;
 461
 462  *c20 -= *a20 * *b00 + *a21 * *b10 + *a22 * *b20;
 463  *c21 -= *a20 * *b01 + *a21 * *b11 + *a22 * *b21;
 464  *c22 -= *a20 * *b02 + *a21 * *b12 + *a22 * *b22;
 465
 466}
 467
 468
 469static inline double det3x3(double* a)
 470{
 471  SET3X3(a);
 472
 473  return
 474    *a00 * *a11 * *a22 + *a01 * *a12 * *a20 + *a02 * *a10 * *a21 -
 475    *a00 * *a12 * *a21 - *a01 * *a10 * *a22 - *a02 * *a11 * *a20;
 476}
 477
 478
 479
 480WARN_RESULT_IGNORED
 481static inline int solv3x3(double* restrict a, double* restrict x, double* restrict b)
 482{
 483
 484  SET3X3(a);
 485  double* b0 = b++;
 486  double* b1 = b++;
 487  double* b2 = b;
 488
 489  double det =
 490    *a00 * *a11 * *a22 + *a01 * *a12 * *a20 + *a02 * *a10 * *a21 -
 491    *a00 * *a12 * *a21 - *a01 * *a10 * *a22 - *a02 * *a11 * *a20;
 492
 493  if (fabs(det) > DBL_EPSILON)
 494  {
 495    double idet = 1.0 / det;
 496    *x++ = idet * (*a01 * *a12 * *b2 +  *a02 * *a21 * *b1 +
 497                   *a11 * *a22 * *b0 -  *a01 * *a22 * *b1 -
 498                   *a02 * *a11 * *b2 -  *a12 * *a21 * *b0);
 499    *x++ = idet * (*a00 * *a22 * *b1 +  *a02 * *a10 * *b2 +
 500                   *a12 * *a20 * *b0 -  *a00 * *a12 * *b2 -
 501                   *a02 * *a20 * *b1 -  *a10 * *a22 * *b0);
 502    *x   = idet * (*a00 * *a11 * *b2 +  *a01 * *a20 * *b1 +
 503                   *a10 * *a21 * *b0 -  *a00 * *a21 * *b1 -
 504                   *a01 * *a10 * *b2 -  *a11 * *a20 * *b0);
 505  }
 506  else
 507  {
 508    *x++ = NAN;
 509    *x++ = NAN;
 510    *x   = NAN;
 511    return 1;
 512  }
 513  return 0;
 514}
 515
 516
 517static inline int equal3x3(double* restrict a, double* restrict b)
 518{
 519  return *a++ == *b++ &&
 520         *a++ == *b++ &&
 521         *a++ == *b++ &&
 522         *a++ == *b++ &&
 523         *a++ == *b++ &&
 524         *a++ == *b++ &&
 525         *a++ == *b++ &&
 526         *a++ == *b++ &&
 527         *a == *b;
 528}
 529
 530
 531static inline int equal3(double* restrict a, double* restrict b)
 532{
 533  return *a++ == *b++ &&
 534         *a++ == *b++ &&
 535         *a == *b;
 536}
 537
 538
 539static inline double dot3(double* restrict a, double* restrict b)
 540{
 541  double r;
 542  r = *a++ * * b++;
 543  r += *a++ * * b++;
 544  r += *a * *b;
 545  return r;
 546}
 547
 548
 549static inline void cross3(double* restrict a, double* restrict b, double* restrict c)
 550{
 551  double* a0 = a++;
 552  double* a1 = a++;
 553  double* a2 = a;
 554  double* b0 = b++;
 555  double* b1 = b++;
 556  double* b2 = b;
 557
 558  *c++ = *a1 * *b2 - *a2 * *b1;
 559  *c++ = *a2 * *b0 - *a0 * *b2;
 560  *c   = *a0 * *b1 - *a1 * *b0;
 561}
 562
 563
 564static inline double hypot2(double* a)
 565{
 566  double r;
 567
 568  r = *a * *a;
 569  a++;
 570  r += *a * *a;
 571  return sqrt(r);
 572}
 573
 574
 575
 576static inline double hypot3(double* a)
 577{
 578  double r;
 579
 580  r = *a * *a;
 581  a++;
 582  r += *a * *a;
 583  a++;
 584  r += *a * *a;
 585  return sqrt(r);
 586}
 587static inline double hypot5(double* a)
 588{
 589  double r;
 590
 591  r = *a * *a;
 592  a++;
 593  r += *a * *a;
 594  a++;
 595  r += *a * *a;
 596  a++;
 597  r += *a * *a;
 598  a++;
 599  r += *a * *a;
 600
 601  return sqrt(r);
 602}
 603static inline double hypot9(double* a)
 604{
 605  double r;
 606
 607  r = *a * *a;
 608  a++;
 609  r += *a * *a;
 610  a++;
 611  r += *a * *a;
 612  a++;
 613  r += *a * *a;
 614  a++;
 615  r += *a * *a;
 616  a++;
 617  r += *a * *a;
 618  a++;
 619  r += *a * *a;
 620  a++;
 621  r += *a * *a;
 622  a++;
 623  r += *a * *a;
 624
 625  return sqrt(r);
 626}
 627
 628
 629
 630
 631static inline void extract3x3(int n, int i0, int j0, double* restrict a, double* restrict b)
 632{
 633
 634  int k0 = i0 + n * j0;
 635
 636  int nm3 = n - 3;
 637
 638  a += k0;
 639
 640  *b++ = *a++;
 641  *b++ = *a++;
 642  *b++ = *a++;
 643  a += nm3;
 644  *b++ = *a++;
 645  *b++ = *a++;
 646  *b++ = *a++;
 647  a += nm3;
 648  *b++ = *a++;
 649  *b++ = *a++;
 650  *b   = *a;
 651}
 652
 653
 654
 655static inline void insert3x3(int n, int i0, int j0, double* restrict a, double* restrict b)
 656{
 657
 658  int k0 = i0 + n * j0;
 659
 660  int nm3 = n - 3;
 661
 662  a += k0;
 663
 664  *a++ = *b++;
 665  *a++ = *b++;
 666  *a++ = *b++;
 667  a += nm3;
 668  *a++ = *b++;
 669  *a++ = *b++;
 670  *a++ = *b++;
 671  a += nm3;
 672  *a++ = *b++;
 673  *a++ = *b++;
 674  *a = *b;
 675}
 676
 677
 678void print3x3(double* mat);
 679
 680
 681
 682void print3(double* v);
 683
 684
 685WARN_RESULT_IGNORED
 686static inline int orthoBaseFromVector_old(double *Ax, double *Ay, double *Az,
 687                                      double *A1x, double *A1y, double *A1z,
 688                                      double *A2x, double *A2y, double *A2z)
 689{
 690  double normA = sqrt((*Ax) * (*Ax) + (*Ay) * (*Ay) + (*Az) * (*Az));
 691  if (normA == 0.)
 692  {
 693    (*Ax) = NAN;
 694    (*Ay) = NAN;
 695    (*Az) = NAN;
 696    (*A1x) = NAN;
 697    (*A1y) = NAN;
 698    (*A1z) = NAN;
 699    (*A2x) = NAN;
 700    (*A2y) = NAN;
 701    (*A2z) = NAN;
 702    return 1;
 703  }
 704  (*Ax) /= normA;
 705  (*Ay) /= normA;
 706  (*Az) /= normA;
 707
 708
 709  if (fabs(*Ax) > fabs(*Ay))
 710  {
 711    if (fabs((*Ax)) > fabs((*Az)))
 712    {
 713      (*A1x) = (*Ay);
 714      (*A1y) = -(*Ax);
 715      (*A1z) = 0;
 716    }
 717    else
 718    {
 719      (*A1z) = (*Ay);
 720      (*A1y) = -(*Az);
 721      (*A1x) = 0;
 722    }
 723  }
 724  else if (fabs(*Ay) > fabs(*Az))
 725  {
 726    (*A1y) = (*Ax);
 727    (*A1x) = -(*Ay);
 728    (*A1z) = 0;
 729  }
 730  else
 731  {
 732    (*A1z) = (*Ay);
 733    (*A1y) = -(*Az);
 734    (*A1x) = 0;
 735  }
 736  double normA1 = sqrt((*A1x) * (*A1x) + (*A1y) * (*A1y) + (*A1z) * (*A1z));
 737  (*A1x) /= normA1;
 738  (*A1y) /= normA1;
 739  (*A1z) /= normA1;
 740
 741  (*A2x) = *Ay * *A1z - *Az * *A1y;
 742  (*A2y) = *Az * *A1x - *Ax * *A1z;
 743  (*A2z) = *Ax * *A1y - *Ay * *A1x;
 744
 745
 746  assert(fabs(sqrt((*Ax) * (*Ax) + (*Ay) * (*Ay) + (*Az) * (*Az))-1.0) < 1e-14 );
 747  assert(fabs(sqrt((*A1x) * (*A1x) + (*A1y) * (*A1y) + (*A1z) * (*A1z))-1.0) < 1e-14 );
 748  assert(fabs(*Ax * *A1x + *Ay * *A1y + *Az * *A1z) < 1e-14 );
 749  assert(fabs(sqrt((*A2x) * (*A2x) + (*A2y) * (*A2y) + (*A2z) * (*A2z))-1.0) < 1e-14 );
 750  assert(fabs(*Ax * *A2x + *Ay * *A2y +  *Az * *A2z) < 1e-14 );
 751  assert(fabs(*A1x * *A2x +  *A1y * *A2y +  *A1z * *A2z)< 1e-14 );
 752
 753  return 0;
 754}
 755
 756
 757
 758
 759static inline int orthoBaseFromVector(double* Ax, double* Ay, double* Az, double* A1x,
 760                                       double* A1y, double* A1z, double* A2x, double* A2y,
 761                                       double* A2z)
 762
 763{
 764  double normA = sqrt((*Ax) * (*Ax) + (*Ay) * (*Ay) + (*Az) * (*Az));
 765  if (normA == 0.)
 766  {
 767    (*Ax) = NAN;
 768    (*Ay) = NAN;
 769    (*Az) = NAN;
 770    (*A1x) = NAN;
 771    (*A1y) = NAN;
 772    (*A1z) = NAN;
 773    (*A2x) = NAN;
 774    (*A2y) = NAN;
 775    (*A2z) = NAN;
 776    return 1;
 777  }
 778  (*Ax) /= normA;
 779  (*Ay) /= normA;
 780  (*Az) /= normA;
 781
 782  double sign = copysignf(1.0, *Az);
 783  const double  a = -1.0 / (sign + *Az);
 784  const double  b = *Ax * *Ay * a;
 785
 786  *A1x = 1.0+ sign * *Ax * * Ax * a;
 787  *A1y = sign  * b;
 788  *A1z = - sign * *Ax;
 789
 790  *A2x = b;
 791  *A2y = sign + *Ay * *Ay * a;
 792  *A2z = - *Ay;
 793
 794
 795  assert(fabs(sqrt((*A1x) * (*A1x) + (*A1y) * (*A1y) + (*A1z) * (*A1z))-1.0) < 1e-14 );
 796  assert(fabs(*Ax * *A1x + *Ay * *A1y + *Az * *A1z) < 1e-14 );
 797  assert(fabs(sqrt((*A2x) * (*A2x) + (*A2y) * (*A2y) + (*A2z) * (*A2z))-1.0) < 1e-14 );
 798  assert(fabs(*Ax * *A2x + *Ay * *A2y +  *Az * *A2z) < 1e-14 );
 799  assert(fabs(*A1x * *A2x +  *A1y * *A2y +  *A1z * *A2z)< 1e-14 );
 800  return 0;
 801}
 802
 803WARN_RESULT_IGNORED
 804static inline int solve_3x3_gepp(const double* restrict a, double* restrict b)
 805{
 806  double lp0, lp1, lp2, lm1, lm2, ln1, ln2;
 807  double bl, bm, bn;
 808  double factor1, factor2;
 809  double sol0, sol1, sol2;
 810  double alp0;
 811  double a0 = a[0];
 812  double a1 = a[1];
 813  double a2 = a[2];
 814  double aa0 = fabs(a0);
 815  double aa1 = fabs(a1);
 816  double aa2 = fabs(a2);
 817
 818  int pivot2, pivot1 = aa0 >= aa1 ? aa0 >= aa2 ? 0 : 20 : aa1 >= aa2 ? 10 : 20;
 819  int info = 0;
 820
 821
 822  switch (pivot1)
 823  {
 824    case 0:
 825      factor1 = a1/a0;
 826      factor2 = a2/a0;
 827      lp0 = a0;
 828      alp0 = fabs(a0);
 829      lp1 = a[3];
 830      lp2 = a[6];
 831      lm1 = a[4] - factor1*lp1;
 832      lm2 = a[7] - factor1*lp2;
 833      ln1 = a[5] - factor2*lp1;
 834      ln2 = a[8] - factor2*lp2;
 835      bl = b[0];
 836      bm = b[1] - factor1*bl;
 837      bn = b[2] - factor2*bl;
 838      break;
 839    case 10:
 840      factor1 = a0/a1;
 841      factor2 = a2/a1;
 842      lp0 = a1;
 843      alp0 = fabs(a1);
 844      lp1 = a[4];
 845      lp2 = a[7];
 846      lm1 = a[3] - factor1*lp1;
 847      lm2 = a[6] - factor1*lp2;
 848      ln1 = a[5] - factor2*lp1;
 849      ln2 = a[8] - factor2*lp2;
 850      bl = b[1];
 851      bm = b[0] - factor1*bl;
 852      bn = b[2] - factor2*bl;
 853      break;
 854    case 20:
 855      factor1 = a0/a2;
 856      factor2 = a1/a2;
 857      lp0 = a2;
 858      alp0 = fabs(a2);
 859      lp1 = a[5];
 860      lp2 = a[8];
 861      lm1 = a[3] - factor1*lp1;
 862      lm2 = a[6] - factor1*lp2;
 863      ln1 = a[4] - factor2*lp1;
 864      ln2 = a[7] - factor2*lp2;
 865      bl = b[2];
 866      bm = b[0] - factor1*bl;
 867      bn = b[1] - factor2*bl;
 868      break;
 869    default:
 870      exit(EXIT_FAILURE);
 871  }
 872
 873  if (alp0 <= DBL_EPSILON)
 874  {
 875    info = 1;
 876    return info;
 877  }
 878
 879  double alm1 = fabs(lm1);
 880  double aln1 = fabs(ln1);
 881  pivot2 = alm1 >= aln1 ? 0 : 1;
 882
 883
 884  switch (pivot2)
 885  {
 886    case 0:
 887      if (alm1 < DBL_EPSILON)
 888      {
 889        info = 1;
 890        return info;
 891      }
 892      factor1 = ln1/lm1;
 893      sol2 = (bn - factor1*bm)/(ln2 - factor1*lm2);
 894      sol1 = (bm - lm2*sol2)/lm1;
 895      break;
 896    case 1:
 897      if (aln1 < DBL_EPSILON)
 898      {
 899        info = 1;
 900        return info;
 901      }
 902      factor1 = lm1/ln1;
 903      sol2 = (bm - factor1*bn)/(lm2 - factor1*ln2);
 904      sol1 = (bn - ln2*sol2)/ln1;
 905      break;
 906    default:
 907      exit(EXIT_FAILURE);
 908  }
 909  sol0 = (bl - sol1*lp1 - sol2*lp2)/lp0;
 910
 911  b[0] = sol0;
 912  b[1] = sol1;
 913  b[2] = sol2;
 914
 915  return info;
 916}
 917
 918
 919#define mat_elem(a, y, x, n) (a + ((y) * (n) + (x)))
 920
 921static void swap_row(double *a, double *b, int r1, int r2, int n)
 922{
 923    double tmp, *p1, *p2;
 924    int i;
 925
 926    if (r1 == r2) return;
 927    for (i = 0; i < n; i++) {
 928            p1 = mat_elem(a, r1, i, n);
 929            p2 = mat_elem(a, r2, i, n);
 930            tmp = *p1, *p1 = *p2, *p2 = tmp;
 931    }
 932    tmp = b[r1], b[r1] = b[r2], b[r2] = tmp;
 933}
 934
 935static inline  void solve_nxn_gepp(int n, double *a, double *b, double *x)
 936{
 937#define A(y, x) (*mat_elem(a, y, x, n))
 938    int  j, col, row, max_row,dia;
 939    double max, tmp;
 940
 941    for (dia = 0; dia < n; dia++) {
 942            max_row = dia, max = A(dia, dia);
 943
 944            for (row = dia + 1; row < n; row++)
 945                    if ((tmp = fabs(A(row, dia))) > max)
 946                            max_row = row, max = tmp;
 947
 948            swap_row(a, b, dia, max_row, n);
 949
 950            for (row = dia + 1; row < n; row++) {
 951                    tmp = A(row, dia) / A(dia, dia);
 952                    for (col = dia+1; col < n; col++)
 953                            A(row, col) -= tmp * A(dia, col);
 954                    A(row, dia) = 0;
 955                    b[row] -= tmp * b[dia];
 956            }
 957    }
 958    for (row = n - 1; row >= 0; row--) {
 959            tmp = b[row];
 960            for (j = n - 1; j > row; j--)
 961                    tmp -= x[j] * A(row, j);
 962            x[row] = tmp / A(row, row);
 963    }
 964#undef A
 965}
 966
 967
 968WARN_RESULT_IGNORED
 969static inline int eig_3x3(double* restrict a, double* restrict b, double* restrict eig)
 970{
 971  SET3X3(a);
 972  SET3X3(b);
 973  double pi = M_PI;
 974  double p1 =  *a01* *a01 +  *a02* *a02 +  *a12* *a12;
 975  if (p1 == 0)
 976  {
 977
 978    eig[0] =  *a00;
 979    eig[1] =  *a11;
 980    eig[2] =  *a22;
 981  }
 982  else
 983  {
 984    double q = ( *a00+ *a11+ *a22)/3.0;
 985    double  p2 = ( *a00 - q)*( *a00 - q) + ( *a11 - q)*( *a11 - q) + ( *a22 - q)*( *a22 - q) + 2 * p1;
 986    double p = sqrt(p2 / 6.0);
 987    *b00 = (1 / p) * ( *a00 - q);
 988    *b11 = (1 / p) * ( *a11 - q);
 989    *b22 = (1 / p) * ( *a22 - q);
 990    *b01 = (1 / p) * ( *a01);
 991    *b02 = (1 / p) * ( *a02);
 992    *b10 = (1 / p) * ( *a10);
 993    *b12 = (1 / p) * ( *a12);
 994    *b20 = (1 / p) * ( *a20);
 995    *b21 = (1 / p) * ( *a21);
 996
 997    double det =
 998      *b00 * *b11 * *b22 + *b01 * *b12 * *b20 + *b02 * *b10 * *b21 -
 999      *b00 * *b12 * *b21 - *b01 * *b10 * *b22 - *b02 * *b11 * *b20;
1000    double r = det/2.0;
1001
1002
1003
1004    double phi;
1005    if (r <= -1)
1006      phi = pi / 3.0;
1007    else if (r >= 1)
1008      phi = 0.0;
1009    else
1010      phi = acos(r) / 3;
1011
1012
1013    eig[0] = q + 2 * p * cos(phi);
1014    eig[2] = q + 2 * p * cos(phi + (2*pi/3));
1015    eig[1] = 3 * q - eig[0] - eig[2];
1016  }
1017
1018  return 0;
1019}
1020
1021
1022
1023#endif