32 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__)
39 #define gone 1065353216
40 #define gsine_pi_over_eight 1053028117
42 #define gcosine_pi_over_eight 1064076127
43 #define gtiny_number 1.e-20
44 #define gfour_gamma_squared 5.8284273147583007813
52 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__)
53 #define __fadd_rn(x, y) __fadd_rn(x, y)
54 #define __fsub_rn(x, y) __fsub_rn(x, y)
55 #define __frsqrt_rn(x) __frsqrt_rn(x)
57 #define __dadd_rn(x, y) __dadd_rn(x, y)
58 #define __dsub_rn(x, y) __dsub_rn(x, y)
59 #define __drsqrt_rn(x) __drcp_rn(__dsqrt_rn(x))
62 #define __fadd_rn(x, y) (x + y)
63 #define __fsub_rn(x, y) (x - y)
64 #define __frsqrt_rn(x) (1.0 / sqrt(x))
66 #define __dadd_rn(x, y) (x + y)
67 #define __dsub_rn(x, y) (x - y)
68 #define __drsqrt_rn(x) (1.0 / sqrt(x))
70 #define __add_rn(x, y) (x + y)
71 #define __sub_rn(x, y) (x - y)
72 #define __rsqrt_rn(x) (1.0 / sqrt(x))
80 template <
typename scalar_t>
86 template <
typename scalar_t>
94 const double *A_3x3,
double *U_3x3,
double *S_3x1,
double *V_3x3) {
95 double gsmall_number = 1.e-12;
97 un<double> Sa11, Sa21, Sa31, Sa12, Sa22, Sa32, Sa13, Sa23, Sa33;
98 un<double> Su11, Su21, Su31, Su12, Su22, Su32, Su13, Su23, Su33;
99 un<double> Sv11, Sv21, Sv31, Sv12, Sv22, Sv32, Sv13, Sv23, Sv33;
102 un<double> Ss11, Ss21, Ss31, Ss22, Ss32, Ss33;
119 Ss11.
f = Sa11.
f * Sa11.
f;
120 Stmp1.
f = Sa21.
f * Sa21.
f;
122 Stmp1.
f = Sa31.
f * Sa31.
f;
125 Ss21.
f = Sa12.
f * Sa11.
f;
126 Stmp1.
f = Sa22.
f * Sa21.
f;
128 Stmp1.
f = Sa32.
f * Sa31.
f;
131 Ss31.
f = Sa13.
f * Sa11.
f;
132 Stmp1.
f = Sa23.
f * Sa21.
f;
134 Stmp1.
f = Sa33.
f * Sa31.
f;
137 Ss22.
f = Sa12.
f * Sa12.
f;
138 Stmp1.
f = Sa22.
f * Sa22.
f;
140 Stmp1.
f = Sa32.
f * Sa32.
f;
143 Ss32.
f = Sa13.
f * Sa12.
f;
144 Stmp1.
f = Sa23.
f * Sa22.
f;
146 Stmp1.
f = Sa33.
f * Sa32.
f;
149 Ss33.
f = Sa13.
f * Sa13.
f;
150 Stmp1.
f = Sa23.
f * Sa23.
f;
152 Stmp1.
f = Sa33.
f * Sa33.
f;
163 for (
int i = 0; i < 4; i++) {
164 Ssh.
f = Ss21.
f * 0.5f;
167 Stmp2.
f = Ssh.
f * Ssh.
f;
169 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
170 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
172 Sch.
ui = Sch.
ui | Stmp2.
ui;
174 Stmp1.
f = Ssh.
f * Ssh.
f;
175 Stmp2.
f = Sch.
f * Sch.
f;
179 Ssh.
f = Stmp4.
f * Ssh.
f;
180 Sch.
f = Stmp4.
f * Sch.
f;
182 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
185 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
186 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
188 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
189 Sch.
ui = Sch.
ui | Stmp2.
ui;
191 Stmp1.
f = Ssh.
f * Ssh.
f;
192 Stmp2.
f = Sch.
f * Sch.
f;
194 Ss.
f = Sch.
f * Ssh.
f;
197 #ifdef DEBUG_JACOBI_CONJUGATE
198 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
206 Ss33.
f = Ss33.
f * Stmp3.
f;
207 Ss31.
f = Ss31.
f * Stmp3.
f;
208 Ss32.
f = Ss32.
f * Stmp3.
f;
209 Ss33.
f = Ss33.
f * Stmp3.
f;
211 Stmp1.
f = Ss.
f * Ss31.
f;
212 Stmp2.
f = Ss.
f * Ss32.
f;
213 Ss31.
f = Sc.
f * Ss31.
f;
214 Ss32.
f = Sc.
f * Ss32.
f;
218 Stmp2.
f = Ss.
f * Ss.
f;
219 Stmp1.
f = Ss22.
f * Stmp2.
f;
220 Stmp3.
f = Ss11.
f * Stmp2.
f;
221 Stmp4.
f = Sc.
f * Sc.
f;
222 Ss11.
f = Ss11.
f * Stmp4.
f;
223 Ss22.
f = Ss22.
f * Stmp4.
f;
228 Ss21.
f = Ss21.
f * Stmp4.
f;
229 Stmp4.
f = Sc.
f * Ss.
f;
230 Stmp2.
f = Stmp2.
f * Stmp4.
f;
231 Stmp5.
f = Stmp5.
f * Stmp4.
f;
236 #ifdef DEBUG_JACOBI_CONJUGATE
237 printf(
"%.20g\n", Ss11.
f);
238 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
239 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
246 Stmp1.
f = Ssh.
f * Sqvvx.
f;
247 Stmp2.
f = Ssh.
f * Sqvvy.
f;
248 Stmp3.
f = Ssh.
f * Sqvvz.
f;
249 Ssh.
f = Ssh.
f * Sqvs.
f;
251 Sqvs.
f = Sch.
f * Sqvs.
f;
252 Sqvvx.
f = Sch.
f * Sqvvx.
f;
253 Sqvvy.
f = Sch.
f * Sqvvy.
f;
254 Sqvvz.
f = Sch.
f * Sqvvz.
f;
261 #ifdef DEBUG_JACOBI_CONJUGATE
262 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
269 Ssh.
f = Ss32.
f * 0.5f;
272 Stmp2.
f = Ssh.
f * Ssh.
f;
274 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
275 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
277 Sch.
ui = Sch.
ui | Stmp2.
ui;
279 Stmp1.
f = Ssh.
f * Ssh.
f;
280 Stmp2.
f = Sch.
f * Sch.
f;
284 Ssh.
f = Stmp4.
f * Ssh.
f;
285 Sch.
f = Stmp4.
f * Sch.
f;
287 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
290 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
291 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
293 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
294 Sch.
ui = Sch.
ui | Stmp2.
ui;
296 Stmp1.
f = Ssh.
f * Ssh.
f;
297 Stmp2.
f = Sch.
f * Sch.
f;
299 Ss.
f = Sch.
f * Ssh.
f;
302 #ifdef DEBUG_JACOBI_CONJUGATE
303 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
312 Ss11.
f = Ss11.
f * Stmp3.
f;
313 Ss21.
f = Ss21.
f * Stmp3.
f;
314 Ss31.
f = Ss31.
f * Stmp3.
f;
315 Ss11.
f = Ss11.
f * Stmp3.
f;
317 Stmp1.
f = Ss.
f * Ss21.
f;
318 Stmp2.
f = Ss.
f * Ss31.
f;
319 Ss21.
f = Sc.
f * Ss21.
f;
320 Ss31.
f = Sc.
f * Ss31.
f;
324 Stmp2.
f = Ss.
f * Ss.
f;
325 Stmp1.
f = Ss33.
f * Stmp2.
f;
326 Stmp3.
f = Ss22.
f * Stmp2.
f;
327 Stmp4.
f = Sc.
f * Sc.
f;
328 Ss22.
f = Ss22.
f * Stmp4.
f;
329 Ss33.
f = Ss33.
f * Stmp4.
f;
334 Ss32.
f = Ss32.
f * Stmp4.
f;
335 Stmp4.
f = Sc.
f * Ss.
f;
336 Stmp2.
f = Stmp2.
f * Stmp4.
f;
337 Stmp5.
f = Stmp5.
f * Stmp4.
f;
342 #ifdef DEBUG_JACOBI_CONJUGATE
343 printf(
"%.20g\n", Ss11.
f);
344 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
345 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
352 Stmp1.
f = Ssh.
f * Sqvvx.
f;
353 Stmp2.
f = Ssh.
f * Sqvvy.
f;
354 Stmp3.
f = Ssh.
f * Sqvvz.
f;
355 Ssh.
f = Ssh.
f * Sqvs.
f;
357 Sqvs.
f = Sch.
f * Sqvs.
f;
358 Sqvvx.
f = Sch.
f * Sqvvx.
f;
359 Sqvvy.
f = Sch.
f * Sqvvy.
f;
360 Sqvvz.
f = Sch.
f * Sqvvz.
f;
367 #ifdef DEBUG_JACOBI_CONJUGATE
368 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
376 Ssh.
f = Ss31.
f * 0.5f;
379 Stmp2.
f = Ssh.
f * Ssh.
f;
381 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
382 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
384 Sch.
ui = Sch.
ui | Stmp2.
ui;
386 Stmp1.
f = Ssh.
f * Ssh.
f;
387 Stmp2.
f = Sch.
f * Sch.
f;
391 Ssh.
f = Stmp4.
f * Ssh.
f;
392 Sch.
f = Stmp4.
f * Sch.
f;
394 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
397 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
398 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
400 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
401 Sch.
ui = Sch.
ui | Stmp2.
ui;
403 Stmp1.
f = Ssh.
f * Ssh.
f;
404 Stmp2.
f = Sch.
f * Sch.
f;
406 Ss.
f = Sch.
f * Ssh.
f;
409 #ifdef DEBUG_JACOBI_CONJUGATE
410 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
419 Ss22.
f = Ss22.
f * Stmp3.
f;
420 Ss32.
f = Ss32.
f * Stmp3.
f;
421 Ss21.
f = Ss21.
f * Stmp3.
f;
422 Ss22.
f = Ss22.
f * Stmp3.
f;
424 Stmp1.
f = Ss.
f * Ss32.
f;
425 Stmp2.
f = Ss.
f * Ss21.
f;
426 Ss32.
f = Sc.
f * Ss32.
f;
427 Ss21.
f = Sc.
f * Ss21.
f;
431 Stmp2.
f = Ss.
f * Ss.
f;
432 Stmp1.
f = Ss11.
f * Stmp2.
f;
433 Stmp3.
f = Ss33.
f * Stmp2.
f;
434 Stmp4.
f = Sc.
f * Sc.
f;
435 Ss33.
f = Ss33.
f * Stmp4.
f;
436 Ss11.
f = Ss11.
f * Stmp4.
f;
441 Ss31.
f = Ss31.
f * Stmp4.
f;
442 Stmp4.
f = Sc.
f * Ss.
f;
443 Stmp2.
f = Stmp2.
f * Stmp4.
f;
444 Stmp5.
f = Stmp5.
f * Stmp4.
f;
449 #ifdef DEBUG_JACOBI_CONJUGATE
450 printf(
"%.20g\n", Ss11.
f);
451 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
452 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
459 Stmp1.
f = Ssh.
f * Sqvvx.
f;
460 Stmp2.
f = Ssh.
f * Sqvvy.
f;
461 Stmp3.
f = Ssh.
f * Sqvvz.
f;
462 Ssh.
f = Ssh.
f * Sqvs.
f;
464 Sqvs.
f = Sch.
f * Sqvs.
f;
465 Sqvvx.
f = Sch.
f * Sqvvx.
f;
466 Sqvvy.
f = Sch.
f * Sqvvy.
f;
467 Sqvvz.
f = Sch.
f * Sqvvz.
f;
480 Stmp2.
f = Sqvs.
f * Sqvs.
f;
481 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
483 Stmp1.
f = Sqvvy.
f * Sqvvy.
f;
485 Stmp1.
f = Sqvvz.
f * Sqvvz.
f;
489 Stmp4.
f = Stmp1.
f * 0.5f;
490 Stmp3.
f = Stmp1.
f * Stmp4.
f;
491 Stmp3.
f = Stmp1.
f * Stmp3.
f;
492 Stmp3.
f = Stmp2.
f * Stmp3.
f;
496 Sqvs.
f = Sqvs.
f * Stmp1.
f;
497 Sqvvx.
f = Sqvvx.
f * Stmp1.
f;
498 Sqvvy.
f = Sqvvy.
f * Stmp1.
f;
499 Sqvvz.
f = Sqvvz.
f * Stmp1.
f;
505 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
506 Stmp2.
f = Sqvvy.
f * Sqvvy.
f;
507 Stmp3.
f = Sqvvz.
f * Sqvvz.
f;
508 Sv11.
f = Sqvs.
f * Sqvs.
f;
520 Sv32.
f = Sqvs.
f * Stmp1.
f;
521 Sv13.
f = Sqvs.
f * Stmp2.
f;
522 Sv21.
f = Sqvs.
f * Stmp3.
f;
523 Stmp1.
f = Sqvvy.
f * Stmp1.
f;
524 Stmp2.
f = Sqvvz.
f * Stmp2.
f;
525 Stmp3.
f = Sqvvx.
f * Stmp3.
f;
539 Sa12.
f = Sv12.
f * Sa11.
f;
540 Sa13.
f = Sv13.
f * Sa11.
f;
541 Sa11.
f = Sv11.
f * Sa11.
f;
542 Stmp1.
f = Sv21.
f * Stmp2.
f;
544 Stmp1.
f = Sv31.
f * Stmp3.
f;
546 Stmp1.
f = Sv22.
f * Stmp2.
f;
548 Stmp1.
f = Sv32.
f * Stmp3.
f;
550 Stmp1.
f = Sv23.
f * Stmp2.
f;
552 Stmp1.
f = Sv33.
f * Stmp3.
f;
557 Sa22.
f = Sv12.
f * Sa21.
f;
558 Sa23.
f = Sv13.
f * Sa21.
f;
559 Sa21.
f = Sv11.
f * Sa21.
f;
560 Stmp1.
f = Sv21.
f * Stmp2.
f;
562 Stmp1.
f = Sv31.
f * Stmp3.
f;
564 Stmp1.
f = Sv22.
f * Stmp2.
f;
566 Stmp1.
f = Sv32.
f * Stmp3.
f;
568 Stmp1.
f = Sv23.
f * Stmp2.
f;
570 Stmp1.
f = Sv33.
f * Stmp3.
f;
575 Sa32.
f = Sv12.
f * Sa31.
f;
576 Sa33.
f = Sv13.
f * Sa31.
f;
577 Sa31.
f = Sv11.
f * Sa31.
f;
578 Stmp1.
f = Sv21.
f * Stmp2.
f;
580 Stmp1.
f = Sv31.
f * Stmp3.
f;
582 Stmp1.
f = Sv22.
f * Stmp2.
f;
584 Stmp1.
f = Sv32.
f * Stmp3.
f;
586 Stmp1.
f = Sv23.
f * Stmp2.
f;
588 Stmp1.
f = Sv33.
f * Stmp3.
f;
595 Stmp1.
f = Sa11.
f * Sa11.
f;
596 Stmp4.
f = Sa21.
f * Sa21.
f;
598 Stmp4.
f = Sa31.
f * Sa31.
f;
601 Stmp2.
f = Sa12.
f * Sa12.
f;
602 Stmp4.
f = Sa22.
f * Sa22.
f;
604 Stmp4.
f = Sa32.
f * Sa32.
f;
607 Stmp3.
f = Sa13.
f * Sa13.
f;
608 Stmp4.
f = Sa23.
f * Sa23.
f;
610 Stmp4.
f = Sa33.
f * Sa33.
f;
615 Stmp4.
ui = (Stmp1.
f < Stmp2.
f) ? 0xffffffff : 0;
616 Stmp5.
ui = Sa11.
ui ^ Sa12.
ui;
617 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
618 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
619 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
621 Stmp5.
ui = Sa21.
ui ^ Sa22.
ui;
622 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
623 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
624 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
626 Stmp5.
ui = Sa31.
ui ^ Sa32.
ui;
627 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
628 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
629 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
631 Stmp5.
ui = Sv11.
ui ^ Sv12.
ui;
632 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
633 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
634 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
636 Stmp5.
ui = Sv21.
ui ^ Sv22.
ui;
637 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
638 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
639 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
641 Stmp5.
ui = Sv31.
ui ^ Sv32.
ui;
642 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
643 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
644 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
646 Stmp5.
ui = Stmp1.
ui ^ Stmp2.
ui;
647 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
648 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
649 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
655 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
659 Sa12.
f = Sa12.
f * Stmp4.
f;
660 Sa22.
f = Sa22.
f * Stmp4.
f;
661 Sa32.
f = Sa32.
f * Stmp4.
f;
663 Sv12.
f = Sv12.
f * Stmp4.
f;
664 Sv22.
f = Sv22.
f * Stmp4.
f;
665 Sv32.
f = Sv32.
f * Stmp4.
f;
669 Stmp4.
ui = (Stmp1.
f < Stmp3.
f) ? 0xffffffff : 0;
670 Stmp5.
ui = Sa11.
ui ^ Sa13.
ui;
671 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
672 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
673 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
675 Stmp5.
ui = Sa21.
ui ^ Sa23.
ui;
676 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
677 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
678 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
680 Stmp5.
ui = Sa31.
ui ^ Sa33.
ui;
681 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
682 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
683 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
685 Stmp5.
ui = Sv11.
ui ^ Sv13.
ui;
686 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
687 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
688 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
690 Stmp5.
ui = Sv21.
ui ^ Sv23.
ui;
691 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
692 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
693 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
695 Stmp5.
ui = Sv31.
ui ^ Sv33.
ui;
696 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
697 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
698 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
700 Stmp5.
ui = Stmp1.
ui ^ Stmp3.
ui;
701 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
702 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
703 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
709 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
713 Sa11.
f = Sa11.
f * Stmp4.
f;
714 Sa21.
f = Sa21.
f * Stmp4.
f;
715 Sa31.
f = Sa31.
f * Stmp4.
f;
717 Sv11.
f = Sv11.
f * Stmp4.
f;
718 Sv21.
f = Sv21.
f * Stmp4.
f;
719 Sv31.
f = Sv31.
f * Stmp4.
f;
723 Stmp4.
ui = (Stmp2.
f < Stmp3.
f) ? 0xffffffff : 0;
724 Stmp5.
ui = Sa12.
ui ^ Sa13.
ui;
725 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
726 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
727 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
729 Stmp5.
ui = Sa22.
ui ^ Sa23.
ui;
730 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
731 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
732 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
734 Stmp5.
ui = Sa32.
ui ^ Sa33.
ui;
735 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
736 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
737 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
739 Stmp5.
ui = Sv12.
ui ^ Sv13.
ui;
740 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
741 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
742 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
744 Stmp5.
ui = Sv22.
ui ^ Sv23.
ui;
745 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
746 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
747 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
749 Stmp5.
ui = Sv32.
ui ^ Sv33.
ui;
750 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
751 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
752 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
754 Stmp5.
ui = Stmp2.
ui ^ Stmp3.
ui;
755 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
756 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
757 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
763 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
767 Sa13.
f = Sa13.
f * Stmp4.
f;
768 Sa23.
f = Sa23.
f * Stmp4.
f;
769 Sa33.
f = Sa33.
f * Stmp4.
f;
771 Sv13.
f = Sv13.
f * Stmp4.
f;
772 Sv23.
f = Sv23.
f * Stmp4.
f;
773 Sv33.
f = Sv33.
f * Stmp4.
f;
789 Ssh.
f = Sa21.
f * Sa21.
f;
790 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
791 Ssh.
ui = Ssh.
ui & Sa21.
ui;
795 Sch.
f =
max(Sch.
f, Sa11.
f);
796 Sch.
f =
max(Sch.
f, gsmall_number);
797 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
799 Stmp1.
f = Sch.
f * Sch.
f;
800 Stmp2.
f = Ssh.
f * Ssh.
f;
804 Stmp4.
f = Stmp1.
f * 0.5f;
805 Stmp3.
f = Stmp1.
f * Stmp4.
f;
806 Stmp3.
f = Stmp1.
f * Stmp3.
f;
807 Stmp3.
f = Stmp2.
f * Stmp3.
f;
810 Stmp1.
f = Stmp1.
f * Stmp2.
f;
814 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
815 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
816 Sch.
ui = Stmp5.
ui & Sch.
ui;
817 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
818 Sch.
ui = Sch.
ui | Stmp1.
ui;
819 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
821 Stmp1.
f = Sch.
f * Sch.
f;
822 Stmp2.
f = Ssh.
f * Ssh.
f;
826 Stmp4.
f = Stmp1.
f * 0.5f;
827 Stmp3.
f = Stmp1.
f * Stmp4.
f;
828 Stmp3.
f = Stmp1.
f * Stmp3.
f;
829 Stmp3.
f = Stmp2.
f * Stmp3.
f;
833 Sch.
f = Sch.
f * Stmp1.
f;
834 Ssh.
f = Ssh.
f * Stmp1.
f;
836 Sc.
f = Sch.
f * Sch.
f;
837 Ss.
f = Ssh.
f * Ssh.
f;
839 Ss.
f = Ssh.
f * Sch.
f;
846 Stmp1.
f = Ss.
f * Sa11.
f;
847 Stmp2.
f = Ss.
f * Sa21.
f;
848 Sa11.
f = Sc.
f * Sa11.
f;
849 Sa21.
f = Sc.
f * Sa21.
f;
853 Stmp1.
f = Ss.
f * Sa12.
f;
854 Stmp2.
f = Ss.
f * Sa22.
f;
855 Sa12.
f = Sc.
f * Sa12.
f;
856 Sa22.
f = Sc.
f * Sa22.
f;
860 Stmp1.
f = Ss.
f * Sa13.
f;
861 Stmp2.
f = Ss.
f * Sa23.
f;
862 Sa13.
f = Sc.
f * Sa13.
f;
863 Sa23.
f = Sc.
f * Sa23.
f;
871 Stmp1.
f = Ss.
f * Su11.
f;
872 Stmp2.
f = Ss.
f * Su12.
f;
873 Su11.
f = Sc.
f * Su11.
f;
874 Su12.
f = Sc.
f * Su12.
f;
878 Stmp1.
f = Ss.
f * Su21.
f;
879 Stmp2.
f = Ss.
f * Su22.
f;
880 Su21.
f = Sc.
f * Su21.
f;
881 Su22.
f = Sc.
f * Su22.
f;
885 Stmp1.
f = Ss.
f * Su31.
f;
886 Stmp2.
f = Ss.
f * Su32.
f;
887 Su31.
f = Sc.
f * Su31.
f;
888 Su32.
f = Sc.
f * Su32.
f;
894 Ssh.
f = Sa31.
f * Sa31.
f;
895 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
896 Ssh.
ui = Ssh.
ui & Sa31.
ui;
900 Sch.
f =
max(Sch.
f, Sa11.
f);
901 Sch.
f =
max(Sch.
f, gsmall_number);
902 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
904 Stmp1.
f = Sch.
f * Sch.
f;
905 Stmp2.
f = Ssh.
f * Ssh.
f;
909 Stmp4.
f = Stmp1.
f * 0.5;
910 Stmp3.
f = Stmp1.
f * Stmp4.
f;
911 Stmp3.
f = Stmp1.
f * Stmp3.
f;
912 Stmp3.
f = Stmp2.
f * Stmp3.
f;
915 Stmp1.
f = Stmp1.
f * Stmp2.
f;
919 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
920 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
921 Sch.
ui = Stmp5.
ui & Sch.
ui;
922 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
923 Sch.
ui = Sch.
ui | Stmp1.
ui;
924 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
926 Stmp1.
f = Sch.
f * Sch.
f;
927 Stmp2.
f = Ssh.
f * Ssh.
f;
931 Stmp4.
f = Stmp1.
f * 0.5f;
932 Stmp3.
f = Stmp1.
f * Stmp4.
f;
933 Stmp3.
f = Stmp1.
f * Stmp3.
f;
934 Stmp3.
f = Stmp2.
f * Stmp3.
f;
938 Sch.
f = Sch.
f * Stmp1.
f;
939 Ssh.
f = Ssh.
f * Stmp1.
f;
941 Sc.
f = Sch.
f * Sch.
f;
942 Ss.
f = Ssh.
f * Ssh.
f;
944 Ss.
f = Ssh.
f * Sch.
f;
951 Stmp1.
f = Ss.
f * Sa11.
f;
952 Stmp2.
f = Ss.
f * Sa31.
f;
953 Sa11.
f = Sc.
f * Sa11.
f;
954 Sa31.
f = Sc.
f * Sa31.
f;
958 Stmp1.
f = Ss.
f * Sa12.
f;
959 Stmp2.
f = Ss.
f * Sa32.
f;
960 Sa12.
f = Sc.
f * Sa12.
f;
961 Sa32.
f = Sc.
f * Sa32.
f;
965 Stmp1.
f = Ss.
f * Sa13.
f;
966 Stmp2.
f = Ss.
f * Sa33.
f;
967 Sa13.
f = Sc.
f * Sa13.
f;
968 Sa33.
f = Sc.
f * Sa33.
f;
976 Stmp1.
f = Ss.
f * Su11.
f;
977 Stmp2.
f = Ss.
f * Su13.
f;
978 Su11.
f = Sc.
f * Su11.
f;
979 Su13.
f = Sc.
f * Su13.
f;
983 Stmp1.
f = Ss.
f * Su21.
f;
984 Stmp2.
f = Ss.
f * Su23.
f;
985 Su21.
f = Sc.
f * Su21.
f;
986 Su23.
f = Sc.
f * Su23.
f;
990 Stmp1.
f = Ss.
f * Su31.
f;
991 Stmp2.
f = Ss.
f * Su33.
f;
992 Su31.
f = Sc.
f * Su31.
f;
993 Su33.
f = Sc.
f * Su33.
f;
999 Ssh.
f = Sa32.
f * Sa32.
f;
1000 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
1001 Ssh.
ui = Ssh.
ui & Sa32.
ui;
1005 Sch.
f =
max(Sch.
f, Sa22.
f);
1006 Sch.
f =
max(Sch.
f, gsmall_number);
1007 Stmp5.
ui = (Sa22.
f >= Stmp5.
f) ? 0xffffffff : 0;
1009 Stmp1.
f = Sch.
f * Sch.
f;
1010 Stmp2.
f = Ssh.
f * Ssh.
f;
1014 Stmp4.
f = Stmp1.
f * 0.5f;
1015 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1016 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1017 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1020 Stmp1.
f = Stmp1.
f * Stmp2.
f;
1024 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
1025 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
1026 Sch.
ui = Stmp5.
ui & Sch.
ui;
1027 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
1028 Sch.
ui = Sch.
ui | Stmp1.
ui;
1029 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1031 Stmp1.
f = Sch.
f * Sch.
f;
1032 Stmp2.
f = Ssh.
f * Ssh.
f;
1036 Stmp4.
f = Stmp1.
f * 0.5f;
1037 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1038 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1039 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1043 Sch.
f = Sch.
f * Stmp1.
f;
1044 Ssh.
f = Ssh.
f * Stmp1.
f;
1046 Sc.
f = Sch.
f * Sch.
f;
1047 Ss.
f = Ssh.
f * Ssh.
f;
1049 Ss.
f = Ssh.
f * Sch.
f;
1056 Stmp1.
f = Ss.
f * Sa21.
f;
1057 Stmp2.
f = Ss.
f * Sa31.
f;
1058 Sa21.
f = Sc.
f * Sa21.
f;
1059 Sa31.
f = Sc.
f * Sa31.
f;
1063 Stmp1.
f = Ss.
f * Sa22.
f;
1064 Stmp2.
f = Ss.
f * Sa32.
f;
1065 Sa22.
f = Sc.
f * Sa22.
f;
1066 Sa32.
f = Sc.
f * Sa32.
f;
1070 Stmp1.
f = Ss.
f * Sa23.
f;
1071 Stmp2.
f = Ss.
f * Sa33.
f;
1072 Sa23.
f = Sc.
f * Sa23.
f;
1073 Sa33.
f = Sc.
f * Sa33.
f;
1081 Stmp1.
f = Ss.
f * Su12.
f;
1082 Stmp2.
f = Ss.
f * Su13.
f;
1083 Su12.
f = Sc.
f * Su12.
f;
1084 Su13.
f = Sc.
f * Su13.
f;
1088 Stmp1.
f = Ss.
f * Su22.
f;
1089 Stmp2.
f = Ss.
f * Su23.
f;
1090 Su22.
f = Sc.
f * Su22.
f;
1091 Su23.
f = Sc.
f * Su23.
f;
1095 Stmp1.
f = Ss.
f * Su32.
f;
1096 Stmp2.
f = Ss.
f * Su33.
f;
1097 Su32.
f = Sc.
f * Su32.
f;
1098 Su33.
f = Sc.
f * Su33.
f;
1131 const float *A_3x3,
float *U_3x3,
float *S_3x1,
float *V_3x3) {
1132 float gsmall_number = 1.e-12;
1134 un<float> Sa11, Sa21, Sa31, Sa12, Sa22, Sa32, Sa13, Sa23, Sa33;
1135 un<float> Su11, Su21, Su31, Su12, Su22, Su32, Su13, Su23, Su33;
1136 un<float> Sv11, Sv21, Sv31, Sv12, Sv22, Sv32, Sv13, Sv23, Sv33;
1138 un<float> Stmp1, Stmp2, Stmp3, Stmp4, Stmp5;
1139 un<float> Ss11, Ss21, Ss31, Ss22, Ss32, Ss33;
1156 Ss11.
f = Sa11.
f * Sa11.
f;
1157 Stmp1.
f = Sa21.
f * Sa21.
f;
1159 Stmp1.
f = Sa31.
f * Sa31.
f;
1162 Ss21.
f = Sa12.
f * Sa11.
f;
1163 Stmp1.
f = Sa22.
f * Sa21.
f;
1165 Stmp1.
f = Sa32.
f * Sa31.
f;
1168 Ss31.
f = Sa13.
f * Sa11.
f;
1169 Stmp1.
f = Sa23.
f * Sa21.
f;
1171 Stmp1.
f = Sa33.
f * Sa31.
f;
1174 Ss22.
f = Sa12.
f * Sa12.
f;
1175 Stmp1.
f = Sa22.
f * Sa22.
f;
1177 Stmp1.
f = Sa32.
f * Sa32.
f;
1180 Ss32.
f = Sa13.
f * Sa12.
f;
1181 Stmp1.
f = Sa23.
f * Sa22.
f;
1183 Stmp1.
f = Sa33.
f * Sa32.
f;
1186 Ss33.
f = Sa13.
f * Sa13.
f;
1187 Stmp1.
f = Sa23.
f * Sa23.
f;
1189 Stmp1.
f = Sa33.
f * Sa33.
f;
1200 for (
int i = 0; i < 4; i++) {
1201 Ssh.
f = Ss21.
f * 0.5f;
1204 Stmp2.
f = Ssh.
f * Ssh.
f;
1206 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
1207 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
1209 Sch.
ui = Sch.
ui | Stmp2.
ui;
1211 Stmp1.
f = Ssh.
f * Ssh.
f;
1212 Stmp2.
f = Sch.
f * Sch.
f;
1216 Ssh.
f = Stmp4.
f * Ssh.
f;
1217 Sch.
f = Stmp4.
f * Sch.
f;
1219 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
1222 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
1223 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1225 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
1226 Sch.
ui = Sch.
ui | Stmp2.
ui;
1228 Stmp1.
f = Ssh.
f * Ssh.
f;
1229 Stmp2.
f = Sch.
f * Sch.
f;
1231 Ss.
f = Sch.
f * Ssh.
f;
1234 #ifdef DEBUG_JACOBI_CONJUGATE
1235 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
1243 Ss33.
f = Ss33.
f * Stmp3.
f;
1244 Ss31.
f = Ss31.
f * Stmp3.
f;
1245 Ss32.
f = Ss32.
f * Stmp3.
f;
1246 Ss33.
f = Ss33.
f * Stmp3.
f;
1248 Stmp1.
f = Ss.
f * Ss31.
f;
1249 Stmp2.
f = Ss.
f * Ss32.
f;
1250 Ss31.
f = Sc.
f * Ss31.
f;
1251 Ss32.
f = Sc.
f * Ss32.
f;
1255 Stmp2.
f = Ss.
f * Ss.
f;
1256 Stmp1.
f = Ss22.
f * Stmp2.
f;
1257 Stmp3.
f = Ss11.
f * Stmp2.
f;
1258 Stmp4.
f = Sc.
f * Sc.
f;
1259 Ss11.
f = Ss11.
f * Stmp4.
f;
1260 Ss22.
f = Ss22.
f * Stmp4.
f;
1265 Ss21.
f = Ss21.
f * Stmp4.
f;
1266 Stmp4.
f = Sc.
f * Ss.
f;
1267 Stmp2.
f = Stmp2.
f * Stmp4.
f;
1268 Stmp5.
f = Stmp5.
f * Stmp4.
f;
1273 #ifdef DEBUG_JACOBI_CONJUGATE
1274 printf(
"%.20g\n", Ss11.
f);
1275 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
1276 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
1283 Stmp1.
f = Ssh.
f * Sqvvx.
f;
1284 Stmp2.
f = Ssh.
f * Sqvvy.
f;
1285 Stmp3.
f = Ssh.
f * Sqvvz.
f;
1286 Ssh.
f = Ssh.
f * Sqvs.
f;
1288 Sqvs.
f = Sch.
f * Sqvs.
f;
1289 Sqvvx.
f = Sch.
f * Sqvvx.
f;
1290 Sqvvy.
f = Sch.
f * Sqvvy.
f;
1291 Sqvvz.
f = Sch.
f * Sqvvz.
f;
1298 #ifdef DEBUG_JACOBI_CONJUGATE
1299 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
1306 Ssh.
f = Ss32.
f * 0.5f;
1309 Stmp2.
f = Ssh.
f * Ssh.
f;
1311 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
1312 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
1314 Sch.
ui = Sch.
ui | Stmp2.
ui;
1316 Stmp1.
f = Ssh.
f * Ssh.
f;
1317 Stmp2.
f = Sch.
f * Sch.
f;
1321 Ssh.
f = Stmp4.
f * Ssh.
f;
1322 Sch.
f = Stmp4.
f * Sch.
f;
1324 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
1327 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
1328 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1330 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
1331 Sch.
ui = Sch.
ui | Stmp2.
ui;
1333 Stmp1.
f = Ssh.
f * Ssh.
f;
1334 Stmp2.
f = Sch.
f * Sch.
f;
1336 Ss.
f = Sch.
f * Ssh.
f;
1339 #ifdef DEBUG_JACOBI_CONJUGATE
1340 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
1349 Ss11.
f = Ss11.
f * Stmp3.
f;
1350 Ss21.
f = Ss21.
f * Stmp3.
f;
1351 Ss31.
f = Ss31.
f * Stmp3.
f;
1352 Ss11.
f = Ss11.
f * Stmp3.
f;
1354 Stmp1.
f = Ss.
f * Ss21.
f;
1355 Stmp2.
f = Ss.
f * Ss31.
f;
1356 Ss21.
f = Sc.
f * Ss21.
f;
1357 Ss31.
f = Sc.
f * Ss31.
f;
1361 Stmp2.
f = Ss.
f * Ss.
f;
1362 Stmp1.
f = Ss33.
f * Stmp2.
f;
1363 Stmp3.
f = Ss22.
f * Stmp2.
f;
1364 Stmp4.
f = Sc.
f * Sc.
f;
1365 Ss22.
f = Ss22.
f * Stmp4.
f;
1366 Ss33.
f = Ss33.
f * Stmp4.
f;
1371 Ss32.
f = Ss32.
f * Stmp4.
f;
1372 Stmp4.
f = Sc.
f * Ss.
f;
1373 Stmp2.
f = Stmp2.
f * Stmp4.
f;
1374 Stmp5.
f = Stmp5.
f * Stmp4.
f;
1379 #ifdef DEBUG_JACOBI_CONJUGATE
1380 printf(
"%.20g\n", Ss11.
f);
1381 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
1382 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
1389 Stmp1.
f = Ssh.
f * Sqvvx.
f;
1390 Stmp2.
f = Ssh.
f * Sqvvy.
f;
1391 Stmp3.
f = Ssh.
f * Sqvvz.
f;
1392 Ssh.
f = Ssh.
f * Sqvs.
f;
1394 Sqvs.
f = Sch.
f * Sqvs.
f;
1395 Sqvvx.
f = Sch.
f * Sqvvx.
f;
1396 Sqvvy.
f = Sch.
f * Sqvvy.
f;
1397 Sqvvz.
f = Sch.
f * Sqvvz.
f;
1404 #ifdef DEBUG_JACOBI_CONJUGATE
1405 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
1413 Ssh.
f = Ss31.
f * 0.5f;
1416 Stmp2.
f = Ssh.
f * Ssh.
f;
1418 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
1419 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
1421 Sch.
ui = Sch.
ui | Stmp2.
ui;
1423 Stmp1.
f = Ssh.
f * Ssh.
f;
1424 Stmp2.
f = Sch.
f * Sch.
f;
1428 Ssh.
f = Stmp4.
f * Ssh.
f;
1429 Sch.
f = Stmp4.
f * Sch.
f;
1431 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
1434 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
1435 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1437 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
1438 Sch.
ui = Sch.
ui | Stmp2.
ui;
1440 Stmp1.
f = Ssh.
f * Ssh.
f;
1441 Stmp2.
f = Sch.
f * Sch.
f;
1443 Ss.
f = Sch.
f * Ssh.
f;
1446 #ifdef DEBUG_JACOBI_CONJUGATE
1447 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
1456 Ss22.
f = Ss22.
f * Stmp3.
f;
1457 Ss32.
f = Ss32.
f * Stmp3.
f;
1458 Ss21.
f = Ss21.
f * Stmp3.
f;
1459 Ss22.
f = Ss22.
f * Stmp3.
f;
1461 Stmp1.
f = Ss.
f * Ss32.
f;
1462 Stmp2.
f = Ss.
f * Ss21.
f;
1463 Ss32.
f = Sc.
f * Ss32.
f;
1464 Ss21.
f = Sc.
f * Ss21.
f;
1468 Stmp2.
f = Ss.
f * Ss.
f;
1469 Stmp1.
f = Ss11.
f * Stmp2.
f;
1470 Stmp3.
f = Ss33.
f * Stmp2.
f;
1471 Stmp4.
f = Sc.
f * Sc.
f;
1472 Ss33.
f = Ss33.
f * Stmp4.
f;
1473 Ss11.
f = Ss11.
f * Stmp4.
f;
1478 Ss31.
f = Ss31.
f * Stmp4.
f;
1479 Stmp4.
f = Sc.
f * Ss.
f;
1480 Stmp2.
f = Stmp2.
f * Stmp4.
f;
1481 Stmp5.
f = Stmp5.
f * Stmp4.
f;
1486 #ifdef DEBUG_JACOBI_CONJUGATE
1487 printf(
"%.20g\n", Ss11.
f);
1488 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
1489 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
1496 Stmp1.
f = Ssh.
f * Sqvvx.
f;
1497 Stmp2.
f = Ssh.
f * Sqvvy.
f;
1498 Stmp3.
f = Ssh.
f * Sqvvz.
f;
1499 Ssh.
f = Ssh.
f * Sqvs.
f;
1501 Sqvs.
f = Sch.
f * Sqvs.
f;
1502 Sqvvx.
f = Sch.
f * Sqvvx.
f;
1503 Sqvvy.
f = Sch.
f * Sqvvy.
f;
1504 Sqvvz.
f = Sch.
f * Sqvvz.
f;
1517 Stmp2.
f = Sqvs.
f * Sqvs.
f;
1518 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
1520 Stmp1.
f = Sqvvy.
f * Sqvvy.
f;
1522 Stmp1.
f = Sqvvz.
f * Sqvvz.
f;
1526 Stmp4.
f = Stmp1.
f * 0.5f;
1527 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1528 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1529 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1533 Sqvs.
f = Sqvs.
f * Stmp1.
f;
1534 Sqvvx.
f = Sqvvx.
f * Stmp1.
f;
1535 Sqvvy.
f = Sqvvy.
f * Stmp1.
f;
1536 Sqvvz.
f = Sqvvz.
f * Stmp1.
f;
1542 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
1543 Stmp2.
f = Sqvvy.
f * Sqvvy.
f;
1544 Stmp3.
f = Sqvvz.
f * Sqvvz.
f;
1545 Sv11.
f = Sqvs.
f * Sqvs.
f;
1557 Sv32.
f = Sqvs.
f * Stmp1.
f;
1558 Sv13.
f = Sqvs.
f * Stmp2.
f;
1559 Sv21.
f = Sqvs.
f * Stmp3.
f;
1560 Stmp1.
f = Sqvvy.
f * Stmp1.
f;
1561 Stmp2.
f = Sqvvz.
f * Stmp2.
f;
1562 Stmp3.
f = Sqvvx.
f * Stmp3.
f;
1576 Sa12.
f = Sv12.
f * Sa11.
f;
1577 Sa13.
f = Sv13.
f * Sa11.
f;
1578 Sa11.
f = Sv11.
f * Sa11.
f;
1579 Stmp1.
f = Sv21.
f * Stmp2.
f;
1581 Stmp1.
f = Sv31.
f * Stmp3.
f;
1583 Stmp1.
f = Sv22.
f * Stmp2.
f;
1585 Stmp1.
f = Sv32.
f * Stmp3.
f;
1587 Stmp1.
f = Sv23.
f * Stmp2.
f;
1589 Stmp1.
f = Sv33.
f * Stmp3.
f;
1594 Sa22.
f = Sv12.
f * Sa21.
f;
1595 Sa23.
f = Sv13.
f * Sa21.
f;
1596 Sa21.
f = Sv11.
f * Sa21.
f;
1597 Stmp1.
f = Sv21.
f * Stmp2.
f;
1599 Stmp1.
f = Sv31.
f * Stmp3.
f;
1601 Stmp1.
f = Sv22.
f * Stmp2.
f;
1603 Stmp1.
f = Sv32.
f * Stmp3.
f;
1605 Stmp1.
f = Sv23.
f * Stmp2.
f;
1607 Stmp1.
f = Sv33.
f * Stmp3.
f;
1612 Sa32.
f = Sv12.
f * Sa31.
f;
1613 Sa33.
f = Sv13.
f * Sa31.
f;
1614 Sa31.
f = Sv11.
f * Sa31.
f;
1615 Stmp1.
f = Sv21.
f * Stmp2.
f;
1617 Stmp1.
f = Sv31.
f * Stmp3.
f;
1619 Stmp1.
f = Sv22.
f * Stmp2.
f;
1621 Stmp1.
f = Sv32.
f * Stmp3.
f;
1623 Stmp1.
f = Sv23.
f * Stmp2.
f;
1625 Stmp1.
f = Sv33.
f * Stmp3.
f;
1632 Stmp1.
f = Sa11.
f * Sa11.
f;
1633 Stmp4.
f = Sa21.
f * Sa21.
f;
1635 Stmp4.
f = Sa31.
f * Sa31.
f;
1638 Stmp2.
f = Sa12.
f * Sa12.
f;
1639 Stmp4.
f = Sa22.
f * Sa22.
f;
1641 Stmp4.
f = Sa32.
f * Sa32.
f;
1644 Stmp3.
f = Sa13.
f * Sa13.
f;
1645 Stmp4.
f = Sa23.
f * Sa23.
f;
1647 Stmp4.
f = Sa33.
f * Sa33.
f;
1652 Stmp4.
ui = (Stmp1.
f < Stmp2.
f) ? 0xffffffff : 0;
1653 Stmp5.
ui = Sa11.
ui ^ Sa12.
ui;
1654 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1655 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
1656 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
1658 Stmp5.
ui = Sa21.
ui ^ Sa22.
ui;
1659 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1660 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
1661 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
1663 Stmp5.
ui = Sa31.
ui ^ Sa32.
ui;
1664 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1665 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
1666 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
1668 Stmp5.
ui = Sv11.
ui ^ Sv12.
ui;
1669 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1670 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
1671 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
1673 Stmp5.
ui = Sv21.
ui ^ Sv22.
ui;
1674 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1675 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
1676 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
1678 Stmp5.
ui = Sv31.
ui ^ Sv32.
ui;
1679 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1680 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
1681 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
1683 Stmp5.
ui = Stmp1.
ui ^ Stmp2.
ui;
1684 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1685 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
1686 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
1692 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1696 Sa12.
f = Sa12.
f * Stmp4.
f;
1697 Sa22.
f = Sa22.
f * Stmp4.
f;
1698 Sa32.
f = Sa32.
f * Stmp4.
f;
1700 Sv12.
f = Sv12.
f * Stmp4.
f;
1701 Sv22.
f = Sv22.
f * Stmp4.
f;
1702 Sv32.
f = Sv32.
f * Stmp4.
f;
1706 Stmp4.
ui = (Stmp1.
f < Stmp3.
f) ? 0xffffffff : 0;
1707 Stmp5.
ui = Sa11.
ui ^ Sa13.
ui;
1708 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1709 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
1710 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
1712 Stmp5.
ui = Sa21.
ui ^ Sa23.
ui;
1713 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1714 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
1715 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
1717 Stmp5.
ui = Sa31.
ui ^ Sa33.
ui;
1718 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1719 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
1720 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
1722 Stmp5.
ui = Sv11.
ui ^ Sv13.
ui;
1723 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1724 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
1725 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
1727 Stmp5.
ui = Sv21.
ui ^ Sv23.
ui;
1728 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1729 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
1730 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
1732 Stmp5.
ui = Sv31.
ui ^ Sv33.
ui;
1733 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1734 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
1735 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
1737 Stmp5.
ui = Stmp1.
ui ^ Stmp3.
ui;
1738 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1739 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
1740 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
1746 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1750 Sa11.
f = Sa11.
f * Stmp4.
f;
1751 Sa21.
f = Sa21.
f * Stmp4.
f;
1752 Sa31.
f = Sa31.
f * Stmp4.
f;
1754 Sv11.
f = Sv11.
f * Stmp4.
f;
1755 Sv21.
f = Sv21.
f * Stmp4.
f;
1756 Sv31.
f = Sv31.
f * Stmp4.
f;
1760 Stmp4.
ui = (Stmp2.
f < Stmp3.
f) ? 0xffffffff : 0;
1761 Stmp5.
ui = Sa12.
ui ^ Sa13.
ui;
1762 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1763 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
1764 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
1766 Stmp5.
ui = Sa22.
ui ^ Sa23.
ui;
1767 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1768 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
1769 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
1771 Stmp5.
ui = Sa32.
ui ^ Sa33.
ui;
1772 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1773 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
1774 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
1776 Stmp5.
ui = Sv12.
ui ^ Sv13.
ui;
1777 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1778 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
1779 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
1781 Stmp5.
ui = Sv22.
ui ^ Sv23.
ui;
1782 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1783 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
1784 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
1786 Stmp5.
ui = Sv32.
ui ^ Sv33.
ui;
1787 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1788 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
1789 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
1791 Stmp5.
ui = Stmp2.
ui ^ Stmp3.
ui;
1792 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1793 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
1794 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
1800 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1804 Sa13.
f = Sa13.
f * Stmp4.
f;
1805 Sa23.
f = Sa23.
f * Stmp4.
f;
1806 Sa33.
f = Sa33.
f * Stmp4.
f;
1808 Sv13.
f = Sv13.
f * Stmp4.
f;
1809 Sv23.
f = Sv23.
f * Stmp4.
f;
1810 Sv33.
f = Sv33.
f * Stmp4.
f;
1826 Ssh.
f = Sa21.
f * Sa21.
f;
1827 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
1828 Ssh.
ui = Ssh.
ui & Sa21.
ui;
1832 Sch.
f =
max(Sch.
f, Sa11.
f);
1833 Sch.
f =
max(Sch.
f, gsmall_number);
1834 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
1836 Stmp1.
f = Sch.
f * Sch.
f;
1837 Stmp2.
f = Ssh.
f * Ssh.
f;
1841 Stmp4.
f = Stmp1.
f * 0.5f;
1842 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1843 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1844 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1847 Stmp1.
f = Stmp1.
f * Stmp2.
f;
1851 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
1852 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
1853 Sch.
ui = Stmp5.
ui & Sch.
ui;
1854 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
1855 Sch.
ui = Sch.
ui | Stmp1.
ui;
1856 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1858 Stmp1.
f = Sch.
f * Sch.
f;
1859 Stmp2.
f = Ssh.
f * Ssh.
f;
1863 Stmp4.
f = Stmp1.
f * 0.5f;
1864 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1865 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1866 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1870 Sch.
f = Sch.
f * Stmp1.
f;
1871 Ssh.
f = Ssh.
f * Stmp1.
f;
1873 Sc.
f = Sch.
f * Sch.
f;
1874 Ss.
f = Ssh.
f * Ssh.
f;
1876 Ss.
f = Ssh.
f * Sch.
f;
1883 Stmp1.
f = Ss.
f * Sa11.
f;
1884 Stmp2.
f = Ss.
f * Sa21.
f;
1885 Sa11.
f = Sc.
f * Sa11.
f;
1886 Sa21.
f = Sc.
f * Sa21.
f;
1890 Stmp1.
f = Ss.
f * Sa12.
f;
1891 Stmp2.
f = Ss.
f * Sa22.
f;
1892 Sa12.
f = Sc.
f * Sa12.
f;
1893 Sa22.
f = Sc.
f * Sa22.
f;
1897 Stmp1.
f = Ss.
f * Sa13.
f;
1898 Stmp2.
f = Ss.
f * Sa23.
f;
1899 Sa13.
f = Sc.
f * Sa13.
f;
1900 Sa23.
f = Sc.
f * Sa23.
f;
1908 Stmp1.
f = Ss.
f * Su11.
f;
1909 Stmp2.
f = Ss.
f * Su12.
f;
1910 Su11.
f = Sc.
f * Su11.
f;
1911 Su12.
f = Sc.
f * Su12.
f;
1915 Stmp1.
f = Ss.
f * Su21.
f;
1916 Stmp2.
f = Ss.
f * Su22.
f;
1917 Su21.
f = Sc.
f * Su21.
f;
1918 Su22.
f = Sc.
f * Su22.
f;
1922 Stmp1.
f = Ss.
f * Su31.
f;
1923 Stmp2.
f = Ss.
f * Su32.
f;
1924 Su31.
f = Sc.
f * Su31.
f;
1925 Su32.
f = Sc.
f * Su32.
f;
1931 Ssh.
f = Sa31.
f * Sa31.
f;
1932 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
1933 Ssh.
ui = Ssh.
ui & Sa31.
ui;
1937 Sch.
f =
max(Sch.
f, Sa11.
f);
1938 Sch.
f =
max(Sch.
f, gsmall_number);
1939 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
1941 Stmp1.
f = Sch.
f * Sch.
f;
1942 Stmp2.
f = Ssh.
f * Ssh.
f;
1946 Stmp4.
f = Stmp1.
f * 0.5;
1947 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1948 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1949 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1952 Stmp1.
f = Stmp1.
f * Stmp2.
f;
1956 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
1957 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
1958 Sch.
ui = Stmp5.
ui & Sch.
ui;
1959 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
1960 Sch.
ui = Sch.
ui | Stmp1.
ui;
1961 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1963 Stmp1.
f = Sch.
f * Sch.
f;
1964 Stmp2.
f = Ssh.
f * Ssh.
f;
1968 Stmp4.
f = Stmp1.
f * 0.5f;
1969 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1970 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1971 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1975 Sch.
f = Sch.
f * Stmp1.
f;
1976 Ssh.
f = Ssh.
f * Stmp1.
f;
1978 Sc.
f = Sch.
f * Sch.
f;
1979 Ss.
f = Ssh.
f * Ssh.
f;
1981 Ss.
f = Ssh.
f * Sch.
f;
1988 Stmp1.
f = Ss.
f * Sa11.
f;
1989 Stmp2.
f = Ss.
f * Sa31.
f;
1990 Sa11.
f = Sc.
f * Sa11.
f;
1991 Sa31.
f = Sc.
f * Sa31.
f;
1995 Stmp1.
f = Ss.
f * Sa12.
f;
1996 Stmp2.
f = Ss.
f * Sa32.
f;
1997 Sa12.
f = Sc.
f * Sa12.
f;
1998 Sa32.
f = Sc.
f * Sa32.
f;
2002 Stmp1.
f = Ss.
f * Sa13.
f;
2003 Stmp2.
f = Ss.
f * Sa33.
f;
2004 Sa13.
f = Sc.
f * Sa13.
f;
2005 Sa33.
f = Sc.
f * Sa33.
f;
2013 Stmp1.
f = Ss.
f * Su11.
f;
2014 Stmp2.
f = Ss.
f * Su13.
f;
2015 Su11.
f = Sc.
f * Su11.
f;
2016 Su13.
f = Sc.
f * Su13.
f;
2020 Stmp1.
f = Ss.
f * Su21.
f;
2021 Stmp2.
f = Ss.
f * Su23.
f;
2022 Su21.
f = Sc.
f * Su21.
f;
2023 Su23.
f = Sc.
f * Su23.
f;
2027 Stmp1.
f = Ss.
f * Su31.
f;
2028 Stmp2.
f = Ss.
f * Su33.
f;
2029 Su31.
f = Sc.
f * Su31.
f;
2030 Su33.
f = Sc.
f * Su33.
f;
2036 Ssh.
f = Sa32.
f * Sa32.
f;
2037 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
2038 Ssh.
ui = Ssh.
ui & Sa32.
ui;
2042 Sch.
f =
max(Sch.
f, Sa22.
f);
2043 Sch.
f =
max(Sch.
f, gsmall_number);
2044 Stmp5.
ui = (Sa22.
f >= Stmp5.
f) ? 0xffffffff : 0;
2046 Stmp1.
f = Sch.
f * Sch.
f;
2047 Stmp2.
f = Ssh.
f * Ssh.
f;
2051 Stmp4.
f = Stmp1.
f * 0.5f;
2052 Stmp3.
f = Stmp1.
f * Stmp4.
f;
2053 Stmp3.
f = Stmp1.
f * Stmp3.
f;
2054 Stmp3.
f = Stmp2.
f * Stmp3.
f;
2057 Stmp1.
f = Stmp1.
f * Stmp2.
f;
2061 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
2062 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
2063 Sch.
ui = Stmp5.
ui & Sch.
ui;
2064 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
2065 Sch.
ui = Sch.
ui | Stmp1.
ui;
2066 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
2068 Stmp1.
f = Sch.
f * Sch.
f;
2069 Stmp2.
f = Ssh.
f * Ssh.
f;
2073 Stmp4.
f = Stmp1.
f * 0.5f;
2074 Stmp3.
f = Stmp1.
f * Stmp4.
f;
2075 Stmp3.
f = Stmp1.
f * Stmp3.
f;
2076 Stmp3.
f = Stmp2.
f * Stmp3.
f;
2080 Sch.
f = Sch.
f * Stmp1.
f;
2081 Ssh.
f = Ssh.
f * Stmp1.
f;
2083 Sc.
f = Sch.
f * Sch.
f;
2084 Ss.
f = Ssh.
f * Ssh.
f;
2086 Ss.
f = Ssh.
f * Sch.
f;
2093 Stmp1.
f = Ss.
f * Sa21.
f;
2094 Stmp2.
f = Ss.
f * Sa31.
f;
2095 Sa21.
f = Sc.
f * Sa21.
f;
2096 Sa31.
f = Sc.
f * Sa31.
f;
2100 Stmp1.
f = Ss.
f * Sa22.
f;
2101 Stmp2.
f = Ss.
f * Sa32.
f;
2102 Sa22.
f = Sc.
f * Sa22.
f;
2103 Sa32.
f = Sc.
f * Sa32.
f;
2107 Stmp1.
f = Ss.
f * Sa23.
f;
2108 Stmp2.
f = Ss.
f * Sa33.
f;
2109 Sa23.
f = Sc.
f * Sa23.
f;
2110 Sa33.
f = Sc.
f * Sa33.
f;
2118 Stmp1.
f = Ss.
f * Su12.
f;
2119 Stmp2.
f = Ss.
f * Su13.
f;
2120 Su12.
f = Sc.
f * Su12.
f;
2121 Su13.
f = Sc.
f * Su13.
f;
2125 Stmp1.
f = Ss.
f * Su22.
f;
2126 Stmp2.
f = Ss.
f * Su23.
f;
2127 Su22.
f = Sc.
f * Su22.
f;
2128 Su23.
f = Sc.
f * Su23.
f;
2132 Stmp1.
f = Ss.
f * Su32.
f;
2133 Stmp2.
f = Ss.
f * Su33.
f;
2134 Su32.
f = Sc.
f * Su32.
f;
2135 Su33.
f = Sc.
f * Su33.
f;
2166 template <
typename scalar_t>
2168 const scalar_t *A_3x3,
2169 const scalar_t *B_3x1,
2180 const scalar_t epsilon = 1e-10;
2181 S[0] =
abs(S[0]) < epsilon ? 0 : 1.0 / S[0];
2182 S[1] =
abs(S[1]) < epsilon ? 0 : 1.0 / S[1];
2183 S[2] =
abs(S[2]) < epsilon ? 0 : 1.0 / S[2];
2190 S_UT[0] = U[0] * S[0];
2191 S_UT[1] = U[3] * S[0];
2192 S_UT[2] = U[6] * S[0];
2193 S_UT[3] = U[1] * S[1];
2194 S_UT[4] = U[4] * S[1];
2195 S_UT[5] = U[7] * S[1];
2196 S_UT[6] = U[2] * S[2];
2197 S_UT[7] = U[5] * S[2];
2198 S_UT[8] = U[8] * S[2];
2203 scalar_t Ainv[9] = {0};
#define CLOUDVIEWER_FORCE_INLINE
#define CLOUDVIEWER_DEVICE
#define gcosine_pi_over_eight
#define gsine_pi_over_eight
#define gfour_gamma_squared
__host__ __device__ int2 abs(int2 v)
static CLOUDVIEWER_DEVICE CLOUDVIEWER_FORCE_INLINE void matmul3x3_3x1(const scalar_t &m00, const scalar_t &m01, const scalar_t &m02, const scalar_t &m10, const scalar_t &m11, const scalar_t &m12, const scalar_t &m20, const scalar_t &m21, const scalar_t &m22, const scalar_t &v0, const scalar_t &v1, const scalar_t &v2, scalar_t &o0, scalar_t &o1, scalar_t &o2)
CLOUDVIEWER_DEVICE CLOUDVIEWER_FORCE_INLINE void svd3x3< float >(const float *A_3x3, float *U_3x3, float *S_3x1, float *V_3x3)
CLOUDVIEWER_DEVICE CLOUDVIEWER_FORCE_INLINE void svd3x3< double >(const double *A_3x3, double *U_3x3, double *S_3x1, double *V_3x3)
CLOUDVIEWER_DEVICE CLOUDVIEWER_FORCE_INLINE void solve_svd3x3(const scalar_t *A_3x3, const scalar_t *B_3x1, scalar_t *X_3x1)
CLOUDVIEWER_DEVICE CLOUDVIEWER_FORCE_INLINE void svd3x3(const scalar_t *A_3x3, scalar_t *U_3x3, scalar_t *S_3x1, scalar_t *V_3x3)
CLOUDVIEWER_DEVICE CLOUDVIEWER_FORCE_INLINE void matmul3x3_3x3(const scalar_t *A_3x3, const scalar_t *B_3x3, scalar_t *C_3x3)
Generic file read and write utility for python interface.