@@ -405,7 +405,7 @@ struct erf_helper<float16_t>
405405 float16_t x = abs_helper<float16_t>::__call (_x);
406406
407407 float16_t t = float16_t (1.f ) / (float16_t (1.f ) + p * x);
408- float16_t y = float16_t (1.f ) - (((a3 * t + a2) * t) + a1) * t * exp (-x * x);
408+ float16_t y = float16_t (1.f ) - (((a3 * t + a2) * t) + a1) * t * exp_helper<float16_t>:: __call (-x * x);
409409
410410 return _sign * y;
411411 }
@@ -451,90 +451,91 @@ struct erfInv_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointSc
451451 }
452452};
453453
454- template<>
455- struct erfInv_helper<float64_t>
456- {
457- static float64_t __call (NBL_CONST_REF_ARG (float64_t) _x)
458- {
459- float64_t x = clamp <float64_t>(_x, NBL_FP64_LITERAL (-0.99999 ), NBL_FP64_LITERAL (0.99999 ));
460-
461- float64_t w = float64_t (-log_helper<float32_t>::__call ((float32_t (NBL_FP64_LITERAL (1.0 )) - x) * float32_t (NBL_FP64_LITERAL (1.0 )) + x));
462- float64_t p;
463- if (w < 6.250000 )
464- {
465- w -= NBL_FP64_LITERAL (3.125000 );
466- p = NBL_FP64_LITERAL (-3. 6444120640178196996e-21 );
467- p = NBL_FP64_LITERAL (-1. 685059138182016589e-19 ) + p * w;
468- p = NBL_FP64_LITERAL (1. 2858480715256400167e-18 ) + p * w;
469- p = NBL_FP64_LITERAL (1. 115787767802518096e-17 ) + p * w;
470- p = NBL_FP64_LITERAL (-1. 333171662854620906e-16 ) + p * w;
471- p = NBL_FP64_LITERAL (2. 0972767875968561637e-17 ) + p * w;
472- p = NBL_FP64_LITERAL (6. 6376381343583238325e-15 ) + p * w;
473- p = NBL_FP64_LITERAL (-4. 0545662729752068639e-14 ) + p * w;
474- p = NBL_FP64_LITERAL (-8. 1519341976054721522e-14 ) + p * w;
475- p = NBL_FP64_LITERAL (2. 6335093153082322977e-12 ) + p * w;
476- p = NBL_FP64_LITERAL (-1. 2975133253453532498e-11 ) + p * w;
477- p = NBL_FP64_LITERAL (-5. 4154120542946279317e-11 ) + p * w;
478- p = NBL_FP64_LITERAL (1. 051212273321532285e-09 ) + p * w;
479- p = NBL_FP64_LITERAL (-4. 1126339803469836976e-09 ) + p * w;
480- p = NBL_FP64_LITERAL (-2. 9070369957882005086e-08 ) + p * w;
481- p = NBL_FP64_LITERAL (4. 2347877827932403518e-07 ) + p * w;
482- p = NBL_FP64_LITERAL (-1. 3654692000834678645e-06 ) + p * w;
483- p = NBL_FP64_LITERAL (-1. 3882523362786468719e-05 ) + p * w;
484- p = NBL_FP64_LITERAL (0.0001867342080340571352 ) + p * w;
485- p = NBL_FP64_LITERAL (-0.00074070253416626697512 ) + p * w;
486- p = NBL_FP64_LITERAL (-0.0060336708714301490533 ) + p * w;
487- p = NBL_FP64_LITERAL (0.24015818242558961693 ) + p * w;
488- p = NBL_FP64_LITERAL (1.6536545626831027356 ) + p * w;
489- }
490- else if (w < 16.000000 )
491- {
492- w = sqrt_helper<float64_t>::__call (w) - NBL_FP64_LITERAL (3.250000 );
493- p = NBL_FP64_LITERAL (2. 2137376921775787049e-09 );
494- p = NBL_FP64_LITERAL (9. 0756561938885390979e-08 ) + p * w;
495- p = NBL_FP64_LITERAL (-2. 7517406297064545428e-07 ) + p * w;
496- p = NBL_FP64_LITERAL (1. 8239629214389227755e-08 ) + p * w;
497- p = NBL_FP64_LITERAL (1. 5027403968909827627e-06 ) + p * w;
498- p = NBL_FP64_LITERAL (-4. 013867526981545969e-06 ) + p * w;
499- p = NBL_FP64_LITERAL (2. 9234449089955446044e-06 ) + p * w;
500- p = NBL_FP64_LITERAL (1. 2475304481671778723e-05 ) + p * w;
501- p = NBL_FP64_LITERAL (-4. 7318229009055733981e-05 ) + p * w;
502- p = NBL_FP64_LITERAL (6. 8284851459573175448e-05 ) + p * w;
503- p = NBL_FP64_LITERAL (2. 4031110387097893999e-05 ) + p * w;
504- p = NBL_FP64_LITERAL (-0.0003550375203628474796 ) + p * w;
505- p = NBL_FP64_LITERAL (0.00095328937973738049703 ) + p * w;
506- p = NBL_FP64_LITERAL (-0.0016882755560235047313 ) + p * w;
507- p = NBL_FP64_LITERAL (0.0024914420961078508066 ) + p * w;
508- p = NBL_FP64_LITERAL (-0.0037512085075692412107 ) + p * w;
509- p = NBL_FP64_LITERAL (0.005370914553590063617 ) + p * w;
510- p = NBL_FP64_LITERAL (1.0052589676941592334 ) + p * w;
511- p = NBL_FP64_LITERAL (3.0838856104922207635 ) + p * w;
512- }
513- else
514- {
515- w = sqrt_helper<float64_t>::__call (w) - NBL_FP64_LITERAL (5.000000 );
516- p = NBL_FP64_LITERAL (-2. 7109920616438573243e-11 );
517- p = NBL_FP64_LITERAL (-2. 5556418169965252055e-10 ) + p * w;
518- p = NBL_FP64_LITERAL (1. 5076572693500548083e-09 ) + p * w;
519- p = NBL_FP64_LITERAL (-3. 7894654401267369937e-09 ) + p * w;
520- p = NBL_FP64_LITERAL (7. 6157012080783393804e-09 ) + p * w;
521- p = NBL_FP64_LITERAL (-1. 4960026627149240478e-08 ) + p * w;
522- p = NBL_FP64_LITERAL (2. 9147953450901080826e-08 ) + p * w;
523- p = NBL_FP64_LITERAL (-6. 7711997758452339498e-08 ) + p * w;
524- p = NBL_FP64_LITERAL (2. 2900482228026654717e-07 ) + p * w;
525- p = NBL_FP64_LITERAL (-9. 9298272942317002539e-07 ) + p * w;
526- p = NBL_FP64_LITERAL (4. 5260625972231537039e-06 ) + p * w;
527- p = NBL_FP64_LITERAL (-1. 9681778105531670567e-05 ) + p * w;
528- p = NBL_FP64_LITERAL (7. 5995277030017761139e-05 ) + p * w;
529- p = NBL_FP64_LITERAL (-0.00021503011930044477347 ) + p * w;
530- p = NBL_FP64_LITERAL (-0.00013871931833623122026 ) + p * w;
531- p = NBL_FP64_LITERAL (1.0103004648645343977 ) + p * w;
532- p = NBL_FP64_LITERAL (4.8499064014085844221 ) + p * w;
533- }
534-
535- return p * x;
536- }
537- };
454+ // log doesn't accept float64_t
455+ // template<>
456+ // struct erfInv_helper<float64_t>
457+ // {
458+ // static float64_t __call(NBL_CONST_REF_ARG(float64_t) _x)
459+ // {
460+ // float64_t x = clamp<float64_t>(_x, NBL_FP64_LITERAL(-0.99999), NBL_FP64_LITERAL(0.99999));
461+
462+ // float64_t w = -log_helper<float64_t>::__call((NBL_FP64_LITERAL(1.0) - x) * (NBL_FP64_LITERAL(1.0) + x));
463+ // float64_t p;
464+ // if (w < 6.250000)
465+ // {
466+ // w -= NBL_FP64_LITERAL(3.125000);
467+ // p = NBL_FP64_LITERAL(-3.6444120640178196996e-21);
468+ // p = NBL_FP64_LITERAL(-1.685059138182016589e-19) + p * w;
469+ // p = NBL_FP64_LITERAL(1.2858480715256400167e-18) + p * w;
470+ // p = NBL_FP64_LITERAL(1.115787767802518096e-17) + p * w;
471+ // p = NBL_FP64_LITERAL(-1.333171662854620906e-16) + p * w;
472+ // p = NBL_FP64_LITERAL(2.0972767875968561637e-17) + p * w;
473+ // p = NBL_FP64_LITERAL(6.6376381343583238325e-15) + p * w;
474+ // p = NBL_FP64_LITERAL(-4.0545662729752068639e-14) + p * w;
475+ // p = NBL_FP64_LITERAL(-8.1519341976054721522e-14) + p * w;
476+ // p = NBL_FP64_LITERAL(2.6335093153082322977e-12) + p * w;
477+ // p = NBL_FP64_LITERAL(-1.2975133253453532498e-11) + p * w;
478+ // p = NBL_FP64_LITERAL(-5.4154120542946279317e-11) + p * w;
479+ // p = NBL_FP64_LITERAL(1.051212273321532285e-09) + p * w;
480+ // p = NBL_FP64_LITERAL(-4.1126339803469836976e-09) + p * w;
481+ // p = NBL_FP64_LITERAL(-2.9070369957882005086e-08) + p * w;
482+ // p = NBL_FP64_LITERAL(4.2347877827932403518e-07) + p * w;
483+ // p = NBL_FP64_LITERAL(-1.3654692000834678645e-06) + p * w;
484+ // p = NBL_FP64_LITERAL(-1.3882523362786468719e-05) + p * w;
485+ // p = NBL_FP64_LITERAL(0.0001867342080340571352) + p * w;
486+ // p = NBL_FP64_LITERAL(-0.00074070253416626697512) + p * w;
487+ // p = NBL_FP64_LITERAL(-0.0060336708714301490533) + p * w;
488+ // p = NBL_FP64_LITERAL(0.24015818242558961693) + p * w;
489+ // p = NBL_FP64_LITERAL(1.6536545626831027356) + p * w;
490+ // }
491+ // else if (w < 16.000000)
492+ // {
493+ // w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(3.250000);
494+ // p = NBL_FP64_LITERAL(2.2137376921775787049e-09);
495+ // p = NBL_FP64_LITERAL(9.0756561938885390979e-08) + p * w;
496+ // p = NBL_FP64_LITERAL(-2.7517406297064545428e-07) + p * w;
497+ // p = NBL_FP64_LITERAL(1.8239629214389227755e-08) + p * w;
498+ // p = NBL_FP64_LITERAL(1.5027403968909827627e-06) + p * w;
499+ // p = NBL_FP64_LITERAL(-4.013867526981545969e-06) + p * w;
500+ // p = NBL_FP64_LITERAL(2.9234449089955446044e-06) + p * w;
501+ // p = NBL_FP64_LITERAL(1.2475304481671778723e-05) + p * w;
502+ // p = NBL_FP64_LITERAL(-4.7318229009055733981e-05) + p * w;
503+ // p = NBL_FP64_LITERAL(6.8284851459573175448e-05) + p * w;
504+ // p = NBL_FP64_LITERAL(2.4031110387097893999e-05) + p * w;
505+ // p = NBL_FP64_LITERAL(-0.0003550375203628474796) + p * w;
506+ // p = NBL_FP64_LITERAL(0.00095328937973738049703) + p * w;
507+ // p = NBL_FP64_LITERAL(-0.0016882755560235047313) + p * w;
508+ // p = NBL_FP64_LITERAL(0.0024914420961078508066) + p * w;
509+ // p = NBL_FP64_LITERAL(-0.0037512085075692412107) + p * w;
510+ // p = NBL_FP64_LITERAL(0.005370914553590063617) + p * w;
511+ // p = NBL_FP64_LITERAL(1.0052589676941592334) + p * w;
512+ // p = NBL_FP64_LITERAL(3.0838856104922207635) + p * w;
513+ // }
514+ // else
515+ // {
516+ // w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(5.000000);
517+ // p = NBL_FP64_LITERAL(-2.7109920616438573243e-11);
518+ // p = NBL_FP64_LITERAL(-2.5556418169965252055e-10) + p * w;
519+ // p = NBL_FP64_LITERAL(1.5076572693500548083e-09) + p * w;
520+ // p = NBL_FP64_LITERAL(-3.7894654401267369937e-09) + p * w;
521+ // p = NBL_FP64_LITERAL(7.6157012080783393804e-09) + p * w;
522+ // p = NBL_FP64_LITERAL(-1.4960026627149240478e-08) + p * w;
523+ // p = NBL_FP64_LITERAL(2.9147953450901080826e-08) + p * w;
524+ // p = NBL_FP64_LITERAL(-6.7711997758452339498e-08) + p * w;
525+ // p = NBL_FP64_LITERAL(2.2900482228026654717e-07) + p * w;
526+ // p = NBL_FP64_LITERAL(-9.9298272942317002539e-07) + p * w;
527+ // p = NBL_FP64_LITERAL(4.5260625972231537039e-06) + p * w;
528+ // p = NBL_FP64_LITERAL(-1.9681778105531670567e-05) + p * w;
529+ // p = NBL_FP64_LITERAL(7.5995277030017761139e-05) + p * w;
530+ // p = NBL_FP64_LITERAL(-0.00021503011930044477347) + p * w;
531+ // p = NBL_FP64_LITERAL(-0.00013871931833623122026) + p * w;
532+ // p = NBL_FP64_LITERAL(1.0103004648645343977) + p * w;
533+ // p = NBL_FP64_LITERAL(4.8499064014085844221) + p * w;
534+ // }
535+
536+ // return p * x;
537+ // }
538+ // };
538539
539540#ifdef __HLSL_VERSION
540541// SPIR-V already defines specializations for builtin vector types
0 commit comments