@@ -258,9 +258,9 @@ LogBINV(::Val{10}, ::Type{Float32}) = 3.321928f0
258258LogBU (:: Val{2} , :: Type{Float32} ) = - 1.0f0
259259LogBU (:: Val{ℯ} , :: Type{Float32} ) = - 0.6931472f0
260260LogBU (:: Val{10} , :: Type{Float32} ) = - 0.30103f0
261- LogBL (base :: Val{2} , :: Type{Float32} ) = 0.0f0
262- LogBL (base :: Val{ℯ} , :: Type{Float32} ) = 1.9046542f-9
263- LogBL (base :: Val{10} , :: Type{Float32} ) = 1.4320989f-8
261+ LogBL (:: Val{2} , :: Type{Float32} ) = 0.0f0
262+ LogBL (:: Val{ℯ} , :: Type{Float32} ) = 1.9046542f-9
263+ LogBL (:: Val{10} , :: Type{Float32} ) = 1.4320989f-8
264264
265265const FloatType64 = Union{Float64,AbstractSIMD{<: Any ,Float64}}
266266const FloatType32 = Union{Float32,AbstractSIMD{<: Any ,Float32}}
547547 # Because of the larger polynomial, this implementation works better on systems with 2 FMA units.
548548
549549 @inline function vexp2 (x:: AbstractSIMD{8,Float64} , :: True )
550- r = vsreduce (16.0 x, Val (0 )) * 0.0625
550+ # M = 64 >> 4 = 4
551+ # r = x - round(2^M * x)*2^-M
552+ r = vsreduce (x, Val (64 ))
551553 N_float = x - r
552554 expr = expm1b_kernel_16 (Val (2 ), r)
553555 inds = convert (UInt64, vsreduce (N_float, Val (1 )) * 16.0 )
@@ -635,18 +637,77 @@ end
635637 :: Val{B}
636638 ) where {B}
637639 N_float = vfmadd (x, LogBINV (Val {B} (), Float32), MAGIC_ROUND_CONST (Float32))
638- N = reinterpret (UInt32, N_float)
639640 N_float = (N_float - MAGIC_ROUND_CONST (Float32))
640-
641- r = fast_fma (N_float, LogBU (Val {B} (), Float32), x, fma_fast ())
642- r = fast_fma (N_float, LogBL (Val {B} (), Float32), r, fma_fast ())
643-
641+ r = fma (N_float, LogBU (Val {B} (), Float32), x)
642+ r = fma (N_float, LogBL (Val {B} (), Float32), r)
644643 small_part = expb_kernel (Val {B} (), r)
645- res = vscalef (small_part, N_float)
646- # twopk = N << 0x00000017
647- # res = reinterpret(Float32, twopk + small_part)
648- return res
644+ return vscalef (small_part, N_float)
649645 end
646+ @inline function vexp2 (x:: AbstractSIMD{<:Any,Float32} , :: True )
647+ r = vsreduce (x, Val (0 ))
648+ N_float = x - r
649+
650+ small_part = expb_kernel (Val {2} (), r)
651+ return vscalef (small_part, N_float)
652+ end
653+
654+ # @inline function vexp_test(x::AbstractSIMD{16,Float32})#, ::True)
655+ # xb = x * LogBINV(Val{ℯ}(), Float32)
656+ # # rs = xb - round(xb)
657+ # rs = vsreduce(xb, Val(0))
658+ # N_float = xb - rs
659+
660+ # # rs = x*log2(ℯ) - N_float
661+ # # r = fma(x, Float32(log2(ℯ)), - N_float)
662+
663+ # # rs = x*(l2_hi + l2_lo) - N_float
664+ # # rs = x*l2_hi - N_float + x*l2_lo
665+ # # r = fma(x, 1.925963f-8, rs)
666+ # # small_part = expb_kernel(Val{2}(), r)
667+ # # B = ℯ
668+ # # r = fma(N_float, LogBU(Val{B}(), Float32), x)
669+ # # r = fma(N_float, LogBL(Val{B}(), Float32), r)
670+ # # small_part = expb_kernel(Val{B}(), r)
671+
672+ # rv2 = fma(1.442695f0, x, -N_float)
673+ # rv2 = fma(1.925963f-8, x, rv2)
674+ # small_part = expb_kernel(Val{2}(), rv2)
675+
676+ # # @show rs r rs / r rv2
677+
678+ # # xb = x * log2(ℯ )
679+ # # rs = xb - N_float
680+ # # rs = x * log2(ℯ) - N_float
681+ # # vs, desierd:
682+ # # r = x - N_float * log(2)
683+ # # r = x - N_float / log2(ℯ)
684+ # # r = rs / log2(ℯ)
685+ # # r = 0.6931471805599453f0 * rs
686+
687+ # # small_part = expb_kernel(Val{2}(), r)
688+ # return vscalef(small_part, N_float)
689+ # end
690+ # @inline vexp_test(x::AbstractSIMD{16}) = vexp_test(Float32(x))
691+ # @inline vexp_test(x::Vec{8}) = shufflevector(
692+ # vexp_test(
693+ # shufflevector(
694+ # x,
695+ # x,
696+ # Val((0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))
697+ # )
698+ # ),
699+ # Val((0, 1, 2, 3, 4, 5, 6, 7))
700+ # )
701+ # @inline vexp_test(x::Vec{4}) = shufflevector(
702+ # vexp_test(shufflevector(x, x, Val((0, 1, 2, 3, 4, 5, 6, 7)))),
703+ # Val((0, 1, 2, 3))
704+ # )
705+ # @inline vexp_test(x::Vec{2}) = shufflevector(
706+ # vexp_test(shufflevector(x, x, Val((0, 1, 2, 3)))),
707+ # Val((0, 1))
708+ # )
709+ # @inline vexp_test(x::VecUnroll) = VecUnroll(fmap(vexp_test, data(x)))
710+ # @inline vexp_test(x::Float32) = vexp_test(Vec(x))(1)
650711
651712else # if !((Sys.ARCH === :x86_64) | (Sys.ARCH === :i686))
652713 const target_trunc = identity
0 commit comments