Skip to content

Commit 76e23b2

Browse files
committed
improve exp2 implementations
1 parent d671ea7 commit 76e23b2

File tree

2 files changed

+75
-14
lines changed

2 files changed

+75
-14
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "VectorizationBase"
22
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
33
authors = ["Chris Elrod <elrodc@gmail.com>"]
4-
version = "0.21.67"
4+
version = "0.21.68"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/special/exp.jl

Lines changed: 74 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -258,9 +258,9 @@ LogBINV(::Val{10}, ::Type{Float32}) = 3.321928f0
258258
LogBU(::Val{2}, ::Type{Float32}) = -1.0f0
259259
LogBU(::Val{ℯ}, ::Type{Float32}) = -0.6931472f0
260260
LogBU(::Val{10}, ::Type{Float32}) = -0.30103f0
261-
LogBL(base::Val{2}, ::Type{Float32}) = 0.0f0
262-
LogBL(base::Val{ℯ}, ::Type{Float32}) = 1.9046542f-9
263-
LogBL(base::Val{10}, ::Type{Float32}) = 1.4320989f-8
261+
LogBL(::Val{2}, ::Type{Float32}) = 0.0f0
262+
LogBL(::Val{ℯ}, ::Type{Float32}) = 1.9046542f-9
263+
LogBL(::Val{10}, ::Type{Float32}) = 1.4320989f-8
264264

265265
const FloatType64 = Union{Float64,AbstractSIMD{<:Any,Float64}}
266266
const FloatType32 = Union{Float32,AbstractSIMD{<:Any,Float32}}
@@ -547,7 +547,9 @@ end
547547
# Because of the larger polynomial, this implementation works better on systems with 2 FMA units.
548548

549549
@inline function vexp2(x::AbstractSIMD{8,Float64}, ::True)
550-
r = vsreduce(16.0x, Val(0)) * 0.0625
550+
# M = 64 >> 4 = 4
551+
# r = x - round(2^M * x)*2^-M
552+
r = vsreduce(x, Val(64))
551553
N_float = x - r
552554
expr = expm1b_kernel_16(Val(2), r)
553555
inds = convert(UInt64, vsreduce(N_float, Val(1)) * 16.0)
@@ -635,18 +637,77 @@ end
635637
::Val{B}
636638
) where {B}
637639
N_float = vfmadd(x, LogBINV(Val{B}(), Float32), MAGIC_ROUND_CONST(Float32))
638-
N = reinterpret(UInt32, N_float)
639640
N_float = (N_float - MAGIC_ROUND_CONST(Float32))
640-
641-
r = fast_fma(N_float, LogBU(Val{B}(), Float32), x, fma_fast())
642-
r = fast_fma(N_float, LogBL(Val{B}(), Float32), r, fma_fast())
643-
641+
r = fma(N_float, LogBU(Val{B}(), Float32), x)
642+
r = fma(N_float, LogBL(Val{B}(), Float32), r)
644643
small_part = expb_kernel(Val{B}(), r)
645-
res = vscalef(small_part, N_float)
646-
# twopk = N << 0x00000017
647-
# res = reinterpret(Float32, twopk + small_part)
648-
return res
644+
return vscalef(small_part, N_float)
649645
end
646+
@inline function vexp2(x::AbstractSIMD{<:Any,Float32}, ::True)
647+
r = vsreduce(x, Val(0))
648+
N_float = x - r
649+
650+
small_part = expb_kernel(Val{2}(), r)
651+
return vscalef(small_part, N_float)
652+
end
653+
654+
# @inline function vexp_test(x::AbstractSIMD{16,Float32})#, ::True)
655+
# xb = x * LogBINV(Val{ℯ}(), Float32)
656+
# # rs = xb - round(xb)
657+
# rs = vsreduce(xb, Val(0))
658+
# N_float = xb - rs
659+
660+
# # rs = x*log2(ℯ) - N_float
661+
# # r = fma(x, Float32(log2(ℯ)), - N_float)
662+
663+
# # rs = x*(l2_hi + l2_lo) - N_float
664+
# # rs = x*l2_hi - N_float + x*l2_lo
665+
# # r = fma(x, 1.925963f-8, rs)
666+
# # small_part = expb_kernel(Val{2}(), r)
667+
# # B = ℯ
668+
# # r = fma(N_float, LogBU(Val{B}(), Float32), x)
669+
# # r = fma(N_float, LogBL(Val{B}(), Float32), r)
670+
# # small_part = expb_kernel(Val{B}(), r)
671+
672+
# rv2 = fma(1.442695f0, x, -N_float)
673+
# rv2 = fma(1.925963f-8, x, rv2)
674+
# small_part = expb_kernel(Val{2}(), rv2)
675+
676+
# # @show rs r rs / r rv2
677+
678+
# # xb = x * log2(ℯ )
679+
# # rs = xb - N_float
680+
# # rs = x * log2(ℯ) - N_float
681+
# # vs, desierd:
682+
# # r = x - N_float * log(2)
683+
# # r = x - N_float / log2(ℯ)
684+
# # r = rs / log2(ℯ)
685+
# # r = 0.6931471805599453f0 * rs
686+
687+
# # small_part = expb_kernel(Val{2}(), r)
688+
# return vscalef(small_part, N_float)
689+
# end
690+
# @inline vexp_test(x::AbstractSIMD{16}) = vexp_test(Float32(x))
691+
# @inline vexp_test(x::Vec{8}) = shufflevector(
692+
# vexp_test(
693+
# shufflevector(
694+
# x,
695+
# x,
696+
# Val((0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))
697+
# )
698+
# ),
699+
# Val((0, 1, 2, 3, 4, 5, 6, 7))
700+
# )
701+
# @inline vexp_test(x::Vec{4}) = shufflevector(
702+
# vexp_test(shufflevector(x, x, Val((0, 1, 2, 3, 4, 5, 6, 7)))),
703+
# Val((0, 1, 2, 3))
704+
# )
705+
# @inline vexp_test(x::Vec{2}) = shufflevector(
706+
# vexp_test(shufflevector(x, x, Val((0, 1, 2, 3)))),
707+
# Val((0, 1))
708+
# )
709+
# @inline vexp_test(x::VecUnroll) = VecUnroll(fmap(vexp_test, data(x)))
710+
# @inline vexp_test(x::Float32) = vexp_test(Vec(x))(1)
650711

651712
else# if !((Sys.ARCH === :x86_64) | (Sys.ARCH === :i686))
652713
const target_trunc = identity

0 commit comments

Comments
 (0)