@@ -122,31 +122,89 @@ fpinttype(::Type{Float128}) = UInt128
122122# conversion
123123Float128 (x:: Float128 ) = x
124124
125- # # Float64
125+ # Float64
126126Float128 (x:: Float64 ) =
127127 Float128 (@ccall (quadoplib. __extenddftf2 (x:: Cdouble ):: Cfloat128 ))
128128Float64 (x:: Float128 ) =
129129 @ccall (quadoplib. __trunctfdf2 (x:: Cfloat128 ):: Cdouble )
130130
131- # Int32
131+ # Float32
132+ Float128 (x:: Float32 ) =
133+ Float128 (@ccall (quadoplib. __extendsftf2 (x:: Cfloat ):: Cfloat128 ))
134+ Float32 (x:: Float128 ) =
135+ @ccall (quadoplib. __trunctfsf2 (x:: Cfloat128 ):: Cfloat )
136+
137+ # integer -> Float128
132138Float128 (x:: Int32 ) =
133139 Float128 (@ccall (quadoplib. __floatsitf (x:: Int32 ):: Cfloat128 ))
134- Int32 (x:: Float128 ) =
135- @ccall (quadoplib. __fixtfsi (x:: Cfloat128 ):: Int32 )
136140
137- # UInt32
138141Float128 (x:: UInt32 ) =
139142 Float128 (@ccall (quadoplib. __floatunsitf (x:: UInt32 ):: Cfloat128 ))
140143
141- # Int64
142144Float128 (x:: Int64 ) =
143145 Float128 (@ccall (quadoplib. __floatditf (x:: Int64 ):: Cfloat128 ))
144- Int64 (x:: Float128 ) =
145- @ccall (quadoplib. __fixtfdi (x:: Cfloat128 ):: Int64 )
146+
147+ Float128 (x:: UInt64 ) =
148+ Float128 (@ccall (quadoplib. __floatunditf (x:: UInt64 ):: Cfloat128 ))
149+
150+ Float128 (x:: Int16 ) = Float128 (Int32 (x))
151+ Float128 (x:: Int8 ) = Float128 (Int32 (x))
152+ Float128 (x:: UInt16 ) = Float128 (UInt32 (x))
153+ Float128 (x:: UInt8 ) = Float128 (UInt32 (x))
154+
155+ function Float128 (x:: UInt128 )
156+ x == 0 && return Float128 (0.0 )
157+ n = 128 - leading_zeros (x) # ndigits0z(x,2)
158+ if n <= 113
159+ y = ((x % UInt128) << (113 - n)) & significand_mask (Float128)
160+ else
161+ y = ((x >> (n- 114 )) % UInt128) & 0x001_ffff_ffff_ffff_ffff_ffff_ffff_ffff # keep 1 extra bit
162+ y = (y+ 1 )>> 1 # round, ties up (extra leading bit in case of next exponent)
163+ y &= ~ UInt64 (trailing_zeros (x) == (n- 114 )) # fix last bit to round to even
164+ end
165+ d = ((n+ 16382 ) % UInt128) << 112
166+ # reinterpret(Float128, d + y)
167+ d += y
168+ if Sys. iswindows ()
169+ return reinterpret (Float128,d)
170+ else
171+ y1 = reinterpret (Float64,UInt64 (d >> 64 ))
172+ y2 = reinterpret (Float64,(d % UInt64))
173+ return Float128 ((y2,y1))
174+ end
175+ end
176+
177+ function Float128 (x:: Int128 )
178+ x == 0 && return 0.0
179+ s = reinterpret (UInt128,x) & sign_mask (Float128) # sign bit
180+ x = abs (x) % UInt128
181+ n = 128 - leading_zeros (x) # ndigits0z(x,2)
182+ if n <= 113
183+ y = ((x % UInt128) << (113 - n)) & significand_mask (Float128)
184+ else
185+ y = ((x >> (n- 114 )) % UInt128) & 0x0001_ffff_ffff_ffff_ffff_ffff_ffff_ffff # keep 1 extra bit
186+ y = (y+ 1 )>> 1 # round, ties up (extra leading bit in case of next exponent)
187+ y &= ~ UInt64 (trailing_zeros (x) == (n- 114 )) # fix last bit to round to even
188+ end
189+ d = ((n+ 16382 ) % UInt128) << 112
190+ # reinterpret(Float128, s | d + y)
191+ d = s | d + y
192+ if Sys. iswindows ()
193+ return reinterpret (Float128,d)
194+ else
195+ y1 = reinterpret (Float64,UInt64 (d >> 64 ))
196+ y2 = reinterpret (Float64,(d % UInt64))
197+ Float128 ((y2,y1))
198+ end
199+ end
200+
201+ # Float128 -> integer requires arithmetic, so is below
146202
147203# Rational
148204Float128 (x:: Rational{T} ) where T = Float128 (numerator (x))/ Float128 (denominator (x))
149205
206+ Float128 (x:: Bool ) = x ? Float128 (1 ) : Float128 (0 )
207+
150208# Comparison
151209(== )(x:: Float128 , y:: Float128 ) =
152210 @ccall (quadoplib. __eqtf2 (x:: Cfloat128 , y:: Cfloat128 ):: Cint ) == 0
@@ -168,6 +226,85 @@ Float128(x::Rational{T}) where T = Float128(numerator(x))/Float128(denominator(x
168226(- )(x:: Float128 ) =
169227 Float128 (@ccall (quadoplib. __negtf2 (x:: Cfloat128 ):: Cfloat128 ))
170228
229+ # Float128 -> Integer
230+ unsafe_trunc (:: Type{Int32} , x:: Float128 ) =
231+ @ccall (quadoplib. __fixtfsi (x:: Cfloat128 ):: Int32 )
232+
233+ unsafe_trunc (:: Type{Int64} , x:: Float128 ) =
234+ @ccall (quadoplib. __fixtfdi (x:: Cfloat128 ):: Int64 )
235+
236+ unsafe_trunc (:: Type{UInt32} , x:: Float128 ) =
237+ @ccall (quadoplib. __fixunstfsi (x:: Cfloat128 ):: UInt32 )
238+
239+ unsafe_trunc (:: Type{UInt64} , x:: Float128 ) =
240+ @ccall (quadoplib. __fixunstfdi (x:: Cfloat128 ):: UInt64 )
241+
242+ function unsafe_trunc (:: Type{UInt128} , x:: Float128 )
243+ xu = reinterpret (UInt128,x)
244+ k = (Int64 (xu >> 112 ) & 0x07fff ) - 16382 - 113
245+ xu = (xu & significand_mask (Float128)) | 0x0001_0000_0000_0000_0000_0000_0000_0000
246+ if k <= 0
247+ UInt128 (xu >> - k)
248+ else
249+ UInt128 (xu) << k
250+ end
251+ end
252+ function unsafe_trunc (:: Type{Int128} , x:: Float128 )
253+ copysign (unsafe_trunc (UInt128,x) % Int128, x)
254+ end
255+ trunc (:: Type{Signed} , x:: Float128 ) = trunc (Int,x)
256+ trunc (:: Type{Unsigned} , x:: Float128 ) = trunc (Int,x)
257+ trunc (:: Type{Integer} , x:: Float128 ) = trunc (Int,x)
258+
259+ for Ti in (Int32, Int64, Int128, UInt32, UInt64, UInt128)
260+ let Tf = Float128
261+ if Ti <: Unsigned || sizeof (Ti) < sizeof (Tf)
262+ # Here `Tf(typemin(Ti))-1` is exact, so we can compare the lower-bound
263+ # directly. `Tf(typemax(Ti))+1` is either always exactly representable, or
264+ # rounded to `Inf` (e.g. when `Ti==UInt128 && Tf==Float32`).
265+ @eval begin
266+ function trunc (:: Type{$Ti} ,x:: $Tf )
267+ if $ (Tf (typemin (Ti))- one (Tf)) < x < $ (Tf (typemax (Ti))+ one (Tf))
268+ return unsafe_trunc ($ Ti,x)
269+ else
270+ throw (InexactError (:trunc , $ Ti, x))
271+ end
272+ end
273+ function (:: Type{$Ti} )(x:: $Tf )
274+ if ($ (Tf (typemin (Ti))) <= x <= $ (Tf (typemax (Ti)))) && (round (x, RoundToZero) == x)
275+ return unsafe_trunc ($ Ti,x)
276+ else
277+ throw (InexactError ($ (Expr (:quote ,Ti. name. name)), $ Ti, x))
278+ end
279+ end
280+ end
281+ else
282+ # Here `eps(Tf(typemin(Ti))) > 1`, so the only value which can be truncated to
283+ # `Tf(typemin(Ti)` is itself. Similarly, `Tf(typemax(Ti))` is inexact and will
284+ # be rounded up. This assumes that `Tf(typemin(Ti)) > -Inf`, which is true for
285+ # these types, but not for `Float16` or larger integer types.
286+ @eval begin
287+ function trunc (:: Type{$Ti} ,x:: $Tf )
288+ if $ (Tf (typemin (Ti))) <= x < $ (Tf (typemax (Ti)))
289+ return unsafe_trunc ($ Ti,x)
290+ else
291+ throw (InexactError (:trunc , $ Ti, x))
292+ end
293+ end
294+ function (:: Type{$Ti} )(x:: $Tf )
295+ if ($ (Tf (typemin (Ti))) <= x < $ (Tf (typemax (Ti)))) && (round (x, RoundToZero) == x)
296+ return unsafe_trunc ($ Ti,x)
297+ else
298+ throw (InexactError ($ (Expr (:quote ,Ti. name. name)), $ Ti, x))
299+ end
300+ end
301+ end
302+ end
303+ end
304+ end
305+
306+ # # math
307+
171308# # one argument
172309for f in (:acos , :acosh , :asin , :asinh , :atan , :atanh , :cosh , :cos ,
173310 :exp , :expm1 , :log , :log2 , :log10 , :log1p ,
180317
181318abs (x:: Float128 ) = Float128 (@ccall (libquadmath. fabsq (x:: Cfloat128 ):: Cfloat128 ))
182319round (x:: Float128 ) = Float128 (@ccall (libquadmath. rintq (x:: Cfloat128 ):: Cfloat128 ))
320+ round (x:: Float128 , r:: RoundingMode{:Down} ) = floor (x)
321+ round (x:: Float128 , r:: RoundingMode{:Up} ) = ceil (x)
322+ round (x:: Float128 , r:: RoundingMode{:ToZero} ) = round (x)
183323
184324# # two argument
185325(^ )(x:: Float128 , y:: Float128 ) =
0 commit comments