Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions lib/std/fmt.zig
Original file line number Diff line number Diff line change
Expand Up @@ -540,10 +540,7 @@ pub fn parseIntSizeSuffix(buf: []const u8, digit_base: u8) ParseIntError!usize {
} else if (without_i.len != without_B.len) {
return error.InvalidCharacter;
}
const multiplier = math.powi(usize, magnitude_base, orders_of_magnitude) catch |err| switch (err) {
error.Underflow => unreachable,
error.Overflow => return error.Overflow,
};
const multiplier = try math.powi(usize, magnitude_base, orders_of_magnitude);
const number = try std.fmt.parseInt(usize, without_suffix, digit_base);
return math.mul(usize, number, multiplier);
}
Expand Down
8 changes: 6 additions & 2 deletions lib/std/math.zig
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,10 @@ pub fn approxEqAbs(comptime T: type, x: T, y: T, tolerance: T) bool {
if (x == y)
return true;

if (isNan(x) or isNan(y))
if (isNan(x) or isNan(y)) {
if (T == comptime_float) unreachable;
return false;
}

return @abs(x - y) <= tolerance;
}
Expand All @@ -109,8 +111,10 @@ pub fn approxEqRel(comptime T: type, x: T, y: T, tolerance: T) bool {
if (x == y)
return true;

if (isNan(x) or isNan(y))
if (isNan(x) or isNan(y)) {
if (T == comptime_float) unreachable;
return false;
}

return @abs(x - y) <= @max(@abs(x), @abs(y)) * tolerance;
}
Expand Down
1 change: 1 addition & 0 deletions lib/std/math/big/int_test.zig
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,7 @@ fn toFloat(comptime Float: type) !void {
);
}
test toFloat {
@setEvalBranchQuota(1_100);
if (builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/24191
try toFloat(f16);
try toFloat(f32);
Expand Down
107 changes: 60 additions & 47 deletions lib/std/math/float.zig
Original file line number Diff line number Diff line change
Expand Up @@ -117,21 +117,28 @@ pub fn FloatRepr(comptime Float: type) type {

/// Creates a raw "1.0" mantissa for floating point type T. Used to dedupe f80 logic.
inline fn mantissaOne(comptime T: type) comptime_int {
if (T == comptime_float) return 0;
return if (@typeInfo(T).float.bits == 80) 1 << floatFractionalBits(T) else 0;
}

/// Creates floating point type T from an unbiased exponent and raw mantissa.
inline fn reconstructFloat(comptime T: type, comptime exponent: comptime_int, comptime mantissa: comptime_int) T {
const TBits = @Type(.{ .int = .{ .signedness = .unsigned, .bits = @bitSizeOf(T) } });
const biased_exponent = @as(TBits, exponent + floatExponentMax(T));
return @as(T, @bitCast((biased_exponent << floatMantissaBits(T)) | @as(TBits, mantissa)));
const UBits, const FBits = switch (@typeInfo(T)) {
.float => |float| .{ std.meta.Int(.unsigned, float.bits), T },
.comptime_float => .{ std.meta.Int(.unsigned, 128), f128 },
else => unreachable,
};
const biased_exponent = @as(UBits, exponent + floatExponentMax(T));
return @as(T, @as(FBits, @bitCast((biased_exponent << floatMantissaBits(T)) | @as(UBits, mantissa))));
}

/// Returns the number of bits in the exponent of floating point type T.
pub inline fn floatExponentBits(comptime T: type) comptime_int {
comptime assert(@typeInfo(T) == .float);
const info = @typeInfo(T);
comptime assert(info == .float or info == .comptime_float);

return switch (@typeInfo(T).float.bits) {
if (info == .comptime_float) return 15;
return switch (info.float.bits) {
16 => 5,
32 => 8,
64 => 11,
Expand All @@ -143,9 +150,11 @@ pub inline fn floatExponentBits(comptime T: type) comptime_int {

/// Returns the number of bits in the mantissa of floating point type T.
pub inline fn floatMantissaBits(comptime T: type) comptime_int {
comptime assert(@typeInfo(T) == .float);
const info = @typeInfo(T);
comptime assert(info == .float or info == .comptime_float);

return switch (@typeInfo(T).float.bits) {
if (info == .comptime_float) return 112;
return switch (info.float.bits) {
16 => 10,
32 => 23,
64 => 52,
Expand All @@ -157,12 +166,14 @@ pub inline fn floatMantissaBits(comptime T: type) comptime_int {

/// Returns the number of fractional bits in the mantissa of floating point type T.
pub inline fn floatFractionalBits(comptime T: type) comptime_int {
comptime assert(@typeInfo(T) == .float);
const info = @typeInfo(T);
comptime assert(info == .float or info == .comptime_float);

// standard IEEE floats have an implicit 0.m or 1.m integer part
// f80 is special and has an explicitly stored bit in the MSB
// this function corresponds to `MANT_DIG - 1' from C
return switch (@typeInfo(T).float.bits) {
if (info == .comptime_float) return 112;
return switch (info.float.bits) {
16 => 10,
32 => 23,
64 => 52,
Expand Down Expand Up @@ -208,58 +219,58 @@ pub inline fn floatEps(comptime T: type) T {
/// Returns the local epsilon of floating point type T.
pub inline fn floatEpsAt(comptime T: type, x: T) T {
switch (@typeInfo(T)) {
.float => |F| {
const U: type = @Type(.{ .int = .{ .signedness = .unsigned, .bits = F.bits } });
.float => |float| {
const U = std.meta.Int(.unsigned, float.bits);
const u: U = @bitCast(x);
const y: T = @bitCast(u ^ 1);
return @abs(x - y);
},
.comptime_float => {
const u: u128 = @bitCast(@as(f128, x));
const y: f128 = @bitCast(u ^ 1);
return @as(comptime_float, @abs(x - y));
},
else => @compileError("floatEpsAt only supports floats"),
}
}

/// Returns the inf value for a floating point `Type`.
pub inline fn inf(comptime Type: type) Type {
const RuntimeType = switch (Type) {
else => Type,
comptime_float => f128, // any float type will do
return switch (@typeInfo(Type)) {
.float => reconstructFloat(Type, floatExponentMax(Type) + 1, mantissaOne(Type)),
.comptime_float => @compileError("comptime_float cannot be infinity"),
else => @compileError("unknown floating point type " ++ @typeName(Type)),
};
return reconstructFloat(RuntimeType, floatExponentMax(RuntimeType) + 1, mantissaOne(RuntimeType));
}

/// Returns the canonical quiet NaN representation for a floating point `Type`.
pub inline fn nan(comptime Type: type) Type {
const RuntimeType = switch (Type) {
else => Type,
comptime_float => f128, // any float type will do
return switch (@typeInfo(Type)) {
.float => reconstructFloat(Type, floatExponentMax(Type) + 1, mantissaOne(Type) | 1 << (floatFractionalBits(Type) - 1)),
.comptime_float => @compileError("comptime_float cannot be NaN"),
else => @compileError("unknown floating point type " ++ @typeName(Type)),
};
return reconstructFloat(
RuntimeType,
floatExponentMax(RuntimeType) + 1,
mantissaOne(RuntimeType) | 1 << (floatFractionalBits(RuntimeType) - 1),
);
}

/// Returns a signalling NaN representation for a floating point `Type`.
///
/// TODO: LLVM is known to miscompile on some architectures to quiet NaN -
/// this is tracked by https://github.com/ziglang/zig/issues/14366
pub inline fn snan(comptime Type: type) Type {
const RuntimeType = switch (Type) {
else => Type,
comptime_float => f128, // any float type will do
return switch (@typeInfo(Type)) {
.float => reconstructFloat(Type, floatExponentMax(Type) + 1, mantissaOne(Type) | 1 << (floatFractionalBits(Type) - 2)),
.comptime_float => @compileError("comptime_float cannot be NaN"),
else => @compileError("unknown floating point type " ++ @typeName(Type)),
};
return reconstructFloat(
RuntimeType,
floatExponentMax(RuntimeType) + 1,
mantissaOne(RuntimeType) | 1 << (floatFractionalBits(RuntimeType) - 2),
);
}

fn floatBits(comptime Type: type) !void {
// (1 +) for the sign bit, since it is separate from the other bits
const size = 1 + floatExponentBits(Type) + floatMantissaBits(Type);
try expect(@bitSizeOf(Type) == size);
if (@typeInfo(Type) == .float)
try expect(@bitSizeOf(Type) == size)
else
try expect(128 == size);
try expect(floatFractionalBits(Type) <= floatMantissaBits(Type));

// for machine epsilon, assert expmin <= -prec <= expmax
Expand All @@ -273,6 +284,8 @@ test floatBits {
try floatBits(f80);
try floatBits(f128);
try floatBits(c_longdouble);
try floatBits(comptime_float);
try comptime floatBits(comptime_float);
}

test inf {
Expand All @@ -281,11 +294,11 @@ test inf {
const inf_u64: u64 = 0x7FF0000000000000;
const inf_u80: u80 = 0x7FFF8000000000000000;
const inf_u128: u128 = 0x7FFF0000000000000000000000000000;
try expectEqual(inf_u16, @as(u16, @bitCast(inf(f16))));
try expectEqual(inf_u32, @as(u32, @bitCast(inf(f32))));
try expectEqual(inf_u64, @as(u64, @bitCast(inf(f64))));
try expectEqual(inf_u80, @as(u80, @bitCast(inf(f80))));
try expectEqual(inf_u128, @as(u128, @bitCast(inf(f128))));
try expect(inf_u16 == @as(u16, @bitCast(inf(f16))));
try expect(inf_u32 == @as(u32, @bitCast(inf(f32))));
try expect(inf_u64 == @as(u64, @bitCast(inf(f64))));
try expect(inf_u80 == @as(u80, @bitCast(inf(f80))));
try expect(inf_u128 == @as(u128, @bitCast(inf(f128))));
}

test nan {
Expand All @@ -294,11 +307,11 @@ test nan {
const qnan_u64: u64 = 0x7FF8000000000000;
const qnan_u80: u80 = 0x7FFFC000000000000000;
const qnan_u128: u128 = 0x7FFF8000000000000000000000000000;
try expectEqual(qnan_u16, @as(u16, @bitCast(nan(f16))));
try expectEqual(qnan_u32, @as(u32, @bitCast(nan(f32))));
try expectEqual(qnan_u64, @as(u64, @bitCast(nan(f64))));
try expectEqual(qnan_u80, @as(u80, @bitCast(nan(f80))));
try expectEqual(qnan_u128, @as(u128, @bitCast(nan(f128))));
try expect(qnan_u16 == @as(u16, @bitCast(nan(f16))));
try expect(qnan_u32 == @as(u32, @bitCast(nan(f32))));
try expect(qnan_u64 == @as(u64, @bitCast(nan(f64))));
try expect(qnan_u80 == @as(u80, @bitCast(nan(f80))));
try expect(qnan_u128 == @as(u128, @bitCast(nan(f128))));
}

test snan {
Expand All @@ -307,9 +320,9 @@ test snan {
const snan_u64: u64 = 0x7FF4000000000000;
const snan_u80: u80 = 0x7FFFA000000000000000;
const snan_u128: u128 = 0x7FFF4000000000000000000000000000;
try expectEqual(snan_u16, @as(u16, @bitCast(snan(f16))));
try expectEqual(snan_u32, @as(u32, @bitCast(snan(f32))));
try expectEqual(snan_u64, @as(u64, @bitCast(snan(f64))));
try expectEqual(snan_u80, @as(u80, @bitCast(snan(f80))));
try expectEqual(snan_u128, @as(u128, @bitCast(snan(f128))));
try expect(snan_u16 == @as(u16, @bitCast(snan(f16))));
try expect(snan_u32 == @as(u32, @bitCast(snan(f32))));
try expect(snan_u64 == @as(u64, @bitCast(snan(f64))));
try expect(snan_u80 == @as(u80, @bitCast(snan(f80))));
try expect(snan_u128 == @as(u128, @bitCast(snan(f128))));
}
Loading
Loading