Add comptime_float support to std.math.float functions

Danielkonge · Danielkonge · commit ac0f590ae428 · 2025-11-08T19:23:02.000+01:00
diff --git a/lib/std/math/float.zig b/lib/std/math/float.zig
@@ -117,21 +117,28 @@ pub fn FloatRepr(comptime Float: type) type {
 
 /// Creates a raw "1.0" mantissa for floating point type T. Used to dedupe f80 logic.
 inline fn mantissaOne(comptime T: type) comptime_int {
+    if (T == comptime_float) return 0;
     return if (@typeInfo(T).float.bits == 80) 1 << floatFractionalBits(T) else 0;
 }
 
 /// Creates floating point type T from an unbiased exponent and raw mantissa.
 inline fn reconstructFloat(comptime T: type, comptime exponent: comptime_int, comptime mantissa: comptime_int) T {
-    const TBits = @Type(.{ .int = .{ .signedness = .unsigned, .bits = @bitSizeOf(T) } });
-    const biased_exponent = @as(TBits, exponent + floatExponentMax(T));
-    return @as(T, @bitCast((biased_exponent << floatMantissaBits(T)) | @as(TBits, mantissa)));
+    const UBits, const FBits = switch (@typeInfo(T)) {
+        .float => |float| .{ std.meta.Int(.unsigned, float.bits), T },
+        .comptime_float => .{ std.meta.Int(.unsigned, 128), f128 },
+        else => unreachable,
+    };
+    const biased_exponent = @as(UBits, exponent + floatExponentMax(T));
+    return @as(T, @as(FBits, @bitCast((biased_exponent << floatMantissaBits(T)) | @as(UBits, mantissa))));
 }
 
 /// Returns the number of bits in the exponent of floating point type T.
 pub inline fn floatExponentBits(comptime T: type) comptime_int {
-    comptime assert(@typeInfo(T) == .float);
+    const info = @typeInfo(T);
+    comptime assert(info == .float or info == .comptime_float);
 
-    return switch (@typeInfo(T).float.bits) {
+    if (info == .comptime_float) return 15;
+    return switch (info.float.bits) {
         16 => 5,
         32 => 8,
         64 => 11,
@@ -143,9 +150,11 @@ pub inline fn floatExponentBits(comptime T: type) comptime_int {
 
 /// Returns the number of bits in the mantissa of floating point type T.
 pub inline fn floatMantissaBits(comptime T: type) comptime_int {
-    comptime assert(@typeInfo(T) == .float);
+    const info = @typeInfo(T);
+    comptime assert(info == .float or info == .comptime_float);
 
-    return switch (@typeInfo(T).float.bits) {
+    if (info == .comptime_float) return 112;
+    return switch (info.float.bits) {
         16 => 10,
         32 => 23,
         64 => 52,
@@ -157,12 +166,14 @@ pub inline fn floatMantissaBits(comptime T: type) comptime_int {
 
 /// Returns the number of fractional bits in the mantissa of floating point type T.
 pub inline fn floatFractionalBits(comptime T: type) comptime_int {
-    comptime assert(@typeInfo(T) == .float);
+    const info = @typeInfo(T);
+    comptime assert(info == .float or info == .comptime_float);
 
     // standard IEEE floats have an implicit 0.m or 1.m integer part
     // f80 is special and has an explicitly stored bit in the MSB
     // this function corresponds to `MANT_DIG - 1' from C
-    return switch (@typeInfo(T).float.bits) {
+    if (info == .comptime_float) return 112;
+    return switch (info.float.bits) {
         16 => 10,
         32 => 23,
         64 => 52,
@@ -208,58 +219,58 @@ pub inline fn floatEps(comptime T: type) T {
 /// Returns the local epsilon of floating point type T.
 pub inline fn floatEpsAt(comptime T: type, x: T) T {
     switch (@typeInfo(T)) {
-        .float => |F| {
-            const U: type = @Type(.{ .int = .{ .signedness = .unsigned, .bits = F.bits } });
+        .float => |float| {
+            const U = std.meta.Int(.unsigned, float.bits);
             const u: U = @bitCast(x);
             const y: T = @bitCast(u ^ 1);
             return @abs(x - y);
         },
+        .comptime_float => {
+            const u: u128 = @bitCast(@as(f128, x));
+            const y: f128 = @bitCast(u ^ 1);
+            return @as(comptime_float, @abs(x - y));
+        },
         else => @compileError("floatEpsAt only supports floats"),
     }
 }
 
 /// Returns the inf value for a floating point `Type`.
 pub inline fn inf(comptime Type: type) Type {
-    const RuntimeType = switch (Type) {
-        else => Type,
-        comptime_float => f128, // any float type will do
+    return switch (@typeInfo(Type)) {
+        .float => reconstructFloat(Type, floatExponentMax(Type) + 1, mantissaOne(Type)),
+        .comptime_float => @compileError("comptime_float cannot be infinity"),
+        else => @compileError("unknown floating point type " ++ @typeName(Type)),
     };
-    return reconstructFloat(RuntimeType, floatExponentMax(RuntimeType) + 1, mantissaOne(RuntimeType));
 }
 
 /// Returns the canonical quiet NaN representation for a floating point `Type`.
 pub inline fn nan(comptime Type: type) Type {
-    const RuntimeType = switch (Type) {
-        else => Type,
-        comptime_float => f128, // any float type will do
+    return switch (@typeInfo(Type)) {
+        .float => reconstructFloat(Type, floatExponentMax(Type) + 1, mantissaOne(Type) | 1 << (floatFractionalBits(Type) - 1)),
+        .comptime_float => @compileError("comptime_float cannot be NaN"),
+        else => @compileError("unknown floating point type " ++ @typeName(Type)),
     };
-    return reconstructFloat(
-        RuntimeType,
-        floatExponentMax(RuntimeType) + 1,
-        mantissaOne(RuntimeType) | 1 << (floatFractionalBits(RuntimeType) - 1),
-    );
 }
 
 /// Returns a signalling NaN representation for a floating point `Type`.
 ///
 /// TODO: LLVM is known to miscompile on some architectures to quiet NaN -
 ///       this is tracked by https://github.com/ziglang/zig/issues/14366
 pub inline fn snan(comptime Type: type) Type {
-    const RuntimeType = switch (Type) {
-        else => Type,
-        comptime_float => f128, // any float type will do
+    return switch (@typeInfo(Type)) {
+        .float => reconstructFloat(Type, floatExponentMax(Type) + 1, mantissaOne(Type) | 1 << (floatFractionalBits(Type) - 2)),
+        .comptime_float => @compileError("comptime_float cannot be NaN"),
+        else => @compileError("unknown floating point type " ++ @typeName(Type)),
     };
-    return reconstructFloat(
-        RuntimeType,
-        floatExponentMax(RuntimeType) + 1,
-        mantissaOne(RuntimeType) | 1 << (floatFractionalBits(RuntimeType) - 2),
-    );
 }
 
 fn floatBits(comptime Type: type) !void {
     // (1 +) for the sign bit, since it is separate from the other bits
     const size = 1 + floatExponentBits(Type) + floatMantissaBits(Type);
-    try expect(@bitSizeOf(Type) == size);
+    if (@typeInfo(Type) == .float)
+        try expect(@bitSizeOf(Type) == size)
+    else
+        try expect(128 == size);
     try expect(floatFractionalBits(Type) <= floatMantissaBits(Type));
 
     // for machine epsilon, assert expmin <= -prec <= expmax
@@ -273,6 +284,8 @@ test floatBits {
     try floatBits(f80);
     try floatBits(f128);
     try floatBits(c_longdouble);
+    try floatBits(comptime_float);
+    try comptime floatBits(comptime_float);
 }
 
 test inf {
@@ -281,11 +294,11 @@ test inf {
     const inf_u64: u64 = 0x7FF0000000000000;
     const inf_u80: u80 = 0x7FFF8000000000000000;
     const inf_u128: u128 = 0x7FFF0000000000000000000000000000;
-    try expectEqual(inf_u16, @as(u16, @bitCast(inf(f16))));
-    try expectEqual(inf_u32, @as(u32, @bitCast(inf(f32))));
-    try expectEqual(inf_u64, @as(u64, @bitCast(inf(f64))));
-    try expectEqual(inf_u80, @as(u80, @bitCast(inf(f80))));
-    try expectEqual(inf_u128, @as(u128, @bitCast(inf(f128))));
+    try expect(inf_u16 == @as(u16, @bitCast(inf(f16))));
+    try expect(inf_u32 == @as(u32, @bitCast(inf(f32))));
+    try expect(inf_u64 == @as(u64, @bitCast(inf(f64))));
+    try expect(inf_u80 == @as(u80, @bitCast(inf(f80))));
+    try expect(inf_u128 == @as(u128, @bitCast(inf(f128))));
 }
 
 test nan {
@@ -294,11 +307,11 @@ test nan {
     const qnan_u64: u64 = 0x7FF8000000000000;
     const qnan_u80: u80 = 0x7FFFC000000000000000;
     const qnan_u128: u128 = 0x7FFF8000000000000000000000000000;
-    try expectEqual(qnan_u16, @as(u16, @bitCast(nan(f16))));
-    try expectEqual(qnan_u32, @as(u32, @bitCast(nan(f32))));
-    try expectEqual(qnan_u64, @as(u64, @bitCast(nan(f64))));
-    try expectEqual(qnan_u80, @as(u80, @bitCast(nan(f80))));
-    try expectEqual(qnan_u128, @as(u128, @bitCast(nan(f128))));
+    try expect(qnan_u16 == @as(u16, @bitCast(nan(f16))));
+    try expect(qnan_u32 == @as(u32, @bitCast(nan(f32))));
+    try expect(qnan_u64 == @as(u64, @bitCast(nan(f64))));
+    try expect(qnan_u80 == @as(u80, @bitCast(nan(f80))));
+    try expect(qnan_u128 == @as(u128, @bitCast(nan(f128))));
 }
 
 test snan {
@@ -307,9 +320,9 @@ test snan {
     const snan_u64: u64 = 0x7FF4000000000000;
     const snan_u80: u80 = 0x7FFFA000000000000000;
     const snan_u128: u128 = 0x7FFF4000000000000000000000000000;
-    try expectEqual(snan_u16, @as(u16, @bitCast(snan(f16))));
-    try expectEqual(snan_u32, @as(u32, @bitCast(snan(f32))));
-    try expectEqual(snan_u64, @as(u64, @bitCast(snan(f64))));
-    try expectEqual(snan_u80, @as(u80, @bitCast(snan(f80))));
-    try expectEqual(snan_u128, @as(u128, @bitCast(snan(f128))));
+    try expect(snan_u16 == @as(u16, @bitCast(snan(f16))));
+    try expect(snan_u32 == @as(u32, @bitCast(snan(f32))));
+    try expect(snan_u64 == @as(u64, @bitCast(snan(f64))));
+    try expect(snan_u80 == @as(u80, @bitCast(snan(f80))));
+    try expect(snan_u128 == @as(u128, @bitCast(snan(f128))));
 }