Commit `6b41beb370`

Jacob Young <jacobly0@users.noreply.github.com>

2025-06-14 20:32:11

big.int: implement float conversions

These conversion routines accept a `round` argument to control how the result is rounded and return whether the result is exact. Most callers wanted this functionality and had hacks around it being missing. Also delete `std.math.big.rational` because it was only being used for float conversion, and using rationals for that is a lot more complex than necessary. It also required an allocator, whereas the new integer routines only need to be passed enough memory to store the result.

master

1 parent 13392ad

Changed files (11)

lib

compiler

aro

Value.zig

std

math

big

zon

src

Sema

LowerZon.zig

Sema.zig

Value.zig

@@ -148,35 +148,25 @@ pub fn floatToInt(v: *Value, dest_ty: Type, comp: *Compilation) !FloatToIntChang
         return .out_of_range;
     }
 
-    const had_fraction = @rem(float_val, 1) != 0;
-    const is_negative = std.math.signbit(float_val);
-    const floored = @floor(@abs(float_val));
-
-    var rational = try std.math.big.Rational.init(comp.gpa);
-    defer rational.deinit();
-    rational.setFloat(f128, floored) catch |err| switch (err) {
-        error.NonFiniteFloat => {
-            v.* = .{};
-            return .overflow;
-        },
-        error.OutOfMemory => return error.OutOfMemory,
-    };
-
-    // The float is reduced in rational.setFloat, so we assert that denominator is equal to one
-    const big_one = BigIntConst{ .limbs = &.{1}, .positive = true };
-    assert(rational.q.toConst().eqlAbs(big_one));
-
-    if (is_negative) {
-        rational.negate();
-    }
-
     const signedness = dest_ty.signedness(comp);
     const bits: usize = @intCast(dest_ty.bitSizeof(comp).?);
 
-    // rational.p.truncate(rational.p.toConst(), signedness: Signedness, bit_count: usize)
-    const fits = rational.p.fitsInTwosComp(signedness, bits);
-    v.* = try intern(comp, .{ .int = .{ .big_int = rational.p.toConst() } });
-    try rational.p.truncate(&rational.p, signedness, bits);
+    var big_int: std.math.big.int.Mutable = .{
+        .limbs = try comp.gpa.alloc(std.math.big.Limb, @max(
+            std.math.big.int.calcLimbLen(float_val),
+            std.math.big.int.calcTwosCompLimbCount(bits),
+        )),
+        .len = undefined,
+        .positive = undefined,
+    };
+    const had_fraction = switch (big_int.setFloat(float_val, .trunc)) {
+        .inexact => true,
+        .exact => false,
+    };
+
+    const fits = big_int.toConst().fitsInTwosComp(signedness, bits);
+    v.* = try intern(comp, .{ .int = .{ .big_int = big_int.toConst() } });
+    big_int.truncate(big_int.toConst(), signedness, bits);
 
     if (!was_zero and v.isZero(comp)) return .nonzero_to_zero;
     if (!fits) return .out_of_range;

@@ -18,17 +18,28 @@ const Signedness = std.builtin.Signedness;
 const native_endian = builtin.cpu.arch.endian();
 
 /// Returns the number of limbs needed to store `scalar`, which must be a
-/// primitive integer value.
+/// primitive integer or float value.
 /// Note: A comptime-known upper bound of this value that may be used
 /// instead if `scalar` is not already comptime-known is
 /// `calcTwosCompLimbCount(@typeInfo(@TypeOf(scalar)).int.bits)`
 pub fn calcLimbLen(scalar: anytype) usize {
-    if (scalar == 0) {
-        return 1;
+    switch (@typeInfo(@TypeOf(scalar))) {
+        .int, .comptime_int => {
+            if (scalar == 0) return 1;
+            const w_value = @abs(scalar);
+            return @as(usize, @intCast(@divFloor(@as(Limb, @intCast(math.log2(w_value))), limb_bits) + 1));
+        },
+        .float => {
+            const repr: std.math.FloatRepr(@TypeOf(scalar)) = @bitCast(scalar);
+            return switch (repr.exponent) {
+                .denormal => 1,
+                else => return calcNonZeroTwosCompLimbCount(@as(usize, 2) + @max(repr.exponent.unbias(), 0)),
+                .infinite => 0,
+            };
+        },
+        .comptime_float => return calcLimbLen(@as(f128, scalar)),
+        else => @compileError("expected float or int, got " ++ @typeName(@TypeOf(scalar))),
     }
-
-    const w_value = @abs(scalar);
-    return @as(usize, @intCast(@divFloor(@as(Limb, @intCast(math.log2(w_value))), limb_bits) + 1));
 }
 
 pub fn calcToStringLimbsBufferLen(a_len: usize, base: u8) usize {
@@ -134,6 +145,22 @@ pub const TwosCompIntLimit = enum {
     max,
 };
 
+pub const Round = enum {
+    /// Round to the nearest representable value, with ties broken by the representation
+    /// that ends with a 0 bit.
+    nearest_even,
+    /// Round away from zero.
+    away,
+    /// Round towards zero.
+    trunc,
+    /// Round towards negative infinity.
+    floor,
+    /// Round towards positive infinity.
+    ceil,
+};
+
+pub const Exactness = enum { inexact, exact };
+
 /// A arbitrary-precision big integer, with a fixed set of mutable limbs.
 pub const Mutable = struct {
     /// Raw digits. These are:
@@ -155,6 +182,20 @@ pub const Mutable = struct {
         };
     }
 
+    pub const ConvertError = Const.ConvertError;
+
+    /// Convert `self` to `Int`.
+    ///
+    /// Returns an error if self cannot be narrowed into the requested type without truncation.
+    pub fn toInt(self: Mutable, comptime Int: type) ConvertError!Int {
+        return self.toConst().toInt(Int);
+    }
+
+    /// Convert `self` to `Float`.
+    pub fn toFloat(self: Mutable, comptime Float: type, round: Round) struct { Float, Exactness } {
+        return self.toConst().toFloat(Float, round);
+    }
+
     /// Returns true if `a == 0`.
     pub fn eqlZero(self: Mutable) bool {
         return self.toConst().eqlZero();
@@ -401,6 +442,65 @@ pub const Mutable = struct {
         }
     }
 
+    /// Sets the Mutable to a float value rounded according to `round`.
+    /// Returns whether the conversion was exact (`round` had no effect on the result).
+    pub fn setFloat(self: *Mutable, value: anytype, round: Round) Exactness {
+        const Float = @TypeOf(value);
+        if (Float == comptime_float) return self.setFloat(@as(f128, value), round);
+        const abs_value = @abs(value);
+        if (abs_value < 1.0) {
+            if (abs_value == 0.0) {
+                self.set(0);
+                return .exact;
+            }
+            self.set(@as(i2, round: switch (round) {
+                .nearest_even => if (abs_value <= 0.5) 0 else continue :round .away,
+                .away => if (value < 0.0) -1 else 1,
+                .trunc => 0,
+                .floor => -@as(i2, @intFromBool(value < 0.0)),
+                .ceil => @intFromBool(value > 0.0),
+            }));
+            return .inexact;
+        }
+        const Repr = std.math.FloatRepr(Float);
+        const repr: Repr = @bitCast(value);
+        const exponent = repr.exponent.unbias();
+        assert(exponent >= 0);
+        const int_bit: Repr.Mantissa = 1 << (@bitSizeOf(Repr.Mantissa) - 1);
+        const mantissa = int_bit | repr.mantissa;
+        if (exponent >= @bitSizeOf(Repr.Normalized.Fraction)) {
+            self.set(mantissa);
+            self.shiftLeft(self.toConst(), @intCast(exponent - @bitSizeOf(Repr.Normalized.Fraction)));
+            self.positive = repr.sign == .positive;
+            return .exact;
+        }
+        self.set(mantissa >> @intCast(@bitSizeOf(Repr.Normalized.Fraction) - exponent));
+        const round_bits: Repr.Normalized.Fraction = @truncate(mantissa << @intCast(exponent));
+        if (round_bits == 0) {
+            self.positive = repr.sign == .positive;
+            return .exact;
+        }
+        round: switch (round) {
+            .nearest_even => {
+                const half: Repr.Normalized.Fraction = 1 << (@bitSizeOf(Repr.Normalized.Fraction) - 1);
+                if (round_bits >= half) self.addScalar(self.toConst(), 1);
+                if (round_bits == half) self.limbs[0] &= ~@as(Limb, 1);
+            },
+            .away => self.addScalar(self.toConst(), 1),
+            .trunc => {},
+            .floor => switch (repr.sign) {
+                .positive => {},
+                .negative => continue :round .away,
+            },
+            .ceil => switch (repr.sign) {
+                .positive => continue :round .away,
+                .negative => {},
+            },
+        }
+        self.positive = repr.sign == .positive;
+        return .inexact;
+    }
+
     /// r = a + scalar
     ///
     /// r and a may be aliases.
@@ -2117,25 +2217,25 @@ pub const Const = struct {
     /// Deprecated; use `toInt`.
     pub const to = toInt;
 
-    /// Convert self to integer type T.
+    /// Convert `self` to `Int`.
     ///
     /// Returns an error if self cannot be narrowed into the requested type without truncation.
-    pub fn toInt(self: Const, comptime T: type) ConvertError!T {
-        switch (@typeInfo(T)) {
+    pub fn toInt(self: Const, comptime Int: type) ConvertError!Int {
+        switch (@typeInfo(Int)) {
             .int => |info| {
                 // Make sure -0 is handled correctly.
                 if (self.eqlZero()) return 0;
 
-                const UT = std.meta.Int(.unsigned, info.bits);
+                const Unsigned = std.meta.Int(.unsigned, info.bits);
 
                 if (!self.fitsInTwosComp(info.signedness, info.bits)) {
                     return error.TargetTooSmall;
                 }
 
-                var r: UT = 0;
+                var r: Unsigned = 0;
 
-                if (@sizeOf(UT) <= @sizeOf(Limb)) {
-                    r = @as(UT, @intCast(self.limbs[0]));
+                if (@sizeOf(Unsigned) <= @sizeOf(Limb)) {
+                    r = @intCast(self.limbs[0]);
                 } else {
                     for (self.limbs[0..self.limbs.len], 0..) |_, ri| {
                         const limb = self.limbs[self.limbs.len - ri - 1];
@@ -2145,40 +2245,76 @@ pub const Const = struct {
                 }
 
                 if (info.signedness == .unsigned) {
-                    return if (self.positive) @as(T, @intCast(r)) else error.NegativeIntoUnsigned;
+                    return if (self.positive) @intCast(r) else error.NegativeIntoUnsigned;
                 } else {
                     if (self.positive) {
                         return @intCast(r);
                     } else {
-                        if (math.cast(T, r)) |ok| {
+                        if (math.cast(Int, r)) |ok| {
                             return -ok;
                         } else {
-                            return minInt(T);
+                            return minInt(Int);
                         }
                     }
                 }
             },
-            else => @compileError("expected int type, found '" ++ @typeName(T) ++ "'"),
+            else => @compileError("expected int type, found '" ++ @typeName(Int) ++ "'"),
         }
     }
 
-    /// Convert self to float type T.
-    pub fn toFloat(self: Const, comptime T: type) T {
-        if (self.limbs.len == 0) return 0;
-
-        const base = std.math.maxInt(std.math.big.Limb) + 1;
-        var result: f128 = 0;
-        var i: usize = self.limbs.len;
-        while (i != 0) {
-            i -= 1;
-            const limb: f128 = @floatFromInt(self.limbs[i]);
-            result = @mulAdd(f128, base, result, limb);
-        }
-        if (self.positive) {
-            return @floatCast(result);
-        } else {
-            return @floatCast(-result);
-        }
+    /// Convert self to `Float`.
+    pub fn toFloat(self: Const, comptime Float: type, round: Round) struct { Float, Exactness } {
+        if (Float == comptime_float) return self.toFloat(f128, round);
+        const normalized_abs: Const = .{
+            .limbs = self.limbs[0..llnormalize(self.limbs)],
+            .positive = true,
+        };
+        if (normalized_abs.eqlZero()) return .{ if (self.positive) 0.0 else -0.0, .exact };
+
+        const Repr = std.math.FloatRepr(Float);
+        var mantissa_limbs: [calcNonZeroTwosCompLimbCount(1 + @bitSizeOf(Repr.Mantissa))]Limb = undefined;
+        var mantissa: Mutable = .{
+            .limbs = &mantissa_limbs,
+            .positive = undefined,
+            .len = undefined,
+        };
+        var exponent = normalized_abs.bitCountAbs() - 1;
+        const exactness: Exactness = exactness: {
+            if (exponent <= @bitSizeOf(Repr.Normalized.Fraction)) {
+                mantissa.shiftLeft(normalized_abs, @intCast(@bitSizeOf(Repr.Normalized.Fraction) - exponent));
+                break :exactness .exact;
+            }
+            const shift: usize = @intCast(exponent - @bitSizeOf(Repr.Normalized.Fraction));
+            mantissa.shiftRight(normalized_abs, shift);
+            const final_limb_index = (shift - 1) / limb_bits;
+            const round_bits = normalized_abs.limbs[final_limb_index] << @truncate(-%shift) |
+                @intFromBool(!std.mem.allEqual(Limb, normalized_abs.limbs[0..final_limb_index], 0));
+            if (round_bits == 0) break :exactness .exact;
+            round: switch (round) {
+                .nearest_even => {
+                    const half: Limb = 1 << (limb_bits - 1);
+                    if (round_bits >= half) mantissa.addScalar(mantissa.toConst(), 1);
+                    if (round_bits == half) mantissa.limbs[0] &= ~@as(Limb, 1);
+                },
+                .away => mantissa.addScalar(mantissa.toConst(), 1),
+                .trunc => {},
+                .floor => if (!self.positive) continue :round .away,
+                .ceil => if (self.positive) continue :round .away,
+            }
+            break :exactness .inexact;
+        };
+        const normalized_res: Repr.Normalized = .{
+            .fraction = @truncate(mantissa.toInt(Repr.Mantissa) catch |err| switch (err) {
+                error.NegativeIntoUnsigned => unreachable,
+                error.TargetTooSmall => fraction: {
+                    assert(mantissa.toConst().orderAgainstScalar(1 << @bitSizeOf(Repr.Mantissa)).compare(.eq));
+                    exponent += 1;
+                    break :fraction 1 << (@bitSizeOf(Repr.Mantissa) - 1);
+                },
+            }),
+            .exponent = std.math.lossyCast(Repr.Normalized.Exponent, exponent),
+        };
+        return .{ normalized_res.reconstruct(if (self.positive) .positive else .negative), exactness };
     }
 
     /// To allow `std.fmt.format` to work with this type.
@@ -2739,16 +2875,16 @@ pub const Managed = struct {
     /// Deprecated; use `toInt`.
     pub const to = toInt;
 
-    /// Convert self to integer type T.
+    /// Convert `self` to `Int`.
     ///
     /// Returns an error if self cannot be narrowed into the requested type without truncation.
-    pub fn toInt(self: Managed, comptime T: type) ConvertError!T {
-        return self.toConst().toInt(T);
+    pub fn toInt(self: Managed, comptime Int: type) ConvertError!Int {
+        return self.toConst().toInt(Int);
     }
 
-    /// Convert self to float type T.
-    pub fn toFloat(self: Managed, comptime T: type) T {
-        return self.toConst().toFloat(T);
+    /// Convert `self` to `Float`.
+    pub fn toFloat(self: Managed, comptime Float: type, round: Round) struct { Float, Exactness } {
+        return self.toConst().toFloat(Float, round);
     }
 
     /// Set self from the string representation `value`.
@@ -3807,7 +3943,7 @@ fn llshr(r: []Limb, a: []const Limb, shift: usize) usize {
 
     // if the most significant limb becomes 0 after the shift
     const shrink = a[a.len - 1] >> bit_shift == 0;
-    std.debug.assert(r.len >= a.len - @intFromBool(!shrink));
+    std.debug.assert(r.len >= a.len - @intFromBool(shrink));
 
     var i: usize = 0;
     while (i < a.len - 1) : (i += 1) {
@@ -4240,7 +4376,7 @@ test {
 
 const testing_allocator = std.testing.allocator;
 test "llshl shift by whole number of limb" {
-    const padding = std.math.maxInt(Limb);
+    const padding = maxInt(Limb);
 
     var r: [10]Limb = @splat(padding);
 
@@ -4390,8 +4526,8 @@ test "llshr to 0" {
     try testOneShiftCase(.llshr, .{1,   &.{0}, &.{1}});
     try testOneShiftCase(.llshr, .{5,   &.{0}, &.{1}});
     try testOneShiftCase(.llshr, .{65,  &.{0}, &.{0, 1}});
-    try testOneShiftCase(.llshr, .{193, &.{0}, &.{0, 0, std.math.maxInt(Limb)}});
-    try testOneShiftCase(.llshr, .{193, &.{0}, &.{std.math.maxInt(Limb), 1, std.math.maxInt(Limb)}});
+    try testOneShiftCase(.llshr, .{193, &.{0}, &.{0, 0, maxInt(Limb)}});
+    try testOneShiftCase(.llshr, .{193, &.{0}, &.{maxInt(Limb), 1, maxInt(Limb)}});
     try testOneShiftCase(.llshr, .{193, &.{0}, &.{0xdeadbeef, 0xabcdefab, 0x1234}});
     // zig fmt: on
 }
@@ -4475,7 +4611,7 @@ fn testOneShiftCase(comptime function: enum { llshr, llshl }, case: Case) !void
 }
 
 fn testOneShiftCaseNoAliasing(func: fn ([]Limb, []const Limb, usize) usize, case: Case) !void {
-    const padding = std.math.maxInt(Limb);
+    const padding = maxInt(Limb);
     var r: [20]Limb = @splat(padding);
 
     const shift = case[0];
@@ -4492,7 +4628,7 @@ fn testOneShiftCaseNoAliasing(func: fn ([]Limb, []const Limb, usize) usize, case
 }
 
 fn testOneShiftCaseAliasing(func: fn ([]Limb, []const Limb, usize) usize, case: Case, shift_direction: isize) !void {
-    const padding = std.math.maxInt(Limb);
+    const padding = maxInt(Limb);
     var r: [60]Limb = @splat(padding);
     const base = 20;

@@ -17,6 +17,12 @@ const minInt = std.math.minInt;
 // They will still run on larger than this and should pass, but the multi-limb code-paths
 // may be untested in some cases.
 
+fn expectNormalized(expected: comptime_int, actual: std.math.big.int.Const) !void {
+    try testing.expectEqual(expected >= 0, actual.positive);
+    try testing.expectEqual(std.math.big.int.calcLimbLen(expected), actual.limbs.len);
+    try testing.expect(actual.orderAgainstScalar(expected).compare(.eq));
+}
+
 test "comptime_int set" {
     comptime var s = 0xefffffff00000001eeeeeeefaaaaaaab;
     var a = try Managed.initSet(testing.allocator, s);
@@ -85,6 +91,407 @@ test "to target too small error" {
     try testing.expectError(error.TargetTooSmall, a.toInt(u8));
 }
 
+fn setFloat(comptime Float: type) !void {
+    var res_limbs: [std.math.big.int.calcNonZeroTwosCompLimbCount(11)]Limb = undefined;
+    var res: Mutable = .{
+        .limbs = &res_limbs,
+        .len = undefined,
+        .positive = undefined,
+    };
+
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -0x1p10), .nearest_even));
+    try expectNormalized(-1 << 10, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -0x1p10), .away));
+    try expectNormalized(-1 << 10, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -0x1p10), .trunc));
+    try expectNormalized(-1 << 10, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -0x1p10), .floor));
+    try expectNormalized(-1 << 10, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -0x1p10), .ceil));
+    try expectNormalized(-1 << 10, res.toConst());
+
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -2.0), .nearest_even));
+    try expectNormalized(-2, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -2.0), .away));
+    try expectNormalized(-2, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -2.0), .trunc));
+    try expectNormalized(-2, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -2.0), .floor));
+    try expectNormalized(-2, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -2.0), .ceil));
+    try expectNormalized(-2, res.toConst());
+
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -1.5), .nearest_even));
+    try expectNormalized(-2, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -1.5), .away));
+    try expectNormalized(-2, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -1.5), .trunc));
+    try expectNormalized(-1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -1.5), .floor));
+    try expectNormalized(-2, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -1.5), .ceil));
+    try expectNormalized(-1, res.toConst());
+
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -1.0), .nearest_even));
+    try expectNormalized(-1, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -1.0), .away));
+    try expectNormalized(-1, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -1.0), .trunc));
+    try expectNormalized(-1, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -1.0), .floor));
+    try expectNormalized(-1, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -1.0), .ceil));
+    try expectNormalized(-1, res.toConst());
+
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.75), .nearest_even));
+    try expectNormalized(-1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.75), .away));
+    try expectNormalized(-1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.75), .trunc));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.75), .floor));
+    try expectNormalized(-1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.75), .ceil));
+    try expectNormalized(0, res.toConst());
+
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.5), .nearest_even));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.5), .away));
+    try expectNormalized(-1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.5), .trunc));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.5), .floor));
+    try expectNormalized(-1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.5), .ceil));
+    try expectNormalized(0, res.toConst());
+
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.25), .nearest_even));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.25), .away));
+    try expectNormalized(-1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.25), .trunc));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.25), .floor));
+    try expectNormalized(-1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, -0.25), .ceil));
+    try expectNormalized(0, res.toConst());
+
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -0.0), .nearest_even));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -0.0), .away));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -0.0), .trunc));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -0.0), .floor));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, -0.0), .ceil));
+    try expectNormalized(0, res.toConst());
+
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 0.0), .nearest_even));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 0.0), .away));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 0.0), .trunc));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 0.0), .floor));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 0.0), .ceil));
+    try expectNormalized(0, res.toConst());
+
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.25), .nearest_even));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.25), .away));
+    try expectNormalized(1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.25), .trunc));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.25), .floor));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.25), .ceil));
+    try expectNormalized(1, res.toConst());
+
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.5), .nearest_even));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.5), .away));
+    try expectNormalized(1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.5), .trunc));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.5), .floor));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.5), .ceil));
+    try expectNormalized(1, res.toConst());
+
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.75), .nearest_even));
+    try expectNormalized(1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.75), .away));
+    try expectNormalized(1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.75), .trunc));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.75), .floor));
+    try expectNormalized(0, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 0.75), .ceil));
+    try expectNormalized(1, res.toConst());
+
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 1.0), .nearest_even));
+    try expectNormalized(1, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 1.0), .away));
+    try expectNormalized(1, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 1.0), .trunc));
+    try expectNormalized(1, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 1.0), .floor));
+    try expectNormalized(1, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 1.0), .ceil));
+    try expectNormalized(1, res.toConst());
+
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 1.5), .nearest_even));
+    try expectNormalized(2, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 1.5), .away));
+    try expectNormalized(2, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 1.5), .trunc));
+    try expectNormalized(1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 1.5), .floor));
+    try expectNormalized(1, res.toConst());
+    try testing.expectEqual(.inexact, res.setFloat(@as(Float, 1.5), .ceil));
+    try expectNormalized(2, res.toConst());
+
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 2.0), .nearest_even));
+    try expectNormalized(2, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 2.0), .away));
+    try expectNormalized(2, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 2.0), .trunc));
+    try expectNormalized(2, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 2.0), .floor));
+    try expectNormalized(2, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 2.0), .ceil));
+    try expectNormalized(2, res.toConst());
+
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 0x1p10), .nearest_even));
+    try expectNormalized(1 << 10, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 0x1p10), .away));
+    try expectNormalized(1 << 10, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 0x1p10), .trunc));
+    try expectNormalized(1 << 10, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 0x1p10), .floor));
+    try expectNormalized(1 << 10, res.toConst());
+    try testing.expectEqual(.exact, res.setFloat(@as(Float, 0x1p10), .ceil));
+    try expectNormalized(1 << 10, res.toConst());
+}
+test setFloat {
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
+
+    try setFloat(f16);
+    try setFloat(f32);
+    try setFloat(f64);
+    try setFloat(f80);
+    try setFloat(f128);
+    try setFloat(c_longdouble);
+    try setFloat(comptime_float);
+}
+
+fn toFloat(comptime Float: type) !void {
+    const Result = struct { Float, std.math.big.int.Exactness };
+    const fractional_bits = std.math.floatFractionalBits(Float);
+
+    var int_limbs: [
+        std.math.big.int.calcNonZeroTwosCompLimbCount(2 + fractional_bits)
+    ]Limb = undefined;
+    var int: Mutable = .{
+        .limbs = &int_limbs,
+        .len = undefined,
+        .positive = undefined,
+    };
+
+    int.set(-(1 << (fractional_bits + 1)) - 1);
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1), .inexact },
+        int.toFloat(Float, .nearest_even),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.nextAfter(
+            Float,
+            -std.math.ldexp(@as(Float, 1), fractional_bits + 1),
+            -std.math.inf(Float),
+        ), .inexact },
+        int.toFloat(Float, .away),
+    );
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1), .inexact },
+        int.toFloat(Float, .trunc),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.nextAfter(
+            Float,
+            -std.math.ldexp(@as(Float, 1), fractional_bits + 1),
+            -std.math.inf(Float),
+        ), .inexact },
+        int.toFloat(Float, .floor),
+    );
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1), .inexact },
+        int.toFloat(Float, .ceil),
+    );
+
+    int.set(-1 << (fractional_bits + 1));
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1), .exact },
+        int.toFloat(Float, .nearest_even),
+    );
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1), .exact },
+        int.toFloat(Float, .away),
+    );
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1), .exact },
+        int.toFloat(Float, .trunc),
+    );
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1), .exact },
+        int.toFloat(Float, .floor),
+    );
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1), .exact },
+        int.toFloat(Float, .ceil),
+    );
+
+    int.set(-(1 << (fractional_bits + 1)) + 1);
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1) + 1.0, .exact },
+        int.toFloat(Float, .nearest_even),
+    );
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1) + 1.0, .exact },
+        int.toFloat(Float, .away),
+    );
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1) + 1.0, .exact },
+        int.toFloat(Float, .trunc),
+    );
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1) + 1.0, .exact },
+        int.toFloat(Float, .floor),
+    );
+    try testing.expectEqual(
+        Result{ comptime -std.math.ldexp(@as(Float, 1), fractional_bits + 1) + 1.0, .exact },
+        int.toFloat(Float, .ceil),
+    );
+
+    int.set(-1 << 10);
+    try testing.expectEqual(Result{ -0x1p10, .exact }, int.toFloat(Float, .nearest_even));
+    try testing.expectEqual(Result{ -0x1p10, .exact }, int.toFloat(Float, .away));
+    try testing.expectEqual(Result{ -0x1p10, .exact }, int.toFloat(Float, .trunc));
+    try testing.expectEqual(Result{ -0x1p10, .exact }, int.toFloat(Float, .floor));
+    try testing.expectEqual(Result{ -0x1p10, .exact }, int.toFloat(Float, .ceil));
+
+    int.set(-1);
+    try testing.expectEqual(Result{ -1.0, .exact }, int.toFloat(Float, .nearest_even));
+    try testing.expectEqual(Result{ -1.0, .exact }, int.toFloat(Float, .away));
+    try testing.expectEqual(Result{ -1.0, .exact }, int.toFloat(Float, .trunc));
+    try testing.expectEqual(Result{ -1.0, .exact }, int.toFloat(Float, .floor));
+    try testing.expectEqual(Result{ -1.0, .exact }, int.toFloat(Float, .ceil));
+
+    int.set(0);
+    try testing.expectEqual(Result{ 0.0, .exact }, int.toFloat(Float, .nearest_even));
+    try testing.expectEqual(Result{ 0.0, .exact }, int.toFloat(Float, .away));
+    try testing.expectEqual(Result{ 0.0, .exact }, int.toFloat(Float, .trunc));
+    try testing.expectEqual(Result{ 0.0, .exact }, int.toFloat(Float, .floor));
+    try testing.expectEqual(Result{ 0.0, .exact }, int.toFloat(Float, .ceil));
+
+    int.set(1);
+    try testing.expectEqual(Result{ 1.0, .exact }, int.toFloat(Float, .nearest_even));
+    try testing.expectEqual(Result{ 1.0, .exact }, int.toFloat(Float, .away));
+    try testing.expectEqual(Result{ 1.0, .exact }, int.toFloat(Float, .trunc));
+    try testing.expectEqual(Result{ 1.0, .exact }, int.toFloat(Float, .floor));
+    try testing.expectEqual(Result{ 1.0, .exact }, int.toFloat(Float, .ceil));
+
+    int.set(1 << 10);
+    try testing.expectEqual(Result{ 0x1p10, .exact }, int.toFloat(Float, .nearest_even));
+    try testing.expectEqual(Result{ 0x1p10, .exact }, int.toFloat(Float, .away));
+    try testing.expectEqual(Result{ 0x1p10, .exact }, int.toFloat(Float, .trunc));
+    try testing.expectEqual(Result{ 0x1p10, .exact }, int.toFloat(Float, .floor));
+    try testing.expectEqual(Result{ 0x1p10, .exact }, int.toFloat(Float, .ceil));
+
+    int.set((1 << (fractional_bits + 1)) - 1);
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1) - 1.0, .exact },
+        int.toFloat(Float, .nearest_even),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1) - 1.0, .exact },
+        int.toFloat(Float, .away),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1) - 1.0, .exact },
+        int.toFloat(Float, .trunc),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1) - 1.0, .exact },
+        int.toFloat(Float, .floor),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1) - 1.0, .exact },
+        int.toFloat(Float, .ceil),
+    );
+
+    int.set(1 << (fractional_bits + 1));
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1), .exact },
+        int.toFloat(Float, .nearest_even),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1), .exact },
+        int.toFloat(Float, .away),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1), .exact },
+        int.toFloat(Float, .trunc),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1), .exact },
+        int.toFloat(Float, .floor),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1), .exact },
+        int.toFloat(Float, .ceil),
+    );
+
+    int.set((1 << (fractional_bits + 1)) + 1);
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1), .inexact },
+        int.toFloat(Float, .nearest_even),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.nextAfter(
+            Float,
+            std.math.ldexp(@as(Float, 1), fractional_bits + 1),
+            std.math.inf(Float),
+        ), .inexact },
+        int.toFloat(Float, .away),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1), .inexact },
+        int.toFloat(Float, .trunc),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.ldexp(@as(Float, 1), fractional_bits + 1), .inexact },
+        int.toFloat(Float, .floor),
+    );
+    try testing.expectEqual(
+        Result{ comptime std.math.nextAfter(
+            Float,
+            std.math.ldexp(@as(Float, 1), fractional_bits + 1),
+            std.math.inf(Float),
+        ), .inexact },
+        int.toFloat(Float, .ceil),
+    );
+}
+test toFloat {
+    try toFloat(f16);
+    try toFloat(f32);
+    try toFloat(f64);
+    try toFloat(f80);
+    try toFloat(f128);
+    try toFloat(c_longdouble);
+}
+
 test "normalize" {
     var a = try Managed.init(testing.allocator);
     defer a.deinit();

@@ -1,820 +0,0 @@
-const std = @import("../../std.zig");
-const builtin = @import("builtin");
-const debug = std.debug;
-const math = std.math;
-const mem = std.mem;
-const testing = std.testing;
-const Allocator = mem.Allocator;
-
-const Limb = std.math.big.Limb;
-const DoubleLimb = std.math.big.DoubleLimb;
-const Int = std.math.big.int.Managed;
-const IntConst = std.math.big.int.Const;
-
-/// An arbitrary-precision rational number.
-///
-/// Memory is allocated as needed for operations to ensure full precision is kept. The precision
-/// of a Rational is only bounded by memory.
-///
-/// Rational's are always normalized. That is, for a Rational r = p/q where p and q are integers,
-/// gcd(p, q) = 1 always.
-///
-/// TODO rework this to store its own allocator and use a non-managed big int, to avoid double
-/// allocator storage.
-pub const Rational = struct {
-    /// Numerator. Determines the sign of the Rational.
-    p: Int,
-
-    /// Denominator. Sign is ignored.
-    q: Int,
-
-    /// Create a new Rational. A small amount of memory will be allocated on initialization.
-    /// This will be 2 * Int.default_capacity.
-    pub fn init(a: Allocator) !Rational {
-        var p = try Int.init(a);
-        errdefer p.deinit();
-        return Rational{
-            .p = p,
-            .q = try Int.initSet(a, 1),
-        };
-    }
-
-    /// Frees all memory associated with a Rational.
-    pub fn deinit(self: *Rational) void {
-        self.p.deinit();
-        self.q.deinit();
-    }
-
-    /// Set a Rational from a primitive integer type.
-    pub fn setInt(self: *Rational, a: anytype) !void {
-        try self.p.set(a);
-        try self.q.set(1);
-    }
-
-    /// Set a Rational from a string of the form `A/B` where A and B are base-10 integers.
-    pub fn setFloatString(self: *Rational, str: []const u8) !void {
-        // TODO: Accept a/b fractions and exponent form
-        if (str.len == 0) {
-            return error.InvalidFloatString;
-        }
-
-        const State = enum {
-            Integer,
-            Fractional,
-        };
-
-        var state = State.Integer;
-        var point: ?usize = null;
-
-        var start: usize = 0;
-        if (str[0] == '-') {
-            start += 1;
-        }
-
-        for (str, 0..) |c, i| {
-            switch (state) {
-                State.Integer => {
-                    switch (c) {
-                        '.' => {
-                            state = State.Fractional;
-                            point = i;
-                        },
-                        '0'...'9' => {
-                            // okay
-                        },
-                        else => {
-                            return error.InvalidFloatString;
-                        },
-                    }
-                },
-                State.Fractional => {
-                    switch (c) {
-                        '0'...'9' => {
-                            // okay
-                        },
-                        else => {
-                            return error.InvalidFloatString;
-                        },
-                    }
-                },
-            }
-        }
-
-        // TODO: batch the multiplies by 10
-        if (point) |i| {
-            try self.p.setString(10, str[0..i]);
-
-            const base = IntConst{ .limbs = &[_]Limb{10}, .positive = true };
-            var local_buf: [@sizeOf(Limb) * Int.default_capacity]u8 align(@alignOf(Limb)) = undefined;
-            var fba = std.heap.FixedBufferAllocator.init(&local_buf);
-            const base_managed = try base.toManaged(fba.allocator());
-
-            var j: usize = start;
-            while (j < str.len - i - 1) : (j += 1) {
-                try self.p.ensureMulCapacity(self.p.toConst(), base);
-                try self.p.mul(&self.p, &base_managed);
-            }
-
-            try self.q.setString(10, str[i + 1 ..]);
-            try self.p.add(&self.p, &self.q);
-
-            try self.q.set(1);
-            var k: usize = i + 1;
-            while (k < str.len) : (k += 1) {
-                try self.q.mul(&self.q, &base_managed);
-            }
-
-            try self.reduce();
-        } else {
-            try self.p.setString(10, str[0..]);
-            try self.q.set(1);
-        }
-    }
-
-    /// Set a Rational from a floating-point value. The rational will have enough precision to
-    /// completely represent the provided float.
-    pub fn setFloat(self: *Rational, comptime T: type, f: T) !void {
-        // Translated from golang.go/src/math/big/rat.go.
-        debug.assert(@typeInfo(T) == .float);
-
-        const UnsignedInt = std.meta.Int(.unsigned, @typeInfo(T).float.bits);
-        const f_bits = @as(UnsignedInt, @bitCast(f));
-
-        const exponent_bits = math.floatExponentBits(T);
-        const exponent_bias = (1 << (exponent_bits - 1)) - 1;
-        const mantissa_bits = math.floatMantissaBits(T);
-
-        const exponent_mask = (1 << exponent_bits) - 1;
-        const mantissa_mask = (1 << mantissa_bits) - 1;
-
-        var exponent = @as(i16, @intCast((f_bits >> mantissa_bits) & exponent_mask));
-        var mantissa = f_bits & mantissa_mask;
-
-        switch (exponent) {
-            exponent_mask => {
-                return error.NonFiniteFloat;
-            },
-            0 => {
-                // denormal
-                exponent -= exponent_bias - 1;
-            },
-            else => {
-                // normal
-                mantissa |= 1 << mantissa_bits;
-                exponent -= exponent_bias;
-            },
-        }
-
-        var shift: i16 = mantissa_bits - exponent;
-
-        // factor out powers of two early from rational
-        while (mantissa & 1 == 0 and shift > 0) {
-            mantissa >>= 1;
-            shift -= 1;
-        }
-
-        try self.p.set(mantissa);
-        self.p.setSign(f >= 0);
-
-        try self.q.set(1);
-        if (shift >= 0) {
-            try self.q.shiftLeft(&self.q, @as(usize, @intCast(shift)));
-        } else {
-            try self.p.shiftLeft(&self.p, @as(usize, @intCast(-shift)));
-        }
-
-        try self.reduce();
-    }
-
-    /// Return a floating-point value that is the closest value to a Rational.
-    ///
-    /// The result may not be exact if the Rational is too precise or too large for the
-    /// target type.
-    pub fn toFloat(self: Rational, comptime T: type) !T {
-        // Translated from golang.go/src/math/big/rat.go.
-        // TODO: Indicate whether the result is not exact.
-        debug.assert(@typeInfo(T) == .float);
-
-        const fsize = @typeInfo(T).float.bits;
-        const BitReprType = std.meta.Int(.unsigned, fsize);
-
-        const msize = math.floatMantissaBits(T);
-        const msize1 = msize + 1;
-        const msize2 = msize1 + 1;
-
-        const esize = math.floatExponentBits(T);
-        const ebias = (1 << (esize - 1)) - 1;
-        const emin = 1 - ebias;
-
-        if (self.p.eqlZero()) {
-            return 0;
-        }
-
-        // 1. left-shift a or sub so that a/b is in [1 << msize1, 1 << (msize2 + 1)]
-        var exp = @as(isize, @intCast(self.p.bitCountTwosComp())) - @as(isize, @intCast(self.q.bitCountTwosComp()));
-
-        var a2 = try self.p.clone();
-        defer a2.deinit();
-
-        var b2 = try self.q.clone();
-        defer b2.deinit();
-
-        const shift = msize2 - exp;
-        if (shift >= 0) {
-            try a2.shiftLeft(&a2, @as(usize, @intCast(shift)));
-        } else {
-            try b2.shiftLeft(&b2, @as(usize, @intCast(-shift)));
-        }
-
-        // 2. compute quotient and remainder
-        var q = try Int.init(self.p.allocator);
-        defer q.deinit();
-
-        // unused
-        var r = try Int.init(self.p.allocator);
-        defer r.deinit();
-
-        try Int.divTrunc(&q, &r, &a2, &b2);
-
-        var mantissa = extractLowBits(q, BitReprType);
-        var have_rem = r.len() > 0;
-
-        // 3. q didn't fit in msize2 bits, redo division b2 << 1
-        if (mantissa >> msize2 == 1) {
-            if (mantissa & 1 == 1) {
-                have_rem = true;
-            }
-            mantissa >>= 1;
-            exp += 1;
-        }
-        if (mantissa >> msize1 != 1) {
-            // NOTE: This can be hit if the limb size is small (u8/16).
-            @panic("unexpected bits in result");
-        }
-
-        // 4. Rounding
-        if (emin - msize <= exp and exp <= emin) {
-            // denormal
-            const shift1 = @as(math.Log2Int(BitReprType), @intCast(emin - (exp - 1)));
-            const lost_bits = mantissa & ((@as(BitReprType, @intCast(1)) << shift1) - 1);
-            have_rem = have_rem or lost_bits != 0;
-            mantissa >>= shift1;
-            exp = 2 - ebias;
-        }
-
-        // round q using round-half-to-even
-        var exact = !have_rem;
-        if (mantissa & 1 != 0) {
-            exact = false;
-            if (have_rem or (mantissa & 2 != 0)) {
-                mantissa += 1;
-                if (mantissa >= 1 << msize2) {
-                    // 11...1 => 100...0
-                    mantissa >>= 1;
-                    exp += 1;
-                }
-            }
-        }
-        mantissa >>= 1;
-
-        const f = math.scalbn(@as(T, @floatFromInt(mantissa)), @as(i32, @intCast(exp - msize1)));
-        if (math.isInf(f)) {
-            exact = false;
-        }
-
-        return if (self.p.isPositive()) f else -f;
-    }
-
-    /// Set a rational from an integer ratio.
-    pub fn setRatio(self: *Rational, p: anytype, q: anytype) !void {
-        try self.p.set(p);
-        try self.q.set(q);
-
-        self.p.setSign(@intFromBool(self.p.isPositive()) ^ @intFromBool(self.q.isPositive()) == 0);
-        self.q.setSign(true);
-
-        try self.reduce();
-
-        if (self.q.eqlZero()) {
-            @panic("cannot set rational with denominator = 0");
-        }
-    }
-
-    /// Set a Rational directly from an Int.
-    pub fn copyInt(self: *Rational, a: Int) !void {
-        try self.p.copy(a.toConst());
-        try self.q.set(1);
-    }
-
-    /// Set a Rational directly from a ratio of two Int's.
-    pub fn copyRatio(self: *Rational, a: Int, b: Int) !void {
-        try self.p.copy(a.toConst());
-        try self.q.copy(b.toConst());
-
-        self.p.setSign(@intFromBool(self.p.isPositive()) ^ @intFromBool(self.q.isPositive()) == 0);
-        self.q.setSign(true);
-
-        try self.reduce();
-    }
-
-    /// Make a Rational positive.
-    pub fn abs(r: *Rational) void {
-        r.p.abs();
-    }
-
-    /// Negate the sign of a Rational.
-    pub fn negate(r: *Rational) void {
-        r.p.negate();
-    }
-
-    /// Efficiently swap a Rational with another. This swaps the limb pointers and a full copy is not
-    /// performed. The address of the limbs field will not be the same after this function.
-    pub fn swap(r: *Rational, other: *Rational) void {
-        r.p.swap(&other.p);
-        r.q.swap(&other.q);
-    }
-
-    /// Returns math.Order.lt, math.Order.eq, math.Order.gt if a < b, a == b or
-    /// a > b respectively.
-    pub fn order(a: Rational, b: Rational) !math.Order {
-        return cmpInternal(a, b, false);
-    }
-
-    /// Returns math.Order.lt, math.Order.eq, math.Order.gt if |a| < |b|, |a| ==
-    /// |b| or |a| > |b| respectively.
-    pub fn orderAbs(a: Rational, b: Rational) !math.Order {
-        return cmpInternal(a, b, true);
-    }
-
-    // p/q > x/y iff p*y > x*q
-    fn cmpInternal(a: Rational, b: Rational, is_abs: bool) !math.Order {
-        // TODO: Would a div compare algorithm of sorts be viable and quicker? Can we avoid
-        // the memory allocations here?
-        var q = try Int.init(a.p.allocator);
-        defer q.deinit();
-
-        var p = try Int.init(b.p.allocator);
-        defer p.deinit();
-
-        try q.mul(&a.p, &b.q);
-        try p.mul(&b.p, &a.q);
-
-        return if (is_abs) q.orderAbs(p) else q.order(p);
-    }
-
-    /// rma = a + b.
-    ///
-    /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b.
-    ///
-    /// Returns an error if memory could not be allocated.
-    pub fn add(rma: *Rational, a: Rational, b: Rational) !void {
-        var r = rma;
-        var aliased = rma.p.limbs.ptr == a.p.limbs.ptr or rma.p.limbs.ptr == b.p.limbs.ptr;
-
-        var sr: Rational = undefined;
-        if (aliased) {
-            sr = try Rational.init(rma.p.allocator);
-            r = &sr;
-            aliased = true;
-        }
-        defer if (aliased) {
-            rma.swap(r);
-            r.deinit();
-        };
-
-        try r.p.mul(&a.p, &b.q);
-        try r.q.mul(&b.p, &a.q);
-        try r.p.add(&r.p, &r.q);
-
-        try r.q.mul(&a.q, &b.q);
-        try r.reduce();
-    }
-
-    /// rma = a - b.
-    ///
-    /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b.
-    ///
-    /// Returns an error if memory could not be allocated.
-    pub fn sub(rma: *Rational, a: Rational, b: Rational) !void {
-        var r = rma;
-        var aliased = rma.p.limbs.ptr == a.p.limbs.ptr or rma.p.limbs.ptr == b.p.limbs.ptr;
-
-        var sr: Rational = undefined;
-        if (aliased) {
-            sr = try Rational.init(rma.p.allocator);
-            r = &sr;
-            aliased = true;
-        }
-        defer if (aliased) {
-            rma.swap(r);
-            r.deinit();
-        };
-
-        try r.p.mul(&a.p, &b.q);
-        try r.q.mul(&b.p, &a.q);
-        try r.p.sub(&r.p, &r.q);
-
-        try r.q.mul(&a.q, &b.q);
-        try r.reduce();
-    }
-
-    /// rma = a * b.
-    ///
-    /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b.
-    ///
-    /// Returns an error if memory could not be allocated.
-    pub fn mul(r: *Rational, a: Rational, b: Rational) !void {
-        try r.p.mul(&a.p, &b.p);
-        try r.q.mul(&a.q, &b.q);
-        try r.reduce();
-    }
-
-    /// rma = a / b.
-    ///
-    /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b.
-    ///
-    /// Returns an error if memory could not be allocated.
-    pub fn div(r: *Rational, a: Rational, b: Rational) !void {
-        if (b.p.eqlZero()) {
-            @panic("division by zero");
-        }
-
-        try r.p.mul(&a.p, &b.q);
-        try r.q.mul(&b.p, &a.q);
-        try r.reduce();
-    }
-
-    /// Invert the numerator and denominator fields of a Rational. p/q => q/p.
-    pub fn invert(r: *Rational) void {
-        Int.swap(&r.p, &r.q);
-    }
-
-    // reduce r/q such that gcd(r, q) = 1
-    fn reduce(r: *Rational) !void {
-        var a = try Int.init(r.p.allocator);
-        defer a.deinit();
-
-        const sign = r.p.isPositive();
-        r.p.abs();
-        try a.gcd(&r.p, &r.q);
-        r.p.setSign(sign);
-
-        const one = IntConst{ .limbs = &[_]Limb{1}, .positive = true };
-        if (a.toConst().order(one) != .eq) {
-            var unused = try Int.init(r.p.allocator);
-            defer unused.deinit();
-
-            // TODO: divexact would be useful here
-            // TODO: don't copy r.q for div
-            try Int.divTrunc(&r.p, &unused, &r.p, &a);
-            try Int.divTrunc(&r.q, &unused, &r.q, &a);
-        }
-    }
-};
-
-fn extractLowBits(a: Int, comptime T: type) T {
-    debug.assert(@typeInfo(T) == .int);
-
-    const t_bits = @typeInfo(T).int.bits;
-    const limb_bits = @typeInfo(Limb).int.bits;
-    if (t_bits <= limb_bits) {
-        return @as(T, @truncate(a.limbs[0]));
-    } else {
-        var r: T = 0;
-        comptime var i: usize = 0;
-
-        // Remainder is always 0 since if t_bits >= limb_bits -> Limb | T and both
-        // are powers of two.
-        inline while (i < t_bits / limb_bits) : (i += 1) {
-            r |= math.shl(T, a.limbs[i], i * limb_bits);
-        }
-
-        return r;
-    }
-}
-
-test extractLowBits {
-    var a = try Int.initSet(testing.allocator, 0x11112222333344441234567887654321);
-    defer a.deinit();
-
-    const a1 = extractLowBits(a, u8);
-    try testing.expect(a1 == 0x21);
-
-    const a2 = extractLowBits(a, u16);
-    try testing.expect(a2 == 0x4321);
-
-    const a3 = extractLowBits(a, u32);
-    try testing.expect(a3 == 0x87654321);
-
-    const a4 = extractLowBits(a, u64);
-    try testing.expect(a4 == 0x1234567887654321);
-
-    const a5 = extractLowBits(a, u128);
-    try testing.expect(a5 == 0x11112222333344441234567887654321);
-}
-
-test "set" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-
-    try a.setInt(5);
-    try testing.expect((try a.p.toInt(u32)) == 5);
-    try testing.expect((try a.q.toInt(u32)) == 1);
-
-    try a.setRatio(7, 3);
-    try testing.expect((try a.p.toInt(u32)) == 7);
-    try testing.expect((try a.q.toInt(u32)) == 3);
-
-    try a.setRatio(9, 3);
-    try testing.expect((try a.p.toInt(i32)) == 3);
-    try testing.expect((try a.q.toInt(i32)) == 1);
-
-    try a.setRatio(-9, 3);
-    try testing.expect((try a.p.toInt(i32)) == -3);
-    try testing.expect((try a.q.toInt(i32)) == 1);
-
-    try a.setRatio(9, -3);
-    try testing.expect((try a.p.toInt(i32)) == -3);
-    try testing.expect((try a.q.toInt(i32)) == 1);
-
-    try a.setRatio(-9, -3);
-    try testing.expect((try a.p.toInt(i32)) == 3);
-    try testing.expect((try a.q.toInt(i32)) == 1);
-}
-
-test "setFloat" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-
-    try a.setFloat(f64, 2.5);
-    try testing.expect((try a.p.toInt(i32)) == 5);
-    try testing.expect((try a.q.toInt(i32)) == 2);
-
-    try a.setFloat(f32, -2.5);
-    try testing.expect((try a.p.toInt(i32)) == -5);
-    try testing.expect((try a.q.toInt(i32)) == 2);
-
-    try a.setFloat(f32, 3.141593);
-
-    //                = 3.14159297943115234375
-    try testing.expect((try a.p.toInt(u32)) == 3294199);
-    try testing.expect((try a.q.toInt(u32)) == 1048576);
-
-    try a.setFloat(f64, 72.141593120712409172417410926841290461290467124);
-
-    //                = 72.1415931207124145885245525278151035308837890625
-    try testing.expect((try a.p.toInt(u128)) == 5076513310880537);
-    try testing.expect((try a.q.toInt(u128)) == 70368744177664);
-}
-
-test "setFloatString" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-
-    try a.setFloatString("72.14159312071241458852455252781510353");
-
-    //                  = 72.1415931207124145885245525278151035308837890625
-    try testing.expect((try a.p.toInt(u128)) == 7214159312071241458852455252781510353);
-    try testing.expect((try a.q.toInt(u128)) == 100000000000000000000000000000000000);
-}
-
-test "toFloat" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-
-    // = 3.14159297943115234375
-    try a.setRatio(3294199, 1048576);
-    try testing.expect((try a.toFloat(f64)) == 3.14159297943115234375);
-
-    // = 72.1415931207124145885245525278151035308837890625
-    try a.setRatio(5076513310880537, 70368744177664);
-    try testing.expect((try a.toFloat(f64)) == 72.141593120712409172417410926841290461290467124);
-}
-
-test "set/to Float round-trip" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-    var prng = std.Random.DefaultPrng.init(std.testing.random_seed);
-    const random = prng.random();
-    var i: usize = 0;
-    while (i < 512) : (i += 1) {
-        const r = random.float(f64);
-        try a.setFloat(f64, r);
-        try testing.expect((try a.toFloat(f64)) == r);
-    }
-}
-
-test "copy" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-
-    var b = try Int.initSet(testing.allocator, 5);
-    defer b.deinit();
-
-    try a.copyInt(b);
-    try testing.expect((try a.p.toInt(u32)) == 5);
-    try testing.expect((try a.q.toInt(u32)) == 1);
-
-    var c = try Int.initSet(testing.allocator, 7);
-    defer c.deinit();
-    var d = try Int.initSet(testing.allocator, 3);
-    defer d.deinit();
-
-    try a.copyRatio(c, d);
-    try testing.expect((try a.p.toInt(u32)) == 7);
-    try testing.expect((try a.q.toInt(u32)) == 3);
-
-    var e = try Int.initSet(testing.allocator, 9);
-    defer e.deinit();
-    var f = try Int.initSet(testing.allocator, 3);
-    defer f.deinit();
-
-    try a.copyRatio(e, f);
-    try testing.expect((try a.p.toInt(u32)) == 3);
-    try testing.expect((try a.q.toInt(u32)) == 1);
-}
-
-test "negate" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-
-    try a.setInt(-50);
-    try testing.expect((try a.p.toInt(i32)) == -50);
-    try testing.expect((try a.q.toInt(i32)) == 1);
-
-    a.negate();
-    try testing.expect((try a.p.toInt(i32)) == 50);
-    try testing.expect((try a.q.toInt(i32)) == 1);
-
-    a.negate();
-    try testing.expect((try a.p.toInt(i32)) == -50);
-    try testing.expect((try a.q.toInt(i32)) == 1);
-}
-
-test "abs" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-
-    try a.setInt(-50);
-    try testing.expect((try a.p.toInt(i32)) == -50);
-    try testing.expect((try a.q.toInt(i32)) == 1);
-
-    a.abs();
-    try testing.expect((try a.p.toInt(i32)) == 50);
-    try testing.expect((try a.q.toInt(i32)) == 1);
-
-    a.abs();
-    try testing.expect((try a.p.toInt(i32)) == 50);
-    try testing.expect((try a.q.toInt(i32)) == 1);
-}
-
-test "swap" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-    var b = try Rational.init(testing.allocator);
-    defer b.deinit();
-
-    try a.setRatio(50, 23);
-    try b.setRatio(17, 3);
-
-    try testing.expect((try a.p.toInt(u32)) == 50);
-    try testing.expect((try a.q.toInt(u32)) == 23);
-
-    try testing.expect((try b.p.toInt(u32)) == 17);
-    try testing.expect((try b.q.toInt(u32)) == 3);
-
-    a.swap(&b);
-
-    try testing.expect((try a.p.toInt(u32)) == 17);
-    try testing.expect((try a.q.toInt(u32)) == 3);
-
-    try testing.expect((try b.p.toInt(u32)) == 50);
-    try testing.expect((try b.q.toInt(u32)) == 23);
-}
-
-test "order" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-    var b = try Rational.init(testing.allocator);
-    defer b.deinit();
-
-    try a.setRatio(500, 231);
-    try b.setRatio(18903, 8584);
-    try testing.expect((try a.order(b)) == .lt);
-
-    try a.setRatio(890, 10);
-    try b.setRatio(89, 1);
-    try testing.expect((try a.order(b)) == .eq);
-}
-
-test "order/orderAbs with negative" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-    var b = try Rational.init(testing.allocator);
-    defer b.deinit();
-
-    try a.setRatio(1, 1);
-    try b.setRatio(-2, 1);
-    try testing.expect((try a.order(b)) == .gt);
-    try testing.expect((try a.orderAbs(b)) == .lt);
-}
-
-test "add single-limb" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-    var b = try Rational.init(testing.allocator);
-    defer b.deinit();
-
-    try a.setRatio(500, 231);
-    try b.setRatio(18903, 8584);
-    try testing.expect((try a.order(b)) == .lt);
-
-    try a.setRatio(890, 10);
-    try b.setRatio(89, 1);
-    try testing.expect((try a.order(b)) == .eq);
-}
-
-test "add" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-    var b = try Rational.init(testing.allocator);
-    defer b.deinit();
-    var r = try Rational.init(testing.allocator);
-    defer r.deinit();
-
-    try a.setRatio(78923, 23341);
-    try b.setRatio(123097, 12441414);
-    try a.add(a, b);
-
-    try r.setRatio(984786924199, 290395044174);
-    try testing.expect((try a.order(r)) == .eq);
-}
-
-test "sub" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-    var b = try Rational.init(testing.allocator);
-    defer b.deinit();
-    var r = try Rational.init(testing.allocator);
-    defer r.deinit();
-
-    try a.setRatio(78923, 23341);
-    try b.setRatio(123097, 12441414);
-    try a.sub(a, b);
-
-    try r.setRatio(979040510045, 290395044174);
-    try testing.expect((try a.order(r)) == .eq);
-}
-
-test "mul" {
-    var a = try Rational.init(testing.allocator);
-    defer a.deinit();
-    var b = try Rational.init(testing.allocator);
-    defer b.deinit();
-    var r = try Rational.init(testing.allocator);
-    defer r.deinit();
-
-    try a.setRatio(78923, 23341);
-    try b.setRatio(123097, 12441414);
-    try a.mul(a, b);
-
-    try r.setRatio(571481443, 17082061422);
-    try testing.expect((try a.order(r)) == .eq);
-}
-
-test "div" {
-    {
-        var a = try Rational.init(testing.allocator);
-        defer a.deinit();
-        var b = try Rational.init(testing.allocator);
-        defer b.deinit();
-        var r = try Rational.init(testing.allocator);
-        defer r.deinit();
-
-        try a.setRatio(78923, 23341);
-        try b.setRatio(123097, 12441414);
-        try a.div(a, b);
-
-        try r.setRatio(75531824394, 221015929);
-        try testing.expect((try a.order(r)) == .eq);
-    }
-
-    {
-        var a = try Rational.init(testing.allocator);
-        defer a.deinit();
-        var r = try Rational.init(testing.allocator);
-        defer r.deinit();
-
-        try a.setRatio(78923, 23341);
-        a.invert();
-
-        try r.setRatio(23341, 78923);
-        try testing.expect((try a.order(r)) == .eq);
-
-        try a.setRatio(-78923, 23341);
-        a.invert();
-
-        try r.setRatio(-23341, 78923);
-        try testing.expect((try a.order(r)) == .eq);
-    }
-}

@@ -1,7 +1,6 @@
 const std = @import("../std.zig");
 const assert = std.debug.assert;
 
-pub const Rational = @import("big/rational.zig").Rational;
 pub const int = @import("big/int.zig");
 pub const Limb = usize;
 const limb_info = @typeInfo(Limb).int;
@@ -18,7 +17,6 @@ comptime {
 
 test {
     _ = int;
-    _ = Rational;
     _ = Limb;
     _ = SignedLimb;
     _ = DoubleLimb;

@@ -4,6 +4,119 @@ const assert = std.debug.assert;
 const expect = std.testing.expect;
 const expectEqual = std.testing.expectEqual;
 
+pub const Sign = enum(u1) { positive, negative };
+
+pub fn FloatRepr(comptime Float: type) type {
+    const fractional_bits = floatFractionalBits(Float);
+    const exponent_bits = floatExponentBits(Float);
+    return packed struct {
+        const Repr = @This();
+
+        mantissa: StoredMantissa,
+        exponent: BiasedExponent,
+        sign: Sign,
+
+        pub const StoredMantissa = @Type(.{ .int = .{
+            .signedness = .unsigned,
+            .bits = floatMantissaBits(Float),
+        } });
+        pub const Mantissa = @Type(.{ .int = .{
+            .signedness = .unsigned,
+            .bits = 1 + fractional_bits,
+        } });
+        pub const Exponent = @Type(.{ .int = .{
+            .signedness = .signed,
+            .bits = exponent_bits,
+        } });
+        pub const BiasedExponent = enum(@Type(.{ .int = .{
+            .signedness = .unsigned,
+            .bits = exponent_bits,
+        } })) {
+            denormal = 0,
+            min_normal = 1,
+            zero = (1 << (exponent_bits - 1)) - 1,
+            max_normal = (1 << exponent_bits) - 2,
+            infinite = (1 << exponent_bits) - 1,
+            _,
+
+            pub const Int = @typeInfo(BiasedExponent).@"enum".tag_type;
+
+            pub fn unbias(biased: BiasedExponent) Exponent {
+                switch (biased) {
+                    .denormal => unreachable,
+                    else => return @bitCast(@intFromEnum(biased) -% @intFromEnum(BiasedExponent.zero)),
+                    .infinite => unreachable,
+                }
+            }
+
+            pub fn bias(unbiased: Exponent) BiasedExponent {
+                return @enumFromInt(@intFromEnum(BiasedExponent.zero) +% @as(Int, @bitCast(unbiased)));
+            }
+        };
+
+        pub const Normalized = struct {
+            fraction: Fraction,
+            exponent: Normalized.Exponent,
+
+            pub const Fraction = @Type(.{ .int = .{
+                .signedness = .unsigned,
+                .bits = fractional_bits,
+            } });
+            pub const Exponent = @Type(.{ .int = .{
+                .signedness = .signed,
+                .bits = 1 + exponent_bits,
+            } });
+
+            /// This currently truncates denormal values, which needs to be fixed before this can be used to
+            /// produce a rounded value.
+            pub fn reconstruct(normalized: Normalized, sign: Sign) Float {
+                if (normalized.exponent > BiasedExponent.max_normal.unbias()) return @bitCast(Repr{
+                    .mantissa = 0,
+                    .exponent = .infinite,
+                    .sign = sign,
+                });
+                const mantissa = @as(Mantissa, 1 << fractional_bits) | normalized.fraction;
+                if (normalized.exponent < BiasedExponent.min_normal.unbias()) return @bitCast(Repr{
+                    .mantissa = @truncate(std.math.shr(
+                        Mantissa,
+                        mantissa,
+                        BiasedExponent.min_normal.unbias() - normalized.exponent,
+                    )),
+                    .exponent = .denormal,
+                    .sign = sign,
+                });
+                return @bitCast(Repr{
+                    .mantissa = @truncate(mantissa),
+                    .exponent = .bias(@intCast(normalized.exponent)),
+                    .sign = sign,
+                });
+            }
+        };
+
+        pub const Classified = union(enum) { normalized: Normalized, infinity, nan, invalid };
+        fn classify(repr: Repr) Classified {
+            return switch (repr.exponent) {
+                .denormal => {
+                    const mantissa: Mantissa = repr.mantissa;
+                    const shift = @clz(mantissa);
+                    return .{ .normalized = .{
+                        .fraction = @truncate(mantissa << shift),
+                        .exponent = @as(Normalized.Exponent, comptime BiasedExponent.min_normal.unbias()) - shift,
+                    } };
+                },
+                else => if (repr.mantissa <= std.math.maxInt(Normalized.Fraction)) .{ .normalized = .{
+                    .fraction = @intCast(repr.mantissa),
+                    .exponent = repr.exponent.unbias(),
+                } } else .invalid,
+                .infinite => switch (repr.mantissa) {
+                    0 => .infinity,
+                    else => .nan,
+                },
+            };
+        }
+    };
+}
+
 /// Creates a raw "1.0" mantissa for floating point type T. Used to dedupe f80 logic.
 inline fn mantissaOne(comptime T: type) comptime_int {
     return if (@typeInfo(T).float.bits == 80) 1 << floatFractionalBits(T) else 0;

@@ -593,7 +593,7 @@ const Parser = struct {
         switch (node.get(self.zoir)) {
             .int_literal => |int| switch (int) {
                 .small => |val| return @floatFromInt(val),
-                .big => |val| return val.toFloat(T),
+                .big => |val| return val.toFloat(T, .nearest_even)[0],
             },
             .float_literal => |val| return @floatCast(val),
             .pos_inf => return std.math.inf(T),

@@ -45,6 +45,7 @@ pub const rad_per_deg = 0.017453292519943295769236907684886127134428718885417254
 /// 180.0/pi
 pub const deg_per_rad = 57.295779513082320876798154814105170332405472466564321549160243861;
 
+pub const FloatRepr = float.FloatRepr;
 pub const floatExponentBits = float.floatExponentBits;
 pub const floatMantissaBits = float.floatMantissaBits;
 pub const floatFractionalBits = float.floatFractionalBits;

@@ -509,30 +509,23 @@ fn lowerInt(
             },
         },
         .float_literal => |val| {
-            // Check for fractional components
-            if (@rem(val, 1) != 0) {
-                return self.fail(
+            var big_int: std.math.big.int.Mutable = .{
+                .limbs = try self.sema.arena.alloc(std.math.big.Limb, std.math.big.int.calcLimbLen(val)),
+                .len = undefined,
+                .positive = undefined,
+            };
+            switch (big_int.setFloat(val, .trunc)) {
+                .inexact => return self.fail(
                     node,
                     "fractional component prevents float value '{}' from coercion to type '{}'",
                     .{ val, res_ty.fmt(self.sema.pt) },
-                );
+                ),
+                .exact => {},
             }
 
-            // Create a rational representation of the float
-            var rational = try std.math.big.Rational.init(self.sema.arena);
-            rational.setFloat(f128, val) catch |err| switch (err) {
-                error.NonFiniteFloat => unreachable,
-                error.OutOfMemory => return error.OutOfMemory,
-            };
-
-            // The float is reduced in rational.setFloat, so we assert that denominator is equal to
-            // one
-            const big_one = std.math.big.int.Const{ .limbs = &.{1}, .positive = true };
-            assert(rational.q.toConst().eqlAbs(big_one));
-
             // Check that the result is in range of the result type
             const int_info = res_ty.intInfo(self.sema.pt.zcu);
-            if (!rational.p.fitsInTwosComp(int_info.signedness, int_info.bits)) {
+            if (!big_int.toConst().fitsInTwosComp(int_info.signedness, int_info.bits)) {
                 return self.fail(
                     node,
                     "type '{}' cannot represent integer value '{}'",
@@ -543,7 +536,7 @@ fn lowerInt(
             return self.sema.pt.intern(.{
                 .int = .{
                     .ty = res_ty.toIntern(),
-                    .storage = .{ .big_int = rational.p.toConst() },
+                    .storage = .{ .big_int = big_int.toConst() },
                 },
             });
         },
@@ -584,7 +577,7 @@ fn lowerFloat(
     const value = switch (node.get(self.file.zoir.?)) {
         .int_literal => |int| switch (int) {
             .small => |val| try self.sema.pt.floatValue(res_ty, @as(f128, @floatFromInt(val))),
-            .big => |val| try self.sema.pt.floatValue(res_ty, val.toFloat(f128)),
+            .big => |val| try self.sema.pt.floatValue(res_ty, val.toFloat(f128, .nearest_even)[0]),
         },
         .float_literal => |val| try self.sema.pt.floatValue(res_ty, val),
         .char_literal => |val| try self.sema.pt.floatValue(res_ty, @as(f128, @floatFromInt(val))),

@@ -32843,24 +32843,21 @@ fn cmpNumeric(
             }
         }
         if (lhs_is_float) {
-            if (lhs_val.floatHasFraction(zcu)) {
-                switch (op) {
+            const float = lhs_val.toFloat(f128, zcu);
+            var big_int: std.math.big.int.Mutable = .{
+                .limbs = try sema.arena.alloc(std.math.big.Limb, std.math.big.int.calcLimbLen(float)),
+                .len = undefined,
+                .positive = undefined,
+            };
+            switch (big_int.setFloat(float, .away)) {
+                .inexact => switch (op) {
                     .eq => return .bool_false,
                     .neq => return .bool_true,
                     else => {},
-                }
-            }
-
-            var bigint = try float128IntPartToBigInt(sema.gpa, lhs_val.toFloat(f128, zcu));
-            defer bigint.deinit();
-            if (lhs_val.floatHasFraction(zcu)) {
-                if (lhs_is_signed) {
-                    try bigint.addScalar(&bigint, -1);
-                } else {
-                    try bigint.addScalar(&bigint, 1);
-                }
+                },
+                .exact => {},
             }
-            lhs_bits = bigint.toConst().bitCountTwosComp();
+            lhs_bits = big_int.toConst().bitCountTwosComp();
         } else {
             lhs_bits = lhs_val.intBitCountTwosComp(zcu);
         }
@@ -32890,24 +32887,21 @@ fn cmpNumeric(
             }
         }
         if (rhs_is_float) {
-            if (rhs_val.floatHasFraction(zcu)) {
-                switch (op) {
+            const float = rhs_val.toFloat(f128, zcu);
+            var big_int: std.math.big.int.Mutable = .{
+                .limbs = try sema.arena.alloc(std.math.big.Limb, std.math.big.int.calcLimbLen(float)),
+                .len = undefined,
+                .positive = undefined,
+            };
+            switch (big_int.setFloat(float, .away)) {
+                .inexact => switch (op) {
                     .eq => return .bool_false,
                     .neq => return .bool_true,
                     else => {},
-                }
-            }
-
-            var bigint = try float128IntPartToBigInt(sema.gpa, rhs_val.toFloat(f128, zcu));
-            defer bigint.deinit();
-            if (rhs_val.floatHasFraction(zcu)) {
-                if (rhs_is_signed) {
-                    try bigint.addScalar(&bigint, -1);
-                } else {
-                    try bigint.addScalar(&bigint, 1);
-                }
+                },
+                .exact => {},
             }
-            rhs_bits = bigint.toConst().bitCountTwosComp();
+            rhs_bits = big_int.toConst().bitCountTwosComp();
         } else {
             rhs_bits = rhs_val.intBitCountTwosComp(zcu);
         }
@@ -36955,31 +36949,6 @@ fn intFromFloat(
     return sema.intFromFloatScalar(block, src, val, int_ty, mode);
 }
 
-// float is expected to be finite and non-NaN
-fn float128IntPartToBigInt(
-    arena: Allocator,
-    float: f128,
-) !std.math.big.int.Managed {
-    const is_negative = std.math.signbit(float);
-    const floored = @floor(@abs(float));
-
-    var rational = try std.math.big.Rational.init(arena);
-    defer rational.q.deinit();
-    rational.setFloat(f128, floored) catch |err| switch (err) {
-        error.NonFiniteFloat => unreachable,
-        error.OutOfMemory => return error.OutOfMemory,
-    };
-
-    // The float is reduced in rational.setFloat, so we assert that denominator is equal to one
-    const big_one = std.math.big.int.Const{ .limbs = &.{1}, .positive = true };
-    assert(rational.q.toConst().eqlAbs(big_one));
-
-    if (is_negative) {
-        rational.negate();
-    }
-    return rational.p;
-}
-
 fn intFromFloatScalar(
     sema: *Sema,
     block: *Block,
@@ -36993,13 +36962,6 @@ fn intFromFloatScalar(
 
     if (val.isUndef(zcu)) return sema.failWithUseOfUndef(block, src);
 
-    if (mode == .exact and val.floatHasFraction(zcu)) return sema.fail(
-        block,
-        src,
-        "fractional component prevents float value '{}' from coercion to type '{}'",
-        .{ val.fmtValueSema(pt, sema), int_ty.fmt(pt) },
-    );
-
     const float = val.toFloat(f128, zcu);
     if (std.math.isNan(float)) {
         return sema.fail(block, src, "float value NaN cannot be stored in integer type '{}'", .{
@@ -37012,12 +36974,28 @@ fn intFromFloatScalar(
         });
     }
 
-    var big_int = try float128IntPartToBigInt(sema.arena, float);
-    defer big_int.deinit();
-
+    var big_int: std.math.big.int.Mutable = .{
+        .limbs = try sema.arena.alloc(std.math.big.Limb, std.math.big.int.calcLimbLen(float)),
+        .len = undefined,
+        .positive = undefined,
+    };
+    switch (big_int.setFloat(float, .trunc)) {
+        .inexact => switch (mode) {
+            .exact => return sema.fail(
+                block,
+                src,
+                "fractional component prevents float value '{}' from coercion to type '{}'",
+                .{ val.fmtValueSema(pt, sema), int_ty.fmt(pt) },
+            ),
+            .truncate => {},
+        },
+        .exact => {},
+    }
     const cti_result = try pt.intValue_big(.comptime_int, big_int.toConst());
+    if (int_ty.toIntern() == .comptime_int_type) return cti_result;
 
-    if (!(try sema.intFitsInType(cti_result, int_ty, null))) {
+    const int_info = int_ty.intInfo(zcu);
+    if (!big_int.toConst().fitsInTwosComp(int_info.signedness, int_info.bits)) {
         return sema.fail(block, src, "float value '{}' cannot be stored in integer type '{}'", .{
             val.fmtValueSema(pt, sema), int_ty.fmt(pt),
         });

@@ -898,7 +898,7 @@ pub fn readFromPackedMemory(
 pub fn toFloat(val: Value, comptime T: type, zcu: *const Zcu) T {
     return switch (zcu.intern_pool.indexToKey(val.toIntern())) {
         .int => |int| switch (int.storage) {
-            .big_int => |big_int| big_int.toFloat(T),
+            .big_int => |big_int| big_int.toFloat(T, .nearest_even)[0],
             inline .u64, .i64 => |x| {
                 if (T == f80) {
                     @panic("TODO we can't lower this properly on non-x86 llvm backend yet");
@@ -997,16 +997,6 @@ pub fn floatCast(val: Value, dest_ty: Type, pt: Zcu.PerThread) !Value {
     } }));
 }
 
-/// Asserts the value is a float
-pub fn floatHasFraction(self: Value, zcu: *const Zcu) bool {
-    return switch (zcu.intern_pool.indexToKey(self.toIntern())) {
-        .float => |float| switch (float.storage) {
-            inline else => |x| @rem(x, 1) != 0,
-        },
-        else => unreachable,
-    };
-}
-
 pub fn orderAgainstZero(lhs: Value, zcu: *Zcu) std.math.Order {
     return orderAgainstZeroInner(lhs, .normal, zcu, {}) catch unreachable;
 }
@@ -1557,17 +1547,13 @@ pub fn floatFromIntAdvanced(
 }
 
 pub fn floatFromIntScalar(val: Value, float_ty: Type, pt: Zcu.PerThread, comptime strat: ResolveStrat) !Value {
-    const zcu = pt.zcu;
-    return switch (zcu.intern_pool.indexToKey(val.toIntern())) {
+    return switch (pt.zcu.intern_pool.indexToKey(val.toIntern())) {
         .undef => try pt.undefValue(float_ty),
         .int => |int| switch (int.storage) {
-            .big_int => |big_int| {
-                const float = big_int.toFloat(f128);
-                return pt.floatValue(float_ty, float);
-            },
+            .big_int => |big_int| pt.floatValue(float_ty, big_int.toFloat(f128, .nearest_even)[0]),
             inline .u64, .i64 => |x| floatFromIntInner(x, float_ty, pt),
-            .lazy_align => |ty| return floatFromIntInner((try Type.fromInterned(ty).abiAlignmentInner(strat.toLazy(), pt.zcu, pt.tid)).scalar.toByteUnits() orelse 0, float_ty, pt),
-            .lazy_size => |ty| return floatFromIntInner((try Type.fromInterned(ty).abiSizeInner(strat.toLazy(), pt.zcu, pt.tid)).scalar, float_ty, pt),
+            .lazy_align => |ty| floatFromIntInner((try Type.fromInterned(ty).abiAlignmentInner(strat.toLazy(), pt.zcu, pt.tid)).scalar.toByteUnits() orelse 0, float_ty, pt),
+            .lazy_size => |ty| floatFromIntInner((try Type.fromInterned(ty).abiSizeInner(strat.toLazy(), pt.zcu, pt.tid)).scalar, float_ty, pt),
         },
         else => unreachable,
     };

Commit 6b41beb370

Commit `6b41beb370`