Commit 97b9facb98

matu3ba <matu3ba@users.noreply.github.com>
2023-02-24 19:27:02
compiler_rt: declutter int.zig, add mulXi3 tests (#14623)
- Combine mulXi3 routines for follow-up cleanup. - DRY up Dwords and Twords - rename both to HalveInt and use instance * Justification: Not all processors have word size 32 bit. * remove test file from CMakeLists * DRY things.
1 parent 5f70c36
lib/compiler_rt/common.zig
@@ -1,5 +1,6 @@
 const std = @import("std");
 const builtin = @import("builtin");
+const native_endian = builtin.cpu.arch.endian();
 
 pub const linkage: std.builtin.GlobalLinkage = if (builtin.is_test) .Internal else .Weak;
 /// Determines the symbol's visibility to other objects.
@@ -221,3 +222,20 @@ pub inline fn fneg(a: anytype) @TypeOf(a) {
     const negated = @bitCast(U, a) ^ sign_bit_mask;
     return @bitCast(F, negated);
 }
+
+/// Allows to access underlying bits as two equally sized lower and higher
+/// signed or unsigned integers.
+pub fn HalveInt(comptime T: type, comptime signed_half: bool) type {
+    return extern union {
+        pub const bits = @divExact(@typeInfo(T).Int.bits, 2);
+        pub const HalfTU = std.meta.Int(.unsigned, bits);
+        pub const HalfTS = std.meta.Int(.signed, bits);
+        pub const HalfT = if (signed_half) HalfTS else HalfTU;
+
+        all: T,
+        s: if (native_endian == .Little)
+            extern struct { low: HalfT, high: HalfT }
+        else
+            extern struct { high: HalfT, low: HalfT },
+    };
+}
lib/compiler_rt/int.zig
@@ -16,7 +16,6 @@ pub const panic = common.panic;
 comptime {
     @export(__divmodti4, .{ .name = "__divmodti4", .linkage = common.linkage, .visibility = common.visibility });
     @export(__udivmoddi4, .{ .name = "__udivmoddi4", .linkage = common.linkage, .visibility = common.visibility });
-    @export(__mulsi3, .{ .name = "__mulsi3", .linkage = common.linkage, .visibility = common.visibility });
     @export(__divmoddi4, .{ .name = "__divmoddi4", .linkage = common.linkage, .visibility = common.visibility });
     if (common.want_aeabi) {
         @export(__aeabi_idiv, .{ .name = "__aeabi_idiv", .linkage = common.linkage, .visibility = common.visibility });
@@ -663,59 +662,3 @@ fn test_one_umodsi3(a: u32, b: u32, expected_r: u32) !void {
     const r: u32 = __umodsi3(a, b);
     try testing.expect(r == expected_r);
 }
-
-pub fn __mulsi3(a: i32, b: i32) callconv(.C) i32 {
-    var ua = @bitCast(u32, a);
-    var ub = @bitCast(u32, b);
-    var r: u32 = 0;
-
-    while (ua > 0) {
-        if ((ua & 1) != 0) r +%= ub;
-        ua >>= 1;
-        ub <<= 1;
-    }
-
-    return @bitCast(i32, r);
-}
-
-fn test_one_mulsi3(a: i32, b: i32, result: i32) !void {
-    try testing.expectEqual(result, __mulsi3(a, b));
-}
-
-test "mulsi3" {
-    try test_one_mulsi3(0, 0, 0);
-    try test_one_mulsi3(0, 1, 0);
-    try test_one_mulsi3(1, 0, 0);
-    try test_one_mulsi3(0, 10, 0);
-    try test_one_mulsi3(10, 0, 0);
-    try test_one_mulsi3(0, maxInt(i32), 0);
-    try test_one_mulsi3(maxInt(i32), 0, 0);
-    try test_one_mulsi3(0, -1, 0);
-    try test_one_mulsi3(-1, 0, 0);
-    try test_one_mulsi3(0, -10, 0);
-    try test_one_mulsi3(-10, 0, 0);
-    try test_one_mulsi3(0, minInt(i32), 0);
-    try test_one_mulsi3(minInt(i32), 0, 0);
-    try test_one_mulsi3(1, 1, 1);
-    try test_one_mulsi3(1, 10, 10);
-    try test_one_mulsi3(10, 1, 10);
-    try test_one_mulsi3(1, maxInt(i32), maxInt(i32));
-    try test_one_mulsi3(maxInt(i32), 1, maxInt(i32));
-    try test_one_mulsi3(1, -1, -1);
-    try test_one_mulsi3(1, -10, -10);
-    try test_one_mulsi3(-10, 1, -10);
-    try test_one_mulsi3(1, minInt(i32), minInt(i32));
-    try test_one_mulsi3(minInt(i32), 1, minInt(i32));
-    try test_one_mulsi3(46340, 46340, 2147395600);
-    try test_one_mulsi3(-46340, 46340, -2147395600);
-    try test_one_mulsi3(46340, -46340, -2147395600);
-    try test_one_mulsi3(-46340, -46340, 2147395600);
-    try test_one_mulsi3(4194303, 8192, @truncate(i32, 34359730176));
-    try test_one_mulsi3(-4194303, 8192, @truncate(i32, -34359730176));
-    try test_one_mulsi3(4194303, -8192, @truncate(i32, -34359730176));
-    try test_one_mulsi3(-4194303, -8192, @truncate(i32, 34359730176));
-    try test_one_mulsi3(8192, 4194303, @truncate(i32, 34359730176));
-    try test_one_mulsi3(-8192, 4194303, @truncate(i32, -34359730176));
-    try test_one_mulsi3(8192, -4194303, @truncate(i32, -34359730176));
-    try test_one_mulsi3(-8192, -4194303, @truncate(i32, 34359730176));
-}
lib/compiler_rt/muldi3.zig
@@ -1,71 +0,0 @@
-//! Ported from
-//! https://github.com/llvm/llvm-project/blob/llvmorg-9.0.0/compiler-rt/lib/builtins/muldi3.c
-
-const std = @import("std");
-const builtin = @import("builtin");
-const native_endian = builtin.cpu.arch.endian();
-const common = @import("common.zig");
-
-pub const panic = common.panic;
-
-comptime {
-    if (common.want_aeabi) {
-        @export(__aeabi_lmul, .{ .name = "__aeabi_lmul", .linkage = common.linkage, .visibility = common.visibility });
-    } else {
-        @export(__muldi3, .{ .name = "__muldi3", .linkage = common.linkage, .visibility = common.visibility });
-    }
-}
-
-pub fn __muldi3(a: i64, b: i64) callconv(.C) i64 {
-    return mul(a, b);
-}
-
-fn __aeabi_lmul(a: i64, b: i64) callconv(.AAPCS) i64 {
-    return mul(a, b);
-}
-
-inline fn mul(a: i64, b: i64) i64 {
-    const x = dwords{ .all = a };
-    const y = dwords{ .all = b };
-    var r = dwords{ .all = muldsi3(x.s.low, y.s.low) };
-    r.s.high +%= x.s.high *% y.s.low +% x.s.low *% y.s.high;
-    return r.all;
-}
-
-const dwords = extern union {
-    all: i64,
-    s: switch (native_endian) {
-        .Little => extern struct {
-            low: u32,
-            high: u32,
-        },
-        .Big => extern struct {
-            high: u32,
-            low: u32,
-        },
-    },
-};
-
-fn muldsi3(a: u32, b: u32) i64 {
-    const bits_in_word_2 = @sizeOf(i32) * 8 / 2;
-    const lower_mask = (~@as(u32, 0)) >> bits_in_word_2;
-
-    var r: dwords = undefined;
-    r.s.low = (a & lower_mask) *% (b & lower_mask);
-    var t: u32 = r.s.low >> bits_in_word_2;
-    r.s.low &= lower_mask;
-    t += (a >> bits_in_word_2) *% (b & lower_mask);
-    r.s.low +%= (t & lower_mask) << bits_in_word_2;
-    r.s.high = t >> bits_in_word_2;
-    t = r.s.low >> bits_in_word_2;
-    r.s.low &= lower_mask;
-    t +%= (b >> bits_in_word_2) *% (a & lower_mask);
-    r.s.low +%= (t & lower_mask) << bits_in_word_2;
-    r.s.high +%= t >> bits_in_word_2;
-    r.s.high +%= (a >> bits_in_word_2) *% (b >> bits_in_word_2);
-    return r.all;
-}
-
-test {
-    _ = @import("muldi3_test.zig");
-}
lib/compiler_rt/muldi3_test.zig
@@ -1,51 +0,0 @@
-const __muldi3 = @import("muldi3.zig").__muldi3;
-const testing = @import("std").testing;
-
-fn test__muldi3(a: i64, b: i64, expected: i64) !void {
-    const x = __muldi3(a, b);
-    try testing.expect(x == expected);
-}
-
-test "muldi3" {
-    try test__muldi3(0, 0, 0);
-    try test__muldi3(0, 1, 0);
-    try test__muldi3(1, 0, 0);
-    try test__muldi3(0, 10, 0);
-    try test__muldi3(10, 0, 0);
-    try test__muldi3(0, 81985529216486895, 0);
-    try test__muldi3(81985529216486895, 0, 0);
-
-    try test__muldi3(0, -1, 0);
-    try test__muldi3(-1, 0, 0);
-    try test__muldi3(0, -10, 0);
-    try test__muldi3(-10, 0, 0);
-    try test__muldi3(0, -81985529216486895, 0);
-    try test__muldi3(-81985529216486895, 0, 0);
-
-    try test__muldi3(1, 1, 1);
-    try test__muldi3(1, 10, 10);
-    try test__muldi3(10, 1, 10);
-    try test__muldi3(1, 81985529216486895, 81985529216486895);
-    try test__muldi3(81985529216486895, 1, 81985529216486895);
-
-    try test__muldi3(1, -1, -1);
-    try test__muldi3(1, -10, -10);
-    try test__muldi3(-10, 1, -10);
-    try test__muldi3(1, -81985529216486895, -81985529216486895);
-    try test__muldi3(-81985529216486895, 1, -81985529216486895);
-
-    try test__muldi3(3037000499, 3037000499, 9223372030926249001);
-    try test__muldi3(-3037000499, 3037000499, -9223372030926249001);
-    try test__muldi3(3037000499, -3037000499, -9223372030926249001);
-    try test__muldi3(-3037000499, -3037000499, 9223372030926249001);
-
-    try test__muldi3(4398046511103, 2097152, 9223372036852678656);
-    try test__muldi3(-4398046511103, 2097152, -9223372036852678656);
-    try test__muldi3(4398046511103, -2097152, -9223372036852678656);
-    try test__muldi3(-4398046511103, -2097152, 9223372036852678656);
-
-    try test__muldi3(2097152, 4398046511103, 9223372036852678656);
-    try test__muldi3(-2097152, 4398046511103, -9223372036852678656);
-    try test__muldi3(2097152, -4398046511103, -9223372036852678656);
-    try test__muldi3(-2097152, -4398046511103, 9223372036852678656);
-}
lib/compiler_rt/multi3.zig
@@ -1,75 +0,0 @@
-//! Ported from git@github.com:llvm-project/llvm-project-20170507.git
-//! ae684fad6d34858c014c94da69c15e7774a633c3
-//! 2018-08-13
-
-const std = @import("std");
-const builtin = @import("builtin");
-const native_endian = builtin.cpu.arch.endian();
-const common = @import("common.zig");
-
-pub const panic = common.panic;
-
-comptime {
-    if (common.want_windows_v2u64_abi) {
-        @export(__multi3_windows_x86_64, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility });
-    } else {
-        @export(__multi3, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility });
-    }
-}
-
-pub fn __multi3(a: i128, b: i128) callconv(.C) i128 {
-    return mul(a, b);
-}
-
-const v2u64 = @Vector(2, u64);
-
-fn __multi3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
-    return @bitCast(v2u64, mul(@bitCast(i128, a), @bitCast(i128, b)));
-}
-
-inline fn mul(a: i128, b: i128) i128 {
-    const x = twords{ .all = a };
-    const y = twords{ .all = b };
-    var r = twords{ .all = mulddi3(x.s.low, y.s.low) };
-    r.s.high +%= x.s.high *% y.s.low +% x.s.low *% y.s.high;
-    return r.all;
-}
-
-fn mulddi3(a: u64, b: u64) i128 {
-    const bits_in_dword_2 = (@sizeOf(i64) * 8) / 2;
-    const lower_mask = ~@as(u64, 0) >> bits_in_dword_2;
-    var r: twords = undefined;
-    r.s.low = (a & lower_mask) *% (b & lower_mask);
-    var t: u64 = r.s.low >> bits_in_dword_2;
-    r.s.low &= lower_mask;
-    t +%= (a >> bits_in_dword_2) *% (b & lower_mask);
-    r.s.low +%= (t & lower_mask) << bits_in_dword_2;
-    r.s.high = t >> bits_in_dword_2;
-    t = r.s.low >> bits_in_dword_2;
-    r.s.low &= lower_mask;
-    t +%= (b >> bits_in_dword_2) *% (a & lower_mask);
-    r.s.low +%= (t & lower_mask) << bits_in_dword_2;
-    r.s.high +%= t >> bits_in_dword_2;
-    r.s.high +%= (a >> bits_in_dword_2) *% (b >> bits_in_dword_2);
-    return r.all;
-}
-
-const twords = extern union {
-    all: i128,
-    s: S,
-
-    const S = if (native_endian == .Little)
-        extern struct {
-            low: u64,
-            high: u64,
-        }
-    else
-        extern struct {
-            high: u64,
-            low: u64,
-        };
-};
-
-test {
-    _ = @import("multi3_test.zig");
-}
lib/compiler_rt/multi3_test.zig
@@ -1,53 +0,0 @@
-const __multi3 = @import("multi3.zig").__multi3;
-const testing = @import("std").testing;
-
-fn test__multi3(a: i128, b: i128, expected: i128) !void {
-    const x = __multi3(a, b);
-    try testing.expect(x == expected);
-}
-
-test "multi3" {
-    try test__multi3(0, 0, 0);
-    try test__multi3(0, 1, 0);
-    try test__multi3(1, 0, 0);
-    try test__multi3(0, 10, 0);
-    try test__multi3(10, 0, 0);
-    try test__multi3(0, 81985529216486895, 0);
-    try test__multi3(81985529216486895, 0, 0);
-
-    try test__multi3(0, -1, 0);
-    try test__multi3(-1, 0, 0);
-    try test__multi3(0, -10, 0);
-    try test__multi3(-10, 0, 0);
-    try test__multi3(0, -81985529216486895, 0);
-    try test__multi3(-81985529216486895, 0, 0);
-
-    try test__multi3(1, 1, 1);
-    try test__multi3(1, 10, 10);
-    try test__multi3(10, 1, 10);
-    try test__multi3(1, 81985529216486895, 81985529216486895);
-    try test__multi3(81985529216486895, 1, 81985529216486895);
-
-    try test__multi3(1, -1, -1);
-    try test__multi3(1, -10, -10);
-    try test__multi3(-10, 1, -10);
-    try test__multi3(1, -81985529216486895, -81985529216486895);
-    try test__multi3(-81985529216486895, 1, -81985529216486895);
-
-    try test__multi3(3037000499, 3037000499, 9223372030926249001);
-    try test__multi3(-3037000499, 3037000499, -9223372030926249001);
-    try test__multi3(3037000499, -3037000499, -9223372030926249001);
-    try test__multi3(-3037000499, -3037000499, 9223372030926249001);
-
-    try test__multi3(4398046511103, 2097152, 9223372036852678656);
-    try test__multi3(-4398046511103, 2097152, -9223372036852678656);
-    try test__multi3(4398046511103, -2097152, -9223372036852678656);
-    try test__multi3(-4398046511103, -2097152, 9223372036852678656);
-
-    try test__multi3(2097152, 4398046511103, 9223372036852678656);
-    try test__multi3(-2097152, 4398046511103, -9223372036852678656);
-    try test__multi3(2097152, -4398046511103, -9223372036852678656);
-    try test__multi3(-2097152, -4398046511103, 9223372036852678656);
-
-    try test__multi3(0x00000000000000B504F333F9DE5BE000, 0x000000000000000000B504F333F9DE5B, 0x7FFFFFFFFFFFF328DF915DA296E8A000);
-}
lib/compiler_rt/mulXi3.zig
@@ -0,0 +1,101 @@
+const builtin = @import("builtin");
+const std = @import("std");
+const testing = std.testing;
+const common = @import("common.zig");
+const native_endian = builtin.cpu.arch.endian();
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__mulsi3, .{ .name = "__mulsi3", .linkage = common.linkage, .visibility = common.visibility });
+    if (common.want_aeabi) {
+        @export(__aeabi_lmul, .{ .name = "__aeabi_lmul", .linkage = common.linkage, .visibility = common.visibility });
+    } else {
+        @export(__muldi3, .{ .name = "__muldi3", .linkage = common.linkage, .visibility = common.visibility });
+    }
+    if (common.want_windows_v2u64_abi) {
+        @export(__multi3_windows_x86_64, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility });
+    } else {
+        @export(__multi3, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility });
+    }
+}
+
+pub fn __mulsi3(a: i32, b: i32) callconv(.C) i32 {
+    var ua = @bitCast(u32, a);
+    var ub = @bitCast(u32, b);
+    var r: u32 = 0;
+
+    while (ua > 0) {
+        if ((ua & 1) != 0) r +%= ub;
+        ua >>= 1;
+        ub <<= 1;
+    }
+
+    return @bitCast(i32, r);
+}
+
+pub fn __muldi3(a: i64, b: i64) callconv(.C) i64 {
+    return mulX(i64, a, b);
+}
+
+fn __aeabi_lmul(a: i64, b: i64) callconv(.AAPCS) i64 {
+    return mulX(i64, a, b);
+}
+
+inline fn mulX(comptime T: type, a: T, b: T) T {
+    const word_t = common.HalveInt(T, false);
+    const x = word_t{ .all = a };
+    const y = word_t{ .all = b };
+    var r = switch (T) {
+        i64, i128 => word_t{ .all = muldXi(word_t.HalfT, x.s.low, y.s.low) },
+        else => unreachable,
+    };
+    r.s.high +%= x.s.high *% y.s.low +% x.s.low *% y.s.high;
+    return r.all;
+}
+
+fn DoubleInt(comptime T: type) type {
+    return switch (T) {
+        u32 => i64,
+        u64 => i128,
+        i32 => i64,
+        i64 => i128,
+        else => unreachable,
+    };
+}
+
+fn muldXi(comptime T: type, a: T, b: T) DoubleInt(T) {
+    const DT = DoubleInt(T);
+    const word_t = common.HalveInt(DT, false);
+    const bits_in_word_2 = @sizeOf(T) * 8 / 2;
+    const lower_mask = (~@as(T, 0)) >> bits_in_word_2;
+
+    var r: word_t = undefined;
+    r.s.low = (a & lower_mask) *% (b & lower_mask);
+    var t: T = r.s.low >> bits_in_word_2;
+    r.s.low &= lower_mask;
+    t += (a >> bits_in_word_2) *% (b & lower_mask);
+    r.s.low +%= (t & lower_mask) << bits_in_word_2;
+    r.s.high = t >> bits_in_word_2;
+    t = r.s.low >> bits_in_word_2;
+    r.s.low &= lower_mask;
+    t +%= (b >> bits_in_word_2) *% (a & lower_mask);
+    r.s.low +%= (t & lower_mask) << bits_in_word_2;
+    r.s.high +%= t >> bits_in_word_2;
+    r.s.high +%= (a >> bits_in_word_2) *% (b >> bits_in_word_2);
+    return r.all;
+}
+
+pub fn __multi3(a: i128, b: i128) callconv(.C) i128 {
+    return mulX(i128, a, b);
+}
+
+const v2u64 = @Vector(2, u64);
+
+fn __multi3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
+    return @bitCast(v2u64, mulX(i128, @bitCast(i128, a), @bitCast(i128, b)));
+}
+
+test {
+    _ = @import("mulXi3_test.zig");
+}
lib/compiler_rt/mulXi3_test.zig
@@ -0,0 +1,147 @@
+const std = @import("std");
+const testing = std.testing;
+const mulXi3 = @import("mulXi3.zig");
+const maxInt = std.math.maxInt;
+const minInt = std.math.minInt;
+
+fn test_one_mulsi3(a: i32, b: i32, result: i32) !void {
+    try testing.expectEqual(result, mulXi3.__mulsi3(a, b));
+}
+
+fn test__muldi3(a: i64, b: i64, expected: i64) !void {
+    const x = mulXi3.__muldi3(a, b);
+    try testing.expect(x == expected);
+}
+
+fn test__multi3(a: i128, b: i128, expected: i128) !void {
+    const x = mulXi3.__multi3(a, b);
+    try testing.expect(x == expected);
+}
+
+test "mulsi3" {
+    try test_one_mulsi3(0, 0, 0);
+    try test_one_mulsi3(0, 1, 0);
+    try test_one_mulsi3(1, 0, 0);
+    try test_one_mulsi3(0, 10, 0);
+    try test_one_mulsi3(10, 0, 0);
+    try test_one_mulsi3(0, maxInt(i32), 0);
+    try test_one_mulsi3(maxInt(i32), 0, 0);
+    try test_one_mulsi3(0, -1, 0);
+    try test_one_mulsi3(-1, 0, 0);
+    try test_one_mulsi3(0, -10, 0);
+    try test_one_mulsi3(-10, 0, 0);
+    try test_one_mulsi3(0, minInt(i32), 0);
+    try test_one_mulsi3(minInt(i32), 0, 0);
+    try test_one_mulsi3(1, 1, 1);
+    try test_one_mulsi3(1, 10, 10);
+    try test_one_mulsi3(10, 1, 10);
+    try test_one_mulsi3(1, maxInt(i32), maxInt(i32));
+    try test_one_mulsi3(maxInt(i32), 1, maxInt(i32));
+    try test_one_mulsi3(1, -1, -1);
+    try test_one_mulsi3(1, -10, -10);
+    try test_one_mulsi3(-10, 1, -10);
+    try test_one_mulsi3(1, minInt(i32), minInt(i32));
+    try test_one_mulsi3(minInt(i32), 1, minInt(i32));
+    try test_one_mulsi3(46340, 46340, 2147395600);
+    try test_one_mulsi3(-46340, 46340, -2147395600);
+    try test_one_mulsi3(46340, -46340, -2147395600);
+    try test_one_mulsi3(-46340, -46340, 2147395600);
+    try test_one_mulsi3(4194303, 8192, @truncate(i32, 34359730176));
+    try test_one_mulsi3(-4194303, 8192, @truncate(i32, -34359730176));
+    try test_one_mulsi3(4194303, -8192, @truncate(i32, -34359730176));
+    try test_one_mulsi3(-4194303, -8192, @truncate(i32, 34359730176));
+    try test_one_mulsi3(8192, 4194303, @truncate(i32, 34359730176));
+    try test_one_mulsi3(-8192, 4194303, @truncate(i32, -34359730176));
+    try test_one_mulsi3(8192, -4194303, @truncate(i32, -34359730176));
+    try test_one_mulsi3(-8192, -4194303, @truncate(i32, 34359730176));
+}
+
+test "muldi3" {
+    try test__muldi3(0, 0, 0);
+    try test__muldi3(0, 1, 0);
+    try test__muldi3(1, 0, 0);
+    try test__muldi3(0, 10, 0);
+    try test__muldi3(10, 0, 0);
+    try test__muldi3(0, 81985529216486895, 0);
+    try test__muldi3(81985529216486895, 0, 0);
+
+    try test__muldi3(0, -1, 0);
+    try test__muldi3(-1, 0, 0);
+    try test__muldi3(0, -10, 0);
+    try test__muldi3(-10, 0, 0);
+    try test__muldi3(0, -81985529216486895, 0);
+    try test__muldi3(-81985529216486895, 0, 0);
+
+    try test__muldi3(1, 1, 1);
+    try test__muldi3(1, 10, 10);
+    try test__muldi3(10, 1, 10);
+    try test__muldi3(1, 81985529216486895, 81985529216486895);
+    try test__muldi3(81985529216486895, 1, 81985529216486895);
+
+    try test__muldi3(1, -1, -1);
+    try test__muldi3(1, -10, -10);
+    try test__muldi3(-10, 1, -10);
+    try test__muldi3(1, -81985529216486895, -81985529216486895);
+    try test__muldi3(-81985529216486895, 1, -81985529216486895);
+
+    try test__muldi3(3037000499, 3037000499, 9223372030926249001);
+    try test__muldi3(-3037000499, 3037000499, -9223372030926249001);
+    try test__muldi3(3037000499, -3037000499, -9223372030926249001);
+    try test__muldi3(-3037000499, -3037000499, 9223372030926249001);
+
+    try test__muldi3(4398046511103, 2097152, 9223372036852678656);
+    try test__muldi3(-4398046511103, 2097152, -9223372036852678656);
+    try test__muldi3(4398046511103, -2097152, -9223372036852678656);
+    try test__muldi3(-4398046511103, -2097152, 9223372036852678656);
+
+    try test__muldi3(2097152, 4398046511103, 9223372036852678656);
+    try test__muldi3(-2097152, 4398046511103, -9223372036852678656);
+    try test__muldi3(2097152, -4398046511103, -9223372036852678656);
+    try test__muldi3(-2097152, -4398046511103, 9223372036852678656);
+}
+
+test "multi3" {
+    try test__multi3(0, 0, 0);
+    try test__multi3(0, 1, 0);
+    try test__multi3(1, 0, 0);
+    try test__multi3(0, 10, 0);
+    try test__multi3(10, 0, 0);
+    try test__multi3(0, 81985529216486895, 0);
+    try test__multi3(81985529216486895, 0, 0);
+
+    try test__multi3(0, -1, 0);
+    try test__multi3(-1, 0, 0);
+    try test__multi3(0, -10, 0);
+    try test__multi3(-10, 0, 0);
+    try test__multi3(0, -81985529216486895, 0);
+    try test__multi3(-81985529216486895, 0, 0);
+
+    try test__multi3(1, 1, 1);
+    try test__multi3(1, 10, 10);
+    try test__multi3(10, 1, 10);
+    try test__multi3(1, 81985529216486895, 81985529216486895);
+    try test__multi3(81985529216486895, 1, 81985529216486895);
+
+    try test__multi3(1, -1, -1);
+    try test__multi3(1, -10, -10);
+    try test__multi3(-10, 1, -10);
+    try test__multi3(1, -81985529216486895, -81985529216486895);
+    try test__multi3(-81985529216486895, 1, -81985529216486895);
+
+    try test__multi3(3037000499, 3037000499, 9223372030926249001);
+    try test__multi3(-3037000499, 3037000499, -9223372030926249001);
+    try test__multi3(3037000499, -3037000499, -9223372030926249001);
+    try test__multi3(-3037000499, -3037000499, 9223372030926249001);
+
+    try test__multi3(4398046511103, 2097152, 9223372036852678656);
+    try test__multi3(-4398046511103, 2097152, -9223372036852678656);
+    try test__multi3(4398046511103, -2097152, -9223372036852678656);
+    try test__multi3(-4398046511103, -2097152, 9223372036852678656);
+
+    try test__multi3(2097152, 4398046511103, 9223372036852678656);
+    try test__multi3(-2097152, 4398046511103, -9223372036852678656);
+    try test__multi3(2097152, -4398046511103, -9223372036852678656);
+    try test__multi3(-2097152, -4398046511103, 9223372036852678656);
+
+    try test__multi3(0x00000000000000B504F333F9DE5BE000, 0x000000000000000000B504F333F9DE5B, 0x7FFFFFFFFFFFF328DF915DA296E8A000);
+}
lib/compiler_rt/shift.zig
@@ -1,7 +1,6 @@
 const std = @import("std");
 const builtin = @import("builtin");
 const Log2Int = std.math.Log2Int;
-const native_endian = builtin.cpu.arch.endian();
 const common = @import("common.zig");
 
 pub const panic = common.panic;
@@ -27,39 +26,24 @@ comptime {
     }
 }
 
-fn Dwords(comptime T: type, comptime signed_half: bool) type {
-    return extern union {
-        const bits = @divExact(@typeInfo(T).Int.bits, 2);
-        const HalfTU = std.meta.Int(.unsigned, bits);
-        const HalfTS = std.meta.Int(.signed, bits);
-        const HalfT = if (signed_half) HalfTS else HalfTU;
-
-        all: T,
-        s: if (native_endian == .Little)
-            extern struct { low: HalfT, high: HalfT }
-        else
-            extern struct { high: HalfT, low: HalfT },
-    };
-}
-
 // Arithmetic shift left: shift in 0 from right to left
 // Precondition: 0 <= b < bits_in_dword
 inline fn ashlXi3(comptime T: type, a: T, b: i32) T {
-    const dwords = Dwords(T, false);
-    const S = Log2Int(dwords.HalfT);
+    const word_t = common.HalveInt(T, false);
+    const S = Log2Int(word_t.HalfT);
 
-    const input = dwords{ .all = a };
-    var output: dwords = undefined;
+    const input = word_t{ .all = a };
+    var output: word_t = undefined;
 
-    if (b >= dwords.bits) {
+    if (b >= word_t.bits) {
         output.s.low = 0;
-        output.s.high = input.s.low << @intCast(S, b - dwords.bits);
+        output.s.high = input.s.low << @intCast(S, b - word_t.bits);
     } else if (b == 0) {
         return a;
     } else {
         output.s.low = input.s.low << @intCast(S, b);
         output.s.high = input.s.high << @intCast(S, b);
-        output.s.high |= input.s.low >> @intCast(S, dwords.bits - b);
+        output.s.high |= input.s.low >> @intCast(S, word_t.bits - b);
     }
 
     return output.all;
@@ -68,24 +52,24 @@ inline fn ashlXi3(comptime T: type, a: T, b: i32) T {
 // Arithmetic shift right: shift in 1 from left to right
 // Precondition: 0 <= b < T.bit_count
 inline fn ashrXi3(comptime T: type, a: T, b: i32) T {
-    const dwords = Dwords(T, true);
-    const S = Log2Int(dwords.HalfT);
+    const word_t = common.HalveInt(T, true);
+    const S = Log2Int(word_t.HalfT);
 
-    const input = dwords{ .all = a };
-    var output: dwords = undefined;
+    const input = word_t{ .all = a };
+    var output: word_t = undefined;
 
-    if (b >= dwords.bits) {
-        output.s.high = input.s.high >> (dwords.bits - 1);
-        output.s.low = input.s.high >> @intCast(S, b - dwords.bits);
+    if (b >= word_t.bits) {
+        output.s.high = input.s.high >> (word_t.bits - 1);
+        output.s.low = input.s.high >> @intCast(S, b - word_t.bits);
     } else if (b == 0) {
         return a;
     } else {
         output.s.high = input.s.high >> @intCast(S, b);
-        output.s.low = input.s.high << @intCast(S, dwords.bits - b);
+        output.s.low = input.s.high << @intCast(S, word_t.bits - b);
         // Avoid sign-extension here
         output.s.low |= @bitCast(
-            dwords.HalfT,
-            @bitCast(dwords.HalfTU, input.s.low) >> @intCast(S, b),
+            word_t.HalfT,
+            @bitCast(word_t.HalfTU, input.s.low) >> @intCast(S, b),
         );
     }
 
@@ -95,20 +79,20 @@ inline fn ashrXi3(comptime T: type, a: T, b: i32) T {
 // Logical shift right: shift in 0 from left to right
 // Precondition: 0 <= b < T.bit_count
 inline fn lshrXi3(comptime T: type, a: T, b: i32) T {
-    const dwords = Dwords(T, false);
-    const S = Log2Int(dwords.HalfT);
+    const word_t = common.HalveInt(T, false);
+    const S = Log2Int(word_t.HalfT);
 
-    const input = dwords{ .all = a };
-    var output: dwords = undefined;
+    const input = word_t{ .all = a };
+    var output: word_t = undefined;
 
-    if (b >= dwords.bits) {
+    if (b >= word_t.bits) {
         output.s.high = 0;
-        output.s.low = input.s.high >> @intCast(S, b - dwords.bits);
+        output.s.low = input.s.high >> @intCast(S, b - word_t.bits);
     } else if (b == 0) {
         return a;
     } else {
         output.s.high = input.s.high >> @intCast(S, b);
-        output.s.low = input.s.high << @intCast(S, dwords.bits - b);
+        output.s.low = input.s.high << @intCast(S, word_t.bits - b);
         output.s.low |= input.s.low >> @intCast(S, b);
     }
 
lib/compiler_rt.zig
@@ -13,8 +13,7 @@ comptime {
     _ = @import("compiler_rt/shift.zig");
     _ = @import("compiler_rt/negXi2.zig");
     _ = @import("compiler_rt/int.zig");
-    _ = @import("compiler_rt/muldi3.zig");
-    _ = @import("compiler_rt/multi3.zig");
+    _ = @import("compiler_rt/mulXi3.zig");
     _ = @import("compiler_rt/divti3.zig");
     _ = @import("compiler_rt/udivti3.zig");
     _ = @import("compiler_rt/modti3.zig");
CMakeLists.txt
@@ -434,13 +434,12 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/log10.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/log2.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/modti3.zig"
+    "${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulXi3.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/muldf3.zig"
-    "${CMAKE_SOURCE_DIR}/lib/compiler_rt/muldi3.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulf3.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulo.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulsf3.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/multf3.zig"
-    "${CMAKE_SOURCE_DIR}/lib/compiler_rt/multi3.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulxf3.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/negXi2.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/negv.zig"
@@ -613,7 +612,6 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/link/tapi.zig"
     "${CMAKE_SOURCE_DIR}/src/link/tapi/Tokenizer.zig"
     "${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig"
-    "${CMAKE_SOURCE_DIR}/src/link/tapi/parse/test.zig"
     "${CMAKE_SOURCE_DIR}/src/link/tapi/yaml.zig"
     "${CMAKE_SOURCE_DIR}/src/main.zig"
     "${CMAKE_SOURCE_DIR}/src/mingw.zig"
@@ -753,7 +751,7 @@ set(BUILD_ZIG2_ARGS
   --deps build_options
   -target "${HOST_TARGET_TRIPLE}"
 )
- 
+
 add_custom_command(
   OUTPUT "${ZIG2_C_SOURCE}"
   COMMAND zig1 ${BUILD_ZIG2_ARGS}
@@ -771,7 +769,7 @@ set(BUILD_COMPILER_RT_ARGS
   --deps build_options
   -target "${HOST_TARGET_TRIPLE}"
 )
- 
+
 add_custom_command(
   OUTPUT "${ZIG_COMPILER_RT_C_SOURCE}"
   COMMAND zig1 ${BUILD_COMPILER_RT_ARGS}