Commit f5dd6fb71a

Pavel Verigo <paul.verigo@gmail.com>
2024-07-20 13:21:46
stage2-wasm: @mulWithOverflow fixes + 128 bit signed
1 parent 56d535d
Changed files (2)
src
arch
test
behavior
src/arch/wasm/CodeGen.zig
@@ -2681,41 +2681,41 @@ fn binOpBigInt(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) Inner
         .@"and", .@"or", .xor => {
             const result = try func.allocStack(ty);
             try func.emitWValue(result);
-            const lhs_low_bit = try func.load(lhs, Type.u64, 0);
-            const rhs_low_bit = try func.load(rhs, Type.u64, 0);
-            const op_low_bit = try func.binOp(lhs_low_bit, rhs_low_bit, Type.u64, op);
-            try func.store(.stack, op_low_bit, Type.u64, result.offset());
+            const lhs_lsb = try func.load(lhs, Type.u64, 0);
+            const rhs_lsb = try func.load(rhs, Type.u64, 0);
+            const op_lsb = try func.binOp(lhs_lsb, rhs_lsb, Type.u64, op);
+            try func.store(.stack, op_lsb, Type.u64, result.offset());
 
             try func.emitWValue(result);
-            const lhs_high_bit = try func.load(lhs, Type.u64, 8);
-            const rhs_high_bit = try func.load(rhs, Type.u64, 8);
-            const op_high_bit = try func.binOp(lhs_high_bit, rhs_high_bit, Type.u64, op);
-            try func.store(.stack, op_high_bit, Type.u64, result.offset() + 8);
+            const lhs_msb = try func.load(lhs, Type.u64, 8);
+            const rhs_msb = try func.load(rhs, Type.u64, 8);
+            const op_msb = try func.binOp(lhs_msb, rhs_msb, Type.u64, op);
+            try func.store(.stack, op_msb, Type.u64, result.offset() + 8);
             return result;
         },
         .add, .sub => {
             const result = try func.allocStack(ty);
-            var lhs_low_bit = try (try func.load(lhs, Type.u64, 0)).toLocal(func, Type.u64);
-            defer lhs_low_bit.free(func);
-            var rhs_low_bit = try (try func.load(rhs, Type.u64, 0)).toLocal(func, Type.u64);
-            defer rhs_low_bit.free(func);
-            var low_op_res = try (try func.binOp(lhs_low_bit, rhs_low_bit, Type.u64, op)).toLocal(func, Type.u64);
-            defer low_op_res.free(func);
+            var lhs_lsb = try (try func.load(lhs, Type.u64, 0)).toLocal(func, Type.u64);
+            defer lhs_lsb.free(func);
+            var rhs_lsb = try (try func.load(rhs, Type.u64, 0)).toLocal(func, Type.u64);
+            defer rhs_lsb.free(func);
+            var op_lsb = try (try func.binOp(lhs_lsb, rhs_lsb, Type.u64, op)).toLocal(func, Type.u64);
+            defer op_lsb.free(func);
 
-            const lhs_high_bit = try func.load(lhs, Type.u64, 8);
-            const rhs_high_bit = try func.load(rhs, Type.u64, 8);
-            const high_op_res = try func.binOp(lhs_high_bit, rhs_high_bit, Type.u64, op);
+            const lhs_msb = try func.load(lhs, Type.u64, 8);
+            const rhs_msb = try func.load(rhs, Type.u64, 8);
+            const op_msb = try func.binOp(lhs_msb, rhs_msb, Type.u64, op);
 
             const lt = if (op == .add) blk: {
-                break :blk try func.cmp(low_op_res, rhs_low_bit, Type.u64, .lt);
+                break :blk try func.cmp(op_lsb, rhs_lsb, Type.u64, .lt);
             } else if (op == .sub) blk: {
-                break :blk try func.cmp(lhs_low_bit, rhs_low_bit, Type.u64, .lt);
+                break :blk try func.cmp(lhs_lsb, rhs_lsb, Type.u64, .lt);
             } else unreachable;
             const tmp = try func.intcast(lt, Type.u32, Type.u64);
-            var tmp_op = try (try func.binOp(high_op_res, tmp, Type.u64, op)).toLocal(func, Type.u64);
+            var tmp_op = try (try func.binOp(op_msb, tmp, Type.u64, op)).toLocal(func, Type.u64);
             defer tmp_op.free(func);
 
-            try func.store(result, low_op_res, Type.u64, 0);
+            try func.store(result, op_lsb, Type.u64, 0);
             try func.store(result, tmp_op, Type.u64, 8);
             return result;
         },
@@ -4419,16 +4419,16 @@ fn intcast(func: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerErro
             break :blk try (try func.intcast(operand, given, sign_ty)).toLocal(func, sign_ty);
         } else operand;
 
-        // store msb first
+        // store lsb first
         try func.store(.stack, lhs, Type.u64, 0 + stack_ptr.offset());
 
-        // For signed integers we shift msb by 63 (64bit integer - 1 sign bit) and store remaining value
+        // For signed integers we shift lsb by 63 (64bit integer - 1 sign bit) and store remaining value
         if (wanted.isSignedInt(mod)) {
             try func.emitWValue(stack_ptr);
             const shr = try func.binOp(lhs, .{ .imm64 = 63 }, Type.i64, .shr);
             try func.store(.stack, shr, Type.u64, 8 + stack_ptr.offset());
         } else {
-            // Ensure memory of lsb is zero'd
+            // Ensure memory of msb is zero'd
             try func.store(stack_ptr, .{ .imm64 = 0 }, Type.u64, 8);
         }
         return stack_ptr;
@@ -5529,17 +5529,17 @@ fn cmpBigInt(func: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: std
         return func.fail("TODO: Support cmpBigInt for integer bitsize: '{d}'", .{operand_ty.bitSize(pt)});
     }
 
-    var lhs_high_bit = try (try func.load(lhs, Type.u64, 8)).toLocal(func, Type.u64);
-    defer lhs_high_bit.free(func);
-    var rhs_high_bit = try (try func.load(rhs, Type.u64, 8)).toLocal(func, Type.u64);
-    defer rhs_high_bit.free(func);
+    var lhs_msb = try (try func.load(lhs, Type.u64, 8)).toLocal(func, Type.u64);
+    defer lhs_msb.free(func);
+    var rhs_msb = try (try func.load(rhs, Type.u64, 8)).toLocal(func, Type.u64);
+    defer rhs_msb.free(func);
 
     switch (op) {
         .eq, .neq => {
-            const xor_high = try func.binOp(lhs_high_bit, rhs_high_bit, Type.u64, .xor);
-            const lhs_low_bit = try func.load(lhs, Type.u64, 0);
-            const rhs_low_bit = try func.load(rhs, Type.u64, 0);
-            const xor_low = try func.binOp(lhs_low_bit, rhs_low_bit, Type.u64, .xor);
+            const xor_high = try func.binOp(lhs_msb, rhs_msb, Type.u64, .xor);
+            const lhs_lsb = try func.load(lhs, Type.u64, 0);
+            const rhs_lsb = try func.load(rhs, Type.u64, 0);
+            const xor_low = try func.binOp(lhs_lsb, rhs_lsb, Type.u64, .xor);
             const or_result = try func.binOp(xor_high, xor_low, Type.u64, .@"or");
 
             switch (op) {
@@ -5551,11 +5551,11 @@ fn cmpBigInt(func: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: std
         else => {
             const ty = if (operand_ty.isSignedInt(mod)) Type.i64 else Type.u64;
             // leave those value on top of the stack for '.select'
-            const lhs_low_bit = try func.load(lhs, Type.u64, 0);
-            const rhs_low_bit = try func.load(rhs, Type.u64, 0);
-            _ = try func.cmp(lhs_low_bit, rhs_low_bit, Type.u64, op);
-            _ = try func.cmp(lhs_high_bit, rhs_high_bit, ty, op);
-            _ = try func.cmp(lhs_high_bit, rhs_high_bit, ty, .eq);
+            const lhs_lsb = try func.load(lhs, Type.u64, 0);
+            const rhs_lsb = try func.load(rhs, Type.u64, 0);
+            _ = try func.cmp(lhs_lsb, rhs_lsb, Type.u64, op);
+            _ = try func.cmp(lhs_msb, rhs_msb, ty, op);
+            _ = try func.cmp(lhs_msb, rhs_msb, ty, .eq);
             try func.addTag(.select);
         },
     }
@@ -6106,11 +6106,11 @@ fn airMulWithOverflow(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
 
     const lhs = try func.resolveInst(extra.lhs);
     const rhs = try func.resolveInst(extra.rhs);
-    const lhs_ty = func.typeOf(extra.lhs);
+    const ty = func.typeOf(extra.lhs);
     const pt = func.pt;
     const mod = pt.zcu;
 
-    if (lhs_ty.zigTypeTag(mod) == .Vector) {
+    if (ty.zigTypeTag(mod) == .Vector) {
         return func.fail("TODO: Implement overflow arithmetic for vectors", .{});
     }
 
@@ -6119,7 +6119,7 @@ fn airMulWithOverflow(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     var overflow_bit = try func.ensureAllocLocal(Type.u1);
     defer overflow_bit.free(func);
 
-    const int_info = lhs_ty.intInfo(mod);
+    const int_info = ty.intInfo(mod);
     const wasm_bits = toWasmBits(int_info.bits) orelse {
         return func.fail("TODO: Implement `@mulWithOverflow` for integer bitsize: {d}", .{int_info.bits});
     };
@@ -6131,147 +6131,106 @@ fn airMulWithOverflow(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     };
 
     // for 32 bit integers we upcast it to a 64bit integer
-    const bin_op = if (int_info.bits == 32) blk: {
+    const mul = if (wasm_bits == 32) blk: {
         const new_ty = if (int_info.signedness == .signed) Type.i64 else Type.u64;
-        const lhs_upcast = try func.intcast(lhs, lhs_ty, new_ty);
-        const rhs_upcast = try func.intcast(rhs, lhs_ty, new_ty);
+        const lhs_upcast = try func.intcast(lhs, ty, new_ty);
+        const rhs_upcast = try func.intcast(rhs, ty, new_ty);
         const bin_op = try (try func.binOp(lhs_upcast, rhs_upcast, new_ty, .mul)).toLocal(func, new_ty);
-        if (int_info.signedness == .unsigned) {
-            const shr = try func.binOp(bin_op, .{ .imm64 = int_info.bits }, new_ty, .shr);
-            const wrap = try func.intcast(shr, new_ty, lhs_ty);
-            _ = try func.cmp(wrap, zero, lhs_ty, .neq);
-            try func.addLabel(.local_set, overflow_bit.local.value);
-            break :blk try func.intcast(bin_op, new_ty, lhs_ty);
-        } else {
-            const down_cast = try (try func.intcast(bin_op, new_ty, lhs_ty)).toLocal(func, lhs_ty);
-            var shr = try (try func.binOp(down_cast, .{ .imm32 = int_info.bits - 1 }, lhs_ty, .shr)).toLocal(func, lhs_ty);
-            defer shr.free(func);
-
-            const shr_res = try func.binOp(bin_op, .{ .imm64 = int_info.bits }, new_ty, .shr);
-            const down_shr_res = try func.intcast(shr_res, new_ty, lhs_ty);
-            _ = try func.cmp(down_shr_res, shr, lhs_ty, .neq);
-            try func.addLabel(.local_set, overflow_bit.local.value);
-            break :blk down_cast;
-        }
-    } else if (int_info.signedness == .signed and wasm_bits == 32) blk: {
-        const bin_op = try (try func.binOp(lhs, rhs, lhs_ty, .mul)).toLocal(func, lhs_ty);
-        const mul_abs = try func.wrapOperand(bin_op, lhs_ty);
-        _ = try func.cmp(mul_abs, bin_op, lhs_ty, .neq);
+        const res = try (try func.trunc(bin_op, ty, new_ty)).toLocal(func, ty);
+        const res_upcast = try func.intcast(res, ty, new_ty);
+        _ = try func.cmp(res_upcast, bin_op, new_ty, .neq);
         try func.addLabel(.local_set, overflow_bit.local.value);
-        break :blk try func.wrapOperand(bin_op, lhs_ty);
-    } else if (wasm_bits == 32) blk: {
-        var bin_op = try (try func.binOp(lhs, rhs, lhs_ty, .mul)).toLocal(func, lhs_ty);
-        defer bin_op.free(func);
-        const shift_imm: WValue = if (wasm_bits == 32)
-            .{ .imm32 = int_info.bits }
-        else
-            .{ .imm64 = int_info.bits };
-        const shr = try func.binOp(bin_op, shift_imm, lhs_ty, .shr);
-        _ = try func.cmp(shr, zero, lhs_ty, .neq);
-        try func.addLabel(.local_set, overflow_bit.local.value);
-        break :blk try func.wrapOperand(bin_op, lhs_ty);
-    } else if (int_info.bits == 64 and int_info.signedness == .unsigned) blk: {
-        const new_ty = Type.u128;
-        var lhs_upcast = try (try func.intcast(lhs, lhs_ty, new_ty)).toLocal(func, lhs_ty);
-        defer lhs_upcast.free(func);
-        var rhs_upcast = try (try func.intcast(rhs, lhs_ty, new_ty)).toLocal(func, lhs_ty);
-        defer rhs_upcast.free(func);
-        const bin_op = try func.binOp(lhs_upcast, rhs_upcast, new_ty, .mul);
-        const lsb = try func.load(bin_op, lhs_ty, 8);
-        _ = try func.cmp(lsb, zero, lhs_ty, .neq);
-        try func.addLabel(.local_set, overflow_bit.local.value);
-
-        break :blk try func.load(bin_op, lhs_ty, 0);
-    } else if (int_info.bits == 64 and int_info.signedness == .signed) blk: {
-        const shift_val: WValue = .{ .imm64 = 63 };
-        var lhs_shifted = try (try func.binOp(lhs, shift_val, lhs_ty, .shr)).toLocal(func, lhs_ty);
-        defer lhs_shifted.free(func);
-        var rhs_shifted = try (try func.binOp(rhs, shift_val, lhs_ty, .shr)).toLocal(func, lhs_ty);
-        defer rhs_shifted.free(func);
-
-        const bin_op = try func.callIntrinsic(
-            "__multi3",
-            &[_]InternPool.Index{.i64_type} ** 4,
-            Type.i128,
-            &.{ lhs, lhs_shifted, rhs, rhs_shifted },
-        );
-        const res = try func.allocLocal(lhs_ty);
-        const msb = try func.load(bin_op, lhs_ty, 0);
-        try func.addLabel(.local_tee, res.local.value);
-        const msb_shifted = try func.binOp(msb, shift_val, lhs_ty, .shr);
-        const lsb = try func.load(bin_op, lhs_ty, 8);
-        _ = try func.cmp(lsb, msb_shifted, lhs_ty, .neq);
+        break :blk res;
+    } else if (wasm_bits == 64) blk: {
+        const new_ty = if (int_info.signedness == .signed) Type.i128 else Type.u128;
+        const lhs_upcast = try func.intcast(lhs, ty, new_ty);
+        const rhs_upcast = try func.intcast(rhs, ty, new_ty);
+        const bin_op = try (try func.binOp(lhs_upcast, rhs_upcast, new_ty, .mul)).toLocal(func, new_ty);
+        const res = try (try func.trunc(bin_op, ty, new_ty)).toLocal(func, ty);
+        const res_upcast = try func.intcast(res, ty, new_ty);
+        _ = try func.cmp(res_upcast, bin_op, new_ty, .neq);
         try func.addLabel(.local_set, overflow_bit.local.value);
         break :blk res;
     } else if (int_info.bits == 128 and int_info.signedness == .unsigned) blk: {
-        var lhs_msb = try (try func.load(lhs, Type.u64, 0)).toLocal(func, Type.u64);
-        defer lhs_msb.free(func);
-        var lhs_lsb = try (try func.load(lhs, Type.u64, 8)).toLocal(func, Type.u64);
+        var lhs_lsb = try (try func.load(lhs, Type.u64, 0)).toLocal(func, Type.u64);
         defer lhs_lsb.free(func);
-        var rhs_msb = try (try func.load(rhs, Type.u64, 0)).toLocal(func, Type.u64);
-        defer rhs_msb.free(func);
-        var rhs_lsb = try (try func.load(rhs, Type.u64, 8)).toLocal(func, Type.u64);
+        var lhs_msb = try (try func.load(lhs, Type.u64, 8)).toLocal(func, Type.u64);
+        defer lhs_msb.free(func);
+        var rhs_lsb = try (try func.load(rhs, Type.u64, 0)).toLocal(func, Type.u64);
         defer rhs_lsb.free(func);
+        var rhs_msb = try (try func.load(rhs, Type.u64, 8)).toLocal(func, Type.u64);
+        defer rhs_msb.free(func);
 
-        const mul1 = try func.callIntrinsic(
+        const cross_1 = try func.callIntrinsic(
             "__multi3",
             &[_]InternPool.Index{.i64_type} ** 4,
             Type.i128,
-            &.{ lhs_lsb, zero, rhs_msb, zero },
+            &.{ lhs_msb, zero, rhs_lsb, zero },
         );
-        const mul2 = try func.callIntrinsic(
+        const cross_2 = try func.callIntrinsic(
             "__multi3",
             &[_]InternPool.Index{.i64_type} ** 4,
             Type.i128,
-            &.{ rhs_lsb, zero, lhs_msb, zero },
+            &.{ rhs_msb, zero, lhs_lsb, zero },
         );
-        const mul3 = try func.callIntrinsic(
+        const mul_lsb = try func.callIntrinsic(
             "__multi3",
             &[_]InternPool.Index{.i64_type} ** 4,
             Type.i128,
-            &.{ lhs_msb, zero, rhs_msb, zero },
+            &.{ rhs_lsb, zero, lhs_lsb, zero },
         );
 
-        const rhs_lsb_not_zero = try func.cmp(rhs_lsb, zero, Type.u64, .neq);
-        const lhs_lsb_not_zero = try func.cmp(lhs_lsb, zero, Type.u64, .neq);
-        const lsb_and = try func.binOp(rhs_lsb_not_zero, lhs_lsb_not_zero, Type.bool, .@"and");
-        const mul1_lsb = try func.load(mul1, Type.u64, 8);
-        const mul1_lsb_not_zero = try func.cmp(mul1_lsb, zero, Type.u64, .neq);
-        const lsb_or1 = try func.binOp(lsb_and, mul1_lsb_not_zero, Type.bool, .@"or");
-        const mul2_lsb = try func.load(mul2, Type.u64, 8);
-        const mul2_lsb_not_zero = try func.cmp(mul2_lsb, zero, Type.u64, .neq);
-        const lsb_or = try func.binOp(lsb_or1, mul2_lsb_not_zero, Type.bool, .@"or");
-
-        const mul1_msb = try func.load(mul1, Type.u64, 0);
-        const mul2_msb = try func.load(mul2, Type.u64, 0);
-        const mul_add1 = try func.binOp(mul1_msb, mul2_msb, Type.u64, .add);
-
-        var mul3_lsb = try (try func.load(mul3, Type.u64, 8)).toLocal(func, Type.u64);
-        defer mul3_lsb.free(func);
-        var mul_add2 = try (try func.binOp(mul_add1, mul3_lsb, Type.u64, .add)).toLocal(func, Type.u64);
-        defer mul_add2.free(func);
-        const mul_add_lt = try func.cmp(mul_add2, mul3_lsb, Type.u64, .lt);
+        const rhs_msb_not_zero = try func.cmp(rhs_msb, zero, Type.u64, .neq);
+        const lhs_msb_not_zero = try func.cmp(lhs_msb, zero, Type.u64, .neq);
+        const both_msb_not_zero = try func.binOp(rhs_msb_not_zero, lhs_msb_not_zero, Type.bool, .@"and");
+        const cross_1_msb = try func.load(cross_1, Type.u64, 8);
+        const cross_1_msb_not_zero = try func.cmp(cross_1_msb, zero, Type.u64, .neq);
+        const cond_1 = try func.binOp(both_msb_not_zero, cross_1_msb_not_zero, Type.bool, .@"or");
+        const cross_2_msb = try func.load(cross_2, Type.u64, 8);
+        const cross_2_msb_not_zero = try func.cmp(cross_2_msb, zero, Type.u64, .neq);
+        const cond_2 = try func.binOp(cond_1, cross_2_msb_not_zero, Type.bool, .@"or");
+
+        const cross_1_lsb = try func.load(cross_1, Type.u64, 0);
+        const cross_2_lsb = try func.load(cross_2, Type.u64, 0);
+        const cross_add = try func.binOp(cross_1_lsb, cross_2_lsb, Type.u64, .add);
+
+        var mul_lsb_msb = try (try func.load(mul_lsb, Type.u64, 8)).toLocal(func, Type.u64);
+        defer mul_lsb_msb.free(func);
+        var all_add = try (try func.binOp(cross_add, mul_lsb_msb, Type.u64, .add)).toLocal(func, Type.u64);
+        defer all_add.free(func);
+        const add_overflow = try func.cmp(all_add, mul_lsb_msb, Type.u64, .lt);
 
         // result for overflow bit
-        _ = try func.binOp(lsb_or, mul_add_lt, Type.bool, .@"or");
+        _ = try func.binOp(cond_2, add_overflow, Type.bool, .@"or");
         try func.addLabel(.local_set, overflow_bit.local.value);
 
         const tmp_result = try func.allocStack(Type.u128);
         try func.emitWValue(tmp_result);
-        const mul3_msb = try func.load(mul3, Type.u64, 0);
-        try func.store(.stack, mul3_msb, Type.u64, tmp_result.offset());
-        try func.store(tmp_result, mul_add2, Type.u64, 8);
+        const mul_lsb_lsb = try func.load(mul_lsb, Type.u64, 0);
+        try func.store(.stack, mul_lsb_lsb, Type.u64, tmp_result.offset());
+        try func.store(tmp_result, all_add, Type.u64, 8);
         break :blk tmp_result;
-    } else return func.fail("TODO: @mulWithOverflow for integers between 32 and 64 bits", .{});
-    var bin_op_local = try bin_op.toLocal(func, lhs_ty);
+    } else if (int_info.bits == 128 and int_info.signedness == .signed) blk: {
+        const overflow_ret = try func.allocStack(Type.i32);
+        const res = try func.callIntrinsic(
+            "__muloti4",
+            &[_]InternPool.Index{ .i128_type, .i128_type, .usize_type },
+            Type.i128,
+            &.{ lhs, rhs, overflow_ret },
+        );
+        _ = try func.load(overflow_ret, Type.i32, 0);
+        try func.addLabel(.local_set, overflow_bit.local.value);
+        break :blk res;
+    } else return func.fail("TODO: @mulWithOverflow for {}", .{ty.fmt(pt)});
+    var bin_op_local = try mul.toLocal(func, ty);
     defer bin_op_local.free(func);
 
-    const result_ptr = try func.allocStack(func.typeOfIndex(inst));
-    try func.store(result_ptr, bin_op_local, lhs_ty, 0);
-    const offset = @as(u32, @intCast(lhs_ty.abiSize(pt)));
-    try func.store(result_ptr, overflow_bit, Type.u1, offset);
+    const result = try func.allocStack(func.typeOfIndex(inst));
+    const offset: u32 = @intCast(ty.abiSize(pt));
+    try func.store(result, bin_op_local, ty, 0);
+    try func.store(result, overflow_bit, Type.u1, offset);
 
-    return func.finishAir(inst, result_ptr, &.{ extra.lhs, extra.rhs });
+    return func.finishAir(inst, result, &.{ extra.lhs, extra.rhs });
 }
 
 fn airMaxMin(func: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
@@ -6378,16 +6337,16 @@ fn airClz(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
             try func.addTag(.i32_wrap_i64);
         },
         128 => {
-            var lsb = try (try func.load(operand, Type.u64, 8)).toLocal(func, Type.u64);
-            defer lsb.free(func);
+            var msb = try (try func.load(operand, Type.u64, 8)).toLocal(func, Type.u64);
+            defer msb.free(func);
 
-            try func.emitWValue(lsb);
+            try func.emitWValue(msb);
             try func.addTag(.i64_clz);
             _ = try func.load(operand, Type.u64, 0);
             try func.addTag(.i64_clz);
             try func.emitWValue(.{ .imm64 = 64 });
             try func.addTag(.i64_add);
-            _ = try func.cmp(lsb, .{ .imm64 = 0 }, Type.u64, .neq);
+            _ = try func.cmp(msb, .{ .imm64 = 0 }, Type.u64, .neq);
             try func.addTag(.select);
             try func.addTag(.i32_wrap_i64);
         },
@@ -6438,10 +6397,10 @@ fn airCtz(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
             try func.addTag(.i32_wrap_i64);
         },
         128 => {
-            var msb = try (try func.load(operand, Type.u64, 0)).toLocal(func, Type.u64);
-            defer msb.free(func);
+            var lsb = try (try func.load(operand, Type.u64, 0)).toLocal(func, Type.u64);
+            defer lsb.free(func);
 
-            try func.emitWValue(msb);
+            try func.emitWValue(lsb);
             try func.addTag(.i64_ctz);
             _ = try func.load(operand, Type.u64, 8);
             if (wasm_bits != int_info.bits) {
@@ -6455,7 +6414,7 @@ fn airCtz(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
             } else {
                 try func.addTag(.i64_add);
             }
-            _ = try func.cmp(msb, .{ .imm64 = 0 }, Type.u64, .neq);
+            _ = try func.cmp(lsb, .{ .imm64 = 0 }, Type.u64, .neq);
             try func.addTag(.select);
             try func.addTag(.i32_wrap_i64);
         },
test/behavior/math.zig
@@ -918,37 +918,22 @@ test "small int addition" {
     try expect(ov[1] == 1);
 }
 
+fn testMulWithOverflow(comptime T: type, a: T, b: T, mul: T, bit: u1) !void {
+    const ov = @mulWithOverflow(a, b);
+    try expect(ov[0] == mul);
+    try expect(ov[1] == bit);
+}
+
 test "basic @mulWithOverflow" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
-    {
-        var a: u8 = 86;
-        _ = &a;
-        const ov = @mulWithOverflow(a, 3);
-        try expect(ov[0] == 2);
-        try expect(ov[1] == 1);
-    }
-    {
-        var a: u8 = 85;
-        _ = &a;
-        const ov = @mulWithOverflow(a, 3);
-        try expect(ov[0] == 255);
-        try expect(ov[1] == 0);
-    }
-
-    var a: u8 = 123;
-    _ = &a;
-    var b: u8 = 2;
-    var ov = @mulWithOverflow(a, b);
-    try expect(ov[0] == 246);
-    try expect(ov[1] == 0);
+    try testMulWithOverflow(u8, 86, 3, 2, 1);
+    try testMulWithOverflow(u8, 85, 3, 255, 0);
 
-    b = 4;
-    ov = @mulWithOverflow(a, b);
-    try expect(ov[0] == 236);
-    try expect(ov[1] == 1);
+    try testMulWithOverflow(u8, 123, 2, 246, 0);
+    try testMulWithOverflow(u8, 123, 4, 236, 1);
 }
 
 test "extensive @mulWithOverflow" {
@@ -956,173 +941,38 @@ test "extensive @mulWithOverflow" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
-    {
-        var a: u5 = 3;
-        _ = &a;
-        var b: u5 = 10;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 30);
-        try expect(ov[1] == 0);
-
-        b = 11;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 1);
-        try expect(ov[1] == 1);
-    }
-
-    {
-        var a: i5 = 3;
-        _ = &a;
-        var b: i5 = -5;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == -15);
-        try expect(ov[1] == 0);
-
-        b = -6;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 14);
-        try expect(ov[1] == 1);
-    }
-
-    {
-        var a: u8 = 3;
-        _ = &a;
-        var b: u8 = 85;
-
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 255);
-        try expect(ov[1] == 0);
-
-        b = 86;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 2);
-        try expect(ov[1] == 1);
-    }
-
-    {
-        var a: i8 = 3;
-        _ = &a;
-        var b: i8 = -42;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == -126);
-        try expect(ov[1] == 0);
-
-        b = -43;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 127);
-        try expect(ov[1] == 1);
-    }
-
-    {
-        var a: u14 = 3;
-        _ = &a;
-        var b: u14 = 0x1555;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0x3fff);
-        try expect(ov[1] == 0);
-
-        b = 0x1556;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 2);
-        try expect(ov[1] == 1);
-    }
-
-    {
-        var a: i14 = 3;
-        _ = &a;
-        var b: i14 = -0xaaa;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == -0x1ffe);
-        try expect(ov[1] == 0);
+    try testMulWithOverflow(u5, 3, 10, 30, 0);
+    try testMulWithOverflow(u5, 3, 11, 1, 1);
+    try testMulWithOverflow(i5, 3, -5, -15, 0);
+    try testMulWithOverflow(i5, 3, -6, 14, 1);
 
-        b = -0xaab;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0x1fff);
-    }
+    try testMulWithOverflow(u8, 3, 85, 255, 0);
+    try testMulWithOverflow(u8, 3, 86, 2, 1);
+    try testMulWithOverflow(i8, 3, -42, -126, 0);
+    try testMulWithOverflow(i8, 3, -43, 127, 1);
 
-    {
-        var a: u16 = 3;
-        _ = &a;
-        var b: u16 = 0x5555;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0xffff);
-        try expect(ov[1] == 0);
+    try testMulWithOverflow(u14, 3, 0x1555, 0x3fff, 0);
+    try testMulWithOverflow(u14, 3, 0x1556, 2, 1);
+    try testMulWithOverflow(i14, 3, -0xaaa, -0x1ffe, 0);
+    try testMulWithOverflow(i14, 3, -0xaab, 0x1fff, 1);
 
-        b = 0x5556;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 2);
-        try expect(ov[1] == 1);
-    }
+    try testMulWithOverflow(u16, 3, 0x5555, 0xffff, 0);
+    try testMulWithOverflow(u16, 3, 0x5556, 2, 1);
+    try testMulWithOverflow(i16, 3, -0x2aaa, -0x7ffe, 0);
+    try testMulWithOverflow(i16, 3, -0x2aab, 0x7fff, 1);
 
-    {
-        var a: i16 = 3;
-        _ = &a;
-        var b: i16 = -0x2aaa;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == -0x7ffe);
-        try expect(ov[1] == 0);
+    try testMulWithOverflow(u30, 3, 0x15555555, 0x3fffffff, 0);
+    try testMulWithOverflow(u30, 3, 0x15555556, 2, 1);
+    try testMulWithOverflow(i30, 3, -0xaaaaaaa, -0x1ffffffe, 0);
+    try testMulWithOverflow(i30, 3, -0xaaaaaab, 0x1fffffff, 1);
 
-        b = -0x2aab;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0x7fff);
-        try expect(ov[1] == 1);
-    }
+    try testMulWithOverflow(u32, 3, 0x55555555, 0xffffffff, 0);
+    try testMulWithOverflow(u32, 3, 0x55555556, 2, 1);
+    try testMulWithOverflow(i32, 3, -0x2aaaaaaa, -0x7ffffffe, 0);
+    try testMulWithOverflow(i32, 3, -0x2aaaaaab, 0x7fffffff, 1);
 
-    {
-        var a: u30 = 3;
-        _ = &a;
-        var b: u30 = 0x15555555;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0x3fffffff);
-        try expect(ov[1] == 0);
-
-        b = 0x15555556;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 2);
-        try expect(ov[1] == 1);
-    }
-
-    {
-        var a: i30 = 3;
-        _ = &a;
-        var b: i30 = -0xaaaaaaa;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == -0x1ffffffe);
-        try expect(ov[1] == 0);
-
-        b = -0xaaaaaab;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0x1fffffff);
-        try expect(ov[1] == 1);
-    }
-
-    {
-        var a: u32 = 3;
-        _ = &a;
-        var b: u32 = 0x55555555;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0xffffffff);
-        try expect(ov[1] == 0);
-
-        b = 0x55555556;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 2);
-        try expect(ov[1] == 1);
-    }
-
-    {
-        var a: i32 = 3;
-        _ = &a;
-        var b: i32 = -0x2aaaaaaa;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == -0x7ffffffe);
-        try expect(ov[1] == 0);
-
-        b = -0x2aaaaaab;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0x7fffffff);
-        try expect(ov[1] == 1);
-    }
+    try testMulWithOverflow(u31, 1 << 30, 1 << 30, 0, 1);
+    try testMulWithOverflow(i31, minInt(i31), minInt(i31), 0, 1);
 }
 
 test "@mulWithOverflow bitsize > 32" {
@@ -1131,118 +981,49 @@ test "@mulWithOverflow bitsize > 32" {
     // aarch64 fails on a release build of the compiler.
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
 
-    {
-        var a: u40 = 3;
-        var b: u40 = 0x55_5555_5555;
-        var ov = @mulWithOverflow(a, b);
+    try testMulWithOverflow(u40, 3, 0x55_5555_5555, 0xff_ffff_ffff, 0);
+    try testMulWithOverflow(u40, 3, 0x55_5555_5556, 2, 1);
+    try testMulWithOverflow(u40, 0x10_0000_0000, 0x10_0000_0000, 0, 1);
 
-        try expect(ov[0] == 0xff_ffff_ffff);
-        try expect(ov[1] == 0);
+    try testMulWithOverflow(i40, 3, -0x2a_aaaa_aaaa, -0x7f_ffff_fffe, 0);
+    try testMulWithOverflow(i40, 3, -0x2a_aaaa_aaab, 0x7f_ffff_ffff, 1);
+    try testMulWithOverflow(i40, 6, -0x2a_aaaa_aaab, -2, 1);
+    try testMulWithOverflow(i40, 0x08_0000_0000, -0x08_0000_0001, -0x8_0000_0000, 1);
 
-        // Check that overflow bits in the low-word of wide-multiplications are checked too.
-        // Intermediate result is less than 2**64
-        b = 0x55_5555_5556;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 2);
-        try expect(ov[1] == 1);
-
-        // Check that overflow bits in the high-word of wide-multiplications are checked too.
-        // Intermediate result is more than 2**64 and bits 40..64 are not set.
-        a = 0x10_0000_0000;
-        b = 0x10_0000_0000;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0);
-        try expect(ov[1] == 1);
-    }
+    try testMulWithOverflow(u62, 3, 0x1555555555555555, 0x3fffffffffffffff, 0);
+    try testMulWithOverflow(u62, 3, 0x1555555555555556, 2, 1);
+    try testMulWithOverflow(i62, 3, -0xaaaaaaaaaaaaaaa, -0x1ffffffffffffffe, 0);
+    try testMulWithOverflow(i62, 3, -0xaaaaaaaaaaaaaab, 0x1fffffffffffffff, 1);
 
-    {
-        var a: i40 = 3;
-        var b: i40 = -0x2a_aaaa_aaaa;
-        var ov = @mulWithOverflow(a, b);
+    try testMulWithOverflow(u64, 3, 0x5555555555555555, 0xffffffffffffffff, 0);
+    try testMulWithOverflow(u64, 3, 0x5555555555555556, 2, 1);
+    try testMulWithOverflow(i64, 3, -0x2aaaaaaaaaaaaaaa, -0x7ffffffffffffffe, 0);
+    try testMulWithOverflow(i64, 3, -0x2aaaaaaaaaaaaaab, 0x7fffffffffffffff, 1);
 
-        try expect(ov[0] == -0x7f_ffff_fffe);
-        try expect(ov[1] == 0);
-
-        // Check that the sign bit is properly checked
-        b = -0x2a_aaaa_aaab;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0x7f_ffff_ffff);
-        try expect(ov[1] == 1);
-
-        // Check that the low-order bits above the sign are checked.
-        a = 6;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == -2);
-        try expect(ov[1] == 1);
-
-        // Check that overflow bits in the high-word of wide-multiplications are checked too.
-        // high parts and sign of low-order bits are all 1.
-        a = 0x08_0000_0000;
-        b = -0x08_0000_0001;
-        ov = @mulWithOverflow(a, b);
-
-        try expect(ov[0] == -0x8_0000_0000);
-        try expect(ov[1] == 1);
-    }
-
-    {
-        var a: u62 = 3;
-        _ = &a;
-        var b: u62 = 0x1555555555555555;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0x3fffffffffffffff);
-        try expect(ov[1] == 0);
-
-        b = 0x1555555555555556;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 2);
-        try expect(ov[1] == 1);
-    }
-
-    {
-        var a: i62 = 3;
-        _ = &a;
-        var b: i62 = -0xaaaaaaaaaaaaaaa;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == -0x1ffffffffffffffe);
-        try expect(ov[1] == 0);
-
-        b = -0xaaaaaaaaaaaaaab;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0x1fffffffffffffff);
-        try expect(ov[1] == 1);
-    }
+    try testMulWithOverflow(u63, 1 << 62, 1 << 62, 0, 1);
+    try testMulWithOverflow(i63, minInt(i63), minInt(i63), 0, 1);
+}
 
-    {
-        var a: u64 = 3;
-        _ = &a;
-        var b: u64 = 0x5555555555555555;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0xffffffffffffffff);
-        try expect(ov[1] == 0);
+test "@mulWithOverflow bitsize 128 bits" {
+    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
 
-        b = 0x5555555555555556;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 2);
-        try expect(ov[1] == 1);
-    }
+    try testMulWithOverflow(u128, 3, 0x5555555555555555_5555555555555555, 0xffffffffffffffff_ffffffffffffffff, 0);
+    try testMulWithOverflow(u128, 3, 0x5555555555555555_5555555555555556, 2, 1);
 
-    {
-        var a: i64 = 3;
-        _ = &a;
-        var b: i64 = -0x2aaaaaaaaaaaaaaa;
-        var ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == -0x7ffffffffffffffe);
-        try expect(ov[1] == 0);
+    try testMulWithOverflow(u128, 1 << 100, 1 << 27, 1 << 127, 0);
+    try testMulWithOverflow(u128, maxInt(u128), maxInt(u128), 1, 1);
+    try testMulWithOverflow(u128, 1 << 100, 1 << 28, 0, 1);
+    try testMulWithOverflow(u128, 1 << 127, 1 << 127, 0, 1);
 
-        b = -0x2aaaaaaaaaaaaaab;
-        ov = @mulWithOverflow(a, b);
-        try expect(ov[0] == 0x7fffffffffffffff);
-        try expect(ov[1] == 1);
-    }
+    try testMulWithOverflow(i128, 3, -0x2aaaaaaaaaaaaaaa_aaaaaaaaaaaaaaaa, -0x7fffffffffffffff_fffffffffffffffe, 0);
+    try testMulWithOverflow(i128, 3, -0x2aaaaaaaaaaaaaaa_aaaaaaaaaaaaaaab, 0x7fffffffffffffff_ffffffffffffffff, 1);
+    try testMulWithOverflow(i128, -1, -1, 1, 0);
+    try testMulWithOverflow(i128, minInt(i128), minInt(i128), 0, 1);
 }
 
 test "@mulWithOverflow u256" {