Commit 9edfccb9a7

Jacob Young <jacobly0@users.noreply.github.com>
2025-06-01 05:14:48
Legalize: implement scalarization of overflow intrinsics
1 parent ec579aa
lib/std/simd.zig
@@ -455,7 +455,6 @@ pub fn prefixScan(comptime op: std.builtin.ReduceOp, comptime hop: isize, vec: a
 }
 
 test "vector prefix scan" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if ((builtin.cpu.arch == .armeb or builtin.cpu.arch == .thumbeb) and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/22060
     if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/21893
     if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .hexagon) return error.SkipZigTest;
src/Air/Legalize.zig
@@ -33,6 +33,10 @@ pub const Feature = enum {
     scalarize_mod_optimized,
     scalarize_max,
     scalarize_min,
+    scalarize_add_with_overflow,
+    scalarize_sub_with_overflow,
+    scalarize_mul_with_overflow,
+    scalarize_shl_with_overflow,
     scalarize_bit_and,
     scalarize_bit_or,
     scalarize_shr,
@@ -129,6 +133,10 @@ pub const Feature = enum {
             .mod_optimized => .scalarize_mod_optimized,
             .max => .scalarize_max,
             .min => .scalarize_min,
+            .add_with_overflow => .scalarize_add_with_overflow,
+            .sub_with_overflow => .scalarize_sub_with_overflow,
+            .mul_with_overflow => .scalarize_mul_with_overflow,
+            .shl_with_overflow => .scalarize_shl_with_overflow,
             .bit_and => .scalarize_bit_and,
             .bit_or => .scalarize_bit_or,
             .shr => .scalarize_shr,
@@ -279,10 +287,15 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
             },
             .ptr_add,
             .ptr_sub,
-            .add_with_overflow,
+            => {},
+            inline .add_with_overflow,
             .sub_with_overflow,
             .mul_with_overflow,
             .shl_with_overflow,
+            => |air_tag| if (l.features.contains(comptime .scalarize(air_tag))) {
+                const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+                if (ty_pl.ty.toType().fieldType(0, zcu).isVector(zcu)) continue :inst l.replaceInst(inst, .block, try l.scalarizeOverflowBlockPayload(inst));
+            },
             .alloc,
             => {},
             .inferred_alloc,
@@ -518,7 +531,7 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
                 switch (vector_ty.vectorLen(zcu)) {
                     0 => unreachable,
                     1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{
-                        .ty = Air.internedToRef(vector_ty.scalarType(zcu).toIntern()),
+                        .ty = Air.internedToRef(vector_ty.childType(zcu).toIntern()),
                         .operand = reduce.operand,
                     } }),
                     else => break :done,
@@ -646,7 +659,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form:
                             .ty_op => loop.block.add(l, .{
                                 .tag = orig.tag,
                                 .data = .{ .ty_op = .{
-                                    .ty = Air.internedToRef(orig.data.ty_op.ty.toType().scalarType(zcu).toIntern()),
+                                    .ty = Air.internedToRef(res_ty.childType(zcu).toIntern()),
                                     .operand = loop.block.add(l, .{
                                         .tag = .array_elem_val,
                                         .data = .{ .bin_op = .{
@@ -745,7 +758,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form:
                                     .shuffle_two => unwrapped.operand_a,
                                 };
                                 const operand_a_len = l.typeOf(operand_a).vectorLen(zcu);
-                                const elem_ty = unwrapped.result_ty.scalarType(zcu);
+                                const elem_ty = res_ty.childType(zcu);
                                 var res_elem: Result = .init(l, elem_ty, &loop.block);
                                 res_elem.block = .init(loop.block.stealCapacity(extra_insts));
                                 {
@@ -945,7 +958,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form:
                             },
                             .select => {
                                 const extra = l.extraData(Air.Bin, orig.data.pl_op.payload).data;
-                                var res_elem: Result = .init(l, l.typeOf(extra.lhs).scalarType(zcu), &loop.block);
+                                var res_elem: Result = .init(l, l.typeOf(extra.lhs).childType(zcu), &loop.block);
                                 res_elem.block = .init(loop.block.stealCapacity(extra_insts));
                                 {
                                     var select_cond_br: CondBr = .init(l, res_elem.block.add(l, .{
@@ -1043,6 +1056,176 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form:
         .payload = try l.addBlockBody(res_block.body()),
     } };
 }
+fn scalarizeOverflowBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
+    const pt = l.pt;
+    const zcu = pt.zcu;
+
+    const orig = l.air_instructions.get(@intFromEnum(orig_inst));
+    const res_ty = l.typeOfIndex(orig_inst);
+    const wrapped_res_ty = res_ty.fieldType(0, zcu);
+    const wrapped_res_scalar_ty = wrapped_res_ty.childType(zcu);
+    const res_len = wrapped_res_ty.vectorLen(zcu);
+
+    var inst_buf: [21]Air.Inst.Index = undefined;
+    try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
+
+    var res_block: Block = .init(&inst_buf);
+    {
+        const res_alloc_inst = res_block.add(l, .{
+            .tag = .alloc,
+            .data = .{ .ty = try pt.singleMutPtrType(res_ty) },
+        });
+        const ptr_wrapped_res_inst = res_block.add(l, .{
+            .tag = .struct_field_ptr_index_0,
+            .data = .{ .ty_op = .{
+                .ty = Air.internedToRef((try pt.singleMutPtrType(wrapped_res_ty)).toIntern()),
+                .operand = res_alloc_inst.toRef(),
+            } },
+        });
+        const ptr_overflow_res_inst = res_block.add(l, .{
+            .tag = .struct_field_ptr_index_1,
+            .data = .{ .ty_op = .{
+                .ty = Air.internedToRef((try pt.singleMutPtrType(res_ty.fieldType(1, zcu))).toIntern()),
+                .operand = res_alloc_inst.toRef(),
+            } },
+        });
+        const index_alloc_inst = res_block.add(l, .{
+            .tag = .alloc,
+            .data = .{ .ty = .ptr_usize },
+        });
+        _ = res_block.add(l, .{
+            .tag = .store,
+            .data = .{ .bin_op = .{
+                .lhs = index_alloc_inst.toRef(),
+                .rhs = .zero_usize,
+            } },
+        });
+
+        var loop: Loop = .init(l, &res_block);
+        loop.block = .init(res_block.stealRemainingCapacity());
+        {
+            const cur_index_inst = loop.block.add(l, .{
+                .tag = .load,
+                .data = .{ .ty_op = .{
+                    .ty = .usize_type,
+                    .operand = index_alloc_inst.toRef(),
+                } },
+            });
+            const extra = l.extraData(Air.Bin, orig.data.ty_pl.payload).data;
+            const res_elem = loop.block.add(l, .{
+                .tag = orig.tag,
+                .data = .{ .ty_pl = .{
+                    .ty = Air.internedToRef(try zcu.intern_pool.getTupleType(zcu.gpa, pt.tid, .{
+                        .types = &.{ wrapped_res_scalar_ty.toIntern(), .u1_type },
+                        .values = &(.{.none} ** 2),
+                    })),
+                    .payload = try l.addExtra(Air.Bin, .{
+                        .lhs = loop.block.add(l, .{
+                            .tag = .array_elem_val,
+                            .data = .{ .bin_op = .{
+                                .lhs = extra.lhs,
+                                .rhs = cur_index_inst.toRef(),
+                            } },
+                        }).toRef(),
+                        .rhs = loop.block.add(l, .{
+                            .tag = .array_elem_val,
+                            .data = .{ .bin_op = .{
+                                .lhs = extra.rhs,
+                                .rhs = cur_index_inst.toRef(),
+                            } },
+                        }).toRef(),
+                    }),
+                } },
+            });
+            _ = loop.block.add(l, .{
+                .tag = .vector_store_elem,
+                .data = .{ .vector_store_elem = .{
+                    .vector_ptr = ptr_overflow_res_inst.toRef(),
+                    .payload = try l.addExtra(Air.Bin, .{
+                        .lhs = cur_index_inst.toRef(),
+                        .rhs = loop.block.add(l, .{
+                            .tag = .struct_field_val,
+                            .data = .{ .ty_pl = .{
+                                .ty = .u1_type,
+                                .payload = try l.addExtra(Air.StructField, .{
+                                    .struct_operand = res_elem.toRef(),
+                                    .field_index = 1,
+                                }),
+                            } },
+                        }).toRef(),
+                    }),
+                } },
+            });
+            _ = loop.block.add(l, .{
+                .tag = .vector_store_elem,
+                .data = .{ .vector_store_elem = .{
+                    .vector_ptr = ptr_wrapped_res_inst.toRef(),
+                    .payload = try l.addExtra(Air.Bin, .{
+                        .lhs = cur_index_inst.toRef(),
+                        .rhs = loop.block.add(l, .{
+                            .tag = .struct_field_val,
+                            .data = .{ .ty_pl = .{
+                                .ty = Air.internedToRef(wrapped_res_scalar_ty.toIntern()),
+                                .payload = try l.addExtra(Air.StructField, .{
+                                    .struct_operand = res_elem.toRef(),
+                                    .field_index = 0,
+                                }),
+                            } },
+                        }).toRef(),
+                    }),
+                } },
+            });
+
+            var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp(
+                l,
+                .lt,
+                cur_index_inst.toRef(),
+                try pt.intRef(.usize, res_len - 1),
+                .{},
+            )).toRef(), &loop.block, .{});
+            loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity());
+            {
+                _ = loop_cond_br.then_block.add(l, .{
+                    .tag = .store,
+                    .data = .{ .bin_op = .{
+                        .lhs = index_alloc_inst.toRef(),
+                        .rhs = loop_cond_br.then_block.add(l, .{
+                            .tag = .add,
+                            .data = .{ .bin_op = .{
+                                .lhs = cur_index_inst.toRef(),
+                                .rhs = .one_usize,
+                            } },
+                        }).toRef(),
+                    } },
+                });
+                _ = loop_cond_br.then_block.add(l, .{
+                    .tag = .repeat,
+                    .data = .{ .repeat = .{ .loop_inst = loop.inst } },
+                });
+            }
+            loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity());
+            _ = loop_cond_br.else_block.add(l, .{
+                .tag = .br,
+                .data = .{ .br = .{
+                    .block_inst = orig_inst,
+                    .operand = loop_cond_br.else_block.add(l, .{
+                        .tag = .load,
+                        .data = .{ .ty_op = .{
+                            .ty = Air.internedToRef(res_ty.toIntern()),
+                            .operand = res_alloc_inst.toRef(),
+                        } },
+                    }).toRef(),
+                } },
+            });
+            try loop_cond_br.finish(l);
+        }
+        try loop.finish(l);
+    }
+    return .{ .ty_pl = .{
+        .ty = Air.internedToRef(res_ty.toIntern()),
+        .payload = try l.addBlockBody(res_block.body()),
+    } };
+}
 
 fn safeIntcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
     const pt = l.pt;
@@ -1535,8 +1718,9 @@ fn addBlockBody(l: *Legalize, body: []const Air.Inst.Index) Error!u32 {
     return @intCast(l.air_extra.items.len);
 }
 
-// inline to propagate comptime `tag`s
-inline fn replaceInst(l: *Legalize, inst: Air.Inst.Index, tag: Air.Inst.Tag, data: Air.Inst.Data) Air.Inst.Tag {
+/// Returns `tag` to remind the caller to `continue :inst` the result.
+/// This is inline to propagate the comptime-known `tag`.
+inline fn replaceInst(l: *Legalize, inst: Air.Inst.Index, comptime tag: Air.Inst.Tag, data: Air.Inst.Data) Air.Inst.Tag {
     const orig_ty = if (std.debug.runtime_safety) l.typeOfIndex(inst) else {};
     l.air_instructions.set(@intFromEnum(inst), .{ .tag = tag, .data = data });
     if (std.debug.runtime_safety) assert(l.typeOfIndex(inst).toIntern() == orig_ty.toIntern());
src/arch/x86_64/CodeGen.zig
@@ -53,6 +53,10 @@ pub fn legalizeFeatures(target: *const std.Target) *const Air.Legalize.Features
             .scalarize_div_exact_optimized = use_old,
             .scalarize_max = use_old,
             .scalarize_min = use_old,
+            .scalarize_add_with_overflow = true,
+            .scalarize_sub_with_overflow = true,
+            .scalarize_mul_with_overflow = true,
+            .scalarize_shl_with_overflow = true,
             .scalarize_bit_and = use_old,
             .scalarize_bit_or = use_old,
             .scalarize_shr = true,
@@ -283,7 +287,7 @@ pub const MCValue = union(enum) {
     /// Payload is a frame address.
     lea_frame: bits.FrameAddr,
     /// Supports integer_per_element abi
-    elementwise_regs_then_frame: packed struct { regs: u3, frame_off: i29, frame_index: FrameIndex },
+    elementwise_args: packed struct { regs: u3, frame_off: i29, frame_index: FrameIndex },
     /// This indicates that we have already allocated a frame index for this instruction,
     /// but it has not been spilled there yet in the current control flow.
     /// Payload is a frame index.
@@ -305,7 +309,7 @@ pub const MCValue = union(enum) {
             .lea_direct,
             .lea_got,
             .lea_frame,
-            .elementwise_regs_then_frame,
+            .elementwise_args,
             .reserved_frame,
             .air_ref,
             => false,
@@ -420,7 +424,7 @@ pub const MCValue = union(enum) {
             .lea_direct,
             .lea_got,
             .lea_frame,
-            .elementwise_regs_then_frame,
+            .elementwise_args,
             .reserved_frame,
             .air_ref,
             => unreachable, // not in memory
@@ -454,7 +458,7 @@ pub const MCValue = union(enum) {
             .load_got,
             .load_frame,
             .load_symbol,
-            .elementwise_regs_then_frame,
+            .elementwise_args,
             .reserved_frame,
             .air_ref,
             => unreachable, // not dereferenceable
@@ -474,7 +478,7 @@ pub const MCValue = union(enum) {
             .unreach,
             .dead,
             .undef,
-            .elementwise_regs_then_frame,
+            .elementwise_args,
             .reserved_frame,
             .air_ref,
             => unreachable, // not valid
@@ -528,7 +532,7 @@ pub const MCValue = union(enum) {
             .load_got,
             .lea_got,
             .lea_frame,
-            .elementwise_regs_then_frame,
+            .elementwise_args,
             .reserved_frame,
             .lea_symbol,
             => unreachable,
@@ -612,7 +616,7 @@ pub const MCValue = union(enum) {
             .load_got => |pl| try writer.print("[got:{d}]", .{pl}),
             .lea_got => |pl| try writer.print("got:{d}", .{pl}),
             .load_frame => |pl| try writer.print("[{} + 0x{x}]", .{ pl.index, pl.off }),
-            .elementwise_regs_then_frame => |pl| try writer.print("elementwise:{d}:[{} + 0x{x}]", .{
+            .elementwise_args => |pl| try writer.print("elementwise:{d}:[{} + 0x{x}]", .{
                 pl.regs, pl.frame_index, pl.frame_off,
             }),
             .lea_frame => |pl| try writer.print("{} + 0x{x}", .{ pl.index, pl.off }),
@@ -645,7 +649,7 @@ const InstTracking = struct {
             .lea_symbol,
             => result,
             .dead,
-            .elementwise_regs_then_frame,
+            .elementwise_args,
             .reserved_frame,
             .air_ref,
             => unreachable,
@@ -754,7 +758,7 @@ const InstTracking = struct {
             .register_overflow,
             .register_mask,
             .indirect,
-            .elementwise_regs_then_frame,
+            .elementwise_args,
             .air_ref,
             => unreachable,
         }
@@ -168424,7 +168428,7 @@ fn load(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerE
         .register_quadruple,
         .register_overflow,
         .register_mask,
-        .elementwise_regs_then_frame,
+        .elementwise_args,
         .reserved_frame,
         => unreachable, // not a valid pointer
         .immediate,
@@ -168642,7 +168646,7 @@ fn store(
         .register_quadruple,
         .register_overflow,
         .register_mask,
-        .elementwise_regs_then_frame,
+        .elementwise_args,
         .reserved_frame,
         => unreachable, // not a valid pointer
         .immediate,
@@ -169128,7 +169132,7 @@ fn genUnOpMir(self: *CodeGen, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv:
         .lea_direct,
         .lea_got,
         .lea_frame,
-        .elementwise_regs_then_frame,
+        .elementwise_args,
         .reserved_frame,
         .air_ref,
         => unreachable, // unmodifiable destination
@@ -170799,7 +170803,7 @@ fn genBinOp(
                         .load_got,
                         .lea_got,
                         .lea_frame,
-                        .elementwise_regs_then_frame,
+                        .elementwise_args,
                         .reserved_frame,
                         .air_ref,
                         => unreachable,
@@ -171982,7 +171986,7 @@ fn genBinOpMir(
         .lea_got,
         .lea_frame,
         .lea_symbol,
-        .elementwise_regs_then_frame,
+        .elementwise_args,
         .reserved_frame,
         .air_ref,
         => unreachable, // unmodifiable destination
@@ -172018,7 +172022,7 @@ fn genBinOpMir(
                     .undef,
                     .register_overflow,
                     .register_mask,
-                    .elementwise_regs_then_frame,
+                    .elementwise_args,
                     .reserved_frame,
                     => unreachable,
                     .register,
@@ -172178,7 +172182,7 @@ fn genBinOpMir(
                 .undef,
                 .register_overflow,
                 .register_mask,
-                .elementwise_regs_then_frame,
+                .elementwise_args,
                 .reserved_frame,
                 .air_ref,
                 => unreachable,
@@ -172274,7 +172278,7 @@ fn genBinOpMir(
                     .undef,
                     .register_overflow,
                     .register_mask,
-                    .elementwise_regs_then_frame,
+                    .elementwise_args,
                     .reserved_frame,
                     .air_ref,
                     => unreachable,
@@ -172405,7 +172409,7 @@ fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv
         .lea_direct,
         .lea_got,
         .lea_frame,
-        .elementwise_regs_then_frame,
+        .elementwise_args,
         .reserved_frame,
         .air_ref,
         => unreachable, // unmodifiable destination
@@ -172437,7 +172441,7 @@ fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv
                 .register_quadruple,
                 .register_overflow,
                 .register_mask,
-                .elementwise_regs_then_frame,
+                .elementwise_args,
                 .reserved_frame,
                 .air_ref,
                 => unreachable,
@@ -172557,7 +172561,7 @@ fn airArg(self: *CodeGen, inst: Air.Inst.Index) !void {
                 try self.genCopy(arg_ty, dst_mcv, src_mcv, .{});
                 break :result dst_mcv;
             },
-            .elementwise_regs_then_frame => |regs_frame_addr| {
+            .elementwise_args => |regs_frame_addr| {
                 try self.spillEflagsIfOccupied();
 
                 const fn_info = zcu.typeToFunc(self.fn_type).?;
@@ -172661,7 +172665,7 @@ fn genLocalDebugInfo(
         .arg, .dbg_arg_inline, .dbg_var_val => |tag| {
             switch (mcv) {
                 .none => try self.asmAir(.dbg_local, inst),
-                .unreach, .dead, .elementwise_regs_then_frame, .reserved_frame, .air_ref => unreachable,
+                .unreach, .dead, .elementwise_args, .reserved_frame, .air_ref => unreachable,
                 .immediate => |imm| try self.asmAirImmediate(.dbg_local, inst, .u(imm)),
                 .lea_frame => |frame_addr| try self.asmAirFrameAddress(.dbg_local, inst, frame_addr),
                 .lea_symbol => |sym_off| try self.asmAirImmediate(.dbg_local, inst, .rel(sym_off)),
@@ -172684,7 +172688,7 @@ fn genLocalDebugInfo(
         },
         .dbg_var_ptr => switch (mcv) {
             else => unreachable,
-            .unreach, .dead, .elementwise_regs_then_frame, .reserved_frame, .air_ref => unreachable,
+            .unreach, .dead, .elementwise_args, .reserved_frame, .air_ref => unreachable,
             .lea_frame => |frame_addr| try self.asmAirMemory(.dbg_local, inst, .{
                 .base = .{ .frame = frame_addr.index },
                 .mod = .{ .rm = .{
@@ -172853,7 +172857,7 @@ fn genCall(self: *CodeGen, info: union(enum) {
                 try self.genCopy(arg_ty, dst_arg, src_arg, opts);
                 try self.freeValue(src_arg);
             },
-            .elementwise_regs_then_frame => |regs_frame_addr| {
+            .elementwise_args => |regs_frame_addr| {
                 const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
                 const index_lock = self.register_manager.lockRegAssumeUnused(index_reg);
                 defer self.register_manager.unlockReg(index_lock);
@@ -172962,7 +172966,7 @@ fn genCall(self: *CodeGen, info: union(enum) {
             .indirect => |reg_off| try self.genSetReg(reg_off.reg, .usize, .{
                 .lea_frame = .{ .index = frame_index, .off = -reg_off.off },
             }, .{}),
-            .elementwise_regs_then_frame => |regs_frame_addr| {
+            .elementwise_args => |regs_frame_addr| {
                 const src_mem: Memory = if (src_arg.isBase()) try src_arg.mem(self, .{ .size = .dword }) else .{
                     .base = .{ .reg = try self.copyToTmpRegister(
                         .usize,
@@ -173350,7 +173354,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v
                                     .lea_got,
                                     .lea_frame,
                                     .lea_symbol,
-                                    .elementwise_regs_then_frame,
+                                    .elementwise_args,
                                     .reserved_frame,
                                     .air_ref,
                                     => unreachable,
@@ -173405,7 +173409,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v
                                     .lea_direct,
                                     .lea_got,
                                     .lea_frame,
-                                    .elementwise_regs_then_frame,
+                                    .elementwise_args,
                                     .reserved_frame,
                                     .air_ref,
                                     => unreachable,
@@ -173810,7 +173814,7 @@ fn isNull(self: *CodeGen, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue)
         .lea_direct,
         .lea_got,
         .lea_symbol,
-        .elementwise_regs_then_frame,
+        .elementwise_args,
         .reserved_frame,
         .air_ref,
         => unreachable,
@@ -175867,7 +175871,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C
         .lea_got,
         .lea_frame,
         .lea_symbol,
-        .elementwise_regs_then_frame,
+        .elementwise_args,
         .reserved_frame,
         .air_ref,
         => unreachable, // unmodifiable destination
@@ -175878,7 +175882,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C
             .dead,
             .undef,
             .register_overflow,
-            .elementwise_regs_then_frame,
+            .elementwise_args,
             .reserved_frame,
             => unreachable,
             .immediate,
@@ -176056,7 +176060,7 @@ fn genSetReg(
         .none,
         .unreach,
         .dead,
-        .elementwise_regs_then_frame,
+        .elementwise_args,
         .reserved_frame,
         => unreachable,
         .undef => if (opts.safety) switch (dst_reg.class()) {
@@ -176593,7 +176597,7 @@ fn genSetMem(
         .none,
         .unreach,
         .dead,
-        .elementwise_regs_then_frame,
+        .elementwise_args,
         .reserved_frame,
         => unreachable,
         .undef => if (opts.safety) try self.genInlineMemset(
@@ -180885,7 +180889,7 @@ fn resolveCallingConventionValues(
 
                         result.stack_byte_count =
                             std.mem.alignForward(u31, result.stack_byte_count, frame_elem_align);
-                        arg_mcv[arg_mcv_i] = .{ .elementwise_regs_then_frame = .{
+                        arg_mcv[arg_mcv_i] = .{ .elementwise_args = .{
                             .regs = remaining_param_int_regs,
                             .frame_off = @intCast(result.stack_byte_count),
                             .frame_index = stack_frame_base,
@@ -181510,7 +181514,7 @@ const Temp = struct {
                 .load_got,
                 .lea_got,
                 .lea_frame,
-                .elementwise_regs_then_frame,
+                .elementwise_args,
                 .reserved_frame,
                 .air_ref,
                 => false,
@@ -181945,7 +181949,7 @@ const Temp = struct {
             .register_quadruple,
             .register_overflow,
             .register_mask,
-            .elementwise_regs_then_frame,
+            .elementwise_args,
             .reserved_frame,
             .air_ref,
             => unreachable, // not a valid pointer
@@ -186669,19 +186673,46 @@ const Temp = struct {
             if (cg.reused_operands.isSet(op_index)) continue;
             try cg.processDeath(op_ref.toIndexAllowNone() orelse continue);
         }
-        if (cg.liveness.isUnused(inst)) try temp.die(cg) else switch (temp.unwrap(cg)) {
-            .ref, .err_ret_trace => {
-                const result = try cg.allocRegOrMem(inst, true);
-                try cg.genCopy(cg.typeOfIndex(inst), result, temp.tracking(cg).short, .{});
-                tracking_log.debug("{} => {} (birth)", .{ inst, result });
-                cg.inst_tracking.putAssumeCapacityNoClobber(inst, .init(result));
-            },
-            .temp => |temp_index| {
-                const temp_tracking = temp_index.tracking(cg);
-                tracking_log.debug("{} => {} (birth)", .{ inst, temp_tracking.short });
-                cg.inst_tracking.putAssumeCapacityNoClobber(inst, .init(temp_tracking.short));
-                assert(cg.reuseTemp(inst, temp_index.toIndex(), temp_tracking));
-            },
+        if (cg.liveness.isUnused(inst)) try temp.die(cg) else {
+            switch (temp.unwrap(cg)) {
+                .ref, .err_ret_trace => {
+                    const temp_mcv = temp.tracking(cg).short;
+                    const result = result: switch (temp_mcv) {
+                        .none, .unreach, .dead, .elementwise_args, .reserved_frame, .air_ref => unreachable,
+                        .undef, .immediate, .lea_frame => temp_mcv,
+                        .eflags,
+                        .register,
+                        .register_pair,
+                        .register_triple,
+                        .register_quadruple,
+                        .register_offset,
+                        .register_overflow,
+                        .register_mask,
+                        .memory,
+                        .load_symbol,
+                        .lea_symbol,
+                        .indirect,
+                        .load_direct,
+                        .lea_direct,
+                        .load_got,
+                        .lea_got,
+                        .load_frame,
+                        => {
+                            const result = try cg.allocRegOrMem(inst, true);
+                            try cg.genCopy(cg.typeOfIndex(inst), result, temp_mcv, .{});
+                            break :result result;
+                        },
+                    };
+                    tracking_log.debug("{} => {} (birth)", .{ inst, result });
+                    cg.inst_tracking.putAssumeCapacityNoClobber(inst, .init(result));
+                },
+                .temp => |temp_index| {
+                    const temp_tracking = temp_index.tracking(cg);
+                    tracking_log.debug("{} => {} (birth)", .{ inst, temp_tracking.short });
+                    cg.inst_tracking.putAssumeCapacityNoClobber(inst, .init(temp_tracking.short));
+                    assert(cg.reuseTemp(inst, temp_index.toIndex(), temp_tracking));
+                },
+            }
         }
         for (0.., op_refs, op_temps) |op_index, op_ref, op_temp| {
             if (op_temp.index != temp.index) continue;
test/behavior/x86_64/binary.zig
@@ -5244,31 +5244,34 @@ test min {
     try test_min.testFloatVectors();
 }
 
-inline fn addWithOverflow(comptime Type: type, lhs: Type, rhs: Type) struct { Type, u1 } {
+inline fn addWithOverflow(comptime Type: type, lhs: Type, rhs: Type) struct { Type, ChangeScalar(Type, u1) } {
     return @addWithOverflow(lhs, rhs);
 }
 test addWithOverflow {
     const test_add_with_overflow = binary(addWithOverflow, .{});
     try test_add_with_overflow.testInts();
+    try test_add_with_overflow.testIntVectors();
 }
 
-inline fn subWithOverflow(comptime Type: type, lhs: Type, rhs: Type) struct { Type, u1 } {
+inline fn subWithOverflow(comptime Type: type, lhs: Type, rhs: Type) struct { Type, ChangeScalar(Type, u1) } {
     return @subWithOverflow(lhs, rhs);
 }
 test subWithOverflow {
     const test_sub_with_overflow = binary(subWithOverflow, .{});
     try test_sub_with_overflow.testInts();
+    try test_sub_with_overflow.testIntVectors();
 }
 
-inline fn mulWithOverflow(comptime Type: type, lhs: Type, rhs: Type) struct { Type, u1 } {
+inline fn mulWithOverflow(comptime Type: type, lhs: Type, rhs: Type) struct { Type, ChangeScalar(Type, u1) } {
     return @mulWithOverflow(lhs, rhs);
 }
 test mulWithOverflow {
     const test_mul_with_overflow = binary(mulWithOverflow, .{});
     try test_mul_with_overflow.testInts();
+    try test_mul_with_overflow.testIntVectors();
 }
 
-inline fn shlWithOverflow(comptime Type: type, lhs: Type, rhs: Type) struct { Type, u1 } {
+inline fn shlWithOverflow(comptime Type: type, lhs: Type, rhs: Type) struct { Type, ChangeScalar(Type, u1) } {
     const bit_cast_rhs: AsSignedness(Type, .unsigned) = @bitCast(rhs);
     const truncate_rhs: Log2Int(Type) = @truncate(bit_cast_rhs);
     return @shlWithOverflow(lhs, if (comptime cast(Log2Int(Scalar(Type)), @bitSizeOf(Scalar(Type)))) |bits| truncate_rhs % splat(Log2Int(Type), bits) else truncate_rhs);
@@ -5276,6 +5279,7 @@ inline fn shlWithOverflow(comptime Type: type, lhs: Type, rhs: Type) struct { Ty
 test shlWithOverflow {
     const test_shl_with_overflow = binary(shlWithOverflow, .{});
     try test_shl_with_overflow.testInts();
+    try test_shl_with_overflow.testIntVectors();
 }
 
 inline fn equal(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs == rhs) {
test/behavior/floatop.zig
@@ -14,9 +14,11 @@ fn epsForType(comptime T: type) T {
 }
 
 test "add f16" {
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest;
+
     try testAdd(f16);
     try comptime testAdd(f16);
 }
@@ -123,10 +125,12 @@ fn testMul(comptime T: type) !void {
 
 test "cmp f16" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.cpu.arch.isArm() and builtin.target.abi.float() == .soft) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/21234
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest;
+
     try testCmp(f16);
     try comptime testCmp(f16);
 }
@@ -338,9 +342,11 @@ test "different sized float comparisons" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest;
+
     try testDifferentSizedFloatComparisons();
     try comptime testDifferentSizedFloatComparisons();
 }
@@ -386,10 +392,12 @@ test "@sqrt f16" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest;
+
     try testSqrt(f16);
     try comptime testSqrt(f16);
 }
@@ -1129,9 +1137,11 @@ test "@abs f16" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest;
+
     try testFabs(f16);
     try comptime testFabs(f16);
 }
@@ -1263,9 +1273,11 @@ test "@floor f32/f64" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest;
+
     try testFloor(f32);
     try comptime testFloor(f32);
     try testFloor(f64);
@@ -1329,7 +1341,9 @@ test "@floor with vectors" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
+
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest;
 
     try testFloorWithVectors();
     try comptime testFloorWithVectors();
@@ -1360,9 +1374,11 @@ test "@ceil f32/f64" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest;
+
     try testCeil(f32);
     try comptime testCeil(f32);
     try testCeil(f64);
@@ -1426,7 +1442,9 @@ test "@ceil with vectors" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
+
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest;
 
     try testCeilWithVectors();
     try comptime testCeilWithVectors();
@@ -1457,9 +1475,11 @@ test "@trunc f32/f64" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest;
+
     try testTrunc(f32);
     try comptime testTrunc(f32);
     try testTrunc(f64);
@@ -1523,7 +1543,9 @@ test "@trunc with vectors" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
+
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest;
 
     try testTruncWithVectors();
     try comptime testTruncWithVectors();
@@ -1543,9 +1565,11 @@ test "neg f16" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest;
+
     if (builtin.os.tag == .freebsd) {
         // TODO file issue to track this failure
         return error.SkipZigTest;
test/behavior/math.zig
@@ -471,10 +471,12 @@ test "division" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest;
+
     try testIntDivision();
     try comptime testIntDivision();
 
@@ -1619,10 +1621,10 @@ test "vector integer addition" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {
@@ -1926,7 +1928,9 @@ test "float vector division of comptime zero by runtime nan is nan" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
+
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest;
 
     const ct_zero: @Vector(1, f32) = .{0};
     var rt_nan: @Vector(1, f32) = .{math.nan(f32)};
test/behavior/maximum_minimum.zig
@@ -204,7 +204,6 @@ test "@min/@max notices vector bounds" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
 
     var x: @Vector(2, u16) = .{ 140, 40 };
     const y: @Vector(2, u64) = .{ 5, 100 };
test/behavior/muladd.zig
@@ -6,10 +6,12 @@ test "@mulAdd" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .fma)) return error.SkipZigTest;
+
     try comptime testMulAdd();
     try testMulAdd();
 }
@@ -137,10 +139,12 @@ test "vector f32" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .fma)) return error.SkipZigTest;
+
     try comptime vector32();
     try vector32();
 }
@@ -163,10 +167,12 @@ test "vector f64" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .fma)) return error.SkipZigTest;
+
     try comptime vector64();
     try vector64();
 }
test/behavior/vector.zig
@@ -75,12 +75,12 @@ test "vector bin compares with mem.eql" {
 
 test "vector int operators" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {
@@ -248,9 +248,11 @@ test "array to vector with element type coercion" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest;
+
     const S = struct {
         fn doTheTest() !void {
             var foo: f16 = 3.14;
@@ -285,11 +287,11 @@ test "peer type resolution with coercible element types" {
 
 test "tuple to vector" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {
@@ -682,12 +684,12 @@ test "vector bitwise not operator" {
 
 test "vector shift operators" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTestShift(x: anytype, y: anytype) !void {
@@ -1036,12 +1038,12 @@ test "saturating shift-left" {
 
 test "multiplication-assignment operator with an array operand" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {
@@ -1058,7 +1060,6 @@ test "multiplication-assignment operator with an array operand" {
 
 test "@addWithOverflow" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -1109,7 +1110,6 @@ test "@addWithOverflow" {
 
 test "@subWithOverflow" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -1144,7 +1144,6 @@ test "@subWithOverflow" {
 
 test "@mulWithOverflow" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -1168,7 +1167,6 @@ test "@mulWithOverflow" {
 
 test "@shlWithOverflow" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -1307,7 +1305,7 @@ test "zero multiplicand" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
 
     const zeros = @Vector(2, u32){ 0.0, 0.0 };
     var ones = @Vector(2, u32){ 1.0, 1.0 };
test/cases/safety/memmove_len_mismatch.zig
@@ -15,5 +15,5 @@ pub fn main() !void {
     return error.TestFailed;
 }
 // run
-// backend=llvm
+// backend=stage2,llvm
 // target=native
test/cases/safety/slice_cast_change_len_0.zig
@@ -23,4 +23,5 @@ pub fn panic(message: []const u8, _: ?*std.builtin.StackTrace, _: ?usize) noretu
 const std = @import("std");
 
 // run
-// backend=llvm
+// backend=stage2,llvm
+// target=x86_64-linux
test/cases/safety/slice_cast_change_len_1.zig
@@ -23,4 +23,5 @@ pub fn panic(message: []const u8, _: ?*std.builtin.StackTrace, _: ?usize) noretu
 const std = @import("std");
 
 // run
-// backend=llvm
+// backend=stage2,llvm
+// target=x86_64-linux
test/cases/safety/slice_cast_change_len_2.zig
@@ -23,4 +23,5 @@ pub fn panic(message: []const u8, _: ?*std.builtin.StackTrace, _: ?usize) noretu
 const std = @import("std");
 
 // run
-// backend=llvm
+// backend=stage2,llvm
+// target=x86_64-linux
test/cases/safety/truncating vector cast.zig
@@ -17,5 +17,5 @@ pub fn main() !void {
 }
 
 // run
-// backend=llvm
+// backend=stage2,llvm
 // target=native
test/cases/safety/unsigned-signed vector cast.zig
@@ -17,5 +17,5 @@ pub fn main() !void {
 }
 
 // run
-// backend=llvm
+// backend=stage2,llvm
 // target=native
test/cases/safety/vector integer addition overflow.zig
@@ -18,5 +18,5 @@ fn add(a: @Vector(4, i32), b: @Vector(4, i32)) @Vector(4, i32) {
     return a + b;
 }
 // run
-// backend=llvm
+// backend=stage2,llvm
 // target=native
test/cases/safety/vector integer multiplication overflow.zig
@@ -18,5 +18,5 @@ fn mul(a: @Vector(4, u8), b: @Vector(4, u8)) @Vector(4, u8) {
     return a * b;
 }
 // run
-// backend=llvm
+// backend=stage2,llvm
 // target=native
test/cases/safety/vector integer negation overflow.zig
@@ -18,5 +18,5 @@ fn neg(a: @Vector(4, i16)) @Vector(4, i16) {
     return -a;
 }
 // run
-// backend=llvm
+// backend=stage2,llvm
 // target=native
test/cases/safety/vector integer subtraction overflow.zig
@@ -18,5 +18,5 @@ fn sub(a: @Vector(4, u32), b: @Vector(4, u32)) @Vector(4, u32) {
     return a - b;
 }
 // run
-// backend=llvm
+// backend=stage2,llvm
 // target=native