Commit 4ea18c22f9

Jacob Young <jacobly0@users.noreply.github.com>
2025-02-14 08:20:43
x86_64: rewrite array access
1 parent 9f87aac
Changed files (3)
src
test
behavior
x86_64
src/arch/x86_64/CodeGen.zig
@@ -2418,7 +2418,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
 }
 
 fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
-    @setEvalBranchQuota(12_300);
+    @setEvalBranchQuota(12_400);
     const pt = cg.pt;
     const zcu = pt.zcu;
     const ip = &zcu.intern_pool;
@@ -2486,8 +2486,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
             .reduce_optimized => try cg.airReduce(inst),
             .aggregate_init   => try cg.airAggregateInit(inst),
             .prefetch         => try cg.airPrefetch(inst),
-
-            .array_elem_val      => try cg.airArrayElemVal(inst),
             // zig fmt: on
 
             .arg => if (cg.debug_output != .none) {
@@ -15150,7 +15148,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     },
                     .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leaa(.src0, .add_src0_elem_size_times_src1), ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leaa(.src0, .add_src0_elem_size_mul_src1), ._, ._ },
                     } },
                 }, .{
                     .dst_constraints = .{ .{ .elem_size_is = 1 }, .any },
@@ -15264,7 +15262,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     },
                     .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leaa(.src0, .sub_src0_elem_size_times_src1), ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leaa(.src0, .sub_src0_elem_size_mul_src1), ._, ._ },
                     } },
                 }, .{
                     .dst_constraints = .{ .{ .elem_size_is = 1 }, .any },
@@ -52951,6 +52949,200 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 try ops[0].toOffset(0, cg);
                 try ops[0].finish(inst, &.{ty_op.operand}, &ops, cg);
             },
+            .array_elem_val => if (use_old) try cg.airArrayElemVal(inst) else {
+                const bin_op = air_datas[@intFromEnum(inst)].bin_op;
+                const array_ty = cg.typeOf(bin_op.lhs);
+                const res_ty = array_ty.elemType2(zcu);
+                var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
+                var res: [1]Temp = undefined;
+                cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{
+                    .src_constraints = .{ .{ .bool_vec = .dword }, .any, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .imm32, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .c }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .bt, .src0d, .ua(.none, .add_src1_rem_32), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .bool_vec = .dword }, .any, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .c }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .bt, .src0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .bool_vec = .qword }, .any, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .imm32, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .c }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .bt, .src0q, .ua(.none, .add_src1_rem_64), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .bool_vec = .qword }, .any, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .c }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .bt, .src0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_bool_vec, .any, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .imm32, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .c }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .bt, .mema(.src0d, .add_src1_div_8_down_4), .ua(.none, .add_src1_rem_32), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_bool_vec, .any, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .c }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .bt, .src0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{ .{ .int = .byte }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .simm32, .none } },
+                    },
+                    .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .movzx, .dst0d, .mema(.src0b, .add_src0_elem_size_mul_src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{ .{ .int = .byte }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .movzx, .dst0d, .memi(.src0b, .src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{ .{ .int = .word }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .simm32, .none } },
+                    },
+                    .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .movzx, .dst0d, .mema(.src0w, .add_src0_elem_size_mul_src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{ .{ .int = .word }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .movzx, .dst0d, .memsi(.src0w, .@"2", .src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{ .{ .int = .dword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .simm32, .none } },
+                    },
+                    .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0d, .mema(.src0d, .add_src0_elem_size_mul_src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{ .{ .int = .dword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0d, .memsi(.src0d, .@"4", .src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{ .{ .int = .qword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .simm32, .none } },
+                    },
+                    .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0q, .mema(.src0q, .add_src0_elem_size_mul_src1), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .dst_constraints = .{ .{ .int = .qword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0q, .memsi(.src0q, .@"8", .src1), ._, ._ },
+                    } },
+                } }) catch |err| switch (err) {
+                    error.SelectFailed => {
+                        const elem_size = res_ty.abiSize(zcu);
+                        const base = try cg.tempAllocReg(.usize, abi.RegisterClass.gp);
+                        while (try ops[0].toBase(false, cg) or
+                            try ops[1].toRegClass(true, .general_purpose, cg))
+                        {}
+                        const base_reg = base.tracking(cg).short.register.to64();
+                        const rhs_reg = ops[1].tracking(cg).short.register.to64();
+                        if (!std.math.isPowerOfTwo(elem_size)) {
+                            try cg.spillEflagsIfOccupied();
+                            try cg.asmRegisterRegisterImmediate(
+                                .{ .i_, .mul },
+                                rhs_reg,
+                                rhs_reg,
+                                .u(elem_size),
+                            );
+                            try cg.asmRegisterMemory(
+                                .{ ._, .lea },
+                                base_reg,
+                                try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }),
+                            );
+                        } else if (elem_size > 8) {
+                            try cg.spillEflagsIfOccupied();
+                            try cg.asmRegisterImmediate(
+                                .{ ._l, .sh },
+                                rhs_reg,
+                                .u(std.math.log2_int(u64, elem_size)),
+                            );
+                            try cg.asmRegisterMemory(
+                                .{ ._, .lea },
+                                base_reg,
+                                try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }),
+                            );
+                        } else try cg.asmRegisterMemory(
+                            .{ ._, .lea },
+                            base_reg,
+                            try ops[0].tracking(cg).short.mem(cg, .{
+                                .index = rhs_reg,
+                                .scale = .fromFactor(@intCast(elem_size)),
+                            }),
+                        );
+                        // Hack around Sema insanity: lhs could be an arbitrarily large comptime-known array
+                        // which could easily get spilled by the upcoming `load`, which would infinite recurse
+                        // since spilling an array requires the same operation that triggered the spill.
+                        try ops[0].die(cg);
+                        ops[0] = base;
+                        res[0] = try ops[0].load(res_ty, .{}, cg);
+                    },
+                    else => |e| return e,
+                };
+                try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
+            },
             .slice_elem_val, .ptr_elem_val => |air_tag| if (use_old) switch (air_tag) {
                 else => unreachable,
                 .slice_elem_val => try cg.airSliceElemVal(inst),
@@ -52968,7 +53160,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     },
                     .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .movzx, .dst0d, .leaa(.src0b, .add_src0_elem_size_times_src1), ._, ._ },
+                        .{ ._, ._, .movzx, .dst0d, .leaa(.src0b, .add_src0_elem_size_mul_src1), ._, ._ },
                     } },
                 }, .{
                     .dst_constraints = .{ .{ .int = .byte }, .any },
@@ -52986,7 +53178,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     },
                     .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .movzx, .dst0d, .leaa(.src0w, .add_src0_elem_size_times_src1), ._, ._ },
+                        .{ ._, ._, .movzx, .dst0d, .leaa(.src0w, .add_src0_elem_size_mul_src1), ._, ._ },
                     } },
                 }, .{
                     .dst_constraints = .{ .{ .int = .word }, .any },
@@ -53004,7 +53196,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     },
                     .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .dst0d, .leaa(.src0d, .add_src0_elem_size_times_src1), ._, ._ },
+                        .{ ._, ._, .mov, .dst0d, .leaa(.src0d, .add_src0_elem_size_mul_src1), ._, ._ },
                     } },
                 }, .{
                     .dst_constraints = .{ .{ .int = .dword }, .any },
@@ -53022,7 +53214,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     },
                     .dst_temps = .{ .{ .rc = .general_purpose }, .unused },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .dst0q, .leaa(.src0q, .add_src0_elem_size_times_src1), ._, ._ },
+                        .{ ._, ._, .mov, .dst0q, .leaa(.src0q, .add_src0_elem_size_mul_src1), ._, ._ },
                     } },
                 }, .{
                     .required_features = .{ .@"64bit", null, null, null },
@@ -53040,8 +53232,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         while (true) for (&ops) |*op| {
                             if (try op.toRegClass(true, .general_purpose, cg)) break;
                         } else break;
-                        const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64();
-                        const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64();
+                        const lhs_reg = ops[0].tracking(cg).short.register.to64();
+                        const rhs_reg = ops[1].tracking(cg).short.register.to64();
                         if (!std.math.isPowerOfTwo(elem_size)) {
                             try cg.spillEflagsIfOccupied();
                             try cg.asmRegisterRegisterImmediate(
@@ -53052,7 +53244,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             );
                             try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
                                 .base = .{ .reg = lhs_reg },
-                                .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
+                                .mod = .{ .rm = .{ .index = rhs_reg } },
                             });
                         } else if (elem_size > 8) {
                             try cg.spillEflagsIfOccupied();
@@ -53063,12 +53255,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             );
                             try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
                                 .base = .{ .reg = lhs_reg },
-                                .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
+                                .mod = .{ .rm = .{ .index = rhs_reg } },
                             });
                         } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
                             .base = .{ .reg = lhs_reg },
                             .mod = .{ .rm = .{
-                                .size = .qword,
                                 .index = rhs_reg,
                                 .scale = .fromFactor(@intCast(elem_size)),
                             } },
@@ -53095,8 +53286,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     while (true) for (&ops) |*op| {
                         if (try op.toRegClass(true, .general_purpose, cg)) break;
                     } else break;
-                    const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64();
-                    const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64();
+                    const lhs_reg = ops[0].tracking(cg).short.register.to64();
+                    const rhs_reg = ops[1].tracking(cg).short.register.to64();
                     if (!std.math.isPowerOfTwo(elem_size)) {
                         try cg.spillEflagsIfOccupied();
                         try cg.asmRegisterRegisterImmediate(
@@ -53107,7 +53298,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         );
                         try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
                             .base = .{ .reg = lhs_reg },
-                            .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
+                            .mod = .{ .rm = .{ .index = rhs_reg } },
                         });
                     } else if (elem_size > 8) {
                         try cg.spillEflagsIfOccupied();
@@ -53118,12 +53309,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         );
                         try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
                             .base = .{ .reg = lhs_reg },
-                            .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
+                            .mod = .{ .rm = .{ .index = rhs_reg } },
                         });
                     } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
                         .base = .{ .reg = lhs_reg },
                         .mod = .{ .rm = .{
-                            .size = .qword,
                             .index = rhs_reg,
                             .scale = .fromFactor(@intCast(elem_size)),
                         } },
@@ -75183,7 +75373,7 @@ fn airErrUnionPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void {
             registerAlias(dst_reg, dst_abi_size),
             .{
                 .base = .{ .reg = src_reg },
-                .mod = .{ .rm = .{ .size = .qword, .disp = pl_off } },
+                .mod = .{ .rm = .{ .disp = pl_off } },
             },
         );
         break :result .{ .register = dst_reg };
@@ -75446,7 +75636,7 @@ fn airPtrSliceLenPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
         registerAlias(dst_reg, dst_abi_size),
         .{
             .base = .{ .reg = src_reg },
-            .mod = .{ .rm = .{ .size = .qword, .disp = 8 } },
+            .mod = .{ .rm = .{ .disp = 8 } },
         },
     );
 
@@ -75700,7 +75890,7 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
                 try self.asmRegisterMemory(
                     .{ ._, .lea },
                     addr_reg,
-                    .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } },
+                    .{ .base = .{ .frame = frame_index } },
                 );
             },
             .load_frame => |frame_addr| try self.asmRegisterMemory(
@@ -75708,7 +75898,7 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
                 addr_reg,
                 .{
                     .base = .{ .frame = frame_addr.index },
-                    .mod = .{ .rm = .{ .size = .qword, .disp = frame_addr.off } },
+                    .mod = .{ .rm = .{ .disp = frame_addr.off } },
                 },
             ),
             .memory,
@@ -76717,7 +76907,6 @@ fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void {
             .{
                 .base = .{ .reg = dst.to64() },
                 .mod = .{ .rm = .{
-                    .size = .qword,
                     .index = tmp.to64(),
                     .scale = .@"4",
                 } },
@@ -76744,7 +76933,6 @@ fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void {
             .{
                 .base = .{ .reg = tmp.to64() },
                 .mod = .{ .rm = .{
-                    .size = .qword,
                     .index = dst.to64(),
                     .scale = .@"2",
                 } },
@@ -85591,7 +85779,6 @@ fn genSetReg(
                     dst_reg.to64(),
                     .{
                         .base = .{ .reloc = sym_off.sym_index },
-                        .mod = .{ .rm = .{ .size = .qword } },
                     },
                 );
                 if (sym_off.off != 0) try self.asmRegisterMemory(
@@ -85599,10 +85786,7 @@ fn genSetReg(
                     dst_reg.to64(),
                     .{
                         .base = .{ .reg = dst_reg.to64() },
-                        .mod = .{ .rm = .{
-                            .size = .qword,
-                            .disp = sym_off.off,
-                        } },
+                        .mod = .{ .rm = .{ .disp = sym_off.off } },
                     },
                 );
             },
@@ -85816,18 +86000,12 @@ fn genSetMem(
                     const src_reg = registerAlias(reg_off.reg, abi_size);
                     try self.asmRegisterMemory(.{ ._, .lea }, src_reg, .{
                         .base = .{ .reg = src_reg },
-                        .mod = .{ .rm = .{
-                            .size = .qword,
-                            .disp = reg_off.off,
-                        } },
+                        .mod = .{ .rm = .{ .disp = reg_off.off } },
                     });
                     try self.genSetMem(base, disp, ty, .{ .register = reg_off.reg }, opts);
                     return self.asmRegisterMemory(.{ ._, .lea }, src_reg, .{
                         .base = .{ .reg = src_reg },
-                        .mod = .{ .rm = .{
-                            .size = .qword,
-                            .disp = -reg_off.off,
-                        } },
+                        .mod = .{ .rm = .{ .disp = -reg_off.off } },
                     });
                 },
                 else => |e| return e,
@@ -87185,10 +87363,7 @@ fn airErrorName(self: *CodeGen, inst: Air.Inst.Index) !void {
         start_reg.to64(),
         .{
             .base = .{ .reg = addr_reg.to64() },
-            .mod = .{ .rm = .{
-                .size = .dword,
-                .index = start_reg.to64(),
-            } },
+            .mod = .{ .rm = .{ .index = start_reg.to64() } },
         },
     );
     try self.asmRegisterMemory(
@@ -87196,10 +87371,7 @@ fn airErrorName(self: *CodeGen, inst: Air.Inst.Index) !void {
         end_reg.to32(),
         .{
             .base = .{ .reg = end_reg.to64() },
-            .mod = .{ .rm = .{
-                .size = .byte,
-                .disp = -1,
-            } },
+            .mod = .{ .rm = .{ .disp = -1 } },
         },
     );
 
@@ -89375,17 +89547,11 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area, .{});
                     if (!unused) try self.asmRegisterMemory(.{ ._, .lea }, addr_reg, .{
                         .base = .{ .reg = addr_reg },
-                        .mod = .{ .rm = .{
-                            .size = .qword,
-                            .index = offset_reg.to64(),
-                        } },
+                        .mod = .{ .rm = .{ .index = offset_reg.to64() } },
                     });
                     try self.asmRegisterMemory(.{ ._, .lea }, offset_reg, .{
                         .base = .{ .reg = offset_reg.to64() },
-                        .mod = .{ .rm = .{
-                            .size = .qword,
-                            .disp = 8,
-                        } },
+                        .mod = .{ .rm = .{ .disp = 8 } },
                     });
                     try self.genCopy(.c_uint, gp_offset, .{ .register = offset_reg }, .{});
                     const done_reloc = try self.asmJmpReloc(undefined);
@@ -89394,10 +89560,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, overflow_arg_area, .{});
                     try self.asmRegisterMemory(.{ ._, .lea }, offset_reg.to64(), .{
                         .base = .{ .reg = addr_reg },
-                        .mod = .{ .rm = .{
-                            .size = .qword,
-                            .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)),
-                        } },
+                        .mod = .{ .rm = .{ .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)) } },
                     });
                     try self.genCopy(
                         ptr_anyopaque_ty,
@@ -89423,17 +89586,11 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area, .{});
                     if (!unused) try self.asmRegisterMemory(.{ ._, .lea }, addr_reg, .{
                         .base = .{ .reg = addr_reg },
-                        .mod = .{ .rm = .{
-                            .size = .qword,
-                            .index = offset_reg.to64(),
-                        } },
+                        .mod = .{ .rm = .{ .index = offset_reg.to64() } },
                     });
                     try self.asmRegisterMemory(.{ ._, .lea }, offset_reg, .{
                         .base = .{ .reg = offset_reg.to64() },
-                        .mod = .{ .rm = .{
-                            .size = .qword,
-                            .disp = 16,
-                        } },
+                        .mod = .{ .rm = .{ .disp = 16 } },
                     });
                     try self.genCopy(.c_uint, fp_offset, .{ .register = offset_reg }, .{});
                     const done_reloc = try self.asmJmpReloc(undefined);
@@ -89442,10 +89599,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, overflow_arg_area, .{});
                     try self.asmRegisterMemory(.{ ._, .lea }, offset_reg.to64(), .{
                         .base = .{ .reg = addr_reg },
-                        .mod = .{ .rm = .{
-                            .size = .qword,
-                            .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)),
-                        } },
+                        .mod = .{ .rm = .{ .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)) } },
                     });
                     try self.genCopy(
                         ptr_anyopaque_ty,
@@ -90505,10 +90659,7 @@ const Temp = struct {
                 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
                 try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
                     .base = .{ .reg = reg.to64() },
-                    .mod = .{ .rm = .{
-                        .size = .qword,
-                        .disp = off,
-                    } },
+                    .mod = .{ .rm = .{ .disp = off } },
                 });
             },
             .register_offset => |reg_off| {
@@ -90517,10 +90668,7 @@ const Temp = struct {
                 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
                 try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
                     .base = .{ .reg = reg_off.reg.to64() },
-                    .mod = .{ .rm = .{
-                        .size = .qword,
-                        .disp = reg_off.off + off,
-                    } },
+                    .mod = .{ .rm = .{ .disp = reg_off.off + off } },
                 });
             },
             .lea_symbol => |sym_off| new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = .{
@@ -90627,10 +90775,7 @@ const Temp = struct {
                 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
                 try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
                     .base = .{ .reg = reg_off.reg.to64() },
-                    .mod = .{ .rm = .{
-                        .size = .qword,
-                        .disp = reg_off.off + @as(u31, limb_index) * 8,
-                    } },
+                    .mod = .{ .rm = .{ .disp = reg_off.off + @as(u31, limb_index) * 8 } },
                 });
             },
             .load_symbol => |sym_off| {
@@ -93462,13 +93607,14 @@ const Select = struct {
                 elem_size,
                 src0_elem_size,
                 dst0_elem_size,
-                src0_elem_size_times_src1,
+                src0_elem_size_mul_src1,
+                src1,
                 log2_src0_elem_size,
                 smin,
                 smax,
                 umax,
             },
-            op: enum(u2) { mul, div, rem_8_mul },
+            op: enum(u2) { mul, div, div_8_down, rem_8_mul },
             rhs: Memory.Scale,
 
             const none: Adjust = .{ .sign = .pos, .lhs = .none, .op = .mul, .rhs = .@"1" };
@@ -93512,8 +93658,11 @@ const Select = struct {
             const add_8_src0_elem_size: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"8" };
             const add_src0_elem_size_div_8: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size, .op = .div, .rhs = .@"8" };
             const sub_src0_elem_size: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"1" };
-            const add_src0_elem_size_times_src1: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size_times_src1, .op = .mul, .rhs = .@"1" };
-            const sub_src0_elem_size_times_src1: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size_times_src1, .op = .mul, .rhs = .@"1" };
+            const add_src0_elem_size_mul_src1: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" };
+            const sub_src0_elem_size_mul_src1: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" };
+            const add_src1_div_8_down_4: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .div_8_down, .rhs = .@"4" };
+            const add_src1_rem_32: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"4" };
+            const add_src1_rem_64: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"8" };
             const add_log2_src0_elem_size: Adjust = .{ .sign = .pos, .lhs = .log2_src0_elem_size, .op = .mul, .rhs = .@"1" };
             const add_dst0_elem_size: Adjust = .{ .sign = .pos, .lhs = .dst0_elem_size, .op = .mul, .rhs = .@"1" };
             const add_elem_limbs: Adjust = .{ .sign = .pos, .lhs = .elem_limbs, .op = .mul, .rhs = .@"1" };
@@ -94086,8 +94235,9 @@ const Select = struct {
                 .elem_size => @intCast(op.base.ref.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)),
                 .src0_elem_size => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)),
                 .dst0_elem_size => @intCast(Select.Operand.Ref.dst0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)),
-                .src0_elem_size_times_src1 => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) *
+                .src0_elem_size_mul_src1 => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) *
                     Select.Operand.Ref.src1.valueOf(s).immediate),
+                .src1 => @intCast(Select.Operand.Ref.src1.valueOf(s).immediate),
                 .log2_src0_elem_size => @intCast(std.math.log2(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))),
                 .smin => @as(SignedImm, std.math.minInt(SignedImm)) >> @truncate(
                     -%op.base.ref.typeOf(s).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu),
@@ -94107,6 +94257,7 @@ const Select = struct {
                     break :op_res op_res[0];
                 },
                 .div => @shrExact(lhs, rhs),
+                .div_8_down => lhs >> 3 & @as(SignedImm, -1) << rhs,
                 .rem_8_mul => lhs & (@as(SignedImm, 1) << @intCast(@as(u3, 3) + rhs)) - 1,
             };
             return switch (op.flags.adjust.sign) {
src/arch/x86_64/Lower.zig
@@ -431,7 +431,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
                                 _ = lower.reloc(.{ .linker_tlsld = sym_index }, 0);
                                 lower.result_insts[lower.result_insts_len] = try .new(.none, .lea, &.{
                                     .{ .reg = .rdi },
-                                    .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) },
+                                    .{ .mem = Memory.initRip(.none, 0) },
                                 }, lower.target);
                                 lower.result_insts_len += 1;
                                 _ = lower.reloc(.{
@@ -443,7 +443,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
                                 lower.result_insts_len += 1;
                                 _ = lower.reloc(.{ .linker_dtpoff = sym_index }, 0);
                                 emit_mnemonic = .lea;
-                                break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{
+                                break :op .{ .mem = Memory.initSib(.none, .{
                                     .base = .{ .reg = .rax },
                                     .disp = std.math.minInt(i32),
                                 }) };
@@ -456,7 +456,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
                                 lower.result_insts_len += 1;
                                 _ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
                                 emit_mnemonic = .lea;
-                                break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{
+                                break :op .{ .mem = Memory.initSib(.none, .{
                                     .base = .{ .reg = .rax },
                                     .disp = std.math.minInt(i32),
                                 }) };
@@ -465,10 +465,10 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
 
                         _ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
                         if (lower.pic) switch (mnemonic) {
-                            .lea => {
-                                if (elf_sym.flags.is_extern_ptr) emit_mnemonic = .mov;
-                                break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) };
-                            },
+                            .lea => if (elf_sym.flags.is_extern_ptr) {
+                                emit_mnemonic = .mov;
+                                break :op .{ .mem = Memory.initRip(.ptr, 0) };
+                            } else break :op .{ .mem = Memory.initRip(.none, 0) },
                             .mov => {
                                 if (elf_sym.flags.is_extern_ptr) {
                                     const reg = ops[0].reg;
@@ -505,7 +505,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
                             _ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
                             lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
                                 .{ .reg = .rdi },
-                                .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) },
+                                .{ .mem = Memory.initRip(.ptr, 0) },
                             }, lower.target);
                             lower.result_insts_len += 1;
                             lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{
@@ -518,10 +518,10 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
 
                         _ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
                         break :op switch (mnemonic) {
-                            .lea => {
-                                if (macho_sym.flags.is_extern_ptr) emit_mnemonic = .mov;
-                                break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) };
-                            },
+                            .lea => if (macho_sym.flags.is_extern_ptr) {
+                                emit_mnemonic = .mov;
+                                break :op .{ .mem = Memory.initRip(.ptr, 0) };
+                            } else break :op .{ .mem = Memory.initRip(.none, 0) },
                             .mov => {
                                 if (macho_sym.flags.is_extern_ptr) {
                                     const reg = ops[0].reg;
test/behavior/x86_64/mem.zig
@@ -1,4 +1,4 @@
-fn access(comptime array: anytype) !void {
+fn accessSlice(comptime array: anytype) !void {
     var slice: []const @typeInfo(@TypeOf(array)).array.child = undefined;
     slice = &array;
     inline for (0.., &array) |ct_index, *elem| {
@@ -20,18 +20,153 @@ fn access(comptime array: anytype) !void {
         if (slice[rt_index] != elem.*) return error.Unexpected;
     }
 }
-test access {
-    try access([3]u8{ 0xdb, 0xef, 0xbd });
-    try access([3]u16{ 0x340e, 0x3654, 0x88d7 });
-    try access([3]u32{ 0xd424c2c0, 0x2d6ac466, 0x5a0cfaba });
-    try access([3]u64{
+test accessSlice {
+    try accessSlice([3]u8{ 0xdb, 0xef, 0xbd });
+    try accessSlice([3]u16{ 0x340e, 0x3654, 0x88d7 });
+    try accessSlice([3]u32{ 0xd424c2c0, 0x2d6ac466, 0x5a0cfaba });
+    try accessSlice([3]u64{
         0x9327a4f5221666a6,
         0x5c34d3ddd84a8b12,
         0xbae087f39f649260,
     });
-    try access([3]u128{
+    try accessSlice([3]u128{
         0x601cf010065444d4d42d5536dd9b95db,
         0xa03f592fcaa22d40af23a0c735531e3c,
         0x5da44907b31602b95c2d93f0b582ceab,
     });
 }
+
+fn accessVector(comptime init: anytype) !void {
+    const Vector = @TypeOf(init);
+    var vector: Vector = undefined;
+    vector = init;
+    inline for (0..@typeInfo(Vector).vector.len) |ct_index| {
+        var rt_index: usize = undefined;
+        rt_index = ct_index;
+        if (&vector[rt_index] != &vector[ct_index]) return error.Unexpected;
+        if (vector[rt_index] != vector[ct_index]) return error.Unexpected;
+    }
+}
+test accessVector {
+    try accessVector(@Vector(1, bool){
+        false,
+    });
+    try accessVector(@Vector(2, bool){
+        false, true,
+    });
+    try accessVector(@Vector(3, bool){
+        true, true, false,
+    });
+    try accessVector(@Vector(5, bool){
+        true, false, true, false, true,
+    });
+    try accessVector(@Vector(7, bool){
+        true, false, true, true, true, false, true,
+    });
+    try accessVector(@Vector(8, bool){
+        false, true, false, true, false, false, false, true,
+    });
+    try accessVector(@Vector(9, bool){
+        true, true, false, true, false, false, false, false,
+        true,
+    });
+    try accessVector(@Vector(15, bool){
+        false, true, true,  true,  false, true,  false, false,
+        true,  true, false, false, true,  false, false,
+    });
+    try accessVector(@Vector(16, bool){
+        true,  true, false, true,  false, false, false, false,
+        false, true, true,  false, false, false, true,  true,
+    });
+    try accessVector(@Vector(17, bool){
+        true,  false, true, true,  false, true,  false, true,
+        true,  true,  true, false, false, false, true,  true,
+        false,
+    });
+    try accessVector(@Vector(31, bool){
+        true,  false, true,  true,  false, true,  true,  true,
+        false, true,  false, true,  false, true,  true,  true,
+        false, false, true,  false, false, false, false, true,
+        true,  true,  true,  false, false, false, false,
+    });
+    try accessVector(@Vector(32, bool){
+        true,  true,  false, false, false, true, true,  true,
+        false, true,  true,  true,  false, true, false, true,
+        false, true,  false, true,  false, true, true,  false,
+        false, false, false, false, false, true, true,  true,
+    });
+    try accessVector(@Vector(33, bool){
+        true,  false, false, false, false, true,  true,  true,
+        false, false, true,  false, true,  true,  false, true,
+        true,  true,  false, true,  true,  false, false, false,
+        false, true,  false, false, false, true,  true,  false,
+        false,
+    });
+    try accessVector(@Vector(63, bool){
+        false, false, true,  true,  true,  false, true,  true,
+        true,  false, true,  true,  true,  false, true,  false,
+        true,  true,  false, true,  false, true,  true,  true,
+        false, false, true,  false, false, false, false, true,
+        true,  true,  true,  true,  false, true,  false, true,
+        true,  true,  false, false, true,  false, false, true,
+        false, true,  false, false, false, false, true,  true,
+        false, true,  false, false, true,  true,  true,
+    });
+    try accessVector(@Vector(64, bool){
+        false, false, true,  true,  true,  false, true,  true,
+        true,  false, true,  true,  false, true,  true,  false,
+        false, false, false, false, true,  true,  false, true,
+        true,  true,  true,  true,  false, false, false, true,
+        true,  false, true,  true,  false, false, true,  false,
+        false, true,  true,  false, true,  true,  false, false,
+        true,  true,  false, true,  false, true,  true,  true,
+        false, true,  true,  false, false, false, false, false,
+    });
+    try accessVector(@Vector(65, bool){
+        false, false, true,  true,  true,  true,  true,  true,
+        true,  false, false, false, false, true,  true,  false,
+        true,  false, true,  true,  true,  false, false, false,
+        true,  false, true,  true,  false, true,  true,  true,
+        true,  true,  false, true,  true,  false, true,  false,
+        false, true,  false, true,  false, false, true,  false,
+        true,  false, true,  true,  true,  false, true,  true,
+        false, false, true,  true,  true,  true,  false, false,
+        true,
+    });
+    try accessVector(@Vector(8, u8){
+        0x60, 0xf7, 0xf4, 0xb0, 0x05, 0xd3, 0x06, 0x78,
+    });
+    try accessVector(@Vector(8, u16){
+        0x9c91, 0xfb8b, 0x7f80, 0x8304, 0x6e52, 0xd8ef, 0x37fc, 0x7851,
+    });
+    try accessVector(@Vector(8, u32){
+        0x688b88e2, 0x68e2b7a2, 0x87574680, 0xab4f0769,
+        0x75472bb5, 0xa791f2ae, 0xeb2ed416, 0x5f05ce82,
+    });
+    try accessVector(@Vector(8, u64){
+        0xdefd1ddffaedf818, 0x91c78a29d3d59890,
+        0x842aaf8fd3c7b785, 0x970a07b8f9f4a6b3,
+        0x21b2425d1a428246, 0xea50e41174a7977b,
+        0x08d0f1c4f5978b74, 0x8dc88a7fd85e0e67,
+    });
+    try accessVector(@Vector(8, u128){
+        0x6f2cbde1fb219b1e73d7f774d10f0d94,
+        0x7c1412616cda20436d7106691d8ba4cc,
+        0x4ee940b50e97675b3b35d7872a35b5ad,
+        0x6d994fb8caa1b2fac48acbb68fa2d2f1,
+        0xdee698c7ec8de9b5940903e3fc665b63,
+        0x0751491a509e4a1ce8cfa6d62fe9e74c,
+        0x3d880f0a927ce3bfc2682b72070fcd50,
+        0x82f0eec62881598699eeb93fbb456e95,
+    });
+    try accessVector(@Vector(8, u256){
+        0x6ee4f35fe624d365952f73960791238ac781bfba782abc7866a691063e43ce48,
+        0xb006491f54a9c9292458a5835b7d5f4cfa18136f175eef0a13bb8adf5c3dc061,
+        0xd6e25ca1bc5685fc52609e261b9065bc05a8662e9291660033dd7f6d98e562b3,
+        0x992c5e54e0e6331dac258996be7dae9b2a2eff323a39043ba8d2721420dc5f5c,
+        0x257313f45fb3556d0fc323d5f38c953e9a093fe2278655312b6a5b64aab9d901,
+        0x6c8ad2182b9a3b2b19c2c9b152956b383d0fee2e3fbd5b02ed72227446a7b221,
+        0xd80cafc2252b289793799675e43f97ba4a5448c7b57e1544a464687b435efc7b,
+        0xfcb480f2d70afd53c4689dd3f5db7638c24302f2a6a15f738167db090d91fb28,
+    });
+}