Commit 60cdacaff2
Changed files (1)
src
arch
x86_64
src/arch/x86_64/CodeGen.zig
@@ -2291,7 +2291,7 @@ fn genBodyBlock(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}
fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
- @setEvalBranchQuota(29_400);
+ @setEvalBranchQuota(29_500);
const pt = cg.pt;
const zcu = pt.zcu;
const ip = &zcu.intern_pool;
@@ -86774,52 +86774,313 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
const is_non_err = try cg.tempInit(.bool, .{ .eflags = .e });
try is_non_err.finish(inst, &.{un_op}, &ops, cg);
},
- .load => fallback: {
+ .load => {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
const val_ty = ty_op.ty.toType();
- const ptr_ty = cg.typeOf(ty_op.operand);
- const ptr_info = ptr_ty.ptrInfo(zcu);
- if (ptr_info.packed_offset.host_size > 0 and
- (ptr_info.flags.vector_index == .none or val_ty.toIntern() == .bool_type))
- break :fallback try cg.airLoad(inst);
var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
- const res = try ops[0].load(val_ty, .{
- .disp = switch (ptr_info.flags.vector_index) {
- .none => 0,
- .runtime => unreachable,
- else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)),
+ var res: [1]Temp = undefined;
+ cg.select(&res, &.{val_ty}, &ops, comptime &.{ .{
+ .src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .none, .none } },
},
- }, cg);
- try res.finish(inst, &.{ty_op.operand}, &ops, cg);
+ .extra_temps = .{
+ .{ .type = .u8, .kind = .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .cc = .c }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
+ .{ ._, ._, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .none, .none } },
+ },
+ .dst_temps = .{ .{ .cc = .c }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .ptr_any_bool_vec_elem, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .none, .none } },
+ },
+ .dst_temps = .{ .{ .cc = .c }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ },
+ } },
+ } }) catch |err| switch (err) {
+ error.SelectFailed => res[0] = try ops[0].load(val_ty, .{
+ .disp = switch (cg.typeOf(ty_op.operand).ptrInfo(zcu).flags.vector_index) {
+ .none => 0,
+ .runtime => unreachable,
+ else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)),
+ },
+ }, cg),
+ else => |e| return e,
+ };
+ try res[0].finish(inst, &.{ty_op.operand}, &ops, cg);
},
.ret => try cg.airRet(inst, false),
.ret_safe => try cg.airRet(inst, true),
.ret_load => try cg.airRetLoad(inst),
- .store, .store_safe => |air_tag| fallback: {
+ .store, .store_safe => |air_tag| {
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
- const ptr_ty = cg.typeOf(bin_op.lhs);
- const ptr_info = ptr_ty.ptrInfo(zcu);
- const val_ty = cg.typeOf(bin_op.rhs);
- if (ptr_info.packed_offset.host_size > 0 and
- (ptr_info.flags.vector_index == .none or val_ty.toIntern() == .bool_type))
- break :fallback try cg.airStore(inst, switch (air_tag) {
- else => unreachable,
- .store => false,
- .store_safe => true,
- });
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
- try ops[0].store(&ops[1], .{
- .disp = switch (ptr_info.flags.vector_index) {
- .none => 0,
- .runtime => unreachable,
- else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)),
+ cg.select(&.{}, &.{}, &ops, comptime &.{ .{
+ .src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .bool, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .{ .imm = 0 }, .none } },
},
- .safe = switch (air_tag) {
- else => unreachable,
- .store => false,
- .store_safe => true,
+ .extra_temps = .{
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
},
- }, cg);
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
+ .{ ._, ._r, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
+ .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .bool, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .{ .imm = 1 }, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
+ .{ ._, ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
+ .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, null, null, null },
+ .src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .bool, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ },
+ .{ ._, ._r, .bt, .tmp1d, .ua(.src0, .add_vector_index), ._, ._ },
+ .{ ._, ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
+ .{ ._, ._, .@"test", .src1b, .si(1), ._, ._ },
+ .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .bool, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
+ .{ ._, ._, .@"test", .src1b, .si(1), ._, ._ },
+ .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._r, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
+ .{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
+ .{ .@"0:", ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
+ .{ .@"1:", ._, .mov, .lea(.src0b), .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .bool, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .{ .imm = 0 }, .none } },
+ },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .bool, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .{ .imm = 1 }, .none } },
+ },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._s, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, null, null, null },
+ .src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .bool, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .tmp0d, .lea(.src0w), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ },
+ .{ ._, ._r, .bt, .tmp1d, .ua(.src0, .add_vector_index), ._, ._ },
+ .{ ._, ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
+ .{ ._, ._, .@"test", .src1b, .si(1), ._, ._ },
+ .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .lea(.src0w), .tmp0w, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .bool, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .to_gpr, .none } },
+ },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .@"test", .src1b, .si(1), ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._r, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ },
+ .{ ._, ._mp, .j, .@"0f", ._, ._, ._ },
+ .{ .@"1:", ._s, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .ptr_any_bool_vec_elem, .bool, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .{ .imm = 0 }, .none } },
+ },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .ptr_any_bool_vec_elem, .bool, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .{ .imm = 1 }, .none } },
+ },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._s, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, null, null, null },
+ .src_constraints = .{ .ptr_any_bool_vec_elem, .bool, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .leaa(.src0d, .add_vector_index_div_8_down_4), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ },
+ .{ ._, ._r, .bt, .tmp1d, .ua(.src0, .add_vector_index_rem_32), ._, ._ },
+ .{ ._, ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index_rem_32), ._, ._ },
+ .{ ._, ._, .@"test", .src1b, .si(1), ._, ._ },
+ .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .leaa(.src0d, .add_vector_index_div_8_down_4), .tmp0d, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .ptr_any_bool_vec_elem, .bool, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_gpr, .to_gpr, .none } },
+ },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .@"test", .src1b, .si(1), ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._r, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ },
+ .{ ._, ._mp, .j, .@"0f", ._, ._, ._ },
+ .{ .@"1:", ._s, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ },
+ } },
+ } }) catch |err| switch (err) {
+ error.SelectFailed => try ops[0].store(&ops[1], .{
+ .disp = switch (cg.typeOf(bin_op.lhs).ptrInfo(zcu).flags.vector_index) {
+ .none => 0,
+ .runtime => unreachable,
+ else => |vector_index| @intCast(cg.typeOf(bin_op.rhs).abiSize(zcu) * @intFromEnum(vector_index)),
+ },
+ .safe = switch (air_tag) {
+ else => unreachable,
+ .store => false,
+ .store_safe => true,
+ },
+ }, cg),
+ else => |e| return e,
+ };
for (ops) |op| try op.die(cg);
},
.unreach => {},
@@ -100863,7 +101124,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .bt, .src0d, .ua(.none, .add_src1_rem_32), ._, ._ },
+ .{ ._, ._, .bt, .src0d, .ua(.none, .add_src1), ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .bool_vec = .dword }, .any, .any },
@@ -100884,7 +101145,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .bt, .src0q, .ua(.none, .add_src1_rem_64), ._, ._ },
+ .{ ._, ._, .bt, .src0q, .ua(.none, .add_src1), ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
@@ -174481,114 +174742,6 @@ fn reuseOperandAdvanced(
return true;
}
-fn packedLoad(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
- const pt = self.pt;
- const zcu = pt.zcu;
-
- const ptr_info = ptr_ty.ptrInfo(zcu);
- const val_ty: Type = .fromInterned(ptr_info.child);
- if (!val_ty.hasRuntimeBitsIgnoreComptime(zcu)) return;
- const val_abi_size: u32 = @intCast(val_ty.abiSize(zcu));
-
- const val_bit_size: u32 = @intCast(val_ty.bitSize(zcu));
- const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) {
- .none => 0,
- .runtime => unreachable,
- else => |vector_index| @intFromEnum(vector_index) * val_bit_size,
- };
- if (ptr_bit_off % 8 == 0) {
- {
- const mat_ptr_mcv: MCValue = switch (ptr_mcv) {
- .immediate, .register, .register_offset, .lea_frame => ptr_mcv,
- else => .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
- };
- const mat_ptr_lock = switch (mat_ptr_mcv) {
- .register => |mat_ptr_reg| self.register_manager.lockReg(mat_ptr_reg),
- else => null,
- };
- defer if (mat_ptr_lock) |lock| self.register_manager.unlockReg(lock);
-
- try self.load(dst_mcv, ptr_ty, mat_ptr_mcv.offset(@intCast(@divExact(ptr_bit_off, 8))));
- }
-
- if (val_abi_size * 8 > val_bit_size) {
- if (dst_mcv.isRegister()) {
- try self.truncateRegister(val_ty, dst_mcv.getReg().?);
- } else {
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const hi_mcv = dst_mcv.address().offset(@intCast(val_bit_size / 64 * 8)).deref();
- try self.genSetReg(tmp_reg, .usize, hi_mcv, .{});
- try self.truncateRegister(val_ty, tmp_reg);
- try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{});
- }
- }
- return;
- }
-
- if (val_abi_size > 8) return self.fail("TODO implement packed load of {f}", .{val_ty.fmt(pt)});
-
- const limb_abi_size: u31 = @min(val_abi_size, 8);
- const limb_abi_bits = limb_abi_size * 8;
- const val_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size);
- const val_bit_off = ptr_bit_off % limb_abi_bits;
- const val_extra_bits = self.regExtraBits(val_ty);
-
- const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
- const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
- defer self.register_manager.unlockReg(ptr_lock);
-
- const dst_reg = switch (dst_mcv) {
- .register => |reg| reg,
- else => try self.register_manager.allocReg(null, abi.RegisterClass.gp),
- };
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- const load_abi_size =
- if (val_bit_off < val_extra_bits) val_abi_size else val_abi_size * 2;
- if (load_abi_size <= 8) {
- const load_reg = registerAlias(dst_reg, load_abi_size);
- try self.asmRegisterMemory(.{ ._, .mov }, load_reg, .{
- .base = .{ .reg = ptr_reg },
- .mod = .{ .rm = .{
- .size = .fromSize(load_abi_size),
- .disp = val_byte_off,
- } },
- });
- try self.spillEflagsIfOccupied();
- try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, .u(val_bit_off));
- } else {
- const tmp_reg =
- registerAlias(try self.register_manager.allocReg(null, abi.RegisterClass.gp), val_abi_size);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const dst_alias = registerAlias(dst_reg, val_abi_size);
- try self.asmRegisterMemory(.{ ._, .mov }, dst_alias, .{
- .base = .{ .reg = ptr_reg },
- .mod = .{ .rm = .{
- .size = .fromSize(val_abi_size),
- .disp = val_byte_off,
- } },
- });
- try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg, .{
- .base = .{ .reg = ptr_reg },
- .mod = .{ .rm = .{
- .size = .fromSize(val_abi_size),
- .disp = val_byte_off + limb_abi_size,
- } },
- });
- try self.spillEflagsIfOccupied();
- try self.asmRegisterRegisterImmediate(.{ ._rd, .sh }, dst_alias, tmp_reg, .u(val_bit_off));
- }
-
- if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg);
- try self.genCopy(val_ty, dst_mcv, .{ .register = dst_reg }, .{});
-}
-
fn load(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
const pt = self.pt;
const zcu = pt.zcu;
@@ -174636,174 +174789,6 @@ fn load(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerE
}
}
-fn airLoad(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const elem_ty = self.typeOfIndex(inst);
- const result: MCValue = result: {
- if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
-
- try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
- const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
- defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
-
- const ptr_ty = self.typeOf(ty_op.operand);
- const elem_size = elem_ty.abiSize(zcu);
-
- const elem_rs = self.regSetForType(elem_ty);
- const ptr_rs = self.regSetForType(ptr_ty);
-
- const ptr_mcv = try self.resolveInst(ty_op.operand);
- const dst_mcv = if (elem_size <= 8 and std.math.isPowerOfTwo(elem_size) and
- elem_rs.supersetOf(ptr_rs) and self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv))
- // The MCValue that holds the pointer can be re-used as the value.
- ptr_mcv
- else
- try self.allocRegOrMem(inst, true);
-
- const ptr_info = ptr_ty.ptrInfo(zcu);
- if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) {
- try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv);
- } else {
- try self.load(dst_mcv, ptr_ty, ptr_mcv);
- }
-
- if (elem_ty.isAbiInt(zcu) and elem_size * 8 > elem_ty.bitSize(zcu)) {
- const high_mcv: MCValue = switch (dst_mcv) {
- .register => |dst_reg| .{ .register = dst_reg },
- .register_pair => |dst_regs| .{ .register = dst_regs[1] },
- else => dst_mcv.address().offset(@intCast((elem_size - 1) / 8 * 8)).deref(),
- };
- const high_reg = if (high_mcv.isRegister())
- high_mcv.getReg().?
- else
- try self.copyToTmpRegister(.usize, high_mcv);
- const high_lock = self.register_manager.lockReg(high_reg);
- defer if (high_lock) |lock| self.register_manager.unlockReg(lock);
-
- try self.truncateRegister(elem_ty, high_reg);
- if (!high_mcv.isRegister()) try self.genCopy(
- if (elem_size <= 8) elem_ty else .usize,
- high_mcv,
- .{ .register = high_reg },
- .{},
- );
- }
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ptr_info = ptr_ty.ptrInfo(zcu);
- const src_ty: Type = .fromInterned(ptr_info.child);
- if (!src_ty.hasRuntimeBitsIgnoreComptime(zcu)) return;
-
- const limb_abi_size: u16 = @min(ptr_info.packed_offset.host_size, 8);
- const limb_abi_bits = limb_abi_size * 8;
- const limb_ty = try pt.intType(.unsigned, limb_abi_bits);
-
- const src_bit_size = src_ty.bitSize(zcu);
- const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) {
- .none => 0,
- .runtime => unreachable,
- else => |vector_index| @intFromEnum(vector_index) * src_bit_size,
- };
- const src_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size);
- const src_bit_off = ptr_bit_off % limb_abi_bits;
-
- const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
- const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
- defer self.register_manager.unlockReg(ptr_lock);
-
- const mat_src_mcv: MCValue = mat_src_mcv: switch (src_mcv) {
- .register => if (src_bit_size > 64) {
- const frame_index = try self.allocFrameIndex(.initSpill(src_ty, self.pt.zcu));
- try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, src_mcv, .{});
- break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } };
- } else src_mcv,
- else => src_mcv,
- };
-
- var limb_i: u16 = 0;
- while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) {
- const part_bit_off = if (limb_i == 0) src_bit_off else 0;
- const part_bit_size =
- @min(src_bit_off + src_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off;
- const limb_mem: Memory = .{
- .base = .{ .reg = ptr_reg },
- .mod = .{ .rm = .{
- .size = .fromSize(limb_abi_size),
- .disp = src_byte_off + limb_i * limb_abi_size,
- } },
- };
-
- const part_mask = (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - part_bit_size)) <<
- @intCast(part_bit_off);
- const part_mask_not = part_mask ^ (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_abi_bits));
- if (limb_abi_size <= 4) {
- try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, .u(part_mask_not));
- } else if (std.math.cast(i32, @as(i64, @bitCast(part_mask_not)))) |small| {
- try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, .s(small));
- } else {
- const part_mask_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, .u(part_mask_not));
- try self.asmMemoryRegister(.{ ._, .@"and" }, limb_mem, part_mask_reg);
- }
-
- if (src_bit_size <= 64) {
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_mcv = MCValue{ .register = tmp_reg };
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- try self.genSetReg(tmp_reg, limb_ty, mat_src_mcv, .{});
- switch (limb_i) {
- 0 => try self.genShiftBinOpMir(
- .{ ._l, .sh },
- limb_ty,
- tmp_mcv,
- .u8,
- .{ .immediate = src_bit_off },
- ),
- 1 => try self.genShiftBinOpMir(
- .{ ._r, .sh },
- limb_ty,
- tmp_mcv,
- .u8,
- .{ .immediate = limb_abi_bits - src_bit_off },
- ),
- else => unreachable,
- }
- try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask });
- try self.asmMemoryRegister(
- .{ ._, .@"or" },
- limb_mem,
- registerAlias(tmp_reg, limb_abi_size),
- );
- } else if (src_bit_size <= 128 and src_bit_off == 0) {
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_mcv = MCValue{ .register = tmp_reg };
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- try self.genSetReg(tmp_reg, limb_ty, switch (limb_i) {
- 0 => mat_src_mcv,
- else => mat_src_mcv.address().offset(limb_i * limb_abi_size).deref(),
- }, .{});
- try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask });
- try self.asmMemoryRegister(
- .{ ._, .@"or" },
- limb_mem,
- registerAlias(tmp_reg, limb_abi_size),
- );
- } else return self.fail("TODO: implement packed store of {f}", .{src_ty.fmt(pt)});
- }
-}
-
fn store(
self: *CodeGen,
ptr_ty: Type,
@@ -174857,35 +174842,6 @@ fn store(
}
}
-fn airStore(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-
- result: {
- if (!safety and (try self.resolveInst(bin_op.rhs)) == .undef) break :result;
-
- try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
- const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
- defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
-
- const ptr_ty = self.typeOf(bin_op.lhs);
- const ptr_info = ptr_ty.ptrInfo(zcu);
- const is_packed = ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0;
- if (is_packed) try self.spillEflagsIfOccupied();
-
- const src_mcv = try self.resolveInst(bin_op.rhs);
- const ptr_mcv = try self.resolveInst(bin_op.lhs);
-
- if (is_packed) {
- try self.packedStore(ptr_ty, ptr_mcv, src_mcv);
- } else {
- try self.store(ptr_ty, ptr_mcv, src_mcv, .{ .safety = safety });
- }
- }
- return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
fn genUnOp(self: *CodeGen, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue {
const pt = self.pt;
const zcu = pt.zcu;
@@ -192171,6 +192127,8 @@ const Select = struct {
exact_bool_vec: u16,
ptr_any_bool_vec,
ptr_bool_vec: Memory.Size,
+ ptr_any_bool_vec_elem,
+ ptr_bool_vec_elem: Memory.Size,
remainder_bool_vec: OfIsSizes,
exact_remainder_bool_vec: struct { of: Memory.Size, is: u16 },
signed_int_vec: Memory.Size,
@@ -192273,6 +192231,22 @@ const Select = struct {
.vector_type => |vector_type| vector_type.child == .bool_type and size.bitSize(cg.target) >= vector_type.len,
else => false,
},
+ .ptr_any_bool_vec_elem => {
+ const ptr_info = ty.ptrInfo(zcu);
+ return switch (ptr_info.flags.vector_index) {
+ .none => false,
+ .runtime => unreachable,
+ else => ptr_info.child == .bool_type,
+ };
+ },
+ .ptr_bool_vec_elem => |size| {
+ const ptr_info = ty.ptrInfo(zcu);
+ return switch (ptr_info.flags.vector_index) {
+ .none => false,
+ .runtime => unreachable,
+ else => ptr_info.child == .bool_type and size.bitSize(cg.target) >= ptr_info.packed_offset.host_size,
+ };
+ },
.remainder_bool_vec => |of_is| ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and
of_is.is.bitSize(cg.target) >= (ty.vectorLen(zcu) - 1) % of_is.of.bitSize(cg.target) + 1,
.exact_remainder_bool_vec => |of_is| ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and
@@ -193266,7 +193240,7 @@ const Select = struct {
ref: Ref,
scale: Memory.Scale = .@"1",
} = .{ .ref = .none },
- unused: u3 = 0,
+ unused: u2 = 0,
},
imm: i32 = 0,
@@ -193279,9 +193253,9 @@ const Select = struct {
lea,
mem,
};
- const Adjust = packed struct(u10) {
+ const Adjust = packed struct(u11) {
sign: enum(u1) { neg, pos },
- lhs: enum(u5) {
+ lhs: enum(u6) {
none,
ptr_size,
ptr_bit_size,
@@ -193303,6 +193277,7 @@ const Select = struct {
src0_elem_size,
dst0_elem_size,
src0_elem_size_mul_src1,
+ vector_index,
src1,
src1_sub_bit_size,
log2_src0_elem_size,
@@ -193373,9 +193348,13 @@ const Select = struct {
const sub_src0_elem_size: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"1" };
const add_src0_elem_size_mul_src1: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" };
const sub_src0_elem_size_mul_src1: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" };
+ const add_vector_index: Adjust = .{ .sign = .pos, .lhs = .vector_index, .op = .mul, .rhs = .@"1" };
+ const add_vector_index_rem_32: Adjust = .{ .sign = .pos, .lhs = .vector_index, .op = .rem_8_mul, .rhs = .@"4" };
+ const add_vector_index_div_8_down_4: Adjust = .{ .sign = .pos, .lhs = .vector_index, .op = .div_8_down, .rhs = .@"4" };
const add_dst0_elem_size: Adjust = .{ .sign = .pos, .lhs = .dst0_elem_size, .op = .mul, .rhs = .@"1" };
const sub_dst0_elem_size: Adjust = .{ .sign = .neg, .lhs = .dst0_elem_size, .op = .mul, .rhs = .@"1" };
const add_src1_div_8_down_4: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .div_8_down, .rhs = .@"4" };
+ const add_src1: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .mul, .rhs = .@"1" };
const add_src1_rem_32: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"4" };
const add_src1_rem_64: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"8" };
const add_src1_sub_bit_size: Adjust = .{ .sign = .pos, .lhs = .src1_sub_bit_size, .op = .mul, .rhs = .@"1" };
@@ -194258,6 +194237,10 @@ const Select = struct {
.dst0_elem_size => @intCast(Select.Operand.Ref.dst0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)),
.src0_elem_size_mul_src1 => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) *
Select.Operand.Ref.src1.valueOf(s).immediate),
+ .vector_index => switch (op.flags.base.ref.typeOf(s).ptrInfo(s.cg.pt.zcu).flags.vector_index) {
+ .none, .runtime => unreachable,
+ else => |vector_index| @intFromEnum(vector_index),
+ },
.src1 => @intCast(Select.Operand.Ref.src1.valueOf(s).immediate),
.src1_sub_bit_size => @as(SignedImm, @intCast(Select.Operand.Ref.src1.valueOf(s).immediate)) -
@as(SignedImm, @intCast(s.cg.nonBoolScalarBitSize(op.flags.base.ref.typeOf(s)))),