Commit c4b93555b0
Changed files (12)
src
test
behavior
x86_64
src/arch/x86_64/abi.zig
@@ -242,8 +242,12 @@ pub fn classifySystemV(ty: Type, zcu: *Zcu, target: std.Target, ctx: Context) [8
.sse, .sseup, .sseup, .sseup,
.sseup, .sseup, .sseup, .none,
};
- // LLVM always returns vectors byval
- if (bits <= 512 or ctx == .ret) return .{
+ if (bits <= 512 or (ctx == .ret and bits <= @as(u64, if (std.Target.x86.featureSetHas(target.cpu.features, .avx512f))
+ 2048
+ else if (std.Target.x86.featureSetHas(target.cpu.features, .avx))
+ 1024
+ else
+ 512))) return .{
.sse, .sseup, .sseup, .sseup,
.sseup, .sseup, .sseup, .sseup,
};
@@ -416,7 +420,7 @@ pub const SysV = struct {
pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
pub const c_abi_sse_param_regs = sse_avx_regs[0..8].*;
pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
- pub const c_abi_sse_return_regs = sse_avx_regs[0..2].*;
+ pub const c_abi_sse_return_regs = sse_avx_regs[0..4].*;
};
pub const Win64 = struct {
@@ -496,7 +500,7 @@ pub fn getCAbiSseReturnRegs(cc: std.builtin.CallingConvention) []const Register
}
const gp_regs = [_]Register{
- .rax, .rcx, .rdx, .rbx, .rsi, .rdi, .r8, .r9, .r10, .r11, .r12, .r13, .r14, .r15,
+ .rax, .rdx, .rbx, .rcx, .rsi, .rdi, .r8, .r9, .r10, .r11, .r12, .r13, .r14, .r15,
};
const x87_regs = [_]Register{
.st0, .st1, .st2, .st3, .st4, .st5, .st6, .st7,
src/arch/x86_64/CodeGen.zig
@@ -135,6 +135,8 @@ const Owner = union(enum) {
}
};
+const MaskKind = enum { sign, all };
+
pub const MCValue = union(enum) {
/// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc.
/// TODO Look into deleting this tag and using `dead` instead, since every use
@@ -156,10 +158,16 @@ pub const MCValue = union(enum) {
register: Register,
/// The value is split across two registers.
register_pair: [2]Register,
+ /// The value is split across three registers.
+ register_triple: [3]Register,
+ /// The value is split across four registers.
+ register_quadruple: [4]Register,
/// The value is a constant offset from the value in a register.
register_offset: bits.RegisterOffset,
/// The value is a tuple { wrapped, overflow } where wrapped value is stored in the GP register.
register_overflow: struct { reg: Register, eflags: Condition },
+ /// The value is a bool vector stored in a vector register with a different scalar type.
+ register_mask: struct { reg: Register, kind: MaskKind, inverted: bool, scalar: Memory.Size },
/// The value is in memory at a hard-coded address.
/// If the type is a pointer, it means the pointer address is stored at this memory location.
memory: u64,
@@ -195,7 +203,7 @@ pub const MCValue = union(enum) {
/// Payload is a frame address.
lea_frame: bits.FrameAddr,
/// Supports integer_per_element abi
- elementwise_regs_then_frame: packed struct { regs: u3 = 0, frame_off: i29 = 0, frame_index: FrameIndex },
+ elementwise_regs_then_frame: packed struct { regs: u3, frame_off: i29, frame_index: FrameIndex },
/// This indicates that we have already allocated a frame index for this instruction,
/// but it has not been spilled there yet in the current control flow.
/// Payload is a frame index.
@@ -210,6 +218,7 @@ pub const MCValue = union(enum) {
.undef,
.immediate,
.register_offset,
+ .register_mask,
.eflags,
.register_overflow,
.lea_symbol,
@@ -223,6 +232,8 @@ pub const MCValue = union(enum) {
=> false,
.register,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.memory,
.load_symbol,
.load_got,
@@ -234,13 +245,20 @@ pub const MCValue = union(enum) {
};
}
- fn isMemory(mcv: MCValue) bool {
+ fn isBase(mcv: MCValue) bool {
return switch (mcv) {
.memory, .indirect, .load_frame => true,
else => false,
};
}
+ fn isMemory(mcv: MCValue) bool {
+ return switch (mcv) {
+ .memory, .indirect, .load_frame, .load_symbol => true,
+ else => false,
+ };
+ }
+
fn isImmediate(mcv: MCValue) bool {
return switch (mcv) {
.immediate => true,
@@ -268,16 +286,23 @@ pub const MCValue = union(enum) {
.register => |reg| reg,
.register_offset, .indirect => |ro| ro.reg,
.register_overflow => |ro| ro.reg,
+ .register_mask => |rm| rm.reg,
else => null,
};
}
fn getRegs(mcv: *const MCValue) []const Register {
return switch (mcv.*) {
- .register => |*reg| @as(*const [1]Register, reg),
- .register_pair => |*regs| regs,
- .register_offset, .indirect => |*ro| @as(*const [1]Register, &ro.reg),
- .register_overflow => |*ro| @as(*const [1]Register, &ro.reg),
+ .register => |*reg| reg[0..1],
+ inline .register_pair,
+ .register_triple,
+ .register_quadruple,
+ => |*regs| regs,
+ inline .register_offset,
+ .indirect,
+ .register_overflow,
+ .register_mask,
+ => |*pl| (&pl.reg)[0..1],
else => &.{},
};
}
@@ -300,8 +325,11 @@ pub const MCValue = union(enum) {
.eflags,
.register,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_offset,
.register_overflow,
+ .register_mask,
.lea_symbol,
.lea_direct,
.lea_got,
@@ -332,7 +360,10 @@ pub const MCValue = union(enum) {
.undef,
.eflags,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_overflow,
+ .register_mask,
.memory,
.indirect,
.load_direct,
@@ -367,7 +398,10 @@ pub const MCValue = union(enum) {
=> unreachable, // not valid
.eflags,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_overflow,
+ .register_mask,
.memory,
.indirect,
.load_direct,
@@ -404,8 +438,11 @@ pub const MCValue = union(enum) {
.eflags,
.register,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_offset,
.register_overflow,
+ .register_mask,
.load_direct,
.lea_direct,
.load_got,
@@ -472,10 +509,11 @@ pub const MCValue = union(enum) {
.memory => |pl| try writer.print("[ds:0x{x}]", .{pl}),
inline .eflags, .register => |pl| try writer.print("{s}", .{@tagName(pl)}),
.register_pair => |pl| try writer.print("{s}:{s}", .{ @tagName(pl[1]), @tagName(pl[0]) }),
+ .register_triple => |pl| try writer.print("{s}:{s}:{s}", .{ @tagName(pl[2]), @tagName(pl[1]), @tagName(pl[0]) }),
+ .register_quadruple => |pl| try writer.print("{s}:{s}:{s}:{s}", .{ @tagName(pl[3]), @tagName(pl[2]), @tagName(pl[1]), @tagName(pl[0]) }),
.register_offset => |pl| try writer.print("{s} + 0x{x}", .{ @tagName(pl.reg), pl.off }),
- .register_overflow => |pl| try writer.print("{s}:{s}", .{
- @tagName(pl.eflags), @tagName(pl.reg),
- }),
+ .register_overflow => |pl| try writer.print("{s}:{s}", .{ @tagName(pl.eflags), @tagName(pl.reg) }),
+ .register_mask => |pl| try writer.print("mask({s},{}):{s}", .{ @tagName(pl.kind), pl.scalar, @tagName(pl.reg) }),
.load_symbol => |pl| try writer.print("[sym:{} + 0x{x}]", .{ pl.sym_index, pl.off }),
.lea_symbol => |pl| try writer.print("sym:{} + 0x{x}", .{ pl.sym_index, pl.off }),
.indirect => |pl| try writer.print("[{s} + 0x{x}]", .{ @tagName(pl.reg), pl.off }),
@@ -526,8 +564,11 @@ const InstTracking = struct {
.eflags,
.register,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_offset,
.register_overflow,
+ .register_mask,
.indirect,
=> .none,
}, .short = result };
@@ -545,17 +586,17 @@ const InstTracking = struct {
return self.short.getCondition();
}
- fn spill(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void {
+ fn spill(self: *InstTracking, cg: *CodeGen, inst: Air.Inst.Index) !void {
if (std.meta.eql(self.long, self.short)) return; // Already spilled
// Allocate or reuse frame index
switch (self.long) {
- .none => self.long = try function.allocRegOrMem(inst, false),
+ .none => self.long = try cg.allocRegOrMem(inst, false),
.load_frame => {},
.reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } },
else => unreachable,
}
tracking_log.debug("spill {} from {} to {}", .{ inst, self.short, self.long });
- try function.genCopy(function.typeOfIndex(inst), self.long, self.short, .{});
+ try cg.genCopy(cg.typeOfIndex(inst), self.long, self.short, .{});
}
fn reuseFrame(self: *InstTracking) void {
@@ -584,8 +625,11 @@ const InstTracking = struct {
.eflags,
.register,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_offset,
.register_overflow,
+ .register_mask,
.indirect,
.elementwise_regs_then_frame,
.reserved_frame,
@@ -630,8 +674,11 @@ const InstTracking = struct {
.eflags,
.register,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_offset,
.register_overflow,
+ .register_mask,
.indirect,
.elementwise_regs_then_frame,
.air_ref,
@@ -735,13 +782,13 @@ const InstTracking = struct {
}
pub fn format(
- self: InstTracking,
+ tracking: InstTracking,
comptime _: []const u8,
_: std.fmt.FormatOptions,
writer: anytype,
) @TypeOf(writer).Error!void {
- if (!std.meta.eql(self.long, self.short)) try writer.print("|{}| ", .{self.long});
- try writer.print("{}", .{self.short});
+ if (!std.meta.eql(tracking.long, tracking.short)) try writer.print("|{}| ", .{tracking.long});
+ try writer.print("{}", .{tracking.short});
}
};
@@ -2259,44 +2306,44 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
try self.asmPseudo(.pseudo_dbg_leave_block_none);
}
-fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
- const pt = self.pt;
+fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
+ const pt = cg.pt;
const zcu = pt.zcu;
const ip = &zcu.intern_pool;
- const air_tags = self.air.instructions.items(.tag);
- const air_datas = self.air.instructions.items(.data);
- const use_old = self.target.ofmt == .coff;
+ const air_tags = cg.air.instructions.items(.tag);
+ const air_datas = cg.air.instructions.items(.data);
+ const use_old = cg.target.ofmt == .coff;
- self.arg_index = 0;
+ cg.arg_index = 0;
for (body) |inst| switch (air_tags[@intFromEnum(inst)]) {
.arg => {
- wip_mir_log.debug("{}", .{self.fmtAir(inst)});
- verbose_tracking_log.debug("{}", .{self.fmtTracking()});
+ wip_mir_log.debug("{}", .{cg.fmtAir(inst)});
+ verbose_tracking_log.debug("{}", .{cg.fmtTracking()});
- self.reused_operands = .initEmpty();
- try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1);
+ cg.reused_operands = .initEmpty();
+ try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1);
- try self.airArg(inst);
+ try cg.airArg(inst);
- self.resetTemps();
- self.checkInvariantsAfterAirInst();
+ cg.resetTemps();
+ cg.checkInvariantsAfterAirInst();
},
else => break,
};
- if (self.arg_index == 0) try self.airDbgVarArgs();
- self.arg_index = 0;
+ if (cg.arg_index == 0) try cg.airDbgVarArgs();
+ cg.arg_index = 0;
for (body) |inst| {
- if (self.liveness.isUnused(inst) and !self.air.mustLower(inst, ip)) continue;
- wip_mir_log.debug("{}", .{self.fmtAir(inst)});
- verbose_tracking_log.debug("{}", .{self.fmtTracking()});
+ if (cg.liveness.isUnused(inst) and !cg.air.mustLower(inst, ip)) continue;
+ wip_mir_log.debug("{}", .{cg.fmtAir(inst)});
+ verbose_tracking_log.debug("{}", .{cg.fmtTracking()});
- self.reused_operands = .initEmpty();
- try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1);
+ cg.reused_operands = .initEmpty();
+ try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1);
switch (air_tags[@intFromEnum(inst)]) {
// zig fmt: off
.not,
- => |tag| try self.airUnOp(inst, tag),
+ => |air_tag| try cg.airUnOp(inst, air_tag),
.add,
.add_wrap,
@@ -2306,22 +2353,22 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.bool_or,
.min,
.max,
- => |tag| try self.airBinOp(inst, tag),
+ => |air_tag| try cg.airBinOp(inst, air_tag),
- .ptr_add, .ptr_sub => |tag| try self.airPtrArithmetic(inst, tag),
+ .ptr_add, .ptr_sub => |air_tag| try cg.airPtrArithmetic(inst, air_tag),
- .shr, .shr_exact => try self.airShlShrBinOp(inst),
- .shl, .shl_exact => try self.airShlShrBinOp(inst),
+ .shr, .shr_exact => try cg.airShlShrBinOp(inst),
+ .shl, .shl_exact => try cg.airShlShrBinOp(inst),
- .mul => try self.airMulDivBinOp(inst),
- .mul_wrap => try self.airMulDivBinOp(inst),
- .rem => try self.airMulDivBinOp(inst),
- .mod => try self.airMulDivBinOp(inst),
+ .mul => try cg.airMulDivBinOp(inst),
+ .mul_wrap => try cg.airMulDivBinOp(inst),
+ .rem => try cg.airMulDivBinOp(inst),
+ .mod => try cg.airMulDivBinOp(inst),
- .add_sat => try self.airAddSat(inst),
- .sub_sat => try self.airSubSat(inst),
- .mul_sat => try self.airMulSat(inst),
- .shl_sat => try self.airShlSat(inst),
+ .add_sat => try cg.airAddSat(inst),
+ .sub_sat => try cg.airSubSat(inst),
+ .mul_sat => try cg.airMulSat(inst),
+ .shl_sat => try cg.airShlSat(inst),
.sin,
.cos,
@@ -2332,98 +2379,97 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.log2,
.log10,
.round,
- => |tag| try self.airUnaryMath(inst, tag),
-
- .floor => try self.airRound(inst, .{ .mode = .down, .precision = .inexact }),
- .ceil => try self.airRound(inst, .{ .mode = .up, .precision = .inexact }),
- .trunc_float => try self.airRound(inst, .{ .mode = .zero, .precision = .inexact }),
- .sqrt => try self.airSqrt(inst),
- .neg => try self.airFloatSign(inst),
-
- .abs => try self.airAbs(inst),
-
- .add_with_overflow => try self.airAddSubWithOverflow(inst),
- .sub_with_overflow => try self.airAddSubWithOverflow(inst),
- .mul_with_overflow => try self.airMulWithOverflow(inst),
- .shl_with_overflow => try self.airShlWithOverflow(inst),
-
- .div_float, .div_trunc, .div_floor, .div_exact => try self.airMulDivBinOp(inst),
-
- .cmp_lt => try self.airCmp(inst, .lt),
- .cmp_lte => try self.airCmp(inst, .lte),
- .cmp_eq => try self.airCmp(inst, .eq),
- .cmp_gte => try self.airCmp(inst, .gte),
- .cmp_gt => try self.airCmp(inst, .gt),
- .cmp_neq => try self.airCmp(inst, .neq),
-
- .cmp_vector => try self.airCmpVector(inst),
- .cmp_lt_errors_len => try self.airCmpLtErrorsLen(inst),
-
- .bitcast => try self.airBitCast(inst),
- .fptrunc => try self.airFptrunc(inst),
- .fpext => try self.airFpext(inst),
- .intcast => try self.airIntCast(inst),
- .trunc => try self.airTrunc(inst),
- .is_non_null => try self.airIsNonNull(inst),
- .is_null => try self.airIsNull(inst),
- .is_non_err => try self.airIsNonErr(inst),
- .is_err => try self.airIsErr(inst),
- .load => try self.airLoad(inst),
- .store => try self.airStore(inst, false),
- .store_safe => try self.airStore(inst, true),
- .struct_field_val => try self.airStructFieldVal(inst),
- .float_from_int => try self.airFloatFromInt(inst),
- .int_from_float => try self.airIntFromFloat(inst),
- .cmpxchg_strong => try self.airCmpxchg(inst),
- .cmpxchg_weak => try self.airCmpxchg(inst),
- .atomic_rmw => try self.airAtomicRmw(inst),
- .atomic_load => try self.airAtomicLoad(inst),
- .memcpy => try self.airMemcpy(inst),
- .memset => try self.airMemset(inst, false),
- .memset_safe => try self.airMemset(inst, true),
- .set_union_tag => try self.airSetUnionTag(inst),
- .get_union_tag => try self.airGetUnionTag(inst),
- .clz => try self.airClz(inst),
- .ctz => try self.airCtz(inst),
- .popcount => try self.airPopCount(inst),
- .byte_swap => try self.airByteSwap(inst),
- .bit_reverse => try self.airBitReverse(inst),
- .tag_name => try self.airTagName(inst),
- .error_name => try self.airErrorName(inst),
- .splat => try self.airSplat(inst),
- .select => try self.airSelect(inst),
- .shuffle => try self.airShuffle(inst),
- .reduce => try self.airReduce(inst),
- .aggregate_init => try self.airAggregateInit(inst),
- .union_init => try self.airUnionInit(inst),
- .prefetch => try self.airPrefetch(inst),
- .mul_add => try self.airMulAdd(inst),
-
- .atomic_store_unordered => try self.airAtomicStore(inst, .unordered),
- .atomic_store_monotonic => try self.airAtomicStore(inst, .monotonic),
- .atomic_store_release => try self.airAtomicStore(inst, .release),
- .atomic_store_seq_cst => try self.airAtomicStore(inst, .seq_cst),
-
- .array_elem_val => try self.airArrayElemVal(inst),
- .slice_elem_val => try self.airSliceElemVal(inst),
- .ptr_elem_val => try self.airPtrElemVal(inst),
-
- .optional_payload => try self.airOptionalPayload(inst),
- .unwrap_errunion_err => try self.airUnwrapErrUnionErr(inst),
- .unwrap_errunion_payload => try self.airUnwrapErrUnionPayload(inst),
- .err_return_trace => try self.airErrReturnTrace(inst),
- .set_err_return_trace => try self.airSetErrReturnTrace(inst),
- .save_err_return_trace_index=> try self.airSaveErrReturnTraceIndex(inst),
-
- .wrap_optional => try self.airWrapOptional(inst),
- .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst),
- .wrap_errunion_err => try self.airWrapErrUnionErr(inst),
+ => |air_tag| try cg.airUnaryMath(inst, air_tag),
+
+ .floor => try cg.airRound(inst, .{ .mode = .down, .precision = .inexact }),
+ .ceil => try cg.airRound(inst, .{ .mode = .up, .precision = .inexact }),
+ .trunc_float => try cg.airRound(inst, .{ .mode = .zero, .precision = .inexact }),
+ .sqrt => try cg.airSqrt(inst),
+ .neg => try cg.airFloatSign(inst),
+
+ .abs => try cg.airAbs(inst),
+
+ .add_with_overflow => try cg.airAddSubWithOverflow(inst),
+ .sub_with_overflow => try cg.airAddSubWithOverflow(inst),
+ .mul_with_overflow => try cg.airMulWithOverflow(inst),
+ .shl_with_overflow => try cg.airShlWithOverflow(inst),
+
+ .div_float, .div_trunc, .div_floor, .div_exact => try cg.airMulDivBinOp(inst),
+
+ .cmp_lt => try cg.airCmp(inst, .lt),
+ .cmp_lte => try cg.airCmp(inst, .lte),
+ .cmp_eq => try cg.airCmp(inst, .eq),
+ .cmp_gte => try cg.airCmp(inst, .gte),
+ .cmp_gt => try cg.airCmp(inst, .gt),
+ .cmp_neq => try cg.airCmp(inst, .neq),
+
+ .cmp_lt_errors_len => try cg.airCmpLtErrorsLen(inst),
+
+ .bitcast => try cg.airBitCast(inst),
+ .fptrunc => try cg.airFptrunc(inst),
+ .fpext => try cg.airFpext(inst),
+ .intcast => try cg.airIntCast(inst),
+ .trunc => try cg.airTrunc(inst),
+ .is_non_null => try cg.airIsNonNull(inst),
+ .is_null => try cg.airIsNull(inst),
+ .is_non_err => try cg.airIsNonErr(inst),
+ .is_err => try cg.airIsErr(inst),
+ .load => try cg.airLoad(inst),
+ .store => try cg.airStore(inst, false),
+ .store_safe => try cg.airStore(inst, true),
+ .struct_field_val => try cg.airStructFieldVal(inst),
+ .float_from_int => try cg.airFloatFromInt(inst),
+ .int_from_float => try cg.airIntFromFloat(inst),
+ .cmpxchg_strong => try cg.airCmpxchg(inst),
+ .cmpxchg_weak => try cg.airCmpxchg(inst),
+ .atomic_rmw => try cg.airAtomicRmw(inst),
+ .atomic_load => try cg.airAtomicLoad(inst),
+ .memcpy => try cg.airMemcpy(inst),
+ .memset => try cg.airMemset(inst, false),
+ .memset_safe => try cg.airMemset(inst, true),
+ .set_union_tag => try cg.airSetUnionTag(inst),
+ .get_union_tag => try cg.airGetUnionTag(inst),
+ .clz => try cg.airClz(inst),
+ .ctz => try cg.airCtz(inst),
+ .popcount => try cg.airPopCount(inst),
+ .byte_swap => try cg.airByteSwap(inst),
+ .bit_reverse => try cg.airBitReverse(inst),
+ .tag_name => try cg.airTagName(inst),
+ .error_name => try cg.airErrorName(inst),
+ .splat => try cg.airSplat(inst),
+ .select => try cg.airSelect(inst),
+ .shuffle => try cg.airShuffle(inst),
+ .reduce => try cg.airReduce(inst),
+ .aggregate_init => try cg.airAggregateInit(inst),
+ .union_init => try cg.airUnionInit(inst),
+ .prefetch => try cg.airPrefetch(inst),
+ .mul_add => try cg.airMulAdd(inst),
+
+ .atomic_store_unordered => try cg.airAtomicStore(inst, .unordered),
+ .atomic_store_monotonic => try cg.airAtomicStore(inst, .monotonic),
+ .atomic_store_release => try cg.airAtomicStore(inst, .release),
+ .atomic_store_seq_cst => try cg.airAtomicStore(inst, .seq_cst),
+
+ .array_elem_val => try cg.airArrayElemVal(inst),
+ .slice_elem_val => try cg.airSliceElemVal(inst),
+ .ptr_elem_val => try cg.airPtrElemVal(inst),
+
+ .optional_payload => try cg.airOptionalPayload(inst),
+ .unwrap_errunion_err => try cg.airUnwrapErrUnionErr(inst),
+ .unwrap_errunion_payload => try cg.airUnwrapErrUnionPayload(inst),
+ .err_return_trace => try cg.airErrReturnTrace(inst),
+ .set_err_return_trace => try cg.airSetErrReturnTrace(inst),
+ .save_err_return_trace_index=> try cg.airSaveErrReturnTraceIndex(inst),
+
+ .wrap_optional => try cg.airWrapOptional(inst),
+ .wrap_errunion_payload => try cg.airWrapErrUnionPayload(inst),
+ .wrap_errunion_err => try cg.airWrapErrUnionErr(inst),
// zig fmt: on
.add_safe,
.sub_safe,
.mul_safe,
- => return self.fail("TODO implement safety_checked_instructions", .{}),
+ => return cg.fail("TODO implement safety_checked_instructions", .{}),
.add_optimized,
.sub_optimized,
.mul_optimized,
@@ -2440,214 +2486,612 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.cmp_gte_optimized,
.cmp_gt_optimized,
.cmp_neq_optimized,
- .cmp_vector_optimized,
.reduce_optimized,
.int_from_float_optimized,
- => return self.fail("TODO implement optimized float mode", .{}),
+ => return cg.fail("TODO implement optimized float mode", .{}),
- .arg => try self.airDbgArg(inst),
- .alloc => if (use_old) try self.airAlloc(inst) else {
- var slot = try self.tempFromValue(self.typeOfIndex(inst), .{ .lea_frame = .{
- .index = try self.allocMemPtr(inst),
+ .arg => try cg.airDbgArg(inst),
+ .alloc => if (use_old) try cg.airAlloc(inst) else {
+ var slot = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .lea_frame = .{
+ .index = try cg.allocMemPtr(inst),
} });
- try slot.moveTo(inst, self);
+ try slot.moveTo(inst, cg);
},
.inferred_alloc => unreachable,
.inferred_alloc_comptime => unreachable,
- .ret_ptr => if (use_old) try self.airRetPtr(inst) else {
- var slot = switch (self.ret_mcv.long) {
+ .ret_ptr => if (use_old) try cg.airRetPtr(inst) else {
+ var slot = switch (cg.ret_mcv.long) {
else => unreachable,
- .none => try self.tempFromValue(self.typeOfIndex(inst), .{ .lea_frame = .{
- .index = try self.allocMemPtr(inst),
+ .none => try cg.tempFromValue(cg.typeOfIndex(inst), .{ .lea_frame = .{
+ .index = try cg.allocMemPtr(inst),
} }),
.load_frame => slot: {
- var slot = try self.tempFromValue(self.typeOfIndex(inst), self.ret_mcv.long);
- try slot.toOffset(self.ret_mcv.short.indirect.off, self);
+ var slot = try cg.tempFromValue(cg.typeOfIndex(inst), cg.ret_mcv.long);
+ try slot.toOffset(cg.ret_mcv.short.indirect.off, cg);
break :slot slot;
},
};
- try slot.moveTo(inst, self);
+ try slot.moveTo(inst, cg);
},
- .assembly => try self.airAsm(inst),
- inline .bit_and, .bit_or, .xor => |tag| if (use_old) try self.airBinOp(inst, tag) else {
+ .assembly => try cg.airAsm(inst),
+ .bit_and, .bit_or, .xor => |air_tag| if (use_old) try cg.airBinOp(inst, air_tag) else {
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
- var ops = try self.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
- try self.spillEflagsIfOccupied();
- const mir_tag: Mir.Inst.Tag = switch (tag) {
- else => unreachable,
- .bit_and => .@"and",
- .bit_or => .@"or",
- .xor => .xor,
- };
+ var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
var res: [1]Temp = undefined;
- try self.select(&res, &.{ &ops[0], &ops[1] }, &.{
- .{
- .required_features = &.{.avx2},
- .mir_tag = .{ .vp_, mir_tag },
- .patterns = &.{
- .{ .ops = &.{ .ymm, .ymm, .mem } },
- .{ .ops = &.{ .ymm, .mem, .ymm }, .commute = .{ 1, 2 } },
- .{ .ops = &.{ .ymm, .ymm, .ymm } },
- },
- },
- .{
- .required_features = &.{.avx},
- .mir_tag = .{ .vp_, mir_tag },
- .patterns = &.{
- .{ .ops = &.{ .xmm, .xmm, .mem } },
- .{ .ops = &.{ .xmm, .mem, .xmm }, .commute = .{ 1, 2 } },
- .{ .ops = &.{ .xmm, .xmm, .xmm } },
- },
- },
- .{
- .required_features = &.{.sse2},
- .mir_tag = .{ .p_, mir_tag },
- .patterns = &.{
- .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .mem } },
- .{ .ops = &.{ .xmm, .mem, .{ .implicit = 0 } } },
- .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .xmm } },
- .{ .ops = &.{ .xmm, .xmm, .{ .implicit = 0 } } },
- },
- },
- .{
- .required_features = &.{.mmx},
- .mir_tag = .{ .p_, mir_tag },
- .patterns = &.{
- .{ .ops = &.{ .mm, .{ .implicit = 0 }, .mem } },
- .{ .ops = &.{ .mm, .mem, .{ .implicit = 0 } } },
- .{ .ops = &.{ .mm, .{ .implicit = 0 }, .mm } },
- .{ .ops = &.{ .mm, .mm, .{ .implicit = 0 } } },
- },
- },
- .{
- .mir_tag = .{ ._, mir_tag },
- .patterns = &.{
- .{ .ops = &.{ .mem, .{ .implicit = 0 }, .simm32 } },
- .{ .ops = &.{ .mem, .simm32, .{ .implicit = 0 } } },
- .{ .ops = &.{ .mem, .{ .implicit = 0 }, .gpr } },
- .{ .ops = &.{ .mem, .gpr, .{ .implicit = 0 } } },
- .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .simm32 } },
- .{ .ops = &.{ .gpr, .simm32, .{ .implicit = 0 } } },
- .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .mem } },
- .{ .ops = &.{ .gpr, .mem, .{ .implicit = 0 } } },
- .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .gpr } },
- .{ .ops = &.{ .gpr, .gpr, .{ .implicit = 0 } } },
- },
- },
+ try cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, pattern_sets: switch (air_tag) {
+ else => unreachable,
+ inline .bit_and, .bit_or, .xor => |ct_air_tag| {
+ const mir_tag: Mir.Inst.Tag = switch (ct_air_tag) {
+ else => unreachable,
+ .bit_and => .@"and",
+ .bit_or => .@"or",
+ .xor => .xor,
+ };
+ break :pattern_sets &.{
+ .{
+ .required_features = &.{.avx2},
+ .mir_tag = .{ .vp_, mir_tag },
+ .patterns = &.{
+ .{ .ops = &.{ .ymm, .ymm, .mem } },
+ .{ .ops = &.{ .ymm, .mem, .ymm }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .ymm, .ymm, .ymm } },
+ },
+ },
+ .{
+ .required_features = &.{.avx},
+ .mir_tag = .{ .vp_, mir_tag },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm, .xmm, .mem } },
+ .{ .ops = &.{ .xmm, .mem, .xmm }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .xmm, .xmm, .xmm } },
+ },
+ },
+ .{
+ .required_features = &.{.sse2},
+ .mir_tag = .{ .p_, mir_tag },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .mem } },
+ .{ .ops = &.{ .xmm, .mem, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .xmm } },
+ .{ .ops = &.{ .xmm, .xmm, .{ .implicit = 0 } } },
+ },
+ },
+ .{
+ .required_features = &.{.mmx},
+ .mir_tag = .{ .p_, mir_tag },
+ .patterns = &.{
+ .{ .ops = &.{ .mm, .{ .implicit = 0 }, .mem } },
+ .{ .ops = &.{ .mm, .mem, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .mm, .{ .implicit = 0 }, .mm } },
+ .{ .ops = &.{ .mm, .mm, .{ .implicit = 0 } } },
+ },
+ },
+ .{
+ .clobbers = .{ .eflags = true },
+ .mir_tag = .{ ._, mir_tag },
+ .patterns = &.{
+ .{ .ops = &.{ .mem, .{ .implicit = 0 }, .simm32 } },
+ .{ .ops = &.{ .mem, .simm32, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .mem, .{ .implicit = 0 }, .gpr } },
+ .{ .ops = &.{ .mem, .gpr, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .simm32 } },
+ .{ .ops = &.{ .gpr, .simm32, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .mem } },
+ .{ .ops = &.{ .gpr, .mem, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .gpr } },
+ .{ .ops = &.{ .gpr, .gpr, .{ .implicit = 0 } } },
+ },
+ },
- .{
- .required_features = &.{.avx2},
- .loop = .bitwise,
- .mir_tag = .{ .vp_, mir_tag },
- .patterns = &.{
- .{ .ops = &.{ .ymm_limb, .{ .explicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .ymm_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
- .{ .ops = &.{ .ymm_limb, .ymm_limb, .mem_limb } },
- },
- },
- .{
- .required_features = &.{.avx},
- .loop = .bitwise,
- .mir_tag = .{ .vp_, mir_tag },
- .patterns = &.{
- .{ .ops = &.{ .xmm_limb, .{ .explicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
- .{ .ops = &.{ .xmm_limb, .xmm_limb, .mem_limb } },
- },
- },
- .{
- .required_features = &.{.sse2},
- .loop = .bitwise,
- .mir_tag = .{ .p_, mir_tag },
- .patterns = &.{
- .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .implicit = 0 } } },
- },
- },
- .{
- .required_features = &.{.mmx},
- .loop = .bitwise,
- .mir_tag = .{ .p_, mir_tag },
- .patterns = &.{
- .{ .ops = &.{ .mm_limb, .{ .implicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .mm_limb, .mem_limb, .{ .implicit = 0 } } },
- },
- },
- .{
- .loop = .bitwise,
- .mir_tag = .{ ._, mir_tag },
- .patterns = &.{
- .{ .ops = &.{ .mem_limb, .{ .implicit = 0 }, .gpr_limb } },
- .{ .ops = &.{ .mem_limb, .gpr_limb, .{ .implicit = 0 } } },
- .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .gpr_limb, .mem_limb, .{ .implicit = 0 } } },
- .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } },
- },
+ .{
+ .required_features = &.{.avx2},
+ .loop = .bitwise,
+ .mir_tag = .{ .vp_, mir_tag },
+ .patterns = &.{
+ .{ .ops = &.{ .ymm_limb, .{ .explicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .ymm_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .ymm_limb, .ymm_limb, .mem_limb } },
+ .{ .ops = &.{ .ymm_limb, .ymm_limb, .ymm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.avx},
+ .loop = .bitwise,
+ .mir_tag = .{ .vp_, mir_tag },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_limb, .{ .explicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .xmm_limb, .xmm_limb, .mem_limb } },
+ .{ .ops = &.{ .xmm_limb, .xmm_limb, .xmm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.sse2},
+ .loop = .bitwise,
+ .mir_tag = .{ .p_, mir_tag },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .xmm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.mmx},
+ .loop = .bitwise,
+ .mir_tag = .{ .p_, mir_tag },
+ .patterns = &.{
+ .{ .ops = &.{ .mm_limb, .{ .implicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .mm_limb, .mem_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .mm_limb, .{ .implicit = 0 }, .mm_limb } },
+ },
+ },
+ .{
+ .clobbers = .{ .eflags = true },
+ .loop = .bitwise,
+ .mir_tag = .{ ._, mir_tag },
+ .patterns = &.{
+ .{ .ops = &.{ .mem_limb, .{ .implicit = 0 }, .gpr_limb } },
+ .{ .ops = &.{ .mem_limb, .gpr_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .gpr_limb, .mem_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } },
+ },
+ },
+ };
},
- });
- if (ops[0].index != res[0].index) try ops[0].die(self);
- if (ops[1].index != res[0].index) try ops[1].die(self);
- try res[0].moveTo(inst, self);
+ }, .{});
+ if (ops[0].index != res[0].index) try ops[0].die(cg);
+ if (ops[1].index != res[0].index) try ops[1].die(cg);
+ try res[0].moveTo(inst, cg);
},
- .block => if (use_old) try self.airBlock(inst) else {
+ .block => if (use_old) try cg.airBlock(inst) else {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.Block, ty_pl.payload);
- try self.asmPseudo(.pseudo_dbg_enter_block_none);
- try self.lowerBlock(inst, @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]));
- try self.asmPseudo(.pseudo_dbg_leave_block_none);
+ const extra = cg.air.extraData(Air.Block, ty_pl.payload);
+ try cg.asmPseudo(.pseudo_dbg_enter_block_none);
+ try cg.lowerBlock(inst, @ptrCast(cg.air.extra[extra.end..][0..extra.data.body_len]));
+ try cg.asmPseudo(.pseudo_dbg_leave_block_none);
},
- .loop => if (use_old) try self.airLoop(inst) else {
+ .loop => if (use_old) try cg.airLoop(inst) else {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.Block, ty_pl.payload);
- self.scope_generation += 1;
- try self.loops.putNoClobber(self.gpa, inst, .{
- .state = try self.saveState(),
- .target = @intCast(self.mir_instructions.len),
+ const extra = cg.air.extraData(Air.Block, ty_pl.payload);
+ cg.scope_generation += 1;
+ try cg.loops.putNoClobber(cg.gpa, inst, .{
+ .state = try cg.saveState(),
+ .target = @intCast(cg.mir_instructions.len),
});
- defer assert(self.loops.remove(inst));
- try self.genBodyBlock(@ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]));
+ defer assert(cg.loops.remove(inst));
+ try cg.genBodyBlock(@ptrCast(cg.air.extra[extra.end..][0..extra.data.body_len]));
},
- .repeat => if (use_old) try self.airRepeat(inst) else {
+ .repeat => if (use_old) try cg.airRepeat(inst) else {
const repeat = air_datas[@intFromEnum(inst)].repeat;
- const loop = self.loops.get(repeat.loop_inst).?;
- try self.restoreState(loop.state, &.{}, .{
+ const loop = cg.loops.get(repeat.loop_inst).?;
+ try cg.restoreState(loop.state, &.{}, .{
.emit_instructions = true,
.update_tracking = false,
.resurrect = false,
.close_scope = true,
});
- _ = try self.asmJmpReloc(loop.target);
+ _ = try cg.asmJmpReloc(loop.target);
},
- .br => try self.airBr(inst),
- .trap => try self.asmOpOnly(.{ ._, .ud2 }),
- .breakpoint => try self.asmOpOnly(.{ ._, .int3 }),
- .ret_addr => if (use_old) try self.airRetAddr(inst) else {
- var slot = try self.tempFromValue(self.typeOfIndex(inst), .{ .load_frame = .{
+ .br => try cg.airBr(inst),
+ .trap => try cg.asmOpOnly(.{ ._, .ud2 }),
+ .breakpoint => try cg.asmOpOnly(.{ ._, .int3 }),
+ .ret_addr => if (use_old) try cg.airRetAddr(inst) else {
+ var slot = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .load_frame = .{
.index = .ret_addr,
} });
- while (try slot.toAnyReg(self)) {}
- try slot.moveTo(inst, self);
+ while (try slot.toAnyReg(cg)) {}
+ try slot.moveTo(inst, cg);
},
- .frame_addr => if (use_old) try self.airFrameAddress(inst) else {
- var slot = try self.tempFromValue(self.typeOfIndex(inst), .{ .lea_frame = .{
+ .frame_addr => if (use_old) try cg.airFrameAddress(inst) else {
+ var slot = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .lea_frame = .{
.index = .base_ptr,
} });
- try slot.moveTo(inst, self);
+ try slot.moveTo(inst, cg);
+ },
+ .call => try cg.airCall(inst, .auto),
+ .call_always_tail => try cg.airCall(inst, .always_tail),
+ .call_never_tail => try cg.airCall(inst, .never_tail),
+ .call_never_inline => try cg.airCall(inst, .never_inline),
+
+ .cmp_vector, .cmp_vector_optimized => if (use_old) try cg.airCmpVector(inst) else fallback: {
+ const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
+ const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data;
+ switch (extra.compareOperator()) {
+ .eq, .neq => {},
+ else => break :fallback try cg.airCmpVector(inst),
+ }
+ var ops = try cg.tempsFromOperands(inst, .{ extra.lhs, extra.rhs });
+ var res: [1]Temp = undefined;
+ switch (extra.compareOperator()) {
+ .lt => unreachable,
+ .lte => unreachable,
+ .eq, .neq => |cmp_op| try cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, &.{
+ .{
+ .required_features = &.{.avx2},
+ .scalar = .{ .any_int = .byte },
+ .mir_tag = .{ .vp_b, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .ymm_mask, .ymm, .mem } },
+ .{ .ops = &.{ .ymm_mask, .mem, .ymm }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .ymm_mask, .ymm, .ymm } },
+ },
+ },
+ .{
+ .required_features = &.{.avx2},
+ .scalar = .{ .any_int = .word },
+ .mir_tag = .{ .vp_w, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .ymm_mask, .ymm, .mem } },
+ .{ .ops = &.{ .ymm_mask, .mem, .ymm }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .ymm_mask, .ymm, .ymm } },
+ },
+ },
+ .{
+ .required_features = &.{.avx2},
+ .scalar = .{ .any_int = .dword },
+ .mir_tag = .{ .vp_d, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .ymm_mask, .ymm, .mem } },
+ .{ .ops = &.{ .ymm_mask, .mem, .ymm }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .ymm_mask, .ymm, .ymm } },
+ },
+ },
+ .{
+ .required_features = &.{.avx2},
+ .scalar = .{ .any_int = .qword },
+ .mir_tag = .{ .vp_q, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .ymm_mask, .ymm, .mem } },
+ .{ .ops = &.{ .ymm_mask, .mem, .ymm }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .ymm_mask, .ymm, .ymm } },
+ },
+ },
+ .{
+ .required_features = &.{.avx},
+ .scalar = .{ .any_int = .byte },
+ .mir_tag = .{ .vp_b, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask, .xmm, .mem } },
+ .{ .ops = &.{ .xmm_mask, .mem, .xmm }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .xmm_mask, .xmm, .xmm } },
+ },
+ },
+ .{
+ .required_features = &.{.avx},
+ .scalar = .{ .any_int = .word },
+ .mir_tag = .{ .vp_w, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask, .xmm, .mem } },
+ .{ .ops = &.{ .xmm_mask, .mem, .xmm }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .xmm_mask, .xmm, .xmm } },
+ },
+ },
+ .{
+ .required_features = &.{.avx},
+ .scalar = .{ .any_int = .dword },
+ .mir_tag = .{ .vp_d, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask, .xmm, .mem } },
+ .{ .ops = &.{ .xmm_mask, .mem, .xmm }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .xmm_mask, .xmm, .xmm } },
+ },
+ },
+ .{
+ .required_features = &.{.avx},
+ .scalar = .{ .any_int = .qword },
+ .mir_tag = .{ .vp_q, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask, .xmm, .mem } },
+ .{ .ops = &.{ .xmm_mask, .mem, .xmm }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .xmm_mask, .xmm, .xmm } },
+ },
+ },
+ .{
+ .required_features = &.{.sse2},
+ .scalar = .{ .any_int = .byte },
+ .mir_tag = .{ .p_b, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .mem } },
+ .{ .ops = &.{ .xmm_mask, .mem, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .xmm } },
+ .{ .ops = &.{ .xmm_mask, .xmm, .{ .implicit = 0 } } },
+ },
+ },
+ .{
+ .required_features = &.{.sse2},
+ .scalar = .{ .any_int = .word },
+ .mir_tag = .{ .p_w, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .mem } },
+ .{ .ops = &.{ .xmm_mask, .mem, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .xmm } },
+ .{ .ops = &.{ .xmm_mask, .xmm, .{ .implicit = 0 } } },
+ },
+ },
+ .{
+ .required_features = &.{.sse2},
+ .scalar = .{ .any_int = .dword },
+ .mir_tag = .{ .p_d, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .mem } },
+ .{ .ops = &.{ .xmm_mask, .mem, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .xmm } },
+ .{ .ops = &.{ .xmm_mask, .xmm, .{ .implicit = 0 } } },
+ },
+ },
+ .{
+ .required_features = &.{.sse4_1},
+ .scalar = .{ .any_int = .qword },
+ .mir_tag = .{ .p_q, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .mem } },
+ .{ .ops = &.{ .xmm_mask, .mem, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .xmm } },
+ .{ .ops = &.{ .xmm_mask, .xmm, .{ .implicit = 0 } } },
+ },
+ },
+ .{
+ .required_features = &.{.mmx},
+ .scalar = .{ .any_int = .byte },
+ .mir_tag = .{ .p_b, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mem } },
+ .{ .ops = &.{ .mm_mask, .mem, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mm } },
+ .{ .ops = &.{ .mm_mask, .mm, .{ .implicit = 0 } } },
+ },
+ },
+ .{
+ .required_features = &.{.mmx},
+ .scalar = .{ .any_int = .word },
+ .mir_tag = .{ .p_w, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mem } },
+ .{ .ops = &.{ .mm_mask, .mem, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mm } },
+ .{ .ops = &.{ .mm_mask, .mm, .{ .implicit = 0 } } },
+ },
+ },
+ .{
+ .required_features = &.{.mmx},
+ .scalar = .{ .any_int = .dword },
+ .mir_tag = .{ .p_d, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mem } },
+ .{ .ops = &.{ .mm_mask, .mem, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mm } },
+ .{ .ops = &.{ .mm_mask, .mm, .{ .implicit = 0 } } },
+ },
+ },
+ .{
+ .scalar = .bool,
+ .clobbers = .{ .eflags = true },
+ .invert_result = true,
+ .mir_tag = .{ ._, .xor },
+ .patterns = &.{
+ .{ .ops = &.{ .mem, .{ .implicit = 0 }, .simm32 } },
+ .{ .ops = &.{ .mem, .simm32, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .mem, .{ .implicit = 0 }, .gpr } },
+ .{ .ops = &.{ .mem, .gpr, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .simm32 } },
+ .{ .ops = &.{ .gpr, .simm32, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .mem } },
+ .{ .ops = &.{ .gpr, .mem, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .gpr } },
+ .{ .ops = &.{ .gpr, .gpr, .{ .implicit = 0 } } },
+ },
+ },
+
+ .{
+ .required_features = &.{.avx2},
+ .scalar = .{ .any_int = .byte },
+ .loop = .elementwise,
+ .mir_tag = .{ .vp_b, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } },
+ .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.avx2},
+ .scalar = .{ .any_int = .word },
+ .loop = .elementwise,
+ .mir_tag = .{ .vp_w, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } },
+ .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.avx2},
+ .scalar = .{ .any_int = .dword },
+ .loop = .elementwise,
+ .mir_tag = .{ .vp_d, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } },
+ .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.avx2},
+ .scalar = .{ .any_int = .qword },
+ .loop = .elementwise,
+ .mir_tag = .{ .vp_q, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } },
+ .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.avx},
+ .scalar = .{ .any_int = .byte },
+ .loop = .elementwise,
+ .mir_tag = .{ .vp_b, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } },
+ .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.avx},
+ .scalar = .{ .any_int = .word },
+ .loop = .elementwise,
+ .mir_tag = .{ .vp_w, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } },
+ .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.avx},
+ .scalar = .{ .any_int = .dword },
+ .loop = .elementwise,
+ .mir_tag = .{ .vp_d, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } },
+ .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.avx},
+ .scalar = .{ .any_int = .qword },
+ .loop = .elementwise,
+ .mir_tag = .{ .vp_q, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
+ .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } },
+ .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.sse2},
+ .scalar = .{ .any_int = .byte },
+ .loop = .elementwise,
+ .mir_tag = .{ .p_b, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.sse2},
+ .scalar = .{ .any_int = .word },
+ .loop = .elementwise,
+ .mir_tag = .{ .p_w, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.sse2},
+ .scalar = .{ .any_int = .dword },
+ .loop = .elementwise,
+ .mir_tag = .{ .p_d, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.sse4_1},
+ .scalar = .{ .any_int = .qword },
+ .loop = .elementwise,
+ .mir_tag = .{ .p_q, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.mmx},
+ .scalar = .{ .any_int = .byte },
+ .loop = .elementwise,
+ .mir_tag = .{ .p_b, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.mmx},
+ .scalar = .{ .any_int = .word },
+ .loop = .elementwise,
+ .mir_tag = .{ .p_w, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } },
+ },
+ },
+ .{
+ .required_features = &.{.mmx},
+ .scalar = .{ .any_int = .dword },
+ .loop = .elementwise,
+ .mir_tag = .{ .p_d, .cmpeq },
+ .patterns = &.{
+ .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } },
+ },
+ },
+ .{
+ .scalar = .bool,
+ .clobbers = .{ .eflags = true },
+ .invert_result = true,
+ .loop = .elementwise,
+ .mir_tag = .{ ._, .xor },
+ .patterns = &.{
+ .{ .ops = &.{ .mem_limb, .{ .implicit = 0 }, .gpr_limb } },
+ .{ .ops = &.{ .mem_limb, .gpr_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .mem_limb } },
+ .{ .ops = &.{ .gpr_limb, .mem_limb, .{ .implicit = 0 } } },
+ .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } },
+ },
+ },
+ }, .{ .invert_result = switch (cmp_op) {
+ .eq => false,
+ .neq => true,
+ else => unreachable,
+ } }),
+ .gte => unreachable,
+ .gt => unreachable,
+ }
+ if (ops[0].index != res[0].index) try ops[0].die(cg);
+ if (ops[1].index != res[0].index) try ops[1].die(cg);
+ try res[0].moveTo(inst, cg);
},
- .call => try self.airCall(inst, .auto),
- .call_always_tail => try self.airCall(inst, .always_tail),
- .call_never_tail => try self.airCall(inst, .never_tail),
- .call_never_inline => try self.airCall(inst, .never_inline),
-
- .cond_br => try self.airCondBr(inst),
- .switch_br => try self.airSwitchBr(inst),
- .loop_switch_br => try self.airLoopSwitchBr(inst),
- .switch_dispatch => try self.airSwitchDispatch(inst),
- .@"try", .try_cold => try self.airTry(inst),
- .try_ptr, .try_ptr_cold => try self.airTryPtr(inst),
- .dbg_stmt => if (use_old) try self.airDbgStmt(inst) else {
+
+ .cond_br => try cg.airCondBr(inst),
+ .switch_br => try cg.airSwitchBr(inst),
+ .loop_switch_br => try cg.airLoopSwitchBr(inst),
+ .switch_dispatch => try cg.airSwitchDispatch(inst),
+ .@"try", .try_cold => try cg.airTry(inst),
+ .try_ptr, .try_ptr_cold => try cg.airTryPtr(inst),
+ .dbg_stmt => if (use_old) try cg.airDbgStmt(inst) else {
const dbg_stmt = air_datas[@intFromEnum(inst)].dbg_stmt;
- _ = try self.addInst(.{
+ _ = try cg.addInst(.{
.tag = .pseudo,
.ops = .pseudo_dbg_line_line_column,
.data = .{ .line_column = .{
@@ -2656,50 +3100,50 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
});
},
- .dbg_empty_stmt => if (use_old) try self.airDbgEmptyStmt() else {
- if (self.mir_instructions.len > 0) {
- const prev_mir_op = &self.mir_instructions.items(.ops)[self.mir_instructions.len - 1];
+ .dbg_empty_stmt => if (use_old) try cg.airDbgEmptyStmt() else {
+ if (cg.mir_instructions.len > 0) {
+ const prev_mir_op = &cg.mir_instructions.items(.ops)[cg.mir_instructions.len - 1];
if (prev_mir_op.* == .pseudo_dbg_line_stmt_line_column)
prev_mir_op.* = .pseudo_dbg_line_line_column;
}
- try self.asmOpOnly(.{ ._, .nop });
+ try cg.asmOpOnly(.{ ._, .nop });
},
- .dbg_inline_block => if (use_old) try self.airDbgInlineBlock(inst) else {
+ .dbg_inline_block => if (use_old) try cg.airDbgInlineBlock(inst) else {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.DbgInlineBlock, ty_pl.payload);
- const old_inline_func = self.inline_func;
- defer self.inline_func = old_inline_func;
- self.inline_func = extra.data.func;
- _ = try self.addInst(.{
+ const extra = cg.air.extraData(Air.DbgInlineBlock, ty_pl.payload);
+ const old_inline_func = cg.inline_func;
+ defer cg.inline_func = old_inline_func;
+ cg.inline_func = extra.data.func;
+ _ = try cg.addInst(.{
.tag = .pseudo,
.ops = .pseudo_dbg_enter_inline_func,
.data = .{ .func = extra.data.func },
});
- try self.lowerBlock(inst, @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]));
- _ = try self.addInst(.{
+ try cg.lowerBlock(inst, @ptrCast(cg.air.extra[extra.end..][0..extra.data.body_len]));
+ _ = try cg.addInst(.{
.tag = .pseudo,
.ops = .pseudo_dbg_leave_inline_func,
.data = .{ .func = old_inline_func },
});
},
- .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => if (use_old) try self.airDbgVar(inst) else {
+ .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => if (use_old) try cg.airDbgVar(inst) else {
const pl_op = air_datas[@intFromEnum(inst)].pl_op;
- var ops = try self.tempsFromOperands(inst, .{pl_op.operand});
- try self.genLocalDebugInfo(inst, ops[0].tracking(self).short);
- try ops[0].die(self);
+ var ops = try cg.tempsFromOperands(inst, .{pl_op.operand});
+ try cg.genLocalDebugInfo(inst, ops[0].tracking(cg).short);
+ try ops[0].die(cg);
},
- .is_null_ptr => if (use_old) try self.airIsNullPtr(inst) else {
+ .is_null_ptr => if (use_old) try cg.airIsNullPtr(inst) else {
const un_op = air_datas[@intFromEnum(inst)].un_op;
- const opt_ty = self.typeOf(un_op).childType(zcu);
+ const opt_ty = cg.typeOf(un_op).childType(zcu);
const opt_repr_is_pl = opt_ty.optionalReprIsPayload(zcu);
const opt_child_ty = opt_ty.optionalChild(zcu);
const opt_child_abi_size: u31 = @intCast(opt_child_ty.abiSize(zcu));
- var ops = try self.tempsFromOperands(inst, .{un_op});
- if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, self);
- while (try ops[0].toLea(self)) {}
- try self.asmMemoryImmediate(
+ var ops = try cg.tempsFromOperands(inst, .{un_op});
+ if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, cg);
+ while (try ops[0].toLea(cg)) {}
+ try cg.asmMemoryImmediate(
.{ ._, .cmp },
- try ops[0].tracking(self).short.deref().mem(self, .{ .size = if (!opt_repr_is_pl)
+ try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = if (!opt_repr_is_pl)
.byte
else if (opt_child_ty.isSlice(zcu))
.qword
@@ -2707,22 +3151,22 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.fromSize(opt_child_abi_size) }),
.u(0),
);
- var is_null = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .e });
- try ops[0].die(self);
- try is_null.moveTo(inst, self);
+ var is_null = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .eflags = .e });
+ try ops[0].die(cg);
+ try is_null.moveTo(inst, cg);
},
- .is_non_null_ptr => if (use_old) try self.airIsNonNullPtr(inst) else {
+ .is_non_null_ptr => if (use_old) try cg.airIsNonNullPtr(inst) else {
const un_op = air_datas[@intFromEnum(inst)].un_op;
- const opt_ty = self.typeOf(un_op).childType(zcu);
+ const opt_ty = cg.typeOf(un_op).childType(zcu);
const opt_repr_is_pl = opt_ty.optionalReprIsPayload(zcu);
const opt_child_ty = opt_ty.optionalChild(zcu);
const opt_child_abi_size: u31 = @intCast(opt_child_ty.abiSize(zcu));
- var ops = try self.tempsFromOperands(inst, .{un_op});
- if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, self);
- while (try ops[0].toLea(self)) {}
- try self.asmMemoryImmediate(
+ var ops = try cg.tempsFromOperands(inst, .{un_op});
+ if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, cg);
+ while (try ops[0].toLea(cg)) {}
+ try cg.asmMemoryImmediate(
.{ ._, .cmp },
- try ops[0].tracking(self).short.deref().mem(self, .{ .size = if (!opt_repr_is_pl)
+ try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = if (!opt_repr_is_pl)
.byte
else if (opt_child_ty.isSlice(zcu))
.qword
@@ -2730,244 +3174,244 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.fromSize(opt_child_abi_size) }),
.u(0),
);
- var is_non_null = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .ne });
- try ops[0].die(self);
- try is_non_null.moveTo(inst, self);
+ var is_non_null = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .eflags = .ne });
+ try ops[0].die(cg);
+ try is_non_null.moveTo(inst, cg);
},
- .is_err_ptr => if (use_old) try self.airIsErrPtr(inst) else {
+ .is_err_ptr => if (use_old) try cg.airIsErrPtr(inst) else {
const un_op = air_datas[@intFromEnum(inst)].un_op;
- const eu_ty = self.typeOf(un_op).childType(zcu);
+ const eu_ty = cg.typeOf(un_op).childType(zcu);
const eu_err_ty = eu_ty.errorUnionSet(zcu);
const eu_pl_ty = eu_ty.errorUnionPayload(zcu);
const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu));
- var ops = try self.tempsFromOperands(inst, .{un_op});
- try ops[0].toOffset(eu_err_off, self);
- while (try ops[0].toLea(self)) {}
- try self.asmMemoryImmediate(
+ var ops = try cg.tempsFromOperands(inst, .{un_op});
+ try ops[0].toOffset(eu_err_off, cg);
+ while (try ops[0].toLea(cg)) {}
+ try cg.asmMemoryImmediate(
.{ ._, .cmp },
- try ops[0].tracking(self).short.deref().mem(self, .{ .size = self.memSize(eu_err_ty) }),
+ try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(eu_err_ty) }),
.u(0),
);
- var is_err = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .ne });
- try ops[0].die(self);
- try is_err.moveTo(inst, self);
+ var is_err = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .eflags = .ne });
+ try ops[0].die(cg);
+ try is_err.moveTo(inst, cg);
},
- .is_non_err_ptr => if (use_old) try self.airIsNonErrPtr(inst) else {
+ .is_non_err_ptr => if (use_old) try cg.airIsNonErrPtr(inst) else {
const un_op = air_datas[@intFromEnum(inst)].un_op;
- const eu_ty = self.typeOf(un_op).childType(zcu);
+ const eu_ty = cg.typeOf(un_op).childType(zcu);
const eu_err_ty = eu_ty.errorUnionSet(zcu);
const eu_pl_ty = eu_ty.errorUnionPayload(zcu);
const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu));
- var ops = try self.tempsFromOperands(inst, .{un_op});
- try ops[0].toOffset(eu_err_off, self);
- while (try ops[0].toLea(self)) {}
- try self.asmMemoryImmediate(
+ var ops = try cg.tempsFromOperands(inst, .{un_op});
+ try ops[0].toOffset(eu_err_off, cg);
+ while (try ops[0].toLea(cg)) {}
+ try cg.asmMemoryImmediate(
.{ ._, .cmp },
- try ops[0].tracking(self).short.deref().mem(self, .{ .size = self.memSize(eu_err_ty) }),
+ try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(eu_err_ty) }),
.u(0),
);
- var is_non_err = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .e });
- try ops[0].die(self);
- try is_non_err.moveTo(inst, self);
+ var is_non_err = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .eflags = .e });
+ try ops[0].die(cg);
+ try is_non_err.moveTo(inst, cg);
},
- .int_from_ptr => if (use_old) try self.airIntFromPtr(inst) else {
+ .int_from_ptr => if (use_old) try cg.airIntFromPtr(inst) else {
const un_op = air_datas[@intFromEnum(inst)].un_op;
- var ops = try self.tempsFromOperands(inst, .{un_op});
- try ops[0].toLimb(0, self);
- try ops[0].moveTo(inst, self);
+ var ops = try cg.tempsFromOperands(inst, .{un_op});
+ try ops[0].toLimb(0, cg);
+ try ops[0].moveTo(inst, cg);
},
- .int_from_bool => if (use_old) try self.airIntFromBool(inst) else {
+ .int_from_bool => if (use_old) try cg.airIntFromBool(inst) else {
const un_op = air_datas[@intFromEnum(inst)].un_op;
- var ops = try self.tempsFromOperands(inst, .{un_op});
- try ops[0].moveTo(inst, self);
+ var ops = try cg.tempsFromOperands(inst, .{un_op});
+ try ops[0].moveTo(inst, cg);
},
- .ret => try self.airRet(inst, false),
- .ret_safe => try self.airRet(inst, true),
- .ret_load => try self.airRetLoad(inst),
+ .ret => try cg.airRet(inst, false),
+ .ret_safe => try cg.airRet(inst, true),
+ .ret_load => try cg.airRetLoad(inst),
.unreach => {},
- .optional_payload_ptr => if (use_old) try self.airOptionalPayloadPtr(inst) else {
+ .optional_payload_ptr => if (use_old) try cg.airOptionalPayloadPtr(inst) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].moveTo(inst, self);
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].moveTo(inst, cg);
},
- .optional_payload_ptr_set => if (use_old) try self.airOptionalPayloadPtrSet(inst) else {
+ .optional_payload_ptr_set => if (use_old) try cg.airOptionalPayloadPtrSet(inst) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- const opt_ty = self.typeOf(ty_op.operand).childType(zcu);
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
+ const opt_ty = cg.typeOf(ty_op.operand).childType(zcu);
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
if (!opt_ty.optionalReprIsPayload(zcu)) {
const opt_child_ty = opt_ty.optionalChild(zcu);
const opt_child_abi_size: i32 = @intCast(opt_child_ty.abiSize(zcu));
- try ops[0].toOffset(opt_child_abi_size, self);
- var has_value = try self.tempFromValue(Type.bool, .{ .immediate = 1 });
- try ops[0].store(&has_value, self);
- try has_value.die(self);
- try ops[0].toOffset(-opt_child_abi_size, self);
+ try ops[0].toOffset(opt_child_abi_size, cg);
+ var has_value = try cg.tempFromValue(Type.bool, .{ .immediate = 1 });
+ try ops[0].store(&has_value, cg);
+ try has_value.die(cg);
+ try ops[0].toOffset(-opt_child_abi_size, cg);
}
- try ops[0].moveTo(inst, self);
+ try ops[0].moveTo(inst, cg);
},
- .unwrap_errunion_payload_ptr => if (use_old) try self.airUnwrapErrUnionPayloadPtr(inst) else {
+ .unwrap_errunion_payload_ptr => if (use_old) try cg.airUnwrapErrUnionPayloadPtr(inst) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- const eu_ty = self.typeOf(ty_op.operand).childType(zcu);
+ const eu_ty = cg.typeOf(ty_op.operand).childType(zcu);
const eu_pl_ty = eu_ty.errorUnionPayload(zcu);
const eu_pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(eu_pl_ty, zcu));
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].toOffset(eu_pl_off, self);
- try ops[0].moveTo(inst, self);
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].toOffset(eu_pl_off, cg);
+ try ops[0].moveTo(inst, cg);
},
- .unwrap_errunion_err_ptr => if (use_old) try self.airUnwrapErrUnionErrPtr(inst) else {
+ .unwrap_errunion_err_ptr => if (use_old) try cg.airUnwrapErrUnionErrPtr(inst) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- const eu_ty = self.typeOf(ty_op.operand).childType(zcu);
+ const eu_ty = cg.typeOf(ty_op.operand).childType(zcu);
const eu_pl_ty = eu_ty.errorUnionPayload(zcu);
const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu));
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].toOffset(eu_err_off, self);
- var err = try ops[0].load(eu_ty.errorUnionSet(zcu), self);
- try ops[0].die(self);
- try err.moveTo(inst, self);
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].toOffset(eu_err_off, cg);
+ var err = try ops[0].load(eu_ty.errorUnionSet(zcu), cg);
+ try ops[0].die(cg);
+ try err.moveTo(inst, cg);
},
- .errunion_payload_ptr_set => if (use_old) try self.airErrUnionPayloadPtrSet(inst) else {
+ .errunion_payload_ptr_set => if (use_old) try cg.airErrUnionPayloadPtrSet(inst) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- const eu_ty = self.typeOf(ty_op.operand).childType(zcu);
+ const eu_ty = cg.typeOf(ty_op.operand).childType(zcu);
const eu_err_ty = eu_ty.errorUnionSet(zcu);
const eu_pl_ty = eu_ty.errorUnionPayload(zcu);
const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu));
const eu_pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(eu_pl_ty, zcu));
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].toOffset(eu_err_off, self);
- var no_err = try self.tempFromValue(eu_err_ty, .{ .immediate = 0 });
- try ops[0].store(&no_err, self);
- try no_err.die(self);
- try ops[0].toOffset(eu_pl_off - eu_err_off, self);
- try ops[0].moveTo(inst, self);
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].toOffset(eu_err_off, cg);
+ var no_err = try cg.tempFromValue(eu_err_ty, .{ .immediate = 0 });
+ try ops[0].store(&no_err, cg);
+ try no_err.die(cg);
+ try ops[0].toOffset(eu_pl_off - eu_err_off, cg);
+ try ops[0].moveTo(inst, cg);
},
- .struct_field_ptr => if (use_old) try self.airStructFieldPtr(inst) else {
+ .struct_field_ptr => if (use_old) try cg.airStructFieldPtr(inst) else {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.StructField, ty_pl.payload).data;
- var ops = try self.tempsFromOperands(inst, .{extra.struct_operand});
- try ops[0].toOffset(self.fieldOffset(
- self.typeOf(extra.struct_operand),
- self.typeOfIndex(inst),
+ const extra = cg.air.extraData(Air.StructField, ty_pl.payload).data;
+ var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand});
+ try ops[0].toOffset(cg.fieldOffset(
+ cg.typeOf(extra.struct_operand),
+ cg.typeOfIndex(inst),
extra.field_index,
- ), self);
- try ops[0].moveTo(inst, self);
+ ), cg);
+ try ops[0].moveTo(inst, cg);
},
- .struct_field_ptr_index_0 => if (use_old) try self.airStructFieldPtrIndex(inst, 0) else {
+ .struct_field_ptr_index_0 => if (use_old) try cg.airStructFieldPtrIndex(inst, 0) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].toOffset(self.fieldOffset(
- self.typeOf(ty_op.operand),
- self.typeOfIndex(inst),
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].toOffset(cg.fieldOffset(
+ cg.typeOf(ty_op.operand),
+ cg.typeOfIndex(inst),
0,
- ), self);
- try ops[0].moveTo(inst, self);
+ ), cg);
+ try ops[0].moveTo(inst, cg);
},
- .struct_field_ptr_index_1 => if (use_old) try self.airStructFieldPtrIndex(inst, 1) else {
+ .struct_field_ptr_index_1 => if (use_old) try cg.airStructFieldPtrIndex(inst, 1) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].toOffset(self.fieldOffset(
- self.typeOf(ty_op.operand),
- self.typeOfIndex(inst),
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].toOffset(cg.fieldOffset(
+ cg.typeOf(ty_op.operand),
+ cg.typeOfIndex(inst),
1,
- ), self);
- try ops[0].moveTo(inst, self);
+ ), cg);
+ try ops[0].moveTo(inst, cg);
},
- .struct_field_ptr_index_2 => if (use_old) try self.airStructFieldPtrIndex(inst, 2) else {
+ .struct_field_ptr_index_2 => if (use_old) try cg.airStructFieldPtrIndex(inst, 2) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].toOffset(self.fieldOffset(
- self.typeOf(ty_op.operand),
- self.typeOfIndex(inst),
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].toOffset(cg.fieldOffset(
+ cg.typeOf(ty_op.operand),
+ cg.typeOfIndex(inst),
2,
- ), self);
- try ops[0].moveTo(inst, self);
+ ), cg);
+ try ops[0].moveTo(inst, cg);
},
- .struct_field_ptr_index_3 => if (use_old) try self.airStructFieldPtrIndex(inst, 3) else {
+ .struct_field_ptr_index_3 => if (use_old) try cg.airStructFieldPtrIndex(inst, 3) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].toOffset(self.fieldOffset(
- self.typeOf(ty_op.operand),
- self.typeOfIndex(inst),
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].toOffset(cg.fieldOffset(
+ cg.typeOf(ty_op.operand),
+ cg.typeOfIndex(inst),
3,
- ), self);
- try ops[0].moveTo(inst, self);
+ ), cg);
+ try ops[0].moveTo(inst, cg);
},
- .slice => if (use_old) try self.airSlice(inst) else {
+ .slice => if (use_old) try cg.airSlice(inst) else {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
- const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
- var ops = try self.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
- try ops[0].toPair(&ops[1], self);
- try ops[0].moveTo(inst, self);
+ const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
+ var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
+ try ops[0].toPair(&ops[1], cg);
+ try ops[0].moveTo(inst, cg);
},
- .slice_len => if (use_old) try self.airSliceLen(inst) else {
+ .slice_len => if (use_old) try cg.airSliceLen(inst) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].toLimb(1, self);
- try ops[0].moveTo(inst, self);
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].toLimb(1, cg);
+ try ops[0].moveTo(inst, cg);
},
- .slice_ptr => if (use_old) try self.airSlicePtr(inst) else {
+ .slice_ptr => if (use_old) try cg.airSlicePtr(inst) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].toLimb(0, self);
- try ops[0].moveTo(inst, self);
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].toLimb(0, cg);
+ try ops[0].moveTo(inst, cg);
},
- .ptr_slice_len_ptr => if (use_old) try self.airPtrSliceLenPtr(inst) else {
+ .ptr_slice_len_ptr => if (use_old) try cg.airPtrSliceLenPtr(inst) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].toOffset(8, self);
- try ops[0].moveTo(inst, self);
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].toOffset(8, cg);
+ try ops[0].moveTo(inst, cg);
},
- .ptr_slice_ptr_ptr => if (use_old) try self.airPtrSlicePtrPtr(inst) else {
+ .ptr_slice_ptr_ptr => if (use_old) try cg.airPtrSlicePtrPtr(inst) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].toOffset(0, self);
- try ops[0].moveTo(inst, self);
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].toOffset(0, cg);
+ try ops[0].moveTo(inst, cg);
},
.slice_elem_ptr, .ptr_elem_ptr => |tag| if (use_old) switch (tag) {
else => unreachable,
- .slice_elem_ptr => try self.airSliceElemPtr(inst),
- .ptr_elem_ptr => try self.airPtrElemPtr(inst),
+ .slice_elem_ptr => try cg.airSliceElemPtr(inst),
+ .ptr_elem_ptr => try cg.airPtrElemPtr(inst),
} else {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
- const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
- var ops = try self.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
+ const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
+ var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
switch (tag) {
else => unreachable,
- .slice_elem_ptr => try ops[0].toLimb(0, self),
+ .slice_elem_ptr => try ops[0].toLimb(0, cg),
.ptr_elem_ptr => {},
}
- const dst_ty = self.typeOfIndex(inst);
+ const dst_ty = cg.typeOfIndex(inst);
if (dst_ty.ptrInfo(zcu).flags.vector_index == .none) zero_offset: {
const elem_size = dst_ty.childType(zcu).abiSize(zcu);
if (elem_size == 0) break :zero_offset;
while (true) for (&ops) |*op| {
- if (try op.toAnyReg(self)) break;
+ if (try op.toAnyReg(cg)) break;
} else break;
- const lhs_reg = ops[0].unwrap(self).temp.tracking(self).short.register.to64();
- const rhs_reg = ops[1].unwrap(self).temp.tracking(self).short.register.to64();
+ const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64();
+ const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64();
if (!std.math.isPowerOfTwo(elem_size)) {
- try self.spillEflagsIfOccupied();
- try self.asmRegisterRegisterImmediate(
+ try cg.spillEflagsIfOccupied();
+ try cg.asmRegisterRegisterImmediate(
.{ .i_, .mul },
rhs_reg,
rhs_reg,
.u(elem_size),
);
- try self.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
+ try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
.base = .{ .reg = lhs_reg },
.mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
});
} else if (elem_size > 8) {
- try self.spillEflagsIfOccupied();
- try self.asmRegisterImmediate(
+ try cg.spillEflagsIfOccupied();
+ try cg.asmRegisterImmediate(
.{ ._l, .sh },
rhs_reg,
.u(std.math.log2_int(u64, elem_size)),
);
- try self.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
+ try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
.base = .{ .reg = lhs_reg },
.mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
});
- } else try self.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
+ } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
.base = .{ .reg = lhs_reg },
.mod = .{ .rm = .{
.size = .qword,
@@ -2976,57 +3420,57 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
});
}
- try ops[1].die(self);
- try ops[0].moveTo(inst, self);
+ try ops[1].die(cg);
+ try ops[0].moveTo(inst, cg);
},
- .array_to_slice => if (use_old) try self.airArrayToSlice(inst) else {
+ .array_to_slice => if (use_old) try cg.airArrayToSlice(inst) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- var len = try self.tempFromValue(Type.usize, .{
- .immediate = self.typeOf(ty_op.operand).childType(zcu).arrayLen(zcu),
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ var len = try cg.tempFromValue(Type.usize, .{
+ .immediate = cg.typeOf(ty_op.operand).childType(zcu).arrayLen(zcu),
});
- try ops[0].toPair(&len, self);
- try ops[0].moveTo(inst, self);
+ try ops[0].toPair(&len, cg);
+ try ops[0].moveTo(inst, cg);
},
- .error_set_has_value => return self.fail("TODO implement error_set_has_value", .{}),
- .field_parent_ptr => if (use_old) try self.airFieldParentPtr(inst) else {
+ .error_set_has_value => return cg.fail("TODO implement error_set_has_value", .{}),
+ .field_parent_ptr => if (use_old) try cg.airFieldParentPtr(inst) else {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data;
- var ops = try self.tempsFromOperands(inst, .{extra.field_ptr});
- try ops[0].toOffset(-self.fieldOffset(
- self.typeOfIndex(inst),
- self.typeOf(extra.field_ptr),
+ const extra = cg.air.extraData(Air.FieldParentPtr, ty_pl.payload).data;
+ var ops = try cg.tempsFromOperands(inst, .{extra.field_ptr});
+ try ops[0].toOffset(-cg.fieldOffset(
+ cg.typeOfIndex(inst),
+ cg.typeOf(extra.field_ptr),
extra.field_index,
- ), self);
- try ops[0].moveTo(inst, self);
+ ), cg);
+ try ops[0].moveTo(inst, cg);
},
- .is_named_enum_value => return self.fail("TODO implement is_named_enum_value", .{}),
+ .is_named_enum_value => return cg.fail("TODO implement is_named_enum_value", .{}),
.wasm_memory_size => unreachable,
.wasm_memory_grow => unreachable,
.addrspace_cast => {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
- var ops = try self.tempsFromOperands(inst, .{ty_op.operand});
- try ops[0].moveTo(inst, self);
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ try ops[0].moveTo(inst, cg);
},
- .vector_store_elem => return self.fail("TODO implement vector_store_elem", .{}),
+ .vector_store_elem => return cg.fail("TODO implement vector_store_elem", .{}),
- .c_va_arg => try self.airVaArg(inst),
- .c_va_copy => try self.airVaCopy(inst),
- .c_va_end => try self.airVaEnd(inst),
- .c_va_start => try self.airVaStart(inst),
+ .c_va_arg => try cg.airVaArg(inst),
+ .c_va_copy => try cg.airVaCopy(inst),
+ .c_va_end => try cg.airVaEnd(inst),
+ .c_va_start => try cg.airVaStart(inst),
.work_item_id => unreachable,
.work_group_size => unreachable,
.work_group_id => unreachable,
}
- self.resetTemps();
- self.checkInvariantsAfterAirInst();
+ cg.resetTemps();
+ cg.checkInvariantsAfterAirInst();
}
- verbose_tracking_log.debug("{}", .{self.fmtTracking()});
+ verbose_tracking_log.debug("{}", .{cg.fmtTracking()});
}
fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void {
@@ -3117,12 +3561,16 @@ fn freeReg(self: *CodeGen, reg: Register) !void {
fn freeValue(self: *CodeGen, value: MCValue) !void {
switch (value) {
.register => |reg| try self.freeReg(reg),
- .register_pair => |regs| for (regs) |reg| try self.freeReg(reg),
+ inline .register_pair,
+ .register_triple,
+ .register_quadruple,
+ => |regs| for (regs) |reg| try self.freeReg(reg),
.register_offset, .indirect => |reg_off| try self.freeReg(reg_off.reg),
.register_overflow => |reg_ov| {
try self.freeReg(reg_ov.reg);
self.eflags_inst = null;
},
+ .register_mask => |reg_mask| try self.freeReg(reg_mask.reg),
.eflags => self.eflags_inst = null,
else => {}, // TODO process stack allocation death
}
@@ -3323,11 +3771,11 @@ fn allocRegOrMemAdvanced(self: *CodeGen, ty: Type, inst: ?Air.Inst.Index, reg_ok
},
.vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
.float => switch (ty.childType(zcu).floatBits(self.target.*)) {
- 16, 32, 64, 128 => if (self.hasFeature(.avx)) 32 else 16,
+ 16, 32, 64, 128 => self.vectorSize(.float),
80 => break :need_mem,
else => unreachable,
},
- else => if (self.hasFeature(.avx)) 32 else 16,
+ else => self.vectorSize(.int),
},
else => 8,
})) {
@@ -3374,6 +3822,28 @@ fn regSetForType(self: *CodeGen, ty: Type) RegisterManager.RegisterBitSet {
return regSetForRegClass(self.regClassForType(ty));
}
+fn vectorSize(cg: *CodeGen, kind: enum { int, float }) u6 {
+ return if (cg.hasFeature(switch (kind) {
+ .int => .avx2,
+ .float => .avx,
+ })) 32 else if (cg.hasFeature(.sse)) 16 else 8;
+}
+
+fn limbType(cg: *CodeGen, ty: Type) Type {
+ const pt = cg.pt;
+ const zcu = pt.zcu;
+ const vector_size = cg.vectorSize(if (ty.isRuntimeFloat()) .float else .int);
+ const scalar_ty, const scalar_size = scalar: {
+ const scalar_ty = ty.scalarType(zcu);
+ const scalar_size = scalar_ty.abiSize(zcu);
+ if (scalar_size <= vector_size) break :scalar .{ scalar_ty, scalar_size };
+ };
+ pt.vectorType(.{
+ .len = @divExact(vector_size, scalar_size),
+ .child = scalar_ty.toIntern(),
+ });
+}
+
const State = struct {
registers: RegisterManager.TrackedRegisters,
reg_tracking: [RegisterManager.RegisterBitSet.bit_length]InstTracking,
@@ -3639,7 +4109,7 @@ fn airFptrunc(self: *CodeGen, inst: Air.Inst.Index) !void {
}
} else {
assert(src_bits == 64 and dst_bits == 32);
- if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ if (self.hasFeature(.avx)) if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
.{ .v_ss, .cvtsd2 },
dst_reg,
dst_reg,
@@ -3652,7 +4122,7 @@ fn airFptrunc(self: *CodeGen, inst: Air.Inst.Index) !void {
src_mcv.getReg().?
else
try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
- ) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ ) else if (src_mcv.isBase()) try self.asmRegisterMemory(
.{ ._ss, .cvtsd2 },
dst_reg,
try src_mcv.mem(self, .{ .size = .qword }),
@@ -3751,7 +4221,7 @@ fn airFpext(self: *CodeGen, inst: Air.Inst.Index) !void {
} else {
assert(src_bits == 32 and dst_bits == 64);
if (self.hasFeature(.avx)) switch (vec_len) {
- 1 => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ 1 => if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
.{ .v_sd, .cvtss2 },
dst_alias,
dst_alias,
@@ -3765,7 +4235,7 @@ fn airFpext(self: *CodeGen, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
),
- 2...4 => if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ 2...4 => if (src_mcv.isBase()) try self.asmRegisterMemory(
.{ .v_pd, .cvtps2 },
dst_alias,
try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }),
@@ -3778,7 +4248,7 @@ fn airFpext(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
),
else => break :result null,
- } else if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ } else if (src_mcv.isBase()) try self.asmRegisterMemory(
switch (vec_len) {
1 => .{ ._sd, .cvtss2 },
2 => .{ ._pd, .cvtps2 },
@@ -3827,7 +4297,7 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void {
if (dst_ty.isVector(zcu)) {
const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu));
const max_abi_size = @max(dst_abi_size, src_abi_size);
- if (max_abi_size > @as(u32, if (self.hasFeature(.avx2)) 32 else 16)) break :result null;
+ if (max_abi_size > self.vectorSize(.int)) break :result null;
const has_avx = self.hasFeature(.avx);
const dst_elem_abi_size = dst_ty.childType(zcu).abiSize(zcu);
@@ -3919,7 +4389,7 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void {
const dst_reg = dst_mcv.getReg().?;
const dst_alias = registerAlias(dst_reg, dst_abi_size);
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
mir_tag,
dst_alias,
try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }),
@@ -4017,7 +4487,7 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void {
const src_limbs_len = std.math.divCeil(u16, src_int_info.bits, 64) catch unreachable;
const dst_limbs_len = std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable;
- const high_mcv: MCValue = if (dst_mcv.isMemory())
+ const high_mcv: MCValue = if (dst_mcv.isBase())
dst_mcv.address().offset((src_limbs_len - 1) * 8).deref()
else
.{ .register = dst_mcv.register_pair[1] };
@@ -4392,7 +4862,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void {
else => null,
};
defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
- if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
.{ ._, .mov },
tmp_reg,
try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
@@ -4416,7 +4886,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void {
else => null,
};
defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
- if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
.{ ._, .xor },
tmp_reg,
try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
@@ -4518,7 +4988,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void {
for (tmp_regs, dst_regs) |tmp_reg, dst_reg|
try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_reg);
- if (mat_rhs_mcv.isMemory()) {
+ if (mat_rhs_mcv.isBase()) {
try self.asmRegisterMemory(
.{ ._, .add },
tmp_regs[0],
@@ -4793,7 +5263,7 @@ fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void {
else => null,
};
defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
- if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
.{ ._, .mov },
tmp_reg,
try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
@@ -4817,7 +5287,7 @@ fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void {
else => null,
};
defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
- if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
.{ ._, .xor },
tmp_reg,
try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
@@ -5348,7 +5818,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
};
defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
- if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
.{ ._, .mov },
.rax,
try mat_lhs_mcv.mem(self, .{ .size = .qword }),
@@ -5357,7 +5827,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
.rax,
mat_lhs_mcv.register_pair[0],
);
- if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
.{ ._, .mov },
tmp_regs[0],
try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
@@ -5370,14 +5840,14 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.asmSetccRegister(.nz, tmp_regs[1].to8());
try self.asmRegisterRegister(.{ .i_, .mul }, tmp_regs[0], .rax);
try self.asmSetccRegister(.o, tmp_regs[2].to8());
- if (mat_rhs_mcv.isMemory())
+ if (mat_rhs_mcv.isBase())
try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .{ .size = .qword }))
else
try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]);
try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]);
try self.asmSetccRegister(.c, tmp_regs[3].to8());
try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[2].to8(), tmp_regs[3].to8());
- if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
.{ ._, .mov },
tmp_regs[0],
try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
@@ -5394,7 +5864,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
tmp_regs[3].to8(),
);
try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
- if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
.{ .i_, .mul },
tmp_regs[0],
try mat_rhs_mcv.mem(self, .{ .size = .qword }),
@@ -6512,7 +6982,7 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
const index_ty = self.typeOf(bin_op.rhs);
const index_mcv = try self.resolveInst(bin_op.rhs);
- const index_lock: ?RegisterLock = switch (index_mcv) {
+ const index_lock = switch (index_mcv) {
.register => |reg| self.register_manager.lockRegAssumeUnused(reg),
else => null,
};
@@ -6520,48 +6990,102 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.spillEflagsIfOccupied();
if (array_ty.isVector(zcu) and elem_ty.bitSize(zcu) == 1) {
- const index_reg = switch (index_mcv) {
- .register => |reg| reg,
- else => try self.copyToTmpRegister(index_ty, index_mcv),
+ const array_mat_mcv: MCValue = switch (array_mcv) {
+ else => array_mcv,
+ .register_mask => .{ .register = try self.copyToTmpRegister(array_ty, array_mcv) },
};
- switch (array_mcv) {
+ const array_mat_lock = switch (array_mat_mcv) {
+ .register => |reg| self.register_manager.lockReg(reg),
+ else => null,
+ };
+ defer if (array_mat_lock) |lock| self.register_manager.unlockReg(lock);
+
+ switch (array_mat_mcv) {
.register => |array_reg| switch (array_reg.class()) {
- .general_purpose => try self.asmRegisterRegister(
- .{ ._, .bt },
- array_reg.to64(),
- index_reg.to64(),
- ),
+ .general_purpose => switch (index_mcv) {
+ .immediate => |index_imm| try self.asmRegisterImmediate(
+ .{ ._, .bt },
+ array_reg.to64(),
+ .u(index_imm),
+ ),
+ else => try self.asmRegisterRegister(
+ .{ ._, .bt },
+ array_reg.to64(),
+ switch (index_mcv) {
+ .register => |index_reg| index_reg,
+ else => try self.copyToTmpRegister(index_ty, index_mcv),
+ }.to64(),
+ ),
+ },
.sse => {
const frame_index = try self.allocFrameIndex(.initType(array_ty, zcu));
- try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mcv, .{});
- try self.asmMemoryRegister(
- .{ ._, .bt },
- .{
- .base = .{ .frame = frame_index },
- .mod = .{ .rm = .{ .size = .qword } },
- },
- index_reg.to64(),
- );
+ try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mat_mcv, .{});
+ switch (index_mcv) {
+ .immediate => |index_imm| try self.asmMemoryImmediate(
+ .{ ._, .bt },
+ .{
+ .base = .{ .frame = frame_index },
+ .mod = .{ .rm = .{ .size = .qword } },
+ },
+ .u(index_imm),
+ ),
+ else => try self.asmMemoryRegister(
+ .{ ._, .bt },
+ .{
+ .base = .{ .frame = frame_index },
+ .mod = .{ .rm = .{ .size = .qword } },
+ },
+ switch (index_mcv) {
+ .register => |index_reg| index_reg,
+ else => try self.copyToTmpRegister(index_ty, index_mcv),
+ }.to64(),
+ ),
+ }
},
else => unreachable,
},
- .load_frame => try self.asmMemoryRegister(
- .{ ._, .bt },
- try array_mcv.mem(self, .{ .size = .qword }),
- index_reg.to64(),
- ),
- .memory, .load_symbol, .load_direct, .load_got, .load_tlv => try self.asmMemoryRegister(
- .{ ._, .bt },
- .{
- .base = .{
- .reg = try self.copyToTmpRegister(Type.usize, array_mcv.address()),
+ .load_frame => switch (index_mcv) {
+ .immediate => |index_imm| try self.asmMemoryImmediate(
+ .{ ._, .bt },
+ try array_mat_mcv.mem(self, .{ .size = .qword }),
+ .u(index_imm),
+ ),
+ else => try self.asmMemoryRegister(
+ .{ ._, .bt },
+ try array_mat_mcv.mem(self, .{ .size = .qword }),
+ switch (index_mcv) {
+ .register => |index_reg| index_reg,
+ else => try self.copyToTmpRegister(index_ty, index_mcv),
+ }.to64(),
+ ),
+ },
+ .memory, .load_symbol, .load_direct, .load_got, .load_tlv => switch (index_mcv) {
+ .immediate => |index_imm| try self.asmMemoryImmediate(
+ .{ ._, .bt },
+ .{
+ .base = .{
+ .reg = try self.copyToTmpRegister(Type.usize, array_mat_mcv.address()),
+ },
+ .mod = .{ .rm = .{ .size = .qword } },
},
- .mod = .{ .rm = .{ .size = .qword } },
- },
- index_reg.to64(),
- ),
+ .u(index_imm),
+ ),
+ else => try self.asmMemoryRegister(
+ .{ ._, .bt },
+ .{
+ .base = .{
+ .reg = try self.copyToTmpRegister(Type.usize, array_mat_mcv.address()),
+ },
+ .mod = .{ .rm = .{ .size = .qword } },
+ },
+ switch (index_mcv) {
+ .register => |index_reg| index_reg,
+ else => try self.copyToTmpRegister(index_ty, index_mcv),
+ }.to64(),
+ ),
+ },
else => return self.fail("TODO airArrayElemVal for {s} of {}", .{
- @tagName(array_mcv), array_ty.fmt(pt),
+ @tagName(array_mat_mcv), array_ty.fmt(pt),
}),
}
@@ -6856,6 +7380,15 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void {
const src_bits: u31 = @intCast(src_ty.bitSize(zcu));
const has_lzcnt = self.hasFeature(.lzcnt);
if (src_bits > @as(u32, if (has_lzcnt) 128 else 64)) {
+ const src_frame_addr: bits.FrameAddr = src_frame_addr: switch (src_mcv) {
+ .load_frame => |src_frame_addr| src_frame_addr,
+ else => {
+ const src_frame_addr = try self.allocFrameIndex(.initSpill(src_ty, zcu));
+ try self.genSetMem(.{ .frame = src_frame_addr }, 0, src_ty, src_mcv, .{});
+ break :src_frame_addr .{ .index = src_frame_addr };
+ },
+ };
+
const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable;
const extra_bits = abi_size * 8 - src_bits;
@@ -6881,22 +7414,22 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.asmRegister(.{ ._, .dec }, index_reg.to32());
}
try self.asmMemoryImmediate(.{ ._, .cmp }, .{
- .base = .{ .frame = src_mcv.load_frame.index },
+ .base = .{ .frame = src_frame_addr.index },
.mod = .{ .rm = .{
.size = .qword,
.index = index_reg.to64(),
.scale = .@"8",
- .disp = src_mcv.load_frame.off,
+ .disp = src_frame_addr.off,
} },
}, .u(0));
_ = try self.asmJccReloc(.e, loop);
try self.asmRegisterMemory(.{ ._, .bsr }, dst_reg.to64(), .{
- .base = .{ .frame = src_mcv.load_frame.index },
+ .base = .{ .frame = src_frame_addr.index },
.mod = .{ .rm = .{
.size = .qword,
.index = index_reg.to64(),
.scale = .@"8",
- .disp = src_mcv.load_frame.off,
+ .disp = src_frame_addr.off,
} },
});
self.performReloc(zero);
@@ -6935,7 +7468,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void {
.{ ._, .lzcnt },
Type.u64,
dst_mcv,
- if (mat_src_mcv.isMemory())
+ if (mat_src_mcv.isBase())
mat_src_mcv
else
.{ .register = mat_src_mcv.register_pair[0] },
@@ -6945,7 +7478,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void {
.{ ._, .lzcnt },
Type.u64,
tmp_mcv,
- if (mat_src_mcv.isMemory())
+ if (mat_src_mcv.isBase())
mat_src_mcv.address().offset(8).deref()
else
.{ .register = mat_src_mcv.register_pair[1] },
@@ -7053,6 +7586,15 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void {
const src_bits: u31 = @intCast(src_ty.bitSize(zcu));
const has_bmi = self.hasFeature(.bmi);
if (src_bits > @as(u32, if (has_bmi) 128 else 64)) {
+ const src_frame_addr: bits.FrameAddr = src_frame_addr: switch (src_mcv) {
+ .load_frame => |src_frame_addr| src_frame_addr,
+ else => {
+ const src_frame_addr = try self.allocFrameIndex(.initSpill(src_ty, zcu));
+ try self.genSetMem(.{ .frame = src_frame_addr }, 0, src_ty, src_mcv, .{});
+ break :src_frame_addr .{ .index = src_frame_addr };
+ },
+ };
+
const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable;
const extra_bits = abi_size * 8 - src_bits;
@@ -7079,22 +7621,22 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.asmRegisterImmediate(.{ ._, .cmp }, index_reg.to32(), .u(limbs_len));
const zero = try self.asmJccReloc(.nb, undefined);
try self.asmMemoryImmediate(.{ ._, .cmp }, .{
- .base = .{ .frame = src_mcv.load_frame.index },
+ .base = .{ .frame = src_frame_addr.index },
.mod = .{ .rm = .{
.size = .qword,
.index = index_reg.to64(),
.scale = .@"8",
- .disp = src_mcv.load_frame.off,
+ .disp = src_frame_addr.off,
} },
}, .u(0));
_ = try self.asmJccReloc(.e, loop);
try self.asmRegisterMemory(.{ ._, .bsf }, dst_reg.to64(), .{
- .base = .{ .frame = src_mcv.load_frame.index },
+ .base = .{ .frame = src_frame_addr.index },
.mod = .{ .rm = .{
.size = .qword,
.index = index_reg.to64(),
.scale = .@"8",
- .disp = src_mcv.load_frame.off,
+ .disp = src_frame_addr.off,
} },
});
self.performReloc(zero);
@@ -7131,11 +7673,11 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void {
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- const lo_mat_src_mcv: MCValue = if (mat_src_mcv.isMemory())
+ const lo_mat_src_mcv: MCValue = if (mat_src_mcv.isBase())
mat_src_mcv
else
.{ .register = mat_src_mcv.register_pair[0] };
- const hi_mat_src_mcv: MCValue = if (mat_src_mcv.isMemory())
+ const hi_mat_src_mcv: MCValue = if (mat_src_mcv.isBase())
mat_src_mcv.address().offset(8).deref()
else
.{ .register = mat_src_mcv.register_pair[1] };
@@ -7224,13 +7766,13 @@ fn airPopCount(self: *CodeGen, inst: Air.Inst.Index) !void {
const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs);
defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
- try self.genPopCount(tmp_regs[0], Type.usize, if (mat_src_mcv.isMemory())
+ try self.genPopCount(tmp_regs[0], Type.usize, if (mat_src_mcv.isBase())
mat_src_mcv
else
.{ .register = mat_src_mcv.register_pair[0] }, false);
const src_info = src_ty.intInfo(zcu);
const hi_ty = try pt.intType(src_info.signedness, (src_info.bits - 1) % 64 + 1);
- try self.genPopCount(tmp_regs[1], hi_ty, if (mat_src_mcv.isMemory())
+ try self.genPopCount(tmp_regs[1], hi_ty, if (mat_src_mcv.isBase())
mat_src_mcv.address().offset(8).deref()
else
.{ .register = mat_src_mcv.register_pair[1] }, false);
@@ -7388,7 +7930,7 @@ fn genByteSwap(
defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
for (dst_regs, 0..) |dst_reg, limb_index| {
- if (src_mcv.isMemory()) {
+ if (src_mcv.isBase()) {
try self.asmRegisterMemory(
.{ ._, if (has_movbe) .movbe else .mov },
dst_reg.to64(),
@@ -7706,7 +8248,7 @@ fn floatSign(self: *CodeGen, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Ty
.abs => try vec_ty.maxInt(pt, vec_ty),
else => unreachable,
});
- const sign_mem: Memory = if (sign_mcv.isMemory())
+ const sign_mem: Memory = if (sign_mcv.isBase())
try sign_mcv.mem(self, .{ .size = .fromSize(abi_size) })
else
.{
@@ -7888,7 +8430,7 @@ fn genRound(self: *CodeGen, ty: Type, dst_reg: Register, src_mcv: MCValue, mode:
const abi_size: u32 = @intCast(ty.abiSize(pt.zcu));
const dst_alias = registerAlias(dst_reg, abi_size);
switch (mir_tag[0]) {
- .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ .v_ss, .v_sd => if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
mir_tag,
dst_alias,
dst_alias,
@@ -7904,7 +8446,7 @@ fn genRound(self: *CodeGen, ty: Type, dst_reg: Register, src_mcv: MCValue, mode:
try self.copyToTmpRegister(ty, src_mcv), abi_size),
.u(@as(u5, @bitCast(mode))),
),
- else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ else => if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate(
mir_tag,
dst_alias,
try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
@@ -8105,7 +8647,7 @@ fn airAbs(self: *CodeGen, inst: Air.Inst.Index) !void {
else
try self.register_manager.allocReg(inst, self.regSetForType(ty));
const dst_alias = registerAlias(dst_reg, abi_size);
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
mir_tag,
dst_alias,
try src_mcv.mem(self, .{ .size = self.memSize(ty) }),
@@ -8212,7 +8754,7 @@ fn airSqrt(self: *CodeGen, inst: Air.Inst.Index) !void {
},
2...8 => {
const wide_reg = registerAlias(dst_reg, abi_size * 2);
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
.{ .v_ps, .cvtph2 },
wide_reg,
try src_mcv.mem(self, .{ .size = .fromSize(
@@ -8257,7 +8799,7 @@ fn airSqrt(self: *CodeGen, inst: Air.Inst.Index) !void {
else => unreachable,
}) orelse return self.fail("TODO implement airSqrt for {}", .{ty.fmt(pt)});
switch (mir_tag[0]) {
- .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ .v_ss, .v_sd => if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
mir_tag,
dst_reg,
dst_reg,
@@ -8271,7 +8813,7 @@ fn airSqrt(self: *CodeGen, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(ty, src_mcv), abi_size),
),
- else => if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ else => if (src_mcv.isBase()) try self.asmRegisterMemory(
mir_tag,
dst_reg,
try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
@@ -8339,7 +8881,7 @@ fn reuseOperandAdvanced(
return false;
switch (mcv) {
- .register, .register_pair, .register_overflow => for (mcv.getRegs()) |reg| {
+ .register, .register_pair, .register_overflow, .register_mask => for (mcv.getRegs()) |reg| {
// If it's in the registers table, need to associate the register(s) with the
// new instruction.
if (maybe_tracked_inst) |tracked_inst| {
@@ -8486,7 +9028,10 @@ fn load(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerE
.undef,
.eflags,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_overflow,
+ .register_mask,
.elementwise_regs_then_frame,
.reserved_frame,
=> unreachable, // not a valid pointer
@@ -8694,7 +9239,10 @@ fn store(
.undef,
.eflags,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_overflow,
+ .register_mask,
.elementwise_regs_then_frame,
.reserved_frame,
=> unreachable, // not a valid pointer
@@ -8986,7 +9534,7 @@ fn airStructFieldVal(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.genCopy(field_ty, dst_mcv, off_mcv, .{});
break :dst dst_mcv;
};
- if (field_abi_size * 8 > field_bit_size and dst_mcv.isMemory()) {
+ if (field_abi_size * 8 > field_bit_size and dst_mcv.isBase()) {
const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
@@ -9194,6 +9742,7 @@ fn genUnOpMir(self: *CodeGen, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv:
.register_offset,
.eflags,
.register_overflow,
+ .register_mask,
.lea_symbol,
.lea_direct,
.lea_got,
@@ -9204,7 +9753,7 @@ fn genUnOpMir(self: *CodeGen, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv:
.air_ref,
=> unreachable, // unmodifiable destination
.register => |dst_reg| try self.asmRegister(mir_tag, registerAlias(dst_reg, abi_size)),
- .register_pair => unreachable, // unimplemented
+ .register_pair, .register_triple, .register_quadruple => unreachable, // unimplemented
.memory, .load_symbol, .load_got, .load_direct, .load_tlv => {
const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
@@ -9974,27 +10523,27 @@ fn genMulDivBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- if (mat_lhs_mcv.isMemory())
+ if (mat_lhs_mcv.isBase())
try self.asmRegisterMemory(.{ ._, .mov }, .rax, try mat_lhs_mcv.mem(self, .{ .size = .qword }))
else
try self.asmRegisterRegister(.{ ._, .mov }, .rax, mat_lhs_mcv.register_pair[0]);
- if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
.{ ._, .mov },
tmp_reg,
try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_rhs_mcv.register_pair[1]);
try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, .rax);
- if (mat_rhs_mcv.isMemory())
+ if (mat_rhs_mcv.isBase())
try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .{ .size = .qword }))
else
try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]);
try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg);
- if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
.{ ._, .mov },
tmp_reg,
try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_lhs_mcv.register_pair[1]);
- if (mat_rhs_mcv.isMemory())
+ if (mat_rhs_mcv.isBase())
try self.asmRegisterMemory(.{ .i_, .mul }, tmp_reg, try mat_rhs_mcv.mem(self, .{ .size = .qword }))
else
try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, mat_rhs_mcv.register_pair[0]);
@@ -10414,7 +10963,7 @@ fn genBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
.{ .vp_w, .insr },
dst_reg,
dst_reg,
@@ -10439,7 +10988,7 @@ fn genBinOp(
64 => .{ .v_sd, .add },
else => unreachable,
};
- if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
mir_tag,
dst_reg,
dst_reg,
@@ -10459,7 +11008,7 @@ fn genBinOp(
64 => .{ ._sd, .add },
else => unreachable,
};
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
mir_tag,
dst_reg,
try src_mcv.mem(self, .{ .size = .fromBitSize(float_bits) }),
@@ -10514,7 +11063,7 @@ fn genBinOp(
};
if (sse_op and ((lhs_ty.scalarType(zcu).isRuntimeFloat() and
lhs_ty.scalarType(zcu).floatBits(self.target.*) == 80) or
- lhs_ty.abiSize(zcu) > @as(u6, if (self.hasFeature(.avx)) 32 else 16)))
+ lhs_ty.abiSize(zcu) > self.vectorSize(.float)))
return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(air_tag), lhs_ty.fmt(pt) });
const maybe_mask_reg = switch (air_tag) {
@@ -10731,7 +11280,7 @@ fn genBinOp(
};
try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]);
- if (src_mcv.isMemory()) {
+ if (src_mcv.isBase()) {
try self.asmRegisterMemory(
.{ ._, .cmp },
dst_regs[0],
@@ -10828,8 +11377,11 @@ fn genBinOp(
.immediate,
.eflags,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_offset,
.register_overflow,
+ .register_mask,
.load_symbol,
.lea_symbol,
.load_direct,
@@ -10909,7 +11461,7 @@ fn genBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
.{ .vp_w, .insr },
dst_reg,
dst_reg,
@@ -11355,7 +11907,7 @@ fn genBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
.{ .vp_w, .insr },
dst_reg,
dst_reg,
@@ -11402,7 +11954,7 @@ fn genBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate(
.{ .vp_d, .insr },
dst_reg,
try src_mcv.mem(self, .{ .size = .dword }),
@@ -11454,7 +12006,7 @@ fn genBinOp(
defer self.register_manager.unlockReg(tmp_lock);
try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
.{ .v_ps, .cvtph2 },
tmp_reg,
try src_mcv.mem(self, .{ .size = .qword }),
@@ -11497,7 +12049,7 @@ fn genBinOp(
defer self.register_manager.unlockReg(tmp_lock);
try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), dst_reg);
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
.{ .v_ps, .cvtph2 },
tmp_reg,
try src_mcv.mem(self, .{ .size = .xword }),
@@ -11659,7 +12211,7 @@ fn genBinOp(
else => if (self.hasFeature(.avx)) {
const lhs_reg =
if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
- if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
mir_tag,
dst_reg,
lhs_reg,
@@ -11678,7 +12230,7 @@ fn genBinOp(
);
} else {
assert(copied_to_dst);
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
mir_tag,
dst_reg,
try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) {
@@ -11705,7 +12257,7 @@ fn genBinOp(
if (self.hasFeature(.avx)) {
const lhs_reg =
if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
- if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
mir_tag,
dst_reg,
lhs_reg,
@@ -11726,7 +12278,7 @@ fn genBinOp(
);
} else {
assert(copied_to_dst);
- if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate(
mir_tag,
dst_reg,
try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) {
@@ -11937,7 +12489,7 @@ fn genBinOp(
=> {
const unsigned_ty = try lhs_ty.toUnsigned(pt);
const not_mcv = try self.genTypedValue(try unsigned_ty.maxInt(pt, unsigned_ty));
- const not_mem: Memory = if (not_mcv.isMemory())
+ const not_mem: Memory = if (not_mcv.isBase())
try not_mcv.mem(self, .{ .size = .fromSize(abi_size) })
else
.{ .base = .{
@@ -12017,6 +12569,7 @@ fn genBinOpMir(
.immediate,
.eflags,
.register_overflow,
+ .register_mask,
.lea_direct,
.lea_got,
.lea_tlv,
@@ -12026,9 +12579,9 @@ fn genBinOpMir(
.reserved_frame,
.air_ref,
=> unreachable, // unmodifiable destination
- .register, .register_pair, .register_offset => {
+ .register, .register_pair, .register_triple, .register_quadruple, .register_offset => {
switch (dst_mcv) {
- .register, .register_pair => {},
+ .register, .register_pair, .register_triple, .register_quadruple => {},
.register_offset => |ro| assert(ro.off == 0),
else => unreachable,
}
@@ -12057,10 +12610,15 @@ fn genBinOpMir(
.dead,
.undef,
.register_overflow,
+ .register_mask,
.elementwise_regs_then_frame,
.reserved_frame,
=> unreachable,
- .register, .register_pair => try self.asmRegisterRegister(
+ .register,
+ .register_pair,
+ .register_triple,
+ .register_quadruple,
+ => try self.asmRegisterRegister(
mir_limb_tag,
dst_alias,
registerAlias(src_mcv.getRegs()[dst_reg_i], limb_abi_size),
@@ -12216,6 +12774,7 @@ fn genBinOpMir(
.dead,
.undef,
.register_overflow,
+ .register_mask,
.elementwise_regs_then_frame,
.reserved_frame,
.air_ref,
@@ -12224,6 +12783,8 @@ fn genBinOpMir(
.eflags,
.register,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_offset,
.indirect,
.lea_direct,
@@ -12311,6 +12872,7 @@ fn genBinOpMir(
.dead,
.undef,
.register_overflow,
+ .register_mask,
.elementwise_regs_then_frame,
.reserved_frame,
.air_ref,
@@ -12364,6 +12926,8 @@ fn genBinOpMir(
},
.register,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_offset,
.eflags,
.memory,
@@ -12382,7 +12946,7 @@ fn genBinOpMir(
const src_limb_mcv: MCValue = if (src_info) |info| .{
.indirect = .{ .reg = info.addr_reg, .off = off },
} else switch (resolved_src_mcv) {
- .register, .register_pair => .{
+ .register, .register_pair, .register_triple, .register_quadruple => .{
.register = resolved_src_mcv.getRegs()[limb_i],
},
.eflags,
@@ -12438,6 +13002,7 @@ fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv
.eflags,
.register_offset,
.register_overflow,
+ .register_mask,
.lea_symbol,
.lea_direct,
.lea_got,
@@ -12462,7 +13027,10 @@ fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv
.dead,
.undef,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_overflow,
+ .register_mask,
.elementwise_regs_then_frame,
.reserved_frame,
.air_ref,
@@ -12539,7 +13107,7 @@ fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv
),
}
},
- .register_pair => unreachable, // unimplemented
+ .register_pair, .register_triple, .register_quadruple => unreachable, // unimplemented
.memory, .indirect, .load_symbol, .load_direct, .load_got, .load_tlv, .load_frame => {
const tmp_reg = try self.copyToTmpRegister(dst_ty, dst_mcv);
const tmp_mcv = MCValue{ .register = tmp_reg };
@@ -12892,7 +13460,7 @@ fn genCall(self: *CodeGen, info: union(enum) {
const index_lock = self.register_manager.lockRegAssumeUnused(index_reg);
defer self.register_manager.unlockReg(index_lock);
- const src_mem: Memory = if (src_arg.isMemory()) try src_arg.mem(self, .{ .size = .dword }) else .{
+ const src_mem: Memory = if (src_arg.isBase()) try src_arg.mem(self, .{ .size = .dword }) else .{
.base = .{ .reg = try self.copyToTmpRegister(
Type.usize,
switch (src_arg) {
@@ -12984,7 +13552,7 @@ fn genCall(self: *CodeGen, info: union(enum) {
.lea_frame = .{ .index = frame_index, .off = -reg_off.off },
}, .{}),
.elementwise_regs_then_frame => |regs_frame_addr| {
- const src_mem: Memory = if (src_arg.isMemory()) try src_arg.mem(self, .{ .size = .dword }) else .{
+ const src_mem: Memory = if (src_arg.isBase()) try src_arg.mem(self, .{ .size = .dword }) else .{
.base = .{ .reg = try self.copyToTmpRegister(
Type.usize,
switch (src_arg) {
@@ -13100,6 +13668,8 @@ fn airRet(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void {
.none => {},
.register,
.register_pair,
+ .register_triple,
+ .register_quadruple,
=> try self.genCopy(ret_ty, self.ret_mcv.short, .{ .air_ref = un_op }, .{ .safety = safety }),
.indirect => |reg_off| {
try self.register_manager.getReg(reg_off.reg, null);
@@ -13226,7 +13796,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v
const temp_lhs_lock = self.register_manager.lockRegAssumeUnused(temp_lhs_reg);
defer self.register_manager.unlockReg(temp_lhs_lock);
- if (lhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (lhs_mcv.isBase()) try self.asmRegisterMemory(
.{ ._, .mov },
temp_lhs_reg.to8(),
try lhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte }),
@@ -13240,7 +13810,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v
}
const payload_compare = payload_compare: {
- if (rhs_mcv.isMemory()) {
+ if (rhs_mcv.isBase()) {
const rhs_mem =
try rhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte });
try self.asmMemoryRegister(.{ ._, .@"test" }, rhs_mem, temp_lhs_reg.to8());
@@ -13291,13 +13861,13 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v
} else .may_flip;
const flipped = switch (may_flip) {
- .may_flip => !lhs_mcv.isRegister() and !lhs_mcv.isMemory(),
+ .may_flip => !lhs_mcv.isRegister() and !lhs_mcv.isBase(),
.must_flip => true,
.must_not_flip => false,
};
const unmat_dst_mcv = if (flipped) rhs_mcv else lhs_mcv;
const dst_mcv = if (unmat_dst_mcv.isRegister() or
- (abi_size <= 8 and unmat_dst_mcv.isMemory())) unmat_dst_mcv else dst: {
+ (abi_size <= 8 and unmat_dst_mcv.isBase())) unmat_dst_mcv else dst: {
const dst_mcv = try self.allocTempRegOrMem(ty, true);
try self.genCopy(ty, dst_mcv, unmat_dst_mcv, .{});
break :dst dst_mcv;
@@ -13335,6 +13905,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v
.register,
.register_offset,
.register_overflow,
+ .register_mask,
.indirect,
.lea_direct,
.lea_got,
@@ -13345,7 +13916,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v
.reserved_frame,
.air_ref,
=> unreachable,
- .register_pair, .load_frame => null,
+ .register_pair, .register_triple, .register_quadruple, .load_frame => null,
.memory, .load_symbol, .load_got, .load_direct, .load_tlv => dst: {
switch (resolved_dst_mcv) {
.memory => |addr| if (std.math.cast(
@@ -13396,6 +13967,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v
.register,
.register_offset,
.register_overflow,
+ .register_mask,
.indirect,
.lea_symbol,
.lea_direct,
@@ -13406,7 +13978,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v
.reserved_frame,
.air_ref,
=> unreachable,
- .register_pair, .load_frame => null,
+ .register_pair, .register_triple, .register_quadruple, .load_frame => null,
.memory, .load_symbol, .load_got, .load_direct, .load_tlv => src: {
switch (resolved_src_mcv) {
.memory => |addr| if (std.math.cast(
@@ -13457,7 +14029,10 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v
try self.genSetReg(tmp_reg, Type.usize, if (dst_info) |info| .{
.indirect = .{ .reg = info.addr_reg, .off = off },
} else switch (resolved_dst_mcv) {
- .register_pair => |dst_regs| .{ .register = dst_regs[limb_i] },
+ inline .register_pair,
+ .register_triple,
+ .register_quadruple,
+ => |dst_regs| .{ .register = dst_regs[limb_i] },
.memory => |dst_addr| .{
.memory = @bitCast(@as(i64, @bitCast(dst_addr)) + off),
},
@@ -13479,9 +14054,10 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v
if (src_info) |info| .{
.indirect = .{ .reg = info.addr_reg, .off = off },
} else switch (resolved_src_mcv) {
- .register_pair => |src_regs| .{
- .register = src_regs[limb_i],
- },
+ inline .register_pair,
+ .register_triple,
+ .register_quadruple,
+ => |src_regs| .{ .register = src_regs[limb_i] },
.memory => |src_addr| .{
.memory = @bitCast(@as(i64, @bitCast(src_addr)) + off),
},
@@ -13539,7 +14115,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v
const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg);
defer self.register_manager.unlockReg(tmp2_lock);
- if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
.{ .vp_w, .insr },
tmp1_reg,
dst_reg.to128(),
@@ -13840,8 +14416,11 @@ fn isNull(self: *CodeGen, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue)
.undef,
.immediate,
.eflags,
+ .register_triple,
+ .register_quadruple,
.register_offset,
.register_overflow,
+ .register_mask,
.lea_direct,
.lea_got,
.lea_tlv,
@@ -15481,6 +16060,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C
.immediate,
.eflags,
.register_overflow,
+ .register_mask,
.lea_direct,
.lea_got,
.lea_tlv,
@@ -15510,7 +16090,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C
.off = -dst_reg_off.off,
} },
}, opts),
- .register_pair => |dst_regs| {
+ inline .register_pair, .register_triple, .register_quadruple => |dst_regs| {
const src_info: ?struct { addr_reg: Register, addr_lock: RegisterLock } = switch (src_mcv) {
.register_pair, .memory, .indirect, .load_frame => null,
.load_symbol, .load_direct, .load_got, .load_tlv => src: {
@@ -15535,9 +16115,12 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C
defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock);
var part_disp: i32 = 0;
- for (dst_regs, try self.splitType(ty), 0..) |dst_reg, dst_ty, part_i| {
+ for (dst_regs, try self.splitType(dst_regs.len, ty), 0..) |dst_reg, dst_ty, part_i| {
try self.genSetReg(dst_reg, dst_ty, switch (src_mcv) {
- .register_pair => |src_regs| .{ .register = src_regs[part_i] },
+ inline .register_pair,
+ .register_triple,
+ .register_quadruple,
+ => |src_regs| .{ .register = src_regs[part_i] },
.memory, .indirect, .load_frame => src_mcv.address().offset(part_disp).deref(),
.load_symbol, .load_direct, .load_got, .load_tlv => .{ .indirect = .{
.reg = src_info.?.addr_reg,
@@ -15733,7 +16316,10 @@ fn genSetReg(
},
.ip => unreachable,
},
- .register_pair => |src_regs| try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }, opts),
+ inline .register_pair,
+ .register_triple,
+ .register_quadruple,
+ => |src_regs| try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }, opts),
.register_offset,
.indirect,
.load_frame,
@@ -15770,6 +16356,47 @@ fn genSetReg(
},
else => unreachable,
}),
+ .register_mask => |src_reg_mask| {
+ assert(src_reg_mask.reg.class() == .sse);
+ const has_avx = self.hasFeature(.avx);
+ const bits_reg = switch (dst_reg.class()) {
+ .general_purpose => dst_reg,
+ else => try self.register_manager.allocReg(null, abi.RegisterClass.gp),
+ };
+ const bits_lock = self.register_manager.lockReg(bits_reg);
+ defer if (bits_lock) |lock| self.register_manager.unlockReg(lock);
+
+ const pack_reg = switch (src_reg_mask.scalar) {
+ else => src_reg_mask.reg,
+ .word => try self.register_manager.allocReg(null, abi.RegisterClass.sse),
+ };
+ const pack_lock = self.register_manager.lockReg(pack_reg);
+ defer if (pack_lock) |lock| self.register_manager.unlockReg(lock);
+
+ var mask_size: u32 = @intCast(ty.vectorLen(zcu) * @divExact(src_reg_mask.scalar.bitSize(), 8));
+ switch (src_reg_mask.scalar) {
+ else => {},
+ .word => {
+ const src_alias = registerAlias(src_reg_mask.reg, mask_size);
+ const pack_alias = registerAlias(pack_reg, mask_size);
+ if (has_avx) {
+ try self.asmRegisterRegisterRegister(.{ .vp_b, .ackssw }, pack_alias, src_alias, src_alias);
+ } else {
+ try self.asmRegisterRegister(.{ ._, .movdqa }, pack_alias, src_alias);
+ try self.asmRegisterRegister(.{ .p_b, .ackssw }, pack_alias, pack_alias);
+ }
+ mask_size = std.math.divCeil(u32, mask_size, 2) catch unreachable;
+ },
+ }
+ try self.asmRegisterRegister(.{ switch (src_reg_mask.scalar) {
+ .byte, .word => if (has_avx) .vp_b else .p_b,
+ .dword => if (has_avx) .v_ps else ._ps,
+ .qword => if (has_avx) .v_pd else ._pd,
+ else => unreachable,
+ }, .movmsk }, bits_reg.to32(), registerAlias(pack_reg, mask_size));
+ if (src_reg_mask.inverted) try self.asmRegister(.{ ._, .not }, registerAlias(bits_reg, abi_size));
+ try self.genSetReg(dst_reg, ty, .{ .register = bits_reg }, .{});
+ },
.memory, .load_symbol, .load_direct, .load_got, .load_tlv => {
switch (src_mcv) {
.memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr|
@@ -15998,9 +16625,9 @@ fn genSetMem(
src_alias,
);
},
- .register_pair => |src_regs| {
+ inline .register_pair, .register_triple, .register_quadruple => |src_regs| {
var part_disp: i32 = disp;
- for (try self.splitType(ty), src_regs) |src_ty, src_reg| {
+ for (try self.splitType(src_regs.len, ty), src_regs) |src_ty, src_reg| {
try self.genSetMem(base, part_disp, src_ty, .{ .register = src_reg }, opts);
part_disp += @intCast(src_ty.abiSize(zcu));
}
@@ -16065,6 +16692,13 @@ fn genSetMem(
try self.genSetMem(base, disp, ty, .{ .register = src_reg }, opts);
},
+ .register_mask => {
+ const src_reg = try self.copyToTmpRegister(ty, src_mcv);
+ const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
+ defer self.register_manager.unlockReg(src_lock);
+
+ try self.genSetMem(base, disp, ty, .{ .register = src_reg }, opts);
+ },
.memory,
.indirect,
.load_direct,
@@ -16283,7 +16917,7 @@ fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void {
const dst_mcv = try self.allocRegOrMem(inst, true);
try self.genCopy(switch (std.math.order(dst_ty.abiSize(zcu), src_ty.abiSize(zcu))) {
.lt => dst_ty,
- .eq => if (!dst_mcv.isMemory() or src_mcv.isMemory()) dst_ty else src_ty,
+ .eq => if (!dst_mcv.isBase() or src_mcv.isBase()) dst_ty else src_ty,
.gt => src_ty,
}, dst_mcv, src_mcv, .{});
break :dst dst_mcv;
@@ -16729,7 +17363,7 @@ fn atomicOp(
});
try self.genSetReg(sse_reg, val_ty, .{ .register = .rax }, .{});
switch (mir_tag[0]) {
- .v_ss, .v_sd => if (val_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ .v_ss, .v_sd => if (val_mcv.isBase()) try self.asmRegisterRegisterMemory(
mir_tag,
sse_reg.to128(),
sse_reg.to128(),
@@ -16743,7 +17377,7 @@ fn atomicOp(
else
try self.copyToTmpRegister(val_ty, val_mcv)).to128(),
),
- ._ss, ._sd => if (val_mcv.isMemory()) try self.asmRegisterMemory(
+ ._ss, ._sd => if (val_mcv.isBase()) try self.asmRegisterMemory(
mir_tag,
sse_reg.to128(),
try val_mcv.mem(self, .{ .size = self.memSize(val_ty) }),
@@ -17443,7 +18077,7 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void {
defer self.register_manager.unlockReg(dst_lock);
const src_mcv = try self.resolveInst(ty_op.operand);
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
mir_tag,
registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))),
try src_mcv.mem(self, .{ .size = self.memSize(scalar_ty) }),
@@ -17519,7 +18153,7 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void {
const src_mcv = try self.resolveInst(ty_op.operand);
if (self.hasFeature(.avx)) {
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
.{ .v_ss, .broadcast },
dst_reg.to128(),
try src_mcv.mem(self, .{ .size = .dword }),
@@ -17556,7 +18190,7 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void {
5...8 => if (self.hasFeature(.avx)) {
const src_mcv = try self.resolveInst(ty_op.operand);
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
.{ .v_ss, .broadcast },
dst_reg.to256(),
try src_mcv.mem(self, .{ .size = .dword }),
@@ -17602,7 +18236,7 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void {
const src_mcv = try self.resolveInst(ty_op.operand);
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
if (self.hasFeature(.sse3)) {
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
dst_reg.to128(),
try src_mcv.mem(self, .{ .size = .qword }),
@@ -17627,7 +18261,7 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void {
3...4 => if (self.hasFeature(.avx)) {
const src_mcv = try self.resolveInst(ty_op.operand);
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
.{ .v_sd, .broadcast },
dst_reg.to256(),
try src_mcv.mem(self, .{ .size = .qword }),
@@ -17670,7 +18304,7 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void {
2 => if (self.hasFeature(.avx)) {
const src_mcv = try self.resolveInst(ty_op.operand);
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (src_mcv.isBase()) try self.asmRegisterMemory(
.{ .v_f128, .broadcast },
dst_reg.to256(),
try src_mcv.mem(self, .{ .size = .xword }),
@@ -17779,7 +18413,7 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void {
else => unreachable,
}, .broadcast },
mask_alias,
- if (pred_mcv.isMemory()) try pred_mcv.mem(self, .{ .size = .byte }) else .{
+ if (pred_mcv.isBase()) try pred_mcv.mem(self, .{ .size = .byte }) else .{
.base = .{ .reg = (try self.copyToTmpRegister(
Type.usize,
pred_mcv.address(),
@@ -17973,7 +18607,7 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.genSetReg(dst_reg, ty, rhs_mcv, .{});
break :rhs dst_alias;
};
- if (lhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryRegister(
+ if (lhs_mcv.isBase()) try self.asmRegisterRegisterMemoryRegister(
mir_tag,
dst_alias,
rhs_alias,
@@ -17989,7 +18623,7 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.copyToTmpRegister(ty, lhs_mcv), abi_size),
mask_alias,
);
- } else if (has_blend) if (lhs_mcv.isMemory()) try self.asmRegisterMemoryRegister(
+ } else if (has_blend) if (lhs_mcv.isBase()) try self.asmRegisterMemoryRegister(
mir_tag,
dst_alias,
try lhs_mcv.mem(self, .{ .size = self.memSize(ty) }),
@@ -18014,7 +18648,7 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void {
},
}) orelse return self.fail("TODO implement airSelect for {}", .{ty.fmt(pt)});
try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias);
- if (rhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (rhs_mcv.isBase()) try self.asmRegisterMemory(
.{ mir_fixes, .andn },
mask_alias,
try rhs_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
@@ -18093,10 +18727,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
for ([_]Mir.Inst.Tag{ .unpckl, .unpckh }) |variant| unpck: {
if (elem_abi_size > 8) break :unpck;
- if (dst_abi_size > @as(u32, if (if (elem_abi_size >= 4)
- has_avx
- else
- self.hasFeature(.avx2)) 32 else 16)) break :unpck;
+ if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :unpck;
var sources: [2]?u1 = @splat(null);
for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
@@ -18154,7 +18785,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
},
else => unreachable,
} };
- if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemory(
mir_tag,
dst_alias,
registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
@@ -18167,7 +18798,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
rhs_mcv.getReg().?
else
try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
- ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemory(
+ ) else if (rhs_mcv.isBase()) try self.asmRegisterMemory(
mir_tag,
dst_alias,
try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
@@ -18184,7 +18815,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
pshufd: {
if (elem_abi_size != 4) break :pshufd;
- if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :pshufd;
+ if (max_abi_size > self.vectorSize(.float)) break :pshufd;
var control: u8 = 0b00_00_00_00;
var sources: [1]?u1 = @splat(null);
@@ -18216,7 +18847,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
const dst_alias = registerAlias(dst_reg, max_abi_size);
- if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate(
.{ if (has_avx) .vp_d else .p_d, .shuf },
dst_alias,
try src_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
@@ -18235,7 +18866,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
shufps: {
if (elem_abi_size != 4) break :shufps;
- if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufps;
+ if (max_abi_size > self.vectorSize(.float)) break :shufps;
var control: u8 = 0b00_00_00_00;
var sources: [2]?u1 = @splat(null);
@@ -18272,7 +18903,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
const dst_reg = dst_mcv.getReg().?;
const dst_alias = registerAlias(dst_reg, max_abi_size);
- if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
.{ .v_ps, .shuf },
dst_alias,
registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
@@ -18287,7 +18918,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
.u(control),
- ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate(
.{ ._ps, .shuf },
dst_alias,
try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
@@ -18306,7 +18937,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
shufpd: {
if (elem_abi_size != 8) break :shufpd;
- if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufpd;
+ if (max_abi_size > self.vectorSize(.float)) break :shufpd;
var control: u4 = 0b0_0_0_0;
var sources: [2]?u1 = @splat(null);
@@ -18339,7 +18970,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
const dst_reg = dst_mcv.getReg().?;
const dst_alias = registerAlias(dst_reg, max_abi_size);
- if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
.{ .v_pd, .shuf },
dst_alias,
registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
@@ -18354,7 +18985,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
.u(control),
- ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate(
.{ ._pd, .shuf },
dst_alias,
try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
@@ -18373,7 +19004,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
blend: {
if (elem_abi_size < 2) break :blend;
- if (dst_abi_size > @as(u32, if (has_avx) 32 else 16)) break :blend;
+ if (dst_abi_size > self.vectorSize(.float)) break :blend;
if (!self.hasFeature(.sse4_1)) break :blend;
var control: u8 = 0b0_0_0_0_0_0_0_0;
@@ -18409,7 +19040,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
const rhs_mcv = try self.resolveInst(extra.b);
const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
- if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
.{ .vp_d, .blend },
registerAlias(dst_reg, dst_abi_size),
registerAlias(lhs_reg, dst_abi_size),
@@ -18461,7 +19092,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv);
const dst_reg = dst_mcv.getReg().?;
- if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
.{ .vp_w, .blend },
registerAlias(dst_reg, dst_abi_size),
registerAlias(if (lhs_mcv.isRegister())
@@ -18482,7 +19113,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
.u(expanded_control),
- ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate(
.{ .p_w, .blend },
registerAlias(dst_reg, dst_abi_size),
try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
@@ -18518,7 +19149,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv);
const dst_reg = dst_mcv.getReg().?;
- if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
switch (elem_abi_size) {
4 => .{ .v_ps, .blend },
8, 16 => .{ .v_pd, .blend },
@@ -18547,7 +19178,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
.u(expanded_control),
- ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate(
switch (elem_abi_size) {
4 => .{ ._ps, .blend },
8, 16 => .{ ._pd, .blend },
@@ -18573,10 +19204,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
}
blendv: {
- if (dst_abi_size > @as(u32, if (if (elem_abi_size >= 4)
- has_avx
- else
- self.hasFeature(.avx2)) 32 else 16)) break :blendv;
+ if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :blendv;
const select_mask_elem_ty = try pt.intType(.unsigned, elem_abi_size * 8);
const select_mask_ty = try pt.vectorType(.{
@@ -18637,7 +19265,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
const dst_reg = dst_mcv.getReg().?;
const dst_alias = registerAlias(dst_reg, dst_abi_size);
- if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryRegister(
+ if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryRegister(
mir_tag,
dst_alias,
if (lhs_mcv.isRegister())
@@ -18658,7 +19286,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
select_mask_alias,
- ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryRegister(
+ ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryRegister(
mir_tag,
dst_alias,
try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
@@ -18701,7 +19329,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
else
.p_;
try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias);
- if (lhs_mcv.isMemory()) try self.asmRegisterMemory(
+ if (lhs_mcv.isBase()) try self.asmRegisterMemory(
.{ mir_fixes, .andn },
mask_alias,
try lhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
@@ -18851,22 +19479,61 @@ fn airReduce(self: *CodeGen, inst: Air.Inst.Index) !void {
if (operand_ty.isVector(zcu) and operand_ty.childType(zcu).toIntern() == .bool_type) {
try self.spillEflagsIfOccupied();
- const operand_mcv = try self.resolveInst(reduce.operand);
- const mask_len = (std.math.cast(u6, operand_ty.vectorLen(zcu)) orelse
- return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(pt)}));
- const mask = (@as(u64, 1) << mask_len) - 1;
const abi_size: u32 = @intCast(operand_ty.abiSize(zcu));
+ const operand_mcv = try self.resolveInst(reduce.operand);
+ const mask_len = operand_ty.vectorLen(zcu);
+ const mask_len_minus_one = (std.math.cast(u6, mask_len - 1) orelse {
+ const acc_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
+ const acc_lock = self.register_manager.lockRegAssumeUnused(acc_reg);
+ defer self.register_manager.unlockReg(acc_lock);
+ var limb_offset: i31 = 0;
+ while (limb_offset < abi_size) : (limb_offset += 8) {
+ try self.asmRegisterMemory(
+ .{ ._, if (limb_offset == 0) .mov else switch (reduce.operation) {
+ .Or => .@"or",
+ .And => .@"and",
+ else => return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(pt)}),
+ } },
+ acc_reg.to64(),
+ try operand_mcv.mem(self, .{
+ .size = .qword,
+ .disp = limb_offset,
+ }),
+ );
+ }
+ switch (reduce.operation) {
+ .Or => {
+ try self.asmRegisterRegister(.{ ._, .@"test" }, acc_reg.to64(), acc_reg.to64());
+ break :result .{ .eflags = .nz };
+ },
+ .And => {
+ try self.asmRegisterImmediate(.{ ._, .cmp }, acc_reg.to64(), .s(-1));
+ break :result .{ .eflags = .z };
+ },
+ else => unreachable,
+ }
+ });
+ const mask = @as(u64, std.math.maxInt(u64)) >> ~mask_len_minus_one;
switch (reduce.operation) {
.Or => {
- if (operand_mcv.isMemory()) try self.asmMemoryImmediate(
+ if (operand_mcv.isBase()) try self.asmMemoryImmediate(
.{ ._, .@"test" },
try operand_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
- .u(mask),
- ) else {
- const operand_reg = registerAlias(if (operand_mcv.isRegister())
- operand_mcv.getReg().?
+ if (mask_len < abi_size * 8)
+ .u(mask)
else
- try self.copyToTmpRegister(operand_ty, operand_mcv), abi_size);
+ .s(-1),
+ ) else {
+ const operand_reg = registerAlias(operand_reg: {
+ if (operand_mcv.isRegister()) {
+ const operand_reg = operand_mcv.getReg().?;
+ if (operand_reg.class() == .general_purpose) break :operand_reg operand_reg;
+ }
+ break :operand_reg try self.copyToTmpRegister(operand_ty, operand_mcv);
+ }, abi_size);
+ const operand_lock = self.register_manager.lockReg(operand_reg);
+ defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
+
if (mask_len < abi_size * 8) try self.asmRegisterImmediate(
.{ ._, .@"test" },
operand_reg,
@@ -18880,7 +19547,10 @@ fn airReduce(self: *CodeGen, inst: Air.Inst.Index) !void {
break :result .{ .eflags = .nz };
},
.And => {
- const tmp_reg = try self.copyToTmpRegister(operand_ty, operand_mcv);
+ const tmp_reg = registerAlias(
+ try self.copyToTmpRegister(operand_ty, operand_mcv),
+ abi_size,
+ );
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
@@ -19184,7 +19854,7 @@ fn airMulAdd(self: *CodeGen, inst: Air.Inst.Index) !void {
if (unused.isSet(0) and mcv.isRegister() and self.reuseOperand(inst, op, op_index, mcv.*)) {
order[op_index] = 1;
unused.unset(0);
- } else if (unused.isSet(2) and mcv.isMemory()) {
+ } else if (unused.isSet(2) and mcv.isBase()) {
order[op_index] = 3;
unused.unset(2);
}
@@ -19531,7 +20201,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
const dst_lock = self.register_manager.lockReg(dst_reg);
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
- if (self.hasFeature(.avx)) if (promote_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ if (self.hasFeature(.avx)) if (promote_mcv.isBase()) try self.asmRegisterRegisterMemory(
.{ .v_ss, .cvtsd2 },
dst_reg,
dst_reg,
@@ -19544,7 +20214,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
promote_mcv.getReg().?
else
try self.copyToTmpRegister(promote_ty, promote_mcv)).to128(),
- ) else if (promote_mcv.isMemory()) try self.asmRegisterMemory(
+ ) else if (promote_mcv.isBase()) try self.asmRegisterMemory(
.{ ._ss, .cvtsd2 },
dst_reg,
try promote_mcv.mem(self, .{ .size = .qword }),
@@ -19751,7 +20421,7 @@ fn resolveCallingConventionValues(
// TODO: is this even possible for C calling convention?
result.return_value = .init(.none);
} else {
- var ret_tracking: [2]InstTracking = undefined;
+ var ret_tracking: [4]InstTracking = undefined;
var ret_tracking_i: usize = 0;
const classes = switch (resolved_cc) {
@@ -19771,14 +20441,17 @@ fn resolveCallingConventionValues(
ret_tracking_i += 1;
},
.sse, .float, .float_combine, .win_i128 => {
- const ret_sse_reg = registerAlias(
- abi.getCAbiSseReturnRegs(resolved_cc)[ret_sse_reg_i],
- @intCast(ret_ty.abiSize(zcu)),
- );
- ret_sse_reg_i += 1;
-
- ret_tracking[ret_tracking_i] = .init(.{ .register = ret_sse_reg });
- ret_tracking_i += 1;
+ const ret_sse_regs = abi.getCAbiSseReturnRegs(resolved_cc);
+ const abi_size: u32 = @intCast(ret_ty.abiSize(zcu));
+ const reg_size = @min(abi_size, self.vectorSize(.float));
+ var byte_offset: u32 = 0;
+ while (byte_offset < abi_size) : (byte_offset += reg_size) {
+ const ret_sse_reg = registerAlias(ret_sse_regs[ret_sse_reg_i], reg_size);
+ ret_sse_reg_i += 1;
+
+ ret_tracking[ret_tracking_i] = .init(.{ .register = ret_sse_reg });
+ ret_tracking_i += 1;
+ }
},
.sseup => assert(ret_tracking[ret_tracking_i - 1].short.register.class() == .sse),
.x87 => {
@@ -19811,6 +20484,17 @@ fn resolveCallingConventionValues(
ret_tracking[0].short.register,
ret_tracking[1].short.register,
} }),
+ 3 => .init(.{ .register_triple = .{
+ ret_tracking[0].short.register,
+ ret_tracking[1].short.register,
+ ret_tracking[2].short.register,
+ } }),
+ 4 => .init(.{ .register_quadruple = .{
+ ret_tracking[0].short.register,
+ ret_tracking[1].short.register,
+ ret_tracking[2].short.register,
+ ret_tracking[3].short.register,
+ } }),
};
}
@@ -19826,7 +20510,7 @@ fn resolveCallingConventionValues(
else => unreachable,
}
- var arg_mcv: [2]MCValue = undefined;
+ var arg_mcv: [4]MCValue = undefined;
var arg_mcv_i: usize = 0;
const classes = switch (resolved_cc) {
@@ -19834,15 +20518,13 @@ fn resolveCallingConventionValues(
.x86_64_win => &.{abi.classifyWindows(ty, zcu)},
else => unreachable,
};
- for (classes) |class| switch (class) {
+ classes: for (classes) |class| switch (class) {
.integer => {
const param_int_regs = abi.getCAbiIntParamRegs(resolved_cc);
if (param_int_reg_i >= param_int_regs.len) break;
- const param_int_reg = registerAlias(
- abi.getCAbiIntParamRegs(resolved_cc)[param_int_reg_i],
- @intCast(@min(ty.abiSize(zcu), 8)),
- );
+ const param_int_reg =
+ registerAlias(param_int_regs[param_int_reg_i], @intCast(@min(ty.abiSize(zcu), 8)));
param_int_reg_i += 1;
arg_mcv[arg_mcv_i] = .{ .register = param_int_reg };
@@ -19850,16 +20532,18 @@ fn resolveCallingConventionValues(
},
.sse, .float, .float_combine => {
const param_sse_regs = abi.getCAbiSseParamRegs(resolved_cc);
- if (param_sse_reg_i >= param_sse_regs.len) break;
+ const abi_size: u32 = @intCast(ty.abiSize(zcu));
+ const reg_size = @min(abi_size, self.vectorSize(.float));
+ var byte_offset: u32 = 0;
+ while (byte_offset < abi_size) : (byte_offset += reg_size) {
+ if (param_sse_reg_i >= param_sse_regs.len) break :classes;
- const param_sse_reg = registerAlias(
- abi.getCAbiSseParamRegs(resolved_cc)[param_sse_reg_i],
- @intCast(ty.abiSize(zcu)),
- );
- param_sse_reg_i += 1;
+ const param_sse_reg = registerAlias(param_sse_regs[param_sse_reg_i], reg_size);
+ param_sse_reg_i += 1;
- arg_mcv[arg_mcv_i] = .{ .register = param_sse_reg };
- arg_mcv_i += 1;
+ arg_mcv[arg_mcv_i] = .{ .register = param_sse_reg };
+ arg_mcv_i += 1;
+ }
},
.sseup => assert(arg_mcv[arg_mcv_i - 1].register.class() == .sse),
.x87, .x87up, .complex_x87, .memory, .win_i128 => switch (resolved_cc) {
@@ -19908,7 +20592,21 @@ fn resolveCallingConventionValues(
arg.* = switch (arg_mcv_i) {
else => unreachable,
1 => arg_mcv[0],
- 2 => .{ .register_pair = .{ arg_mcv[0].register, arg_mcv[1].register } },
+ 2 => .{ .register_pair = .{
+ arg_mcv[0].register,
+ arg_mcv[1].register,
+ } },
+ 3 => .{ .register_triple = .{
+ arg_mcv[0].register,
+ arg_mcv[1].register,
+ arg_mcv[2].register,
+ } },
+ 4 => .{ .register_quadruple = .{
+ arg_mcv[0].register,
+ arg_mcv[1].register,
+ arg_mcv[2].register,
+ arg_mcv[3].register,
+ } },
};
continue;
}
@@ -20052,32 +20750,40 @@ fn memSize(self: *CodeGen, ty: Type) Memory.Size {
};
}
-fn splitType(self: *CodeGen, ty: Type) ![2]Type {
+fn splitType(self: *CodeGen, comptime parts_len: usize, ty: Type) ![parts_len]Type {
const pt = self.pt;
const zcu = pt.zcu;
+ var parts: [parts_len]Type = undefined;
+ if (ty.isVector(zcu)) if (std.math.divExact(u32, ty.vectorLen(zcu), parts_len)) |vec_len| return .{
+ try pt.vectorType(.{ .len = vec_len, .child = ty.scalarType(zcu).toIntern() }),
+ } ** parts_len else |err| switch (err) {
+ error.DivisionByZero => unreachable,
+ error.UnexpectedRemainder => {},
+ };
const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none);
- var parts: [2]Type = undefined;
- if (classes.len == 2) for (&parts, classes, 0..) |*part, class, part_i| {
+ if (classes.len == parts_len) for (&parts, classes, 0..) |*part, class, part_i| {
part.* = switch (class) {
- .integer => switch (part_i) {
- 0 => Type.u64,
- 1 => part: {
- const elem_size = ty.abiAlignment(zcu).minStrict(.@"8").toByteUnits().?;
- const elem_ty = try pt.intType(.unsigned, @intCast(elem_size * 8));
- break :part switch (@divExact(ty.abiSize(zcu) - 8, elem_size)) {
- 1 => elem_ty,
- else => |len| try pt.arrayType(.{ .len = len, .child = elem_ty.toIntern() }),
- };
- },
- else => unreachable,
+ .integer => if (part_i < parts_len - 1)
+ Type.u64
+ else part: {
+ const elem_size = ty.abiAlignment(zcu).minStrict(.@"8").toByteUnits().?;
+ const elem_ty = try pt.intType(.unsigned, @intCast(elem_size * 8));
+ break :part switch (@divExact(ty.abiSize(zcu) - part_i * 8, elem_size)) {
+ 1 => elem_ty,
+ else => |array_len| try pt.arrayType(.{ .len = array_len, .child = elem_ty.toIntern() }),
+ };
},
.float => Type.f32,
.float_combine => try pt.arrayType(.{ .len = 2, .child = .f32_type }),
.sse => Type.f64,
else => break,
};
- } else if (parts[0].abiSize(zcu) + parts[1].abiSize(zcu) == ty.abiSize(zcu)) return parts;
- return self.fail("TODO implement splitType for {}", .{ty.fmt(pt)});
+ } else {
+ var part_sizes: u64 = 0;
+ for (parts) |part| part_sizes += part.abiSize(zcu);
+ if (part_sizes == ty.abiSize(zcu)) return parts;
+ };
+ return self.fail("TODO implement splitType({d}, {})", .{ parts_len, ty.fmt(pt) });
}
/// Truncates the value in the register in place.
@@ -20297,6 +21003,7 @@ const Temp = struct {
.immediate,
.eflags,
.register_offset,
+ .register_mask,
.memory,
.load_symbol,
.lea_symbol,
@@ -20314,6 +21021,8 @@ const Temp = struct {
=> false,
.register,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_overflow,
=> true,
.load_frame => |frame_addr| !frame_addr.index.isNamed(),
@@ -20330,7 +21039,7 @@ const Temp = struct {
cg.temp_type[@intFromEnum(new_temp_index)] = Type.usize;
cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
switch (temp.tracking(cg).short) {
- else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }),
+ else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
.register => |reg| {
const new_reg =
try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
@@ -20434,7 +21143,7 @@ const Temp = struct {
const new_temp_index = cg.next_temp_index;
cg.temp_type[@intFromEnum(new_temp_index)] = Type.usize;
switch (temp.tracking(cg).short) {
- else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }),
+ else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
.immediate => |imm| {
assert(limb_index == 0);
new_temp_index.tracking(cg).* = .init(.{ .immediate = imm });
@@ -20635,6 +21344,17 @@ const Temp = struct {
first_temp.* = result_temp;
}
+ fn asMask(temp: Temp, kind: MaskKind, inverted: bool, scalar: Memory.Size, cg: *CodeGen) void {
+ assert(scalar != .none);
+ const mcv = &temp.unwrap(cg).temp.tracking(cg).short;
+ mcv.* = .{ .register_mask = .{
+ .reg = mcv.register,
+ .kind = kind,
+ .inverted = inverted,
+ .scalar = scalar,
+ } };
+ }
+
fn toLea(temp: *Temp, cg: *CodeGen) !bool {
switch (temp.tracking(cg).short) {
.none,
@@ -20643,7 +21363,10 @@ const Temp = struct {
.undef,
.eflags,
.register_pair,
+ .register_triple,
+ .register_quadruple,
.register_overflow,
+ .register_mask,
.elementwise_regs_then_frame,
.reserved_frame,
.air_ref,
@@ -20677,10 +21400,7 @@ const Temp = struct {
fn toBase(temp: *Temp, cg: *CodeGen) !bool {
const temp_tracking = temp.tracking(cg);
- switch (temp_tracking.short) {
- else => {},
- .indirect, .load_frame => return false,
- }
+ if (temp_tracking.short.isBase()) return false;
const new_temp_index = cg.next_temp_index;
cg.temp_type[@intFromEnum(new_temp_index)] = temp.typeOf(cg);
const new_reg =
@@ -20697,7 +21417,7 @@ const Temp = struct {
const val_abi_size: u32 = @intCast(val_ty.abiSize(cg.pt.zcu));
const val = try cg.tempAlloc(val_ty);
switch (val.tracking(cg).short) {
- else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }),
+ else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
.register => |val_reg| {
while (try ptr.toLea(cg)) {}
switch (val_reg.class()) {
@@ -20706,7 +21426,7 @@ const Temp = struct {
registerAlias(val_reg, val_abi_size),
try ptr.tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(val_ty) }),
),
- else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }),
+ else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
}
},
.load_frame => |val_frame_addr| {
@@ -20724,7 +21444,7 @@ const Temp = struct {
const val_ty = val.typeOf(cg);
const val_abi_size: u32 = @intCast(val_ty.abiSize(cg.pt.zcu));
val: switch (val.tracking(cg).short) {
- else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }),
+ else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
.immediate => |imm| if (std.math.cast(i32, imm)) |s| {
while (try ptr.toLea(cg)) {}
try cg.asmMemoryImmediate(
@@ -20742,7 +21462,7 @@ const Temp = struct {
try ptr.tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(val_ty) }),
registerAlias(val_reg, val_abi_size),
),
- else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }),
+ else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
}
},
}
@@ -20876,6 +21596,7 @@ fn reuseTemp(
.register_pair,
.register_offset,
.register_overflow,
+ .register_mask,
.indirect,
=> for (tracking.short.getRegs()) |tracked_reg| {
if (RegisterManager.indexOfRegIntoTracked(tracked_reg)) |tracked_index| {
@@ -20913,6 +21634,26 @@ fn tempAllocReg(cg: *CodeGen, ty: Type, rc: RegisterManager.RegisterBitSet) !Tem
return .{ .index = temp_index.toIndex() };
}
+fn tempAllocRegPair(cg: *CodeGen, ty: Type, rc: RegisterManager.RegisterBitSet) !Temp {
+ const temp_index = cg.next_temp_index;
+ temp_index.tracking(cg).* = .init(
+ .{ .register_pair = try cg.register_manager.allocRegs(2, temp_index.toIndex(), rc) },
+ );
+ cg.temp_type[@intFromEnum(temp_index)] = ty;
+ cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
+ return .{ .index = temp_index.toIndex() };
+}
+
+fn tempAllocMem(cg: *CodeGen, ty: Type) !Temp {
+ const temp_index = cg.next_temp_index;
+ temp_index.tracking(cg).* = .init(
+ try cg.allocRegOrMemAdvanced(ty, temp_index.toIndex(), false),
+ );
+ cg.temp_type[@intFromEnum(temp_index)] = ty;
+ cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
+ return .{ .index = temp_index.toIndex() };
+}
+
fn tempFromValue(cg: *CodeGen, ty: Type, value: MCValue) !Temp {
const temp_index = cg.next_temp_index;
temp_index.tracking(cg).* = .init(value);
@@ -20993,30 +21734,23 @@ const Operand = union(enum) {
inst: Mir.Inst.Index,
};
-const SelectLoop = struct {
- element_reloc: Mir.Inst.Index,
- element_offset: union(enum) {
- unused,
- known: u31,
- temp: Temp,
- },
- element_size: ?u13,
- limb_reloc: Mir.Inst.Index,
- limb_offset: union(enum) {
- unused,
- known: u31,
- temp: Temp,
- },
- limb_size: ?u8,
- remaining_size: ?u64,
-};
-
const Pattern = struct {
ops: []const Op,
commute: struct { u8, u8 } = .{ 0, 0 },
const Set = struct {
required_features: []const std.Target.x86.Feature = &.{},
+ scalar: union(enum) {
+ any,
+ bool,
+ float: Memory.Size,
+ any_int: Memory.Size,
+ signed_int: Memory.Size,
+ unsigned_int: Memory.Size,
+ any_float_or_int: Memory.Size,
+ } = .any,
+ clobbers: struct { eflags: bool = false } = .{},
+ invert_result: bool = false,
loop: enum {
/// only execute the instruction once
once,
@@ -21050,6 +21784,18 @@ const Pattern = struct {
xmm,
/// any 256-bit sse register
ymm,
+ /// a 64-bit mmx register mask
+ mm_mask,
+ /// a 128-bit sse register mask
+ xmm_mask,
+ /// a 256-bit sse register mask
+ ymm_mask,
+ /// a 64-bit mmx register sign mask
+ mm_sign_mask,
+ /// a 128-bit sse register sign mask
+ xmm_sign_mask,
+ /// a 256-bit sse register sign mask
+ ymm_sign_mask,
/// any memory
mem,
/// a limb stored in a gpr
@@ -21062,41 +21808,82 @@ const Pattern = struct {
ymm_limb,
/// a limb stored in memory
mem_limb,
+ /// a limb stored in a 64-bit mmx register mask
+ mm_mask_limb,
+ /// a limb stored in a 128-bit sse register masuk
+ xmm_mask_limb,
+ /// a limb stored in a 256-bit sse register masuk
+ ymm_mask_limb,
/// specific immediate
imm: i8,
/// any immediate signed extended from 32 bits
simm32,
+ /// a temp general purpose register containing all ones
+ umax_gpr,
+ /// a temp 64-bit mmx register containing all ones
+ umax_mm,
+ /// a temp 128-bit sse register containing all ones
+ umax_xmm,
+ /// a temp 256-bit sse register containing all ones
+ umax_ymm,
fn matches(op: Op, is_mut: bool, temp: Temp, cg: *CodeGen) bool {
- const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu);
- return switch (op) {
+ switch (op) {
.implicit, .explicit => unreachable,
+ else => {},
+ // temp is undefined
+ .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => return true,
+ }
+ const temp_ty = temp.typeOf(cg);
+ const abi_size = temp_ty.abiSize(cg.pt.zcu);
+ return switch (op) {
+ .implicit, .explicit, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable,
.gpr => abi_size <= 8 and switch (temp.tracking(cg).short) {
.register => |reg| reg.class() == .general_purpose,
.register_offset => |reg_off| reg_off.reg.class() == .general_purpose and
reg_off.off == 0,
- else => cg.regClassForType(temp.typeOf(cg)) == .general_purpose,
+ else => cg.regClassForType(temp_ty) == .general_purpose,
},
- .mm => abi_size <= 8 and switch (temp.tracking(cg).short) {
+ .mm, .mm_mask, .mm_sign_mask => abi_size <= 8 and switch (temp.tracking(cg).short) {
.register => |reg| reg.class() == .mmx,
.register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0,
- else => cg.regClassForType(temp.typeOf(cg)) == .mmx,
+ else => cg.regClassForType(temp_ty) == .mmx,
},
- .xmm => abi_size > 8 and abi_size <= 16 and switch (temp.tracking(cg).short) {
+ .xmm, .xmm_mask, .xmm_sign_mask => abi_size > 8 and abi_size <= 16 and switch (temp.tracking(cg).short) {
.register => |reg| reg.class() == .sse,
.register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0,
- else => cg.regClassForType(temp.typeOf(cg)) == .sse,
+ else => cg.regClassForType(temp_ty) == .sse,
},
- .ymm => abi_size > 16 and abi_size <= 32 and switch (temp.tracking(cg).short) {
+ .ymm, .ymm_mask, .ymm_sign_mask => abi_size > 16 and abi_size <= 32 and switch (temp.tracking(cg).short) {
.register => |reg| reg.class() == .sse,
.register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0,
- else => cg.regClassForType(temp.typeOf(cg)) == .sse,
+ else => cg.regClassForType(temp_ty) == .sse,
},
.mem, .mem_limb => (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(),
- .gpr_limb => abi_size > 8,
- .mm_limb => abi_size > 8 and (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory() and cg.regClassForType(temp.typeOf(cg)) == .mmx,
- .xmm_limb => abi_size > 16 and (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(),
- .ymm_limb => abi_size > 32 and (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(),
+ .gpr_limb => abi_size > 8 and switch (temp.tracking(cg).short) {
+ inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| {
+ if (reg.class() != .general_purpose) break false;
+ } else true,
+ else => |mcv| mcv.isMemory(),
+ },
+ .mm_limb, .mm_mask_limb => abi_size > 8 and switch (temp.tracking(cg).short) {
+ inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| {
+ if (reg.class() != .mmx) break false;
+ } else true,
+ else => |mcv| mcv.isMemory() and cg.regClassForType(temp_ty) == .mmx,
+ },
+ .xmm_limb, .xmm_mask_limb => abi_size > 16 and switch (temp.tracking(cg).short) {
+ inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| {
+ if (reg.class() != .sse) break false;
+ } else true,
+ else => |mcv| mcv.isMemory(),
+ },
+ .ymm_limb, .ymm_mask_limb => abi_size > 32 and switch (temp.tracking(cg).short) {
+ inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| {
+ if (reg.class() != .sse) break false;
+ } else true,
+ else => |mcv| mcv.isMemory(),
+ },
.imm => |specific_imm| if (is_mut) unreachable else switch (temp.tracking(cg).short) {
.immediate => |imm| @as(i64, @bitCast(imm)) == specific_imm,
else => false,
@@ -21109,78 +21896,176 @@ const Pattern = struct {
}
};
};
-fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_sets: []const Pattern.Set) !void {
- var loop: SelectLoop = .{
+const SelectOptions = struct {
+ invert_result: bool = false,
+};
+fn select(
+ cg: *CodeGen,
+ dst_temps: []Temp,
+ dst_tys: []const Type,
+ src_temps: []Temp,
+ pattern_sets: []const Pattern.Set,
+ opts: SelectOptions,
+) !void {
+ var loop: struct {
+ element_reloc: Mir.Inst.Index,
+ element_offset: Offset,
+ element_size: ?u13,
+ limb_reloc: Mir.Inst.Index,
+ limb_offset: Offset,
+ limb_size: ?u8,
+ mask_limb_temp: ?Temp,
+ mask_limb_offset: Offset,
+ mask_limb_offset_lock: ?RegisterLock,
+ mask_limb_bit_size: ?u7,
+ mask_store_temp: ?Temp,
+ mask_store_reg: ?Register,
+ mask_store_bit_size: ?u7,
+ remaining_size: ?u64,
+
+ const Offset = union(enum) {
+ unused,
+ known: u31,
+ temp: Temp,
+ };
+ } = .{
.element_reloc = undefined,
.element_offset = .unused,
.element_size = null,
.limb_reloc = undefined,
.limb_offset = .unused,
.limb_size = null,
+ .mask_limb_temp = null,
+ .mask_limb_offset = .unused,
+ .mask_limb_offset_lock = null,
+ .mask_limb_bit_size = null,
+ .mask_store_temp = null,
+ .mask_store_reg = null,
+ .mask_store_bit_size = null,
.remaining_size = null,
};
var extra_temps: [4]?Temp = @splat(null);
pattern_sets: for (pattern_sets) |pattern_set| {
for (pattern_set.required_features) |required_feature| if (!cg.hasFeature(required_feature)) continue :pattern_sets;
+ for (src_temps) |src_temp| switch (pattern_set.scalar) {
+ .any => {},
+ .bool => if (src_temp.typeOf(cg).scalarType(cg.pt.zcu).toIntern() != .bool_type) continue :pattern_sets,
+ .float => |size| {
+ const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu);
+ if (!scalar_ty.isRuntimeFloat()) continue :pattern_sets;
+ if (scalar_ty.floatBits(cg.target.*) != size.bitSize()) continue :pattern_sets;
+ },
+ .any_int => |size| {
+ const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu);
+ if (!scalar_ty.isAbiInt(cg.pt.zcu)) continue :pattern_sets;
+ if (scalar_ty.intInfo(cg.pt.zcu).bits > size.bitSize()) continue :pattern_sets;
+ },
+ .signed_int => |size| {
+ const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu);
+ if (!scalar_ty.isAbiInt(cg.pt.zcu)) continue :pattern_sets;
+ const scalar_info = scalar_ty.intInfo(cg.pt.zcu);
+ if (scalar_info.signedness != .signed) continue :pattern_sets;
+ if (scalar_info.bits > size.bitSize()) continue :pattern_sets;
+ },
+ .unsigned_int => |size| {
+ const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu);
+ if (!scalar_ty.isAbiInt(cg.pt.zcu)) continue :pattern_sets;
+ const scalar_info = scalar_ty.intInfo(cg.pt.zcu);
+ if (scalar_info.signedness != .unsigned) continue :pattern_sets;
+ if (scalar_info.bits > size.bitSize()) continue :pattern_sets;
+ },
+ .any_float_or_int => |size| {
+ const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu);
+ if (scalar_ty.isRuntimeFloat()) {
+ if (scalar_ty.floatBits(cg.target.*) != size.bitSize()) continue :pattern_sets;
+ } else if (scalar_ty.isAbiInt(cg.pt.zcu)) {
+ if (scalar_ty.intInfo(cg.pt.zcu).bits > size.bitSize()) continue :pattern_sets;
+ } else continue :pattern_sets;
+ },
+ };
patterns: for (pattern_set.patterns) |pattern| {
for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| {
const ref_src_op, const is_mut = switch (src_op) {
- .implicit, .explicit => |op_index| .{ pattern.ops[op_index], true },
+ .implicit, .explicit => |linked_index| .{ pattern.ops[linked_index], true },
else => .{ src_op, false },
};
- if (!ref_src_op.matches(is_mut, src_temp.*, cg)) continue :patterns;
+ if (!ref_src_op.matches(is_mut, src_temp, cg)) continue :patterns;
}
- while (true) for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| {
- if (switch (switch (src_op) {
- .implicit, .explicit => |op_index| pattern.ops[op_index],
+
+ while (true) for (src_temps, pattern.ops[dst_temps.len..]) |*src_temp, src_op| {
+ if (changed: switch (switch (src_op) {
+ .implicit, .explicit => |linked_index| pattern.ops[linked_index],
else => src_op,
}) {
.implicit, .explicit => unreachable,
.gpr => try src_temp.toRegClass(.general_purpose, cg),
- .mm => try src_temp.toRegClass(.mmx, cg),
- .xmm, .ymm => try src_temp.toRegClass(.sse, cg),
- .mem, .imm, .simm32 => false,
- .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb, .mem_limb => switch (src_temp.tracking(cg).short) {
- .register_pair => false,
+ .mm, .mm_mask, .mm_sign_mask => try src_temp.toRegClass(.mmx, cg),
+ .xmm,
+ .ymm,
+ .xmm_mask,
+ .ymm_mask,
+ .xmm_sign_mask,
+ .ymm_sign_mask,
+ => try src_temp.toRegClass(.sse, cg),
+ .mem => try src_temp.toBase(cg),
+ .imm, .simm32 => false,
+ .gpr_limb,
+ .mm_limb,
+ .xmm_limb,
+ .ymm_limb,
+ .mem_limb,
+ => switch (src_temp.tracking(cg).short) {
+ .register_pair, .register_triple, .register_quadruple => false,
else => try src_temp.toBase(cg),
},
+ .mm_mask_limb,
+ .xmm_mask_limb,
+ .ymm_mask_limb,
+ => if (!cg.hasFeature(.bmi2) and !cg.register_manager.isKnownRegFree(.rcx)) {
+ try cg.register_manager.getKnownReg(.rcx, null);
+ loop.mask_limb_offset_lock = cg.register_manager.lockKnownRegAssumeUnused(.rcx);
+ break :changed true;
+ } else false,
+ .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => false,
}) break;
} else break;
+
+ var dst_is_linked: std.StaticBitSet(4) = .initEmpty();
var mir_ops_len = dst_temps.len;
for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| {
- const ref_src_op, const extra_temp = op: switch (src_op) {
- .implicit => |op_index| {
- dst_temps[op_index] = if (src_temp.isMut(cg))
- src_temp.*
- else
- try cg.tempAlloc(src_temp.typeOf(cg));
- break :op .{ pattern.ops[op_index], &extra_temps[op_index] };
- },
- .explicit => |op_index| {
- dst_temps[op_index] = if (src_temp.isMut(cg))
- src_temp.*
- else
- try cg.tempAlloc(src_temp.typeOf(cg));
- defer mir_ops_len += 1;
- break :op .{ pattern.ops[op_index], &extra_temps[mir_ops_len] };
- },
- else => {
- defer mir_ops_len += 1;
- break :op .{ src_op, &extra_temps[mir_ops_len] };
+ defer mir_ops_len += @intFromBool(src_op != .implicit);
+ const linked_src_op, const extra_temp = op: switch (src_op) {
+ .implicit, .explicit => |linked_index| {
+ if (src_temp.isMut(cg)) {
+ dst_temps[linked_index] = src_temp;
+ dst_is_linked.set(linked_index);
+ }
+ break :op .{ pattern.ops[linked_index], &extra_temps[linked_index] };
},
+ else => .{ src_op, &extra_temps[mir_ops_len] },
};
- const limb_size: u8, const rc = switch (ref_src_op) {
+ const limb_size: u8, const rc = switch (linked_src_op) {
else => continue,
.gpr_limb => .{ 8, abi.RegisterClass.gp },
- .mm_limb => .{ 8, @panic("TODO") },
- .xmm_limb => .{ 16, abi.RegisterClass.sse },
- .ymm_limb => .{ 32, abi.RegisterClass.sse },
+ .mm_limb, .mm_mask_limb => .{ 8, @panic("TODO") },
+ .xmm_limb, .xmm_mask_limb => .{ 16, abi.RegisterClass.sse },
+ .ymm_limb, .ymm_mask_limb => .{ 32, abi.RegisterClass.sse },
+ .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => {
+ assert(extra_temp.* == null);
+ extra_temp.* = try cg.tempAllocReg(Type.usize, switch (linked_src_op) {
+ else => unreachable,
+ .umax_gpr => abi.RegisterClass.gp,
+ .umax_mm => @panic("TODO"),
+ .umax_xmm, .umax_ymm => abi.RegisterClass.sse,
+ });
+ continue;
+ },
};
assert(loop.limb_size == null or loop.limb_size == limb_size);
loop.limb_size = limb_size;
loop.remaining_size = loop.remaining_size orelse src_temp.typeOf(cg).abiSize(cg.pt.zcu);
switch (src_temp.tracking(cg).short) {
- .register_pair => switch (loop.limb_offset) {
+ .register_pair, .register_triple, .register_quadruple => switch (loop.limb_offset) {
.unused, .temp => loop.limb_offset = .{ .known = 0 },
.known => {},
},
@@ -21189,25 +22074,130 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set
.unused => loop.limb_offset = .{ .temp = undefined },
.known, .temp => {},
}
+ assert(extra_temp.* == null);
extra_temp.* = try cg.tempAllocReg(Type.usize, rc);
},
}
}
+ for (
+ 0..,
+ dst_temps,
+ pattern.ops[0..dst_temps.len],
+ dst_tys,
+ extra_temps[0..dst_temps.len],
+ ) |dst_index, *dst_temp, dst_op, dst_ty, *extra_temp| switch (dst_op) {
+ else => if (!dst_is_linked.isSet(dst_index)) {
+ dst_temp.* = dst_temp: switch (dst_op) {
+ .implicit => unreachable,
+ .explicit => |linked_index| dst_temps[linked_index],
+ .gpr => try cg.tempAllocReg(dst_ty, abi.RegisterClass.gp),
+ .mm, .mm_mask, .mm_sign_mask => @panic("TODO"),
+ .xmm, .xmm_mask, .xmm_sign_mask => try cg.tempAllocReg(dst_ty, abi.RegisterClass.sse),
+ .ymm, .ymm_mask, .ymm_sign_mask => try cg.tempAllocReg(dst_ty, abi.RegisterClass.sse),
+ .mem => @panic("TODO"),
+ .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => {
+ if (extra_temp.* == null) extra_temp.* = try cg.tempAllocReg(Type.usize, switch (dst_op) {
+ else => unreachable,
+ .gpr_limb => abi.RegisterClass.gp,
+ .mm_limb => @panic("TODO"),
+ .xmm_limb, .ymm_limb => abi.RegisterClass.sse,
+ });
+ break :dst_temp try cg.tempAlloc(dst_ty);
+ },
+ .mem_limb => try cg.tempAlloc(dst_ty),
+ .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => unreachable, // already checked
+ .imm, .simm32, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, // unmodifiable destination
+ };
+ },
+ .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => {
+ const scalar_size = @divExact(switch (pattern_set.scalar) {
+ .any, .bool => unreachable,
+ .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size,
+ }.bitSize(), 8);
+ const mask_bit_size = @divExact(loop.remaining_size.?, scalar_size);
+ const mask_limb_bit_size: u7 = @intCast(@divExact(loop.limb_size.?, scalar_size));
+ assert(loop.mask_limb_bit_size == null or loop.mask_limb_bit_size == mask_limb_bit_size);
+ loop.mask_limb_bit_size = mask_limb_bit_size;
+ const mask_store_bit_size = mask_store_bit_size: {
+ // Try to match limb size so that no shifting will be needed.
+ if (mask_limb_bit_size % 8 == 0) break :mask_store_bit_size mask_limb_bit_size;
+ // If abi size <= 8 the entire value can be stored at once,
+ // enabling store forwarding and minimizing store buffer usage.
+ // Otherwise, we will be performing shifts that need to wrap at
+ // store size, which for x86 requires 32 or 64, so just pick 64
+ // for the same reasons as above.
+ break :mask_store_bit_size @min(mask_bit_size, 64);
+ };
+ assert(loop.mask_store_bit_size == null or loop.mask_store_bit_size == mask_store_bit_size);
+ loop.mask_store_bit_size = mask_store_bit_size;
+ loop.mask_limb_offset = loop.limb_offset;
+ if (extra_temp.* == null) extra_temp.* = try cg.tempAllocReg(Type.usize, switch (dst_op) {
+ else => unreachable,
+ .mm_mask_limb => @panic("TODO"),
+ .xmm_mask_limb, .ymm_mask_limb => abi.RegisterClass.sse,
+ });
+ if (loop.mask_limb_temp == null) loop.mask_limb_temp = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp);
+ if (mask_limb_bit_size < mask_store_bit_size and loop.mask_store_reg == null) {
+ loop.mask_store_temp = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp);
+ loop.mask_store_reg = loop.mask_store_temp.?.tracking(cg).short.register;
+ }
+ dst_temp.* = if (mask_store_bit_size < mask_bit_size)
+ try cg.tempAllocMem(dst_ty)
+ else if (loop.mask_store_temp) |mask_store_temp| dst_temp: {
+ loop.mask_store_temp = null;
+ break :dst_temp mask_store_temp;
+ } else try cg.tempAlloc(dst_ty);
+ },
+ };
+ switch (loop.mask_limb_offset) {
+ .unused, .known => {},
+ .temp => |*mask_limb_offset| {
+ if (cg.hasFeature(.bmi2)) {
+ assert(loop.mask_limb_offset_lock == null);
+ mask_limb_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp);
+ } else {
+ if (loop.mask_limb_offset_lock) |lock| cg.register_manager.unlockReg(lock);
+ loop.mask_limb_offset_lock = null;
+ mask_limb_offset.* = try cg.tempFromValue(Type.usize, .{ .register = .rcx });
+ }
+ if (loop.mask_store_reg) |mask_store_reg| {
+ const mask_store_alias = registerAlias(
+ mask_store_reg,
+ @min(std.math.divCeil(u7, loop.mask_store_bit_size.?, 8) catch unreachable, 4),
+ );
+ try cg.spillEflagsIfOccupied();
+ try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias);
+ }
+ },
+ }
switch (loop.element_offset) {
.unused, .known => {},
.temp => |*element_offset| {
element_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp);
const element_offset_reg = element_offset.tracking(cg).short.register;
+ try cg.spillEflagsIfOccupied();
try cg.asmRegisterRegister(.{ ._, .xor }, element_offset_reg.to32(), element_offset_reg.to32());
loop.element_reloc = @intCast(cg.mir_instructions.len);
},
}
+ switch (loop.limb_offset) {
+ .unused, .known => {},
+ .temp => |*limb_offset| limb_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp),
+ }
while (true) {
+ switch (loop.mask_limb_offset) {
+ .unused, .known => {},
+ .temp => |mask_limb_offset| {
+ const mask_limb_offset_reg = mask_limb_offset.tracking(cg).short.register.to32();
+ try cg.spillEflagsIfOccupied();
+ try cg.asmRegisterRegister(.{ ._, .xor }, mask_limb_offset_reg, mask_limb_offset_reg);
+ },
+ }
switch (loop.limb_offset) {
.unused, .known => {},
- .temp => |*limb_offset| {
- limb_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp);
- const limb_offset_reg = limb_offset.tracking(cg).short.register;
+ .temp => |limb_offset| {
+ const limb_offset_reg = limb_offset.tracking(cg).short.register.to32();
+ try cg.spillEflagsIfOccupied();
try cg.asmRegisterRegister(.{ ._, .xor }, limb_offset_reg.to32(), limb_offset_reg.to32());
loop.limb_reloc = @intCast(cg.mir_instructions.len);
},
@@ -21216,56 +22206,67 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set
var mir_ops: [4]Operand = @splat(.none);
mir_ops_len = dst_temps.len;
for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| {
- const mir_op, const ref_src_op, const extra_temp = op: switch (src_op) {
- .implicit => |op_index| .{ &mir_ops[op_index], pattern.ops[op_index], &extra_temps[op_index] },
- .explicit => |op_index| {
- defer mir_ops_len += 1;
- break :op .{ &mir_ops[mir_ops_len], pattern.ops[op_index], &extra_temps[mir_ops_len] };
- },
- else => {
- defer mir_ops_len += 1;
- break :op .{ &mir_ops[mir_ops_len], src_op, &extra_temps[mir_ops_len] };
- },
+ defer mir_ops_len += @intFromBool(src_op != .implicit);
+ const mir_op, const linked_src_op, const extra_temp = switch (src_op) {
+ .implicit => |linked_index| .{ &mir_ops[linked_index], pattern.ops[linked_index], extra_temps[linked_index] },
+ .explicit => |linked_index| .{ &mir_ops[mir_ops_len], pattern.ops[linked_index], extra_temps[linked_index] },
+ else => .{ &mir_ops[mir_ops_len], src_op, extra_temps[mir_ops_len] },
};
- const src_mcv = src_temp.tracking(cg).short;
- switch (ref_src_op) {
- else => {},
- .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => switch (src_mcv) {
- .register_pair => {},
- else => try cg.asmRegisterMemory(
- switch (ref_src_op) {
- else => unreachable,
- .gpr_limb => .{ ._, .mov },
- .mm_limb => .{ ._q, .mov },
- .xmm_limb, .ymm_limb => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu },
+ const src_mcv = switch (linked_src_op) {
+ else => src_temp,
+ // src_temp is undefined
+ .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => extra_temp.?,
+ }.tracking(cg).short;
+ copy_limb: switch (src_mcv) {
+ .register_pair, .register_triple, .register_quadruple => {},
+ else => try cg.asmRegisterMemory(
+ switch (linked_src_op) {
+ else => break :copy_limb,
+ .gpr_limb => .{ ._, .mov },
+ .mm_limb, .mm_mask_limb => .{ ._q, .mov },
+ .xmm_limb,
+ .ymm_limb,
+ .xmm_mask_limb,
+ .ymm_mask_limb,
+ => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu },
+ },
+ registerAlias(extra_temp.?.tracking(cg).short.register, loop.limb_size.?),
+ try src_mcv.mem(cg, switch (loop.limb_offset) {
+ .unused => unreachable,
+ .known => |limb_offset| .{
+ .size = .fromSize(loop.limb_size.?),
+ .disp = limb_offset,
},
- registerAlias(extra_temp.*.?.tracking(cg).short.register, loop.limb_size.?),
- try src_mcv.mem(cg, switch (loop.limb_offset) {
- .unused => unreachable,
- .known => |limb_offset| .{
- .size = .fromSize(loop.limb_size.?),
- .disp = limb_offset,
- },
- .temp => |limb_offset| .{
- .size = .fromSize(loop.limb_size.?),
- .index = limb_offset.tracking(cg).short.register.to64(),
- },
- }),
- ),
- },
+ .temp => |limb_offset| .{
+ .size = .fromSize(loop.limb_size.?),
+ .index = limb_offset.tracking(cg).short.register.to64(),
+ },
+ }),
+ ),
}
- mir_op.* = switch (ref_src_op) {
+ mir_op.* = switch (linked_src_op) {
.implicit, .explicit => unreachable,
.gpr => .{ .reg = registerAlias(
src_mcv.register,
@intCast(src_temp.typeOf(cg).abiSize(cg.pt.zcu)),
) },
- .mm => .{ .reg = src_mcv.register },
- .xmm => .{ .reg = src_mcv.register.to128() },
- .ymm => .{ .reg = src_mcv.register.to256() },
+ .umax_gpr => .{ .reg = src_mcv.register.to64() }, // TODO: use other op size?
+ .mm, .mm_mask, .mm_sign_mask, .umax_mm => .{ .reg = src_mcv.register },
+ .xmm, .xmm_mask, .xmm_sign_mask, .umax_xmm => .{ .reg = src_mcv.register.to128() },
+ .ymm, .ymm_mask, .ymm_sign_mask, .umax_ymm => .{ .reg = src_mcv.register.to256() },
.mem => .{ .mem = try src_mcv.mem(cg, .{ .size = cg.memSize(src_temp.typeOf(cg)) }) },
- .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => switch (src_mcv) {
- .register_pair => |src_regs| switch (loop.limb_offset) {
+ .gpr_limb,
+ .mm_limb,
+ .xmm_limb,
+ .ymm_limb,
+ .mm_mask_limb,
+ .xmm_mask_limb,
+ .ymm_mask_limb,
+ => switch (src_mcv) {
+ inline .register_pair,
+ .register_triple,
+ .register_quadruple,
+ => |src_regs| switch (loop.limb_offset) {
.unused => unreachable,
.known => |limb_offset| .{ .reg = registerAlias(
src_regs[@divExact(limb_offset, loop.limb_size.?)],
@@ -21274,12 +22275,12 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set
.temp => unreachable,
},
else => .{ .reg = registerAlias(
- extra_temp.*.?.tracking(cg).short.register,
+ extra_temp.?.tracking(cg).short.register,
loop.limb_size.?,
) },
},
.mem_limb => .{ .mem = switch (src_mcv) {
- .register_pair => unreachable,
+ .register_pair, .register_triple, .register_quadruple => unreachable,
else => switch (loop.limb_offset) {
.unused => unreachable,
.known => |limb_offset| try src_mcv.mem(cg, .{
@@ -21309,76 +22310,53 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set
.u(@as(u32, @intCast(src_mcv.immediate))) },
},
};
+ switch (src_op) {
+ else => {},
+ .explicit => |linked_index| mir_ops[linked_index] = mir_op.*,
+ }
}
for (
+ mir_ops[0..dst_temps.len],
pattern.ops[0..dst_temps.len],
dst_temps,
- mir_ops[0..dst_temps.len],
+ dst_tys,
extra_temps[0..dst_temps.len],
- ) |dst_op, *dst_temp, *mir_op, *extra_temp| {
+ ) |*mir_op, dst_op, dst_temp, dst_ty, extra_temp| {
if (mir_op.* != .none) continue;
- const ty = src_temps[0].typeOf(cg);
- switch (dst_op) {
+ mir_op.* = switch (dst_op) {
.implicit => unreachable,
- .explicit => |op_index| {
- dst_temp.* = dst_temps[op_index];
- mir_op.* = mir_ops[op_index];
- },
- .gpr => {
- dst_temp.* = try cg.tempAllocReg(ty, abi.RegisterClass.gp);
- mir_op.* = .{ .reg = registerAlias(
- dst_temp.tracking(cg).short.register,
- @intCast(ty.abiSize(cg.pt.zcu)),
- ) };
- },
- .mm => @panic("TODO"),
- .xmm => {
- dst_temp.* = try cg.tempAllocReg(ty, abi.RegisterClass.sse);
- mir_op.* = .{ .reg = dst_temp.tracking(cg).short.register.to128() };
- },
- .ymm => {
- dst_temp.* = try cg.tempAllocReg(ty, abi.RegisterClass.sse);
- mir_op.* = .{ .reg = dst_temp.tracking(cg).short.register.to256() };
- },
+ .explicit => |linked_index| mir_ops[linked_index],
+ .gpr => .{ .reg = registerAlias(
+ dst_temp.tracking(cg).short.register,
+ @intCast(dst_ty.abiSize(cg.pt.zcu)),
+ ) },
+ .mm, .mm_mask, .mm_sign_mask => @panic("TODO"),
+ .xmm, .xmm_mask, .xmm_sign_mask => .{ .reg = dst_temp.tracking(cg).short.register.to128() },
+ .ymm, .ymm_mask, .ymm_sign_mask => .{ .reg = dst_temp.tracking(cg).short.register.to256() },
.mem => @panic("TODO"),
- .gpr_limb => {
- dst_temp.* = try cg.tempAlloc(ty);
- extra_temp.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp);
- mir_op.* = .{ .reg = extra_temp.*.?.tracking(cg).short.register.to64() };
- },
- .mm_limb => {
- dst_temp.* = try cg.tempAlloc(ty);
- extra_temp.* = try cg.tempAllocReg(Type.usize, @panic("TODO"));
- mir_op.* = .{ .reg = extra_temp.*.?.tracking(cg).short.register };
- },
- .xmm_limb => {
- dst_temp.* = try cg.tempAlloc(ty);
- extra_temp.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.sse);
- mir_op.* = .{ .reg = extra_temp.*.?.tracking(cg).short.register.to128() };
- },
- .ymm_limb => {
- dst_temp.* = try cg.tempAlloc(ty);
- extra_temp.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.sse);
- mir_op.* = .{ .reg = extra_temp.*.?.tracking(cg).short.register.to256() };
- },
- .mem_limb => {
- dst_temp.* = try cg.tempAlloc(ty);
- mir_op.* = .{ .mem = try dst_temp.tracking(cg).short.mem(cg, switch (loop.limb_offset) {
- .unused => unreachable,
- .known => |limb_offset| .{
- .size = .fromSize(loop.limb_size.?),
- .disp = limb_offset,
- },
- .temp => |limb_offset| .{
- .size = .fromSize(loop.limb_size.?),
- .index = limb_offset.tracking(cg).short.register.to64(),
- },
- }) };
- },
- .imm, .simm32 => unreachable, // unmodifiable destination
- }
+ .gpr_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to64() },
+ .mm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register },
+ .xmm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to128() },
+ .ymm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to256() },
+ .mem_limb => .{ .mem = try dst_temp.tracking(cg).short.mem(cg, switch (loop.limb_offset) {
+ .unused => unreachable,
+ .known => |limb_offset| .{
+ .size = .fromSize(loop.limb_size.?),
+ .disp = limb_offset,
+ },
+ .temp => |limb_offset| .{
+ .size = .fromSize(loop.limb_size.?),
+ .index = limb_offset.tracking(cg).short.register.to64(),
+ },
+ }) },
+ .mm_mask_limb => .{ .reg = extra_temp.?.tracking(cg).short.register },
+ .xmm_mask_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to128() },
+ .ymm_mask_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to256() },
+ .imm, .simm32, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, // unmodifiable destination
+ };
}
std.mem.swap(Operand, &mir_ops[pattern.commute[0]], &mir_ops[pattern.commute[1]]);
+ if (pattern_set.clobbers.eflags) try cg.spillEflagsIfOccupied();
cg.asmOps(pattern_set.mir_tag, mir_ops) catch |err| switch (err) {
error.InvalidInstruction => {
const fixes = @tagName(pattern_set.mir_tag[0]);
@@ -21398,42 +22376,223 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set
},
else => |e| return e,
};
+ const invert_result = opts.invert_result != pattern_set.invert_result;
for (
extra_temps[0..dst_temps.len],
pattern.ops[0..dst_temps.len],
+ mir_ops[0..dst_temps.len],
dst_temps,
- ) |maybe_extra_temp, dst_op, dst_temp| if (maybe_extra_temp) |extra_temp| switch (dst_op) {
- else => {},
- .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => switch (dst_temp.tracking(cg).short) {
- .register_pair => |dst_regs| switch (loop.limb_offset) {
- .unused => unreachable,
- .known => |limb_offset| try cg.asmRegisterRegister(
- .{ ._, .mov },
- dst_regs[@divExact(limb_offset, loop.limb_size.?)].to64(),
- extra_temp.tracking(cg).short.register.to64(),
+ ) |extra_temp, dst_op, mir_op, dst_temp| switch (dst_op) {
+ else => if (invert_result) {
+ try cg.spillEflagsIfOccupied();
+ cg.asmOps(
+ .{ ._, .not },
+ .{ mir_op, .none, .none, .none },
+ ) catch |err| switch (err) {
+ error.InvalidInstruction => return cg.fail(
+ "invalid instruction: 'not {s} none none none'",
+ .{@tagName(mir_op)},
),
- .temp => unreachable,
- },
- else => |dst_mcv| try cg.asmMemoryRegister(
- switch (dst_op) {
- else => unreachable,
- .gpr_limb => .{ ._, .mov },
- .mm_limb => .{ ._q, .mov },
- .xmm_limb, .ymm_limb => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu },
+ else => |e| return e,
+ };
+ },
+ .mm_mask,
+ .xmm_mask,
+ .ymm_mask,
+ .mm_sign_mask,
+ .xmm_sign_mask,
+ .ymm_sign_mask,
+ => dst_temp.asMask(switch (dst_op) {
+ else => unreachable,
+ .mm_mask, .xmm_mask, .ymm_mask => .all,
+ .mm_sign_mask, .xmm_sign_mask, .ymm_sign_mask => .sign,
+ }, invert_result, switch (pattern_set.scalar) {
+ .any, .bool => unreachable,
+ .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size,
+ }, cg),
+ .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => if (extra_temp) |limb_temp|
+ switch (dst_temp.tracking(cg).short) {
+ inline .register_pair,
+ .register_triple,
+ .register_quadruple,
+ => |dst_regs| switch (loop.limb_offset) {
+ .unused => unreachable,
+ .known => |limb_offset| try cg.asmRegisterRegister(
+ .{ ._, .mov },
+ dst_regs[@divExact(limb_offset, loop.limb_size.?)].to64(),
+ limb_temp.tracking(cg).short.register.to64(),
+ ),
+ .temp => unreachable,
},
- try dst_mcv.mem(cg, switch (loop.limb_offset) {
+ else => |dst_mcv| try cg.asmMemoryRegister(
+ switch (dst_op) {
+ else => unreachable,
+ .gpr_limb => .{ ._, .mov },
+ .mm_limb => .{ ._q, .mov },
+ .xmm_limb, .ymm_limb => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu },
+ },
+ try dst_mcv.mem(cg, switch (loop.limb_offset) {
+ .unused => unreachable,
+ .known => |limb_offset| .{
+ .size = .fromSize(loop.limb_size.?),
+ .disp = limb_offset,
+ },
+ .temp => |limb_offset| .{
+ .size = .fromSize(loop.limb_size.?),
+ .index = limb_offset.tracking(cg).short.register.to64(),
+ },
+ }),
+ registerAlias(limb_temp.tracking(cg).short.register, loop.limb_size.?),
+ ),
+ },
+ .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => {
+ const scalar_size = switch (pattern_set.scalar) {
+ .any, .bool => unreachable,
+ .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size,
+ };
+ switch (scalar_size) {
+ else => {},
+ .word => if (cg.hasFeature(.avx)) try cg.asmRegisterRegisterRegister(
+ .{ .vp_b, .ackssw },
+ mir_op.reg,
+ mir_op.reg,
+ mir_op.reg,
+ ) else try cg.asmRegisterRegister(
+ .{ .p_b, .ackssw },
+ mir_op.reg,
+ mir_op.reg,
+ ),
+ }
+ const mask_store_size: u4 =
+ @intCast(std.math.divCeil(u7, loop.mask_store_bit_size.?, 8) catch unreachable);
+ const mask_limb_reg = registerAlias(
+ loop.mask_limb_temp.?.tracking(cg).short.register,
+ mask_store_size,
+ );
+ try cg.asmRegisterRegister(switch (scalar_size) {
+ else => unreachable,
+ .byte, .word => .{ if (cg.hasFeature(.avx)) .vp_b else .p_b, .movmsk },
+ .dword => .{ if (cg.hasFeature(.avx)) .v_ps else ._ps, .movmsk },
+ .qword => .{ if (cg.hasFeature(.avx)) .v_pd else ._pd, .movmsk },
+ }, mask_limb_reg.to32(), mir_op.reg);
+ if (invert_result) if (loop.mask_store_reg) |_| {
+ try cg.spillEflagsIfOccupied();
+ try cg.asmRegisterImmediate(
+ .{ ._, .xor },
+ registerAlias(mask_limb_reg, @min(mask_store_size, 4)),
+ .u((@as(u32, 1) << @intCast(loop.mask_limb_bit_size.?)) - 1),
+ );
+ } else try cg.asmRegister(.{ ._, .not }, mask_limb_reg);
+ if (loop.mask_store_reg) |mask_store_reg| {
+ const mask_store_alias = registerAlias(mask_store_reg, mask_store_size);
+ switch (loop.mask_limb_offset) {
.unused => unreachable,
- .known => |limb_offset| .{
- .size = .fromSize(loop.limb_size.?),
- .disp = limb_offset,
+ .known => |mask_limb_offset| switch (mask_limb_offset & (loop.mask_store_bit_size.? - 1)) {
+ 0 => try cg.asmRegisterRegister(.{ ._, .mov }, mask_store_alias, mask_limb_reg),
+ else => |shl_count| {
+ try cg.spillEflagsIfOccupied();
+ try cg.asmRegisterImmediate(.{ ._l, .sh }, mask_limb_reg, .u(shl_count));
+ try cg.spillEflagsIfOccupied();
+ try cg.asmRegisterRegister(.{ ._, .@"or" }, mask_store_alias, mask_limb_reg);
+ },
},
- .temp => |limb_offset| .{
- .size = .fromSize(loop.limb_size.?),
- .index = limb_offset.tracking(cg).short.register.to64(),
+ .temp => |mask_limb_offset| {
+ if (cg.hasFeature(.bmi2)) {
+ const shlx_size = @max(mask_store_size, 4);
+ const shlx_mask_limb_reg = registerAlias(mask_limb_reg, shlx_size);
+ try cg.asmRegisterRegisterRegister(
+ .{ ._lx, .sh },
+ shlx_mask_limb_reg,
+ shlx_mask_limb_reg,
+ registerAlias(mask_limb_offset.tracking(cg).short.register, shlx_size),
+ );
+ } else {
+ try cg.spillEflagsIfOccupied();
+ try cg.asmRegisterRegister(
+ .{ ._l, .sh },
+ mask_limb_reg,
+ mask_limb_offset.tracking(cg).short.register.to8(),
+ );
+ }
+ try cg.spillEflagsIfOccupied();
+ try cg.asmRegisterRegister(.{ ._, .@"or" }, mask_store_alias, mask_limb_reg);
},
- }),
- registerAlias(extra_temp.tracking(cg).short.register, loop.limb_size.?),
- ),
+ }
+ }
+ const dst_mcv = dst_temp.tracking(cg).short;
+ switch (loop.mask_limb_offset) {
+ .unused => unreachable,
+ .known => |*mask_limb_offset| {
+ mask_limb_offset.* += loop.mask_limb_bit_size.?;
+ if (mask_limb_offset.* & (loop.mask_store_bit_size.? - 1) == 0) {
+ switch (dst_mcv) {
+ .register => {},
+ else => try cg.asmMemoryRegister(
+ .{ ._, .mov },
+ try dst_mcv.mem(cg, .{
+ .size = .fromSize(mask_store_size),
+ .disp = @divExact(mask_limb_offset.*, 8) - mask_store_size,
+ }),
+ registerAlias(loop.mask_store_reg orelse mask_limb_reg, mask_store_size),
+ ),
+ }
+ if (loop.mask_store_reg) |mask_store_reg| {
+ const mask_store_alias = registerAlias(mask_store_reg, @min(mask_store_size, 4));
+ try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias);
+ }
+ }
+ },
+ .temp => |mask_limb_offset| {
+ const mask_limb_offset_reg = mask_limb_offset.tracking(cg).short.register.to32();
+ if (loop.mask_store_reg) |mask_store_reg| {
+ try cg.asmRegisterMemory(.{ ._, .lea }, mask_limb_offset_reg, .{
+ .base = .{ .reg = mask_limb_offset_reg.to64() },
+ .mod = .{ .rm = .{
+ .size = .qword,
+ .disp = loop.mask_limb_bit_size.?,
+ } },
+ });
+ switch (dst_mcv) {
+ .register => {},
+ else => {
+ try cg.spillEflagsIfOccupied();
+ try cg.asmRegisterImmediate(
+ .{ ._, .@"test" },
+ mask_limb_offset_reg,
+ .u(loop.mask_store_bit_size.? - 1),
+ );
+ const skip_store_reloc = try cg.asmJccReloc(.nz, undefined);
+ const mask_store_offset_reg = mask_limb_reg.to32();
+ try cg.asmRegisterRegister(.{ ._, .mov }, mask_store_offset_reg, mask_limb_offset_reg);
+ try cg.asmRegisterImmediate(.{ ._r, .sh }, mask_store_offset_reg, .u(3));
+ try cg.asmMemoryRegister(.{ ._, .mov }, try dst_mcv.mem(cg, .{
+ .size = .fromSize(mask_store_size),
+ .index = mask_store_offset_reg.to64(),
+ .disp = -@as(i8, mask_store_size),
+ }), registerAlias(mask_store_reg, mask_store_size));
+ const mask_store_alias = registerAlias(mask_store_reg, @min(mask_store_size, 4));
+ try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias);
+ cg.performReloc(skip_store_reloc);
+ },
+ }
+ } else {
+ switch (dst_mcv) {
+ .register => {},
+ else => try cg.asmMemoryRegister(.{ ._, .mov }, try dst_mcv.mem(cg, .{
+ .size = .fromSize(mask_store_size),
+ .index = mask_limb_offset_reg.to64(),
+ }), mask_limb_reg),
+ }
+ try cg.asmRegisterMemory(.{ ._, .lea }, mask_limb_offset_reg, .{
+ .base = .{ .reg = mask_limb_offset_reg.to64() },
+ .mod = .{ .rm = .{
+ .size = .qword,
+ .disp = mask_store_size,
+ } },
+ });
+ }
+ },
+ }
},
};
switch (pattern_set.loop) {
@@ -21442,7 +22601,7 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set
.limbwise_carry => @panic("TODO"),
.limbwise_pairs_forward => @panic("TODO"),
.limbwise_pairs_reverse => @panic("TODO"),
- .elementwise => @panic("TODO"),
+ .elementwise => {},
}
switch (loop.limb_offset) {
.unused => break,
@@ -21452,6 +22611,11 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set
if (loop.remaining_size.? < loop.limb_size.? or
(loop.element_size != null and limb_offset.* >= loop.element_size.?))
{
+ switch (loop.mask_limb_offset) {
+ .unused => {},
+ .known => |*mask_limb_offset| mask_limb_offset.* = 0,
+ .temp => unreachable,
+ }
limb_offset.* = 0;
break;
}
@@ -21465,6 +22629,7 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set
.disp = loop.limb_size.?,
} },
});
+ try cg.spillEflagsIfOccupied();
try cg.asmRegisterImmediate(
.{ ._, .cmp },
limb_offset_reg.to32(),
@@ -21476,6 +22641,12 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set
},
}
}
+ if (loop.mask_limb_temp) |mask_limb_temp| try mask_limb_temp.die(cg);
+ if (loop.mask_store_temp) |mask_store_temp| try mask_store_temp.die(cg);
+ switch (loop.mask_limb_offset) {
+ .unused, .known => {},
+ .temp => |mask_limb_offset| try mask_limb_offset.die(cg),
+ }
switch (loop.element_offset) {
.unused => break :pattern_sets,
.known => |*element_offset| {
src/arch/x86_64/Disassembler.zig
@@ -223,7 +223,7 @@ pub fn next(dis: *Disassembler) Error!?Instruction {
.op3 = op3,
});
},
- .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable, // TODO
+ .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable, // TODO
}
}
src/arch/x86_64/encoder.zig
@@ -403,7 +403,7 @@ pub const Instruction = struct {
else => {
const mem_op = switch (data.op_en) {
.m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
- .rm, .rmi, .rm0, .vmi => inst.ops[1],
+ .rm, .rmi, .rm0, .vmi, .rmv => inst.ops[1],
.rvm, .rvmr, .rvmi => inst.ops[2],
else => unreachable,
};
@@ -412,7 +412,7 @@ pub const Instruction = struct {
const rm = switch (data.op_en) {
.m, .mi, .m1, .mc, .vmi => enc.modRmExt(),
.mr, .mri, .mrc => inst.ops[1].reg.lowEnc(),
- .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0].reg.lowEnc(),
+ .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0].reg.lowEnc(),
.mvr => inst.ops[2].reg.lowEnc(),
else => unreachable,
};
@@ -422,7 +422,7 @@ pub const Instruction = struct {
const op = switch (data.op_en) {
.m, .mi, .m1, .mc, .vmi => .none,
.mr, .mri, .mrc => inst.ops[1],
- .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0],
+ .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0],
.mvr => inst.ops[2],
else => unreachable,
};
@@ -493,7 +493,7 @@ pub const Instruction = struct {
}
else
null,
- .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable,
+ .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable,
};
if (segment_override) |seg| {
legacy.setSegmentOverride(seg);
@@ -512,9 +512,9 @@ pub const Instruction = struct {
switch (op_en) {
.zo, .i, .zi, .fd, .td, .d => {},
.o, .oi => rex.b = inst.ops[0].reg.isExtended(),
- .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0 => {
+ .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .rmv => {
const r_op = switch (op_en) {
- .rm, .rmi, .rm0 => inst.ops[0],
+ .rm, .rmi, .rm0, .rmv => inst.ops[0],
.mr, .mri, .mrc => inst.ops[1],
else => .none,
};
@@ -546,9 +546,9 @@ pub const Instruction = struct {
switch (op_en) {
.zo, .i, .zi, .fd, .td, .d => {},
.o, .oi => vex.b = inst.ops[0].reg.isExtended(),
- .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr => {
+ .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => {
const r_op = switch (op_en) {
- .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0],
+ .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0],
.mr, .mri, .mrc => inst.ops[1],
.mvr => inst.ops[2],
.m, .mi, .m1, .mc, .vmi => .none,
@@ -557,7 +557,7 @@ pub const Instruction = struct {
vex.r = r_op.isBaseExtended();
const b_x_op = switch (op_en) {
- .rm, .rmi, .rm0, .vmi => inst.ops[1],
+ .rm, .rmi, .rm0, .vmi, .rmv => inst.ops[1],
.m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
.rvm, .rvmr, .rvmi => inst.ops[2],
else => unreachable,
@@ -588,6 +588,7 @@ pub const Instruction = struct {
else => {},
.vmi => vex.v = inst.ops[0].reg,
.rvm, .rvmr, .rvmi => vex.v = inst.ops[1].reg,
+ .rmv => vex.v = inst.ops[2].reg,
}
try encoder.vex(vex);
src/arch/x86_64/Encoding.zig
@@ -177,7 +177,7 @@ pub fn format(
try writer.print("+{s} ", .{tag});
},
.m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}),
- .mr, .rm, .rmi, .mri, .mrc, .rm0, .rvm, .rvmr, .rvmi, .mvr => try writer.writeAll("/r "),
+ .mr, .rm, .rmi, .mri, .mrc, .rm0, .rvm, .rvmr, .rvmi, .mvr, .rmv => try writer.writeAll("/r "),
}
switch (encoding.data.op_en) {
@@ -202,7 +202,7 @@ pub fn format(
try writer.print("{s} ", .{tag});
},
.rvmr => try writer.writeAll("/is4 "),
- .zo, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr => {},
+ .zo, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr, .rmv => {},
}
try writer.print("{s} ", .{@tagName(encoding.mnemonic)});
@@ -260,10 +260,10 @@ pub const Mnemonic = enum {
neg, nop, not,
@"or",
pause, pop, popcnt, popfq, push, pushfq,
- rcl, rcr, ret, rol, ror,
- sal, sar, sbb,
+ rcl, rcr, ret, rol, ror, rorx,
+ sal, sar, sarx, sbb,
scas, scasb, scasd, scasq, scasw,
- shl, shld, shr, shrd, sub, syscall,
+ shl, shld, shlx, shr, shrd, shrx, sub, syscall,
seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae,
setnb, setnbe, setnc, setne, setng, setnge, setnl, setnle, setno, setnp, setns,
setnz, seto, setp, setpe, setpo, sets, setz,
@@ -444,7 +444,7 @@ pub const OpEn = enum {
fd, td,
m1, mc, mi, mr, rm,
rmi, mri, mrc,
- rm0, vmi, rvm, rvmr, rvmi, mvr,
+ rm0, vmi, rvm, rvmr, rvmi, mvr, rmv,
// zig fmt: on
};
@@ -808,6 +808,7 @@ pub const Feature = enum {
avx,
avx2,
bmi,
+ bmi2,
f16c,
fma,
lzcnt,
src/arch/x86_64/encodings.zig
@@ -1287,6 +1287,16 @@ pub const table = [_]Entry{
.{ .sha256rnds2, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x0f, 0x38, 0xcb }, 0, .none, .sha },
// AVX
+ .{ .rorx, .rmi, &.{ .r32, .rm32, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w0, .bmi2 },
+ .{ .rorx, .rmi, &.{ .r64, .rm64, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w1, .bmi2 },
+
+ .{ .sarx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
+ .{ .shlx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
+ .{ .shrx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
+ .{ .sarx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
+ .{ .shlx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
+ .{ .shrx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
+
.{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx },
.{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx },
src/arch/x86_64/Mir.zig
@@ -29,10 +29,14 @@ pub const Inst = struct {
_l,
/// ___ Left Double
_ld,
+ /// ___ Left Without Affecting Flags
+ _lx,
/// ___ Right
_r,
/// ___ Right Double
_rd,
+ /// ___ Right Without Affecting Flags
+ _rx,
/// ___ Above
_a,
@@ -401,9 +405,11 @@ pub const Inst = struct {
ret,
/// Rotate left
/// Rotate right
+ /// Rotate right logical without affecting flags
ro,
/// Arithmetic shift left
/// Arithmetic shift right
+ /// Shift left arithmetic without affecting flags
sa,
/// Integer subtraction with borrow
sbb,
@@ -417,6 +423,8 @@ pub const Inst = struct {
/// Double precision shift left
/// Logical shift right
/// Double precision shift right
+ /// Shift left logical without affecting flags
+ /// Shift right logical without affecting flags
sh,
/// Subtract
/// Subtract packed integers
src/Air.zig
@@ -1229,7 +1229,7 @@ pub const VectorCmp = struct {
op: u32,
pub fn compareOperator(self: VectorCmp) std.math.CompareOperator {
- return @as(std.math.CompareOperator, @enumFromInt(@as(u3, @truncate(self.op))));
+ return @enumFromInt(@as(u3, @intCast(self.op)));
}
pub fn encodeOp(compare_operator: std.math.CompareOperator) u32 {
@@ -1274,11 +1274,11 @@ pub const Cmpxchg = struct {
flags: u32,
pub fn successOrder(self: Cmpxchg) std.builtin.AtomicOrder {
- return @as(std.builtin.AtomicOrder, @enumFromInt(@as(u3, @truncate(self.flags))));
+ return @enumFromInt(@as(u3, @truncate(self.flags)));
}
pub fn failureOrder(self: Cmpxchg) std.builtin.AtomicOrder {
- return @as(std.builtin.AtomicOrder, @enumFromInt(@as(u3, @truncate(self.flags >> 3))));
+ return @enumFromInt(@as(u3, @intCast(self.flags >> 3)));
}
};
@@ -1289,11 +1289,11 @@ pub const AtomicRmw = struct {
flags: u32,
pub fn ordering(self: AtomicRmw) std.builtin.AtomicOrder {
- return @as(std.builtin.AtomicOrder, @enumFromInt(@as(u3, @truncate(self.flags))));
+ return @enumFromInt(@as(u3, @truncate(self.flags)));
}
pub fn op(self: AtomicRmw) std.builtin.AtomicRmwOp {
- return @as(std.builtin.AtomicRmwOp, @enumFromInt(@as(u4, @truncate(self.flags >> 3))));
+ return @enumFromInt(@as(u4, @intCast(self.flags >> 3)));
}
};
src/register_manager.zig
@@ -112,6 +112,9 @@ pub fn RegisterManager(
pub fn indexOfRegIntoTracked(reg: Register) ?TrackedIndex {
return indexOfReg(tracked_registers, reg);
}
+ pub inline fn indexOfKnownRegIntoTracked(comptime reg: Register) ?TrackedIndex {
+ return comptime indexOfRegIntoTracked(reg);
+ }
pub fn regAtTrackedIndex(tracked_index: TrackedIndex) Register {
return tracked_registers[tracked_index];
@@ -124,6 +127,9 @@ pub fn RegisterManager(
pub fn isRegFree(self: Self, reg: Register) bool {
return self.isRegIndexFree(indexOfRegIntoTracked(reg) orelse return true);
}
+ pub fn isKnownRegFree(self: Self, comptime reg: Register) bool {
+ return self.isRegIndexFree(indexOfKnownRegIntoTracked(reg) orelse return true);
+ }
/// Returns whether this register was allocated in the course
/// of this function.
@@ -143,6 +149,9 @@ pub fn RegisterManager(
pub fn isRegLocked(self: Self, reg: Register) bool {
return self.isRegIndexLocked(indexOfRegIntoTracked(reg) orelse return false);
}
+ pub fn isKnownRegLocked(self: Self, comptime reg: Register) bool {
+ return self.isRegIndexLocked(indexOfKnownRegIntoTracked(reg) orelse return false);
+ }
pub const RegisterLock = struct { tracked_index: TrackedIndex };
@@ -176,6 +185,9 @@ pub fn RegisterManager(
pub fn lockRegAssumeUnused(self: *Self, reg: Register) RegisterLock {
return self.lockRegIndexAssumeUnused(indexOfRegIntoTracked(reg) orelse unreachable);
}
+ pub fn lockKnownRegAssumeUnused(self: *Self, comptime reg: Register) RegisterLock {
+ return self.lockRegIndexAssumeUnused(indexOfKnownRegIntoTracked(reg) orelse unreachable);
+ }
/// Like `lockReg` but locks multiple registers.
pub fn lockRegs(
@@ -366,7 +378,7 @@ pub fn RegisterManager(
comptime reg: Register,
inst: ?Air.Inst.Index,
) AllocationError!void {
- return self.getRegIndex((comptime indexOfRegIntoTracked(reg)) orelse return, inst);
+ return self.getRegIndex(indexOfKnownRegIntoTracked(reg) orelse return, inst);
}
/// Allocates the specified register with the specified
test/behavior/x86_64/math.zig
@@ -0,0 +1,230 @@
+fn testBinary(comptime op: anytype) !void {
+ const testType = struct {
+ fn testType(comptime Type: type, comptime imm_lhs: Type, comptime imm_rhs: Type) !void {
+ const expected = op(Type, imm_lhs, imm_rhs);
+ try struct {
+ fn testOne(actual: @TypeOf(expected)) !void {
+ if (switch (@typeInfo(@TypeOf(expected))) {
+ else => actual != expected,
+ .vector => @reduce(.Or, actual != expected),
+ }) return error.Unexpected;
+ }
+ noinline fn testOps(mem_lhs: Type, mem_rhs: Type) !void {
+ var reg_lhs = mem_lhs;
+ var reg_rhs = mem_rhs;
+ _ = .{ ®_lhs, ®_rhs };
+ try testOne(op(Type, reg_lhs, reg_rhs));
+ try testOne(op(Type, reg_lhs, mem_rhs));
+ try testOne(op(Type, reg_lhs, imm_rhs));
+ try testOne(op(Type, mem_lhs, reg_rhs));
+ try testOne(op(Type, mem_lhs, mem_rhs));
+ try testOne(op(Type, mem_lhs, imm_rhs));
+ try testOne(op(Type, imm_lhs, reg_rhs));
+ try testOne(op(Type, imm_lhs, mem_rhs));
+ }
+ }.testOps(imm_lhs, imm_rhs);
+ }
+ }.testType;
+
+ try testType(u8, 0xbb, 0x43);
+ try testType(u16, 0xb8bf, 0x626d);
+ try testType(u32, 0x80d7a2c6, 0xbff6a402);
+ try testType(u64, 0x71138bc6b4a38898, 0x1bc4043de9438c7b);
+ try testType(u128, 0xe05fc132ef2cd8affee00a907f0a851f, 0x29f912a72cfc6a7c6973426a9636da9a);
+
+ try testType(@Vector(16, u8), .{
+ 0xea, 0x80, 0xbb, 0xe8, 0x74, 0x81, 0xc8, 0x66, 0x7b, 0x41, 0x90, 0xcb, 0x30, 0x70, 0x4b, 0x0f,
+ }, .{
+ 0x61, 0x26, 0xbe, 0x47, 0x00, 0x9c, 0x55, 0xa5, 0x59, 0xf0, 0xb2, 0x20, 0x30, 0xaf, 0x82, 0x3e,
+ });
+ try testType(@Vector(32, u8), .{
+ 0xa1, 0x88, 0xc4, 0xf4, 0x77, 0x0b, 0xf5, 0xbb, 0x09, 0x03, 0xbf, 0xf5, 0xcc, 0x7f, 0x6b, 0x2a,
+ 0x4c, 0x05, 0x37, 0xc9, 0x8a, 0xcb, 0x91, 0x23, 0x09, 0x5f, 0xb8, 0x99, 0x4a, 0x75, 0x26, 0xe4,
+ }, .{
+ 0xff, 0x0f, 0x99, 0x49, 0xa6, 0x25, 0xa7, 0xd4, 0xc9, 0x2f, 0x97, 0x6a, 0x01, 0xd6, 0x6e, 0x41,
+ 0xa4, 0xb5, 0x3c, 0x03, 0xea, 0x82, 0x9c, 0x5f, 0xac, 0x07, 0x16, 0x15, 0x1c, 0x64, 0x25, 0x2f,
+ });
+ try testType(@Vector(64, u8), .{
+ 0xaa, 0x08, 0xeb, 0xb2, 0xd7, 0x89, 0x0f, 0x98, 0xda, 0x9f, 0xa6, 0x4e, 0x3c, 0xce, 0x1b, 0x1b,
+ 0x9e, 0x5f, 0x2b, 0xd6, 0x59, 0x26, 0x47, 0x05, 0x2a, 0xb7, 0xd1, 0x10, 0xde, 0xd9, 0x84, 0x00,
+ 0x07, 0xc0, 0xaa, 0x6e, 0xfa, 0x3b, 0x97, 0x85, 0xa8, 0x42, 0xd7, 0xa5, 0x90, 0xe6, 0x10, 0x1a,
+ 0x47, 0x84, 0xe1, 0x3e, 0xb0, 0x70, 0x26, 0x3f, 0xea, 0x24, 0xb8, 0x5f, 0xe3, 0xe3, 0x4c, 0xed,
+ }, .{
+ 0x3b, 0xc5, 0xe0, 0x3d, 0x4f, 0x2e, 0x1d, 0xa9, 0xf7, 0x7b, 0xc7, 0xc1, 0x48, 0xc6, 0xe5, 0x9e,
+ 0x4d, 0xa8, 0x21, 0x37, 0xa1, 0x1a, 0x95, 0x69, 0x89, 0x2f, 0x15, 0x07, 0x3d, 0x7b, 0x69, 0x89,
+ 0xea, 0x87, 0xf0, 0x94, 0x67, 0xf2, 0x3d, 0x04, 0x96, 0x8a, 0xd6, 0x70, 0x7c, 0x16, 0xe7, 0x62,
+ 0xf0, 0x8d, 0x96, 0x65, 0xd1, 0x4a, 0x35, 0x3e, 0x7a, 0x67, 0xa6, 0x1f, 0x37, 0x66, 0xe3, 0x45,
+ });
+ try testType(@Vector(128, u8), .{
+ 0xa1, 0xd0, 0x7b, 0xf9, 0x7b, 0x77, 0x7b, 0x3d, 0x2d, 0x68, 0xc2, 0x7b, 0xb0, 0xb8, 0xd4, 0x7c,
+ 0x1a, 0x1f, 0xd2, 0x92, 0x3e, 0xcb, 0xc1, 0x6b, 0xb9, 0x4d, 0xf1, 0x67, 0x58, 0x8e, 0x77, 0xa6,
+ 0xb9, 0xdf, 0x10, 0x6f, 0xbe, 0xe3, 0x33, 0xb6, 0x93, 0x77, 0x80, 0xef, 0x09, 0x9d, 0x61, 0x40,
+ 0xa2, 0xf4, 0x52, 0x18, 0x9d, 0xe4, 0xb0, 0xaf, 0x0a, 0xa7, 0x0b, 0x09, 0x67, 0x38, 0x71, 0x04,
+ 0x72, 0xa1, 0xd2, 0xfd, 0xf8, 0xf0, 0xa7, 0x23, 0x24, 0x5b, 0x7d, 0xfb, 0x43, 0xba, 0x6c, 0xc4,
+ 0x83, 0x46, 0x0e, 0x4d, 0x6c, 0x92, 0xab, 0x4f, 0xd2, 0x70, 0x9d, 0xfe, 0xce, 0xf8, 0x05, 0x9f,
+ 0x98, 0x36, 0x9c, 0x90, 0x9a, 0xd0, 0xb5, 0x76, 0x16, 0xe8, 0x25, 0xc2, 0xbd, 0x91, 0xab, 0xf9,
+ 0x6f, 0x6c, 0xc5, 0x60, 0xe5, 0x30, 0xf2, 0xb7, 0x59, 0xc4, 0x9c, 0xdd, 0xdf, 0x04, 0x65, 0xd9,
+ }, .{
+ 0xed, 0xe1, 0x8a, 0xf6, 0xf3, 0x8b, 0xfd, 0x1d, 0x3c, 0x87, 0xbf, 0xfe, 0x04, 0x52, 0x15, 0x82,
+ 0x0b, 0xb0, 0xcf, 0xcf, 0xf8, 0x03, 0x9c, 0xef, 0xc1, 0x76, 0x7e, 0xe3, 0xe9, 0xa8, 0x18, 0x90,
+ 0xd4, 0xc4, 0x91, 0x15, 0x68, 0x7f, 0x65, 0xd8, 0xe1, 0xb3, 0x23, 0xc2, 0x7d, 0x84, 0x3b, 0xaf,
+ 0x74, 0x69, 0x07, 0x2a, 0x1b, 0x5f, 0x0e, 0x44, 0x0d, 0x2b, 0x9c, 0x82, 0x41, 0xf9, 0x7f, 0xb5,
+ 0xc4, 0xd9, 0xcb, 0xd3, 0xc5, 0x31, 0x8b, 0x5f, 0xda, 0x09, 0x9b, 0x29, 0xa3, 0xb7, 0x13, 0x0d,
+ 0x55, 0x9b, 0x59, 0x33, 0x2a, 0x59, 0x3a, 0x44, 0x1f, 0xd3, 0x40, 0x4e, 0xde, 0x2c, 0xe4, 0x16,
+ 0xfd, 0xc3, 0x02, 0x74, 0xaa, 0x65, 0xfd, 0xc8, 0x2a, 0x8a, 0xdb, 0xae, 0x44, 0x28, 0x62, 0xa4,
+ 0x56, 0x4f, 0xf1, 0xaa, 0x0a, 0x0f, 0xdb, 0x1b, 0xc8, 0x45, 0x9b, 0x12, 0xb4, 0x1a, 0xe4, 0xa3,
+ });
+
+ try testType(@Vector(8, u16), .{
+ 0xcf61, 0xb121, 0x3cf1, 0x3e9f, 0x43a7, 0x8d69, 0x96f5, 0xc11e,
+ }, .{
+ 0xee30, 0x82f0, 0x270b, 0x1498, 0x4c60, 0x6e72, 0x0b64, 0x02d4,
+ });
+ try testType(@Vector(16, u16), .{
+ 0x9191, 0xd23e, 0xf844, 0xd84a, 0xe907, 0xf1e8, 0x712d, 0x90af,
+ 0x6541, 0x3fa6, 0x92eb, 0xe35a, 0xc0c9, 0xcb47, 0xb790, 0x4453,
+ }, .{
+ 0x21c3, 0x4039, 0x9b71, 0x60bd, 0xcd7f, 0x2ec8, 0x50ba, 0xe810,
+ 0xebd4, 0x06e5, 0xed18, 0x2f66, 0x7e31, 0xe282, 0xad63, 0xb25e,
+ });
+ try testType(@Vector(32, u16), .{
+ 0x6b6a, 0x30a9, 0xc267, 0x2231, 0xbf4c, 0x00bc, 0x9c2c, 0x2928,
+ 0xecad, 0x82df, 0xcfb0, 0xa4e5, 0x909b, 0x1b05, 0xaf40, 0x1fd9,
+ 0xcec6, 0xd8dc, 0xd4b5, 0x6d59, 0x8e3f, 0x4d8a, 0xb83a, 0x808e,
+ 0x47e2, 0x5782, 0x59bf, 0xcefc, 0x5179, 0x3f48, 0x93dc, 0x66d2,
+ }, .{
+ 0x1be8, 0xe98c, 0xf9b3, 0xb008, 0x2f8d, 0xf087, 0xc9b9, 0x75aa,
+ 0xbd16, 0x9540, 0xc5bd, 0x2b2c, 0xd43f, 0x9394, 0x3e1d, 0xf695,
+ 0x167d, 0xff7a, 0xf09d, 0xdff8, 0xdfa2, 0xc779, 0x70b7, 0x01bd,
+ 0x46b3, 0x995a, 0xb7bc, 0xa79d, 0x5542, 0x961e, 0x37cd, 0x9c2a,
+ });
+ try testType(@Vector(64, u16), .{
+ 0x6b87, 0xfd84, 0x436b, 0xe345, 0xfb82, 0x81fc, 0x0992, 0x45f9,
+ 0x5527, 0x1f6d, 0xda46, 0x6a16, 0xf6e1, 0x8fb7, 0x3619, 0xdfe3,
+ 0x64ce, 0x8ac6, 0x3ae8, 0x30e3, 0xec3b, 0x4ba7, 0x02a4, 0xa694,
+ 0x8e68, 0x8f0c, 0x5e30, 0x0e55, 0x6538, 0x9852, 0xea35, 0x7be2,
+ 0xdabd, 0x57e6, 0x5b38, 0x0fb2, 0x2604, 0x85e7, 0x6595, 0x8de9,
+ 0x49b1, 0xe9a2, 0x3758, 0xa4d9, 0x505b, 0xc9d3, 0xddc5, 0x9a43,
+ 0xfd44, 0x50f5, 0x379e, 0x03b6, 0x6375, 0x692f, 0x5586, 0xc717,
+ 0x94dd, 0xee06, 0xb32d, 0x0bb9, 0x0e35, 0x5f8f, 0x0ba4, 0x19a8,
+ }, .{
+ 0xbeeb, 0x3e54, 0x6486, 0x5167, 0xe432, 0x57cf, 0x9cac, 0x922e,
+ 0xd2f8, 0x5614, 0x2e7f, 0x19cf, 0x9a07, 0x0524, 0x168f, 0x4464,
+ 0x4def, 0x83ce, 0x97b4, 0xf269, 0xda5f, 0x28c1, 0x9cc3, 0xfa7c,
+ 0x25a0, 0x912d, 0x25b2, 0xd60d, 0xcd82, 0x0e03, 0x40cc, 0xc9dc,
+ 0x18eb, 0xc609, 0xb06d, 0x29e0, 0xf3c7, 0x997b, 0x8ca2, 0xa750,
+ 0xc9bc, 0x8f0e, 0x3916, 0xd905, 0x94f8, 0x397f, 0x98b5, 0xc61d,
+ 0x05db, 0x3e7a, 0xf750, 0xe8de, 0x3225, 0x81d9, 0x612e, 0x0a7e,
+ 0x2c02, 0xff5b, 0x19ca, 0xbbf5, 0x870e, 0xc9ca, 0x47bb, 0xcfcc,
+ });
+
+ try testType(@Vector(4, u32), .{
+ 0x234d576e, 0x4151cc9c, 0x39f558e4, 0xba935a32,
+ }, .{
+ 0x398f2a9d, 0x4540f093, 0x9225551c, 0x3bac865b,
+ });
+ try testType(@Vector(8, u32), .{
+ 0xb8336635, 0x2fc3182c, 0x27a00123, 0x71587fbe,
+ 0x9cbc65d2, 0x6f4bb0e6, 0x362594ce, 0x9971df38,
+ }, .{
+ 0x5727e734, 0x972b0313, 0xff25f5dc, 0x924f8e55,
+ 0x04920a61, 0xa1c3b334, 0xf52df4b6, 0x5ef72ecc,
+ });
+ try testType(@Vector(16, u32), .{
+ 0xfb566f9e, 0x9ad4691a, 0x5b5f9ec0, 0x5a572d2a,
+ 0x8f2f226b, 0x2dfc7e33, 0x9fb07e32, 0x9d672a2e,
+ 0xbedc3cee, 0x6872428d, 0xbc73a9fd, 0xd4d5f055,
+ 0x69c1e9ee, 0x65038deb, 0x1449061a, 0x48412ec2,
+ }, .{
+ 0x96cbe946, 0x3f24f60b, 0xaeacdc53, 0x7611a8b4,
+ 0x031a67a8, 0x52a26828, 0x75646f4b, 0xb75902c3,
+ 0x1f881f08, 0x834e02a4, 0x5e5b40eb, 0xc75c264d,
+ 0xa8251e09, 0x28e46bbd, 0x12cb1f31, 0x9a2af615,
+ });
+ try testType(@Vector(32, u32), .{
+ 0x131bbb7b, 0xa7311026, 0x9d5e59a0, 0x99b090d6,
+ 0xfe969e2e, 0x04547697, 0x357d3250, 0x43be6d7a,
+ 0x16ecf5c5, 0xf60febcc, 0x1d1e2602, 0x138a96d2,
+ 0x9117ba72, 0x9f185b32, 0xc10e23fd, 0x3e6b7fd8,
+ 0x4dc9be70, 0x2ee30047, 0xaffeab60, 0x7172d362,
+ 0x6154bfcf, 0x5388dc3e, 0xd6e5a76e, 0x8b782f2d,
+ 0xacbef4a2, 0x843aca71, 0x25d8ab5c, 0xe1a63a39,
+ 0xc26212e5, 0x0847b84b, 0xb53541e5, 0x0c8e44db,
+ }, .{
+ 0x4ad92822, 0x715b623f, 0xa5bed8a7, 0x937447a9,
+ 0x7ecb38eb, 0x0a2f3dfc, 0x96f467a2, 0xec882793,
+ 0x41a8707f, 0xf7310656, 0x76217b80, 0x2058e5fc,
+ 0x26682154, 0x87313e31, 0x4bdc480a, 0x193572ff,
+ 0x60b03c75, 0x0fe45908, 0x56c73703, 0xdb86554c,
+ 0xdda2dd7d, 0x34371b27, 0xe4e6ad50, 0x422d1828,
+ 0x1de3801b, 0xdce268d3, 0x20af9ec8, 0x188a591f,
+ 0xf080e943, 0xc8718d14, 0x3f920382, 0x18d101b5,
+ });
+
+ // TODO: implement fallback for pcmpeqq
+ if (!comptime @import("std").Target.x86.featureSetHas(@import("builtin").cpu.features, .sse4_1)) return;
+
+ try testType(@Vector(2, u64), .{
+ 0x4cd89a317b03d430, 0x28998f61842f63a9,
+ }, .{
+ 0x6c34db64af0e217e, 0x57aa5d02cd45dceb,
+ });
+ try testType(@Vector(4, u64), .{
+ 0x946cf7e7484691c9, 0xf4fc5be2a762fcbf,
+ 0x71cc83bc25abaf14, 0xc69cef44c6f833a1,
+ }, .{
+ 0x9f90cbd6c3ce1d4e, 0x182f65295dff4e84,
+ 0x4dfe62c59fed0040, 0x18402347c1db1999,
+ });
+ try testType(@Vector(8, u64), .{
+ 0x92c6281333943e2c, 0xa97750504668efb5,
+ 0x234be51057c0181f, 0xefbc1f407f3df4fb,
+ 0x8da6cc7c39cebb94, 0xb408f7e56feee497,
+ 0x2363f1f8821592ed, 0x01716e800c0619e1,
+ }, .{
+ 0xa617426684147e7e, 0x7542da7ebe093a7b,
+ 0x3f21d99ac57606b7, 0x65cd36d697d22de4,
+ 0xed23d6bdf176c844, 0x2d4573f100ff7b58,
+ 0x4968f4d21b49f8ab, 0xf5d9a205d453e933,
+ });
+ try testType(@Vector(16, u64), .{
+ 0x2f61a4ee66177b4a, 0xf13b286b279f6a93,
+ 0x36b46beb63665318, 0x74294dbde0da98d2,
+ 0x3aa872ba60b936eb, 0xe8f698b36e62600b,
+ 0x9e8930c21a6a1a76, 0x876998b09b8eb03c,
+ 0xa0244771a2ec0adb, 0xb4c72bff3d3ac1a2,
+ 0xd70677210830eced, 0x6622abc1734dd72d,
+ 0x157e2bb0d57d6596, 0x2aac8192fb7ef973,
+ 0xc4a0ca92f34d7b13, 0x04300f8ad1845246,
+ }, .{
+ 0xeaf71dcf0eb76f5d, 0x0e84b1b63dc97139,
+ 0x0f64cc38d23c94a1, 0x12775cf0816349b7,
+ 0xfdcf13387ba48d54, 0xf8d3c672cacd8779,
+ 0xe728c1f5eb56ab1e, 0x05931a34877f7a69,
+ 0x1861a763c8dafd1f, 0x4ac97573ecd5739f,
+ 0x3384414c9bf77b8c, 0x32c15bbd04a5ddc4,
+ 0xbfd88aee1d82ed32, 0x20e91c15b701059a,
+ 0xed533d18f8657f3f, 0x1ddd7cd7f6bab957,
+ });
+}
+
+inline fn bitAnd(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs & rhs) {
+ return lhs & rhs;
+}
+test bitAnd {
+ try testBinary(bitAnd);
+}
+
+inline fn bitOr(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs | rhs) {
+ return lhs | rhs;
+}
+test bitOr {
+ try testBinary(bitOr);
+}
+
+inline fn bitXor(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs ^ rhs) {
+ return lhs ^ rhs;
+}
+test bitXor {
+ try testBinary(bitXor);
+}
test/behavior/x86_64.zig
@@ -0,0 +1,9 @@
+//! CodeGen tests for the x86_64 backend.
+
+const builtin = @import("builtin");
+
+test {
+ if (builtin.zig_backend != .stage2_x86_64) return error.SkipZigTest;
+ if (builtin.object_format == .coff) return error.SkipZigTest;
+ _ = @import("x86_64/math.zig");
+}
test/behavior.zig
@@ -110,6 +110,8 @@ test {
_ = @import("behavior/widening.zig");
_ = @import("behavior/abs.zig");
+ _ = @import("behavior/x86_64.zig");
+
if (builtin.cpu.arch == .wasm32) {
_ = @import("behavior/wasm.zig");
}