Commit a1828ebcda
Changed files (8)
src
link
Elf
test
behavior
x86_64
src/arch/x86_64/bits.zig
@@ -479,8 +479,8 @@ pub const RegisterOffset = struct { reg: Register, off: i32 = 0 };
pub const SymbolOffset = struct { sym_index: u32, off: i32 = 0 };
pub const Memory = struct {
- base: Base,
- mod: Mod,
+ base: Base = .none,
+ mod: Mod = .{ .rm = .{} },
pub const Base = union(enum(u2)) {
none,
@@ -503,7 +503,7 @@ pub const Memory = struct {
off: u64,
pub const Rm = struct {
- size: Size,
+ size: Size = .none,
index: Register = .none,
scale: Scale = .@"1",
disp: i32 = 0,
@@ -512,6 +512,7 @@ pub const Memory = struct {
pub const Size = enum(u4) {
none,
+ ptr,
byte,
word,
dword,
@@ -548,9 +549,10 @@ pub const Memory = struct {
};
}
- pub fn bitSize(s: Size) u64 {
+ pub fn bitSize(s: Size, target: *const std.Target) u64 {
return switch (s) {
.none => 0,
+ .ptr => target.ptrBitWidth(),
.byte => 8,
.word => 16,
.dword => 32,
@@ -569,8 +571,11 @@ pub const Memory = struct {
writer: anytype,
) @TypeOf(writer).Error!void {
if (s == .none) return;
- try writer.writeAll(@tagName(s));
- try writer.writeAll(" ptr");
+ if (s != .ptr) {
+ try writer.writeAll(@tagName(s));
+ try writer.writeByte(' ');
+ }
+ try writer.writeAll("ptr");
}
};
src/arch/x86_64/CodeGen.zig
@@ -465,7 +465,7 @@ pub const MCValue = union(enum) {
} },
} else .{ .base = .{ .reg = .ds }, .mod = .{ .off = addr } },
.indirect => |reg_off| .{
- .base = .{ .reg = reg_off.reg },
+ .base = .{ .reg = registerAlias(reg_off.reg, @divExact(function.target.ptrBitWidth(), 8)) },
.mod = .{ .rm = .{
.size = mod_rm.size,
.index = mod_rm.index,
@@ -986,6 +986,7 @@ pub fn generate(
.air = function.air,
.lower = .{
.bin_file = bin_file,
+ .target = function.target,
.allocator = gpa,
.mir = mir,
.cc = cc,
@@ -1074,6 +1075,7 @@ pub fn generateLazy(
.air = function.air,
.lower = .{
.bin_file = bin_file,
+ .target = function.target,
.allocator = gpa,
.mir = mir,
.cc = abi.resolveCallingConvention(.auto, function.target.*),
@@ -1154,6 +1156,7 @@ fn formatWipMir(
const mod = comp.root_mod;
var lower: Lower = .{
.bin_file = data.self.bin_file,
+ .target = data.self.target,
.allocator = data.self.gpa,
.mir = .{
.instructions = data.self.mir_instructions.slice(),
@@ -2514,7 +2517,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
var res: [1]Temp = undefined;
- cg.select2(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Mir.Inst.Tag, switch (air_tag) {
+ cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Mir.Inst.Tag, switch (air_tag) {
else => unreachable,
.bit_and => .@"and",
.bit_or => .@"or",
@@ -2530,7 +2533,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{.{ .rc = .sse }},
.each = .{ .once = &.{
- .{ .vp_, mir_tag, .ydst0, .ysrc0, .ysrc1, .none },
+ .{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ },
} },
}, .{
.required_features = .{ .avx, null },
@@ -2541,7 +2544,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{.{ .rc = .sse }},
.each = .{ .once = &.{
- .{ .v_pd, mir_tag, .ydst0, .ysrc0, .ysrc1, .none },
+ .{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ },
} },
}, .{
.required_features = .{ .avx, null },
@@ -2552,7 +2555,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{.{ .rc = .sse }},
.each = .{ .once = &.{
- .{ .vp_, mir_tag, .xdst0, .xsrc0, .xsrc1, .none },
+ .{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ },
} },
}, .{
.required_features = .{ .sse2, null },
@@ -2561,9 +2564,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
.{ .src = .{ .mut_xmm, .xmm } },
},
- .dst_temps = .{.{ .src = 0 }},
+ .dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = &.{
- .{ .p_, mir_tag, .xdst0, .xsrc1, .none, .none },
+ .{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ },
} },
}, .{
.required_features = .{ .sse, null },
@@ -2572,9 +2575,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
.{ .src = .{ .mut_xmm, .xmm } },
},
- .dst_temps = .{.{ .src = 0 }},
+ .dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = &.{
- .{ ._ps, mir_tag, .xdst0, .xsrc1, .none, .none },
+ .{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ },
} },
}, .{
.required_features = .{ .mmx, null },
@@ -2583,12 +2586,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } },
.{ .src = .{ .mut_mm, .mm } },
},
- .dst_temps = .{.{ .src = 0 }},
+ .dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = &.{
- .{ .p_, mir_tag, .rdst0, .rsrc1, .none, .none },
+ .{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ },
} },
}, .{
- .constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8 } },
.{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } },
@@ -2601,12 +2604,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .src = .{ .mut_gpr, .gpr } },
},
.clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .src = 0 }},
+ .dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = &.{
- .{ ._, mir_tag, .dst0b, .src1b, .none, .none },
+ .{ ._, ._, mir_tag, .dst0b, .src1b, ._, ._ },
} },
}, .{
- .constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm16 } },
.{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } },
@@ -2619,12 +2622,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .src = .{ .mut_gpr, .gpr } },
},
.clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .src = 0 }},
+ .dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = &.{
- .{ ._, mir_tag, .dst0w, .src1w, .none, .none },
+ .{ ._, ._, mir_tag, .dst0w, .src1w, ._, ._ },
} },
}, .{
- .constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm32 } },
.{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } },
@@ -2637,13 +2640,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .src = .{ .mut_gpr, .gpr } },
},
.clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .src = 0 }},
+ .dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = &.{
- .{ ._, mir_tag, .edst0, .esrc1, .none, .none },
+ .{ ._, ._, mir_tag, .dst0d, .src1d, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null },
- .constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
.patterns = &.{
.{ .src = .{ .mut_mem, .simm32 } },
.{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } },
@@ -2656,9 +2659,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .src = .{ .mut_gpr, .gpr } },
},
.clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .src = 0 }},
+ .dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = &.{
- .{ ._, mir_tag, .rdst0, .rsrc1, .none, .none },
+ .{ ._, ._, mir_tag, .dst0q, .src1q, ._, ._ },
} },
}, .{
.required_features = .{ .avx2, null },
@@ -2669,14 +2672,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.unused,
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.mem},
.each = .{ .limb = .{
- .of = .ysrc0,
+ .of = .src0y,
.body = &.{
- .{ .v_, .movdqu, .ytmp1, .{ .src_limb = 0 }, .none, .none },
- .{ .vp_, mir_tag, .ytmp1, .ytmp1, .{ .src_limb = 1 }, .none },
- .{ .v_, .movdqu, .{ .dst_limb = 0 }, .ytmp1, .none, .none },
+ .{ ._, .v_dqu, .mov, .tmp1y, .limb(.src0y), ._, ._ },
+ .{ ._, .vp_, mir_tag, .tmp1y, .tmp1y, .limb(.src1y), ._ },
+ .{ ._, .v_dqu, .mov, .limb(.dst0y), .tmp1y, ._, ._ },
},
} },
}, .{
@@ -2688,14 +2694,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.unused,
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.mem},
.each = .{ .limb = .{
- .of = .ysrc0,
+ .of = .src0y,
.body = &.{
- .{ .v_pd, .movu, .ytmp1, .{ .src_limb = 0 }, .none, .none },
- .{ .v_pd, mir_tag, .ytmp1, .ytmp1, .{ .src_limb = 1 }, .none },
- .{ .v_pd, .movu, .{ .dst_limb = 0 }, .ytmp1, .none, .none },
+ .{ ._, .v_pd, .movu, .tmp1y, .limb(.src0y), ._, ._ },
+ .{ ._, .v_pd, mir_tag, .tmp1y, .tmp1y, .limb(.src1y), ._ },
+ .{ ._, .v_pd, .movu, .limb(.dst0y), .tmp1y, ._, ._ },
},
} },
}, .{
@@ -2707,14 +2716,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.unused,
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.mem},
.each = .{ .limb = .{
- .of = .xsrc0,
+ .of = .src0x,
.body = &.{
- .{ .v_, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none },
- .{ .vp_, mir_tag, .xtmp1, .xtmp1, .{ .src_limb = 1 }, .none },
- .{ .v_, .movdqu, .{ .dst_limb = 0 }, .xtmp1, .none, .none },
+ .{ ._, .v_dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ },
+ .{ ._, .vp_, mir_tag, .tmp1x, .tmp1x, .limb(.src1x), ._ },
+ .{ ._, .v_dqu, .mov, .limb(.dst0x), .tmp1x, ._, ._ },
},
} },
}, .{
@@ -2726,14 +2738,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.unused,
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.mem},
.each = .{ .limb = .{
- .of = .xsrc0,
+ .of = .src0x,
.body = &.{
- .{ ._, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none },
- .{ .p_, mir_tag, .xtmp1, .{ .src_limb = 1 }, .none, .none },
- .{ ._, .movdqu, .{ .dst_limb = 0 }, .xtmp1, .none, .none },
+ .{ ._, ._dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ },
+ .{ ._, .p_, mir_tag, .tmp1x, .limb(.src1x), ._, ._ },
+ .{ ._, ._dqu, .mov, .limb(.dst0x), .tmp1x, ._, ._ },
},
} },
}, .{
@@ -2745,14 +2760,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.unused,
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.mem},
.each = .{ .limb = .{
- .of = .xsrc0,
+ .of = .src0x,
.body = &.{
- .{ ._ps, .movu, .xtmp1, .{ .src_limb = 0 }, .none, .none },
- .{ ._ps, mir_tag, .xtmp1, .{ .src_limb = 1 }, .none, .none },
- .{ ._ps, .movu, .{ .dst_limb = 0 }, .xtmp1, .none, .none },
+ .{ ._, ._ps, .movu, .tmp1x, .limb(.src0x), ._, ._ },
+ .{ ._, ._ps, mir_tag, .tmp1x, .limb(.src1x), ._, ._ },
+ .{ ._, ._ps, .movu, .limb(.dst0x), .tmp1x, ._, ._ },
},
} },
}, .{
@@ -2764,33 +2782,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .mmx } },
.unused,
- },
- .dst_temps = .{.mem},
- .each = .{ .limb = .{
- .of = .rsrc0,
- .body = &.{
- .{ ._q, .mov, .rtmp1, .{ .src_limb = 0 }, .none, .none },
- .{ .p_, mir_tag, .rtmp1, .{ .src_limb = 1 }, .none, .none },
- .{ ._q, .mov, .{ .dst_limb = 0 }, .rtmp1, .none, .none },
- },
- } },
- }, .{
- .required_features = .{ .@"64bit", null },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
.unused,
},
.dst_temps = .{.mem},
.each = .{ .limb = .{
- .of = .rsrc0,
+ .of = .src0q,
.body = &.{
- .{ ._, .mov, .rtmp1, .{ .src_limb = 0 }, .none, .none },
- .{ ._, mir_tag, .rtmp1, .{ .src_limb = 1 }, .none, .none },
- .{ ._, .mov, .{ .dst_limb = 0 }, .rtmp1, .none, .none },
+ .{ ._, ._q, .mov, .tmp1q, .limb(.src0q), ._, ._ },
+ .{ ._, .p_, mir_tag, .tmp1q, .limb(.src1q), ._, ._ },
+ .{ ._, ._q, .mov, .limb(.dst0q), .tmp1q, ._, ._ },
},
} },
}, .{
@@ -2799,21 +2801,24 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.extra_temps = .{
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
.unused,
},
.dst_temps = .{.mem},
.each = .{ .limb = .{
- .of = .esrc0,
+ .of = .src0p,
.body = &.{
- .{ ._, .mov, .etmp1, .{ .src_limb = 0 }, .none, .none },
- .{ ._, mir_tag, .etmp1, .{ .src_limb = 1 }, .none, .none },
- .{ ._, .mov, .{ .dst_limb = 0 }, .etmp1, .none, .none },
+ .{ ._, ._, .mov, .tmp1p, .limb(.src0p), ._, ._ },
+ .{ ._, ._, mir_tag, .tmp1p, .limb(.src1p), ._, ._ },
+ .{ ._, ._, .mov, .limb(.dst0p), .tmp1p, ._, ._ },
},
} },
} },
}) catch |err2| switch (err2) {
- error.Select2Failed => return cg.fail("failed to select2 {s} {} {} {}", .{
+ error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
@tagName(air_tag),
cg.typeOf(bin_op.lhs).fmt(pt),
ops[0].tracking(cg),
@@ -2875,7 +2880,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.call_never_tail => try cg.airCall(inst, .never_tail),
.call_never_inline => try cg.airCall(inst, .never_inline),
- .cmp_vector, .cmp_vector_optimized => if (use_old) try cg.airCmpVector(inst) else fallback: {
+ .cmp_vector, .cmp_vector_optimized => |air_tag| if (use_old) try cg.airCmpVector(inst) else fallback: {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data;
switch (extra.compareOperator()) {
@@ -2887,7 +2892,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
switch (extra.compareOperator()) {
.lt => unreachable,
.lte => unreachable,
- .eq, .neq => |cmp_op| cg.select2(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (cmp_op) {
+ .eq, .neq => |cmp_op| cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (cmp_op) {
else => unreachable,
.eq => .e,
.neq => .ne,
@@ -2895,7 +2900,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
else => unreachable,
inline .e, .ne => |cc| comptime &.{ .{
.required_features = .{ .avx2, null },
- .constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
.patterns = &.{
.{ .src = .{ .ymm, .mem } },
.{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
@@ -2911,11 +2916,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .byte,
} } }},
.each = .{ .once = &.{
- .{ .vp_b, .cmpeq, .ydst0, .ysrc0, .ysrc1, .none },
+ .{ ._, .vp_b, .cmpeq, .dst0y, .src0y, .src1y, ._ },
} },
}, .{
.required_features = .{ .avx2, null },
- .constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
.patterns = &.{
.{ .src = .{ .ymm, .mem } },
.{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
@@ -2931,11 +2936,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .word,
} } }},
.each = .{ .once = &.{
- .{ .vp_w, .cmpeq, .ydst0, .ysrc0, .ysrc1, .none },
+ .{ ._, .vp_w, .cmpeq, .dst0y, .src0y, .src1y, ._ },
} },
}, .{
.required_features = .{ .avx2, null },
- .constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
.patterns = &.{
.{ .src = .{ .ymm, .mem } },
.{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
@@ -2951,11 +2956,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .dword,
} } }},
.each = .{ .once = &.{
- .{ .vp_d, .cmpeq, .ydst0, .ysrc0, .ysrc1, .none },
+ .{ ._, .vp_d, .cmpeq, .dst0y, .src0y, .src1y, ._ },
} },
}, .{
.required_features = .{ .avx2, null },
- .constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
.patterns = &.{
.{ .src = .{ .ymm, .mem } },
.{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
@@ -2971,11 +2976,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .qword,
} } }},
.each = .{ .once = &.{
- .{ .vp_q, .cmpeq, .ydst0, .ysrc0, .ysrc1, .none },
+ .{ ._, .vp_q, .cmpeq, .dst0y, .src0y, .src1y, ._ },
} },
}, .{
.required_features = .{ .avx, null },
- .constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
.patterns = &.{
.{ .src = .{ .xmm, .mem } },
.{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } },
@@ -2991,11 +2996,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .byte,
} } }},
.each = .{ .once = &.{
- .{ .vp_b, .cmpeq, .xdst0, .xsrc0, .xsrc1, .none },
+ .{ ._, .vp_b, .cmpeq, .dst0x, .src0x, .src1x, ._ },
} },
}, .{
.required_features = .{ .avx, null },
- .constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
.patterns = &.{
.{ .src = .{ .xmm, .mem } },
.{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } },
@@ -3011,11 +3016,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .word,
} } }},
.each = .{ .once = &.{
- .{ .vp_w, .cmpeq, .xdst0, .xsrc0, .xsrc1, .none },
+ .{ ._, .vp_w, .cmpeq, .dst0x, .src0x, .src1x, ._ },
} },
}, .{
.required_features = .{ .avx, null },
- .constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
.patterns = &.{
.{ .src = .{ .xmm, .mem } },
.{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } },
@@ -3031,11 +3036,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .dword,
} } }},
.each = .{ .once = &.{
- .{ .vp_d, .cmpeq, .xdst0, .xsrc0, .xsrc1, .none },
+ .{ ._, .vp_d, .cmpeq, .dst0x, .src0x, .src1x, ._ },
} },
}, .{
.required_features = .{ .avx, null },
- .constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
.patterns = &.{
.{ .src = .{ .xmm, .mem } },
.{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } },
@@ -3051,17 +3056,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .qword,
} } }},
.each = .{ .once = &.{
- .{ .vp_q, .cmpeq, .xdst0, .xsrc0, .xsrc1, .none },
+ .{ ._, .vp_q, .cmpeq, .dst0x, .src0x, .src1x, ._ },
} },
}, .{
.required_features = .{ .sse2, null },
- .constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
.patterns = &.{
.{ .src = .{ .mut_xmm, .mem } },
.{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
.{ .src = .{ .mut_xmm, .xmm } },
},
- .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{
+ .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
.inverted = switch (cc) {
else => unreachable,
@@ -3071,17 +3076,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .byte,
} } }},
.each = .{ .once = &.{
- .{ .p_b, .cmpeq, .xdst0, .xsrc1, .none, .none },
+ .{ ._, .p_b, .cmpeq, .dst0x, .src1x, ._, ._ },
} },
}, .{
.required_features = .{ .sse2, null },
- .constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
.patterns = &.{
.{ .src = .{ .mut_xmm, .mem } },
.{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
.{ .src = .{ .mut_xmm, .xmm } },
},
- .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{
+ .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
.inverted = switch (cc) {
else => unreachable,
@@ -3091,17 +3096,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .word,
} } }},
.each = .{ .once = &.{
- .{ .p_w, .cmpeq, .xdst0, .xsrc1, .none, .none },
+ .{ ._, .p_w, .cmpeq, .dst0x, .src1x, ._, ._ },
} },
}, .{
.required_features = .{ .sse2, null },
- .constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
.patterns = &.{
.{ .src = .{ .mut_xmm, .mem } },
.{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
.{ .src = .{ .mut_xmm, .xmm } },
},
- .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{
+ .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
.inverted = switch (cc) {
else => unreachable,
@@ -3111,17 +3116,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .dword,
} } }},
.each = .{ .once = &.{
- .{ .p_d, .cmpeq, .xdst0, .xsrc1, .none, .none },
+ .{ ._, .p_d, .cmpeq, .dst0x, .src1x, ._, ._ },
} },
}, .{
.required_features = .{ .sse4_1, null },
- .constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
.patterns = &.{
.{ .src = .{ .mut_xmm, .mem } },
.{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
.{ .src = .{ .mut_xmm, .xmm } },
},
- .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{
+ .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
.inverted = switch (cc) {
else => unreachable,
@@ -3131,17 +3136,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .qword,
} } }},
.each = .{ .once = &.{
- .{ .p_q, .cmpeq, .xdst0, .xsrc1, .none, .none },
+ .{ ._, .p_q, .cmpeq, .dst0x, .src1x, ._, ._ },
} },
}, .{
.required_features = .{ .mmx, null },
- .constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
.patterns = &.{
.{ .src = .{ .mut_mm, .mem } },
.{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } },
.{ .src = .{ .mut_mm, .mm } },
},
- .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{
+ .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
.inverted = switch (cc) {
else => unreachable,
@@ -3151,17 +3156,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .byte,
} } }},
.each = .{ .once = &.{
- .{ .p_b, .cmpeq, .rdst0, .rsrc1, .none, .none },
+ .{ ._, .p_b, .cmpeq, .dst0q, .src1q, ._, ._ },
} },
}, .{
.required_features = .{ .mmx, null },
- .constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
.patterns = &.{
.{ .src = .{ .mut_mm, .mem } },
.{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } },
.{ .src = .{ .mut_mm, .mm } },
},
- .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{
+ .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
.inverted = switch (cc) {
else => unreachable,
@@ -3171,17 +3176,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .word,
} } }},
.each = .{ .once = &.{
- .{ .p_w, .cmpeq, .rdst0, .rsrc1, .none, .none },
+ .{ ._, .p_w, .cmpeq, .dst0q, .src1q, ._, ._ },
} },
}, .{
.required_features = .{ .mmx, null },
- .constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
.patterns = &.{
.{ .src = .{ .mut_mm, .mem } },
.{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } },
.{ .src = .{ .mut_mm, .mm } },
},
- .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{
+ .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
.inverted = switch (cc) {
else => unreachable,
@@ -3191,10 +3196,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.scalar = .dword,
} } }},
.each = .{ .once = &.{
- .{ .p_d, .cmpeq, .rdst0, .rsrc1, .none, .none },
+ .{ ._, .p_d, .cmpeq, .dst0q, .src1q, ._, ._ },
} },
}, .{
- .constraints = .{ .{ .bool_vec = .byte }, .{ .bool_vec = .byte } },
+ .src_constraints = .{ .{ .bool_vec = .byte }, .{ .bool_vec = .byte } },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8 } },
.{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } },
@@ -3207,19 +3212,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .src = .{ .mut_gpr, .gpr } },
},
.clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .src = 0 }},
+ .dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, .xor, .dst0b, .src1b, .none, .none },
- .{ ._, .not, .dst0b, .none, .none, .none },
+ .{ ._, ._, .xor, .dst0b, .src1b, ._, ._ },
+ .{ ._, ._, .not, .dst0b, ._, ._, ._ },
},
.ne => &.{
- .{ ._, .xor, .dst0b, .src1b, .none, .none },
+ .{ ._, ._, .xor, .dst0b, .src1b, ._, ._ },
},
} },
}, .{
- .constraints = .{ .{ .bool_vec = .word }, .{ .bool_vec = .word } },
+ .src_constraints = .{ .{ .bool_vec = .word }, .{ .bool_vec = .word } },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm16 } },
.{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } },
@@ -3232,19 +3237,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .src = .{ .mut_gpr, .gpr } },
},
.clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .src = 0 }},
+ .dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, .xor, .dst0w, .src1w, .none, .none },
- .{ ._, .not, .dst0w, .none, .none, .none },
+ .{ ._, ._, .xor, .dst0w, .src1w, ._, ._ },
+ .{ ._, ._, .not, .dst0w, ._, ._, ._ },
},
.ne => &.{
- .{ ._, .xor, .dst0w, .src1w, .none, .none },
+ .{ ._, ._, .xor, .dst0w, .src1w, ._, ._ },
},
} },
}, .{
- .constraints = .{ .{ .bool_vec = .dword }, .{ .bool_vec = .dword } },
+ .src_constraints = .{ .{ .bool_vec = .dword }, .{ .bool_vec = .dword } },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm32 } },
.{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } },
@@ -3257,20 +3262,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .src = .{ .mut_gpr, .gpr } },
},
.clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .src = 0 }},
+ .dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, .xor, .edst0, .esrc1, .none, .none },
- .{ ._, .not, .edst0, .none, .none, .none },
+ .{ ._, ._, .xor, .dst0d, .src1d, ._, ._ },
+ .{ ._, ._, .not, .dst0d, ._, ._, ._ },
},
.ne => &.{
- .{ ._, .xor, .edst0, .esrc1, .none, .none },
+ .{ ._, ._, .xor, .dst0d, .src1d, ._, ._ },
},
} },
}, .{
.required_features = .{ .@"64bit", null },
- .constraints = .{ .{ .bool_vec = .qword }, .{ .bool_vec = .qword } },
+ .src_constraints = .{ .{ .bool_vec = .qword }, .{ .bool_vec = .qword } },
.patterns = &.{
.{ .src = .{ .mut_mem, .simm32 } },
.{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } },
@@ -3283,326 +3288,1665 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .src = .{ .mut_gpr, .gpr } },
},
.clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .src = 0 }},
+ .dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, .xor, .rdst0, .rsrc1, .none, .none },
- .{ ._, .not, .rdst0, .none, .none, .none },
+ .{ ._, ._, .xor, .dst0q, .src1q, ._, ._ },
+ .{ ._, ._, .not, .dst0q, ._, ._, ._ },
},
.ne => &.{
- .{ ._, .xor, .rdst0, .rsrc1, .none, .none },
+ .{ ._, ._, .xor, .dst0q, .src1q, ._, ._ },
},
} },
- } },
- }) catch |err2| switch (err2) {
- error.Select2Failed => cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, &.{
- .{
- .required_features = &.{.avx2},
- .scalar = .{ .any_int = .byte },
- .loop = .elementwise,
- .mir_tag = .{ .vp_b, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
- .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } },
- .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } },
- },
+ }, .{
+ .src_constraints = .{ .any_bool_vec, .any_bool_vec },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .required_features = &.{.avx2},
- .scalar = .{ .any_int = .word },
- .loop = .elementwise,
- .mir_tag = .{ .vp_w, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
- .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } },
- .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } },
- },
+ .clobbers = .{ .eflags = true },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
},
- .{
- .required_features = &.{.avx2},
- .scalar = .{ .any_int = .dword },
- .loop = .elementwise,
- .mir_tag = .{ .vp_d, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
- .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } },
- .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } },
+ .dst_temps = .{.mem},
+ .each = .{ .once = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .not, .tmp1p, ._, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
},
- },
- .{
- .required_features = &.{.avx2},
- .scalar = .{ .any_int = .qword },
- .loop = .elementwise,
- .mir_tag = .{ .vp_q, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
- .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } },
- .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } },
+ .ne => &.{
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
},
+ } },
+ }, .{
+ .required_features = .{ .avx2, null },
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .required_features = &.{.avx},
- .scalar = .{ .any_int = .byte },
- .loop = .elementwise,
- .mir_tag = .{ .vp_b, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
- .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } },
- .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } },
- },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
},
- .{
- .required_features = &.{.avx},
- .scalar = .{ .any_int = .word },
- .loop = .elementwise,
- .mir_tag = .{ .vp_w, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
- .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } },
- .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0y,
+ .of_mask = .dst0b,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ },
+ .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0d), .tmp2d, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ },
+ .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
+ .{ ._, ._, .not, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0d), .tmp2d, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .avx2, null },
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .required_features = &.{.avx},
- .scalar = .{ .any_int = .dword },
- .loop = .elementwise,
- .mir_tag = .{ .vp_d, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
- .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } },
- .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0y,
+ .of_mask = .dst0w,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ },
+ .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ },
+ .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ },
+ .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ },
+ .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
+ .{ ._, ._, .not, .tmp2w, ._, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .avx2, null },
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .required_features = &.{.avx},
- .scalar = .{ .any_int = .qword },
- .loop = .elementwise,
- .mir_tag = .{ .vp_q, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } },
- .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } },
- .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0y,
+ .of_mask = .dst0d,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ },
+ .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ },
+ .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0b), .tmp2b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ },
+ .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ },
+ .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ },
+ .{ ._, ._, .not, .tmp2b, ._, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0b), .tmp2b, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .avx2, null },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .required_features = &.{.sse2},
- .scalar = .{ .any_int = .byte },
- .loop = .elementwise,
- .mir_tag = .{ .p_b, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
- .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .reg = .rcx } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0y,
+ .of_mask = .dst0q,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, .v_dqu, .mov, .tmp4y, .limb(.src0y), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .limb(.src1y), ._ },
+ .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, .v_dqu, .mov, .tmp4y, .limb(.src0y), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .limb(.src1y), ._ },
+ .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ },
+ .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .avx, null },
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .required_features = &.{.sse2},
- .scalar = .{ .any_int = .word },
- .loop = .elementwise,
- .mir_tag = .{ .p_w, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
- .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0x,
+ .of_mask = .dst0b,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, .v_dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ },
+ .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .limb(.src1x), ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, .v_dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ },
+ .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .limb(.src1x), ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .not, .tmp2w, ._, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .avx, null },
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .required_features = &.{.sse2},
- .scalar = .{ .any_int = .dword },
- .loop = .elementwise,
- .mir_tag = .{ .p_d, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
- .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0x,
+ .of_mask = .dst0w,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, .v_dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ },
+ .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .limb(.src1x), ._ },
+ .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, .v_dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ },
+ .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .limb(.src1x), ._ },
+ .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .not, .tmp2b, ._, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .avx, null },
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .required_features = &.{.sse4_1},
- .scalar = .{ .any_int = .qword },
- .loop = .elementwise,
- .mir_tag = .{ .p_q, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
- .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .reg = .rcx } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0x,
+ .of_mask = .dst0d,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, .v_dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .limb(.src1x), ._ },
+ .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, .v_dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .limb(.src1x), ._ },
+ .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .avx, null },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .required_features = &.{.mmx},
- .scalar = .{ .any_int = .byte },
- .loop = .elementwise,
- .mir_tag = .{ .p_b, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
- .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .reg = .rcx } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0x,
+ .of_mask = .dst0q,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, .v_dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .limb(.src1x), ._ },
+ .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, .v_dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .limb(.src1x), ._ },
+ .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .sse2, null },
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .required_features = &.{.mmx},
- .scalar = .{ .any_int = .word },
- .loop = .elementwise,
- .mir_tag = .{ .p_w, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
- .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0x,
+ .of_mask = .dst0b,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ },
+ .{ ._, .p_b, .cmpeq, .tmp3x, .limb(.src1x), ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ },
+ .{ ._, .p_b, .cmpeq, .tmp3x, .limb(.src1x), ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .not, .tmp2w, ._, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .sse2, null },
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .required_features = &.{.mmx},
- .scalar = .{ .any_int = .dword },
- .loop = .elementwise,
- .mir_tag = .{ .p_d, .cmpeq },
- .patterns = &.{
- .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } },
- .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0x,
+ .of_mask = .dst0w,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ },
+ .{ ._, .p_w, .cmpeq, .tmp3x, .limb(.src1x), ._, ._ },
+ .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ },
+ .{ ._, .p_w, .cmpeq, .tmp3x, .limb(.src1x), ._, ._ },
+ .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .not, .tmp2b, ._, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .sse2, null },
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .scalar = .bool,
- .clobbers = .{ .eflags = true },
- .invert_result = true,
- .loop = .elementwise,
- .mir_tag = .{ ._, .xor },
- .patterns = &.{
- .{ .ops = &.{ .mem_limb, .{ .implicit = 0 }, .gpr_limb } },
- .{ .ops = &.{ .mem_limb, .gpr_limb, .{ .implicit = 0 } } },
- .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .mem_limb } },
- .{ .ops = &.{ .gpr_limb, .mem_limb, .{ .implicit = 0 } } },
- .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .reg = .rcx } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0x,
+ .of_mask = .dst0d,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ },
+ .{ ._, .p_q, .cmpeq, .tmp4x, .limb(.src1x), ._, ._ },
+ .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ },
+ .{ ._, .p_d, .cmpeq, .tmp4x, .limb(.src1x), ._, ._ },
+ .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .sse4_1, null },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .scalar = .{ .any_int = .byte },
- .clobbers = .{ .eflags = true },
- .loop = .elementwise,
- .mir_tag = .{ ._, .cmp },
- .patterns = &.{
- .{ .ops = &.{ .cc_elem, .mem_elem, .gpr_elem } },
- .{ .ops = &.{ .cc_elem, .gpr_elem, .mem_elem } },
- .{ .ops = &.{ .cc_elem, .gpr_elem, .gpr_elem } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .reg = .rcx } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0x,
+ .of_mask = .dst0q,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ },
+ .{ ._, .p_q, .cmpeq, .tmp4x, .limb(.src1x), ._, ._ },
+ .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ },
+ .{ ._, .p_q, .cmpeq, .tmp4x, .limb(.src1x), ._, ._ },
+ .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .mmx, null },
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .scalar = .{ .any_int = .word },
- .clobbers = .{ .eflags = true },
- .loop = .elementwise,
- .mir_tag = .{ ._, .cmp },
- .patterns = &.{
- .{ .ops = &.{ .cc_elem, .mem_elem, .gpr_elem } },
- .{ .ops = &.{ .cc_elem, .gpr_elem, .mem_elem } },
- .{ .ops = &.{ .cc_elem, .gpr_elem, .gpr_elem } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .mmx } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0q,
+ .of_mask = .dst0b,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._dqu, .mov, .tmp3q, .limb(.src0q), ._, ._ },
+ .{ ._, .p_b, .cmpeq, .tmp3q, .limb(.src1q), ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._dqu, .mov, .tmp3q, .limb(.src0q), ._, ._ },
+ .{ ._, .p_b, .cmpeq, .tmp3q, .limb(.src1q), ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ },
+ .{ ._, ._, .not, .tmp2w, ._, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .mmx, null },
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .scalar = .{ .any_int = .dword },
- .clobbers = .{ .eflags = true },
- .loop = .elementwise,
- .mir_tag = .{ ._, .cmp },
- .patterns = &.{
- .{ .ops = &.{ .cc_elem, .mem_elem, .gpr_elem } },
- .{ .ops = &.{ .cc_elem, .gpr_elem, .mem_elem } },
- .{ .ops = &.{ .cc_elem, .gpr_elem, .gpr_elem } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .mmx } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0q,
+ .of_mask = .dst0w,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._dqu, .mov, .tmp3q, .limb(.src0q), ._, ._ },
+ .{ ._, .p_w, .cmpeq, .tmp3q, .limb(.src1q), ._, ._ },
+ .{ ._, .p_b, .ackssw, .tmp3q, .tmp3q, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._dqu, .mov, .tmp3q, .limb(.src0q), ._, ._ },
+ .{ ._, .p_w, .cmpeq, .tmp3q, .limb(.src1q), ._, ._ },
+ .{ ._, .p_b, .ackssw, .tmp3q, .tmp3q, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ },
+ .{ ._, ._, .not, .tmp2b, ._, ._, ._ },
+ .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .mmx, null },
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- .{
- .scalar = .{ .any_int = .qword },
- .clobbers = .{ .eflags = true },
- .loop = .elementwise,
- .mir_tag = .{ ._, .cmp },
- .patterns = &.{
- .{ .ops = &.{ .cc_elem, .mem_elem, .gpr_elem } },
- .{ .ops = &.{ .cc_elem, .gpr_elem, .mem_elem } },
- .{ .ops = &.{ .cc_elem, .gpr_elem, .gpr_elem } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .reg = .rcx } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .mmx } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .limb_and_mask_limb = .{
+ .of = .src0q,
+ .of_mask = .dst0d,
+ .body = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._dqu, .mov, .tmp4q, .limb(.src0q), ._, ._ },
+ .{ ._, .p_q, .cmpeq, .tmp4q, .limb(.src1q), ._, ._ },
+ .{ ._, ._ps, .movmsk, .tmp3d, .tmp4q, ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._dqu, .mov, .tmp4q, .limb(.src0q), ._, ._ },
+ .{ ._, .p_q, .cmpeq, .tmp4q, .limb(.src1q), ._, ._ },
+ .{ ._, ._ps, .movmsk, .tmp3d, .tmp4q, ._, ._ },
+ .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ },
},
+ } },
+ }, .{
+ .required_features = .{ .slow_incdec, null },
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
}, .{
- .cc = .e,
- .invert_result = switch (cmp_op) {
- .eq => false,
- .neq => true,
- else => unreachable,
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
},
- }) catch |err| switch (err) {
- error.SelectFailed => return cg.fail("failed to select", .{}),
- else => |e| return e,
- },
- else => |e| return e,
- },
- .gte => unreachable,
- .gt => unreachable,
- }
- if (ops[0].index != res[0].index) try ops[0].die(cg);
- if (ops[1].index != res[0].index) try ops[1].die(cg);
- try res[0].moveTo(inst, cg);
- },
-
- .cmp_lt,
- .cmp_lt_optimized,
- .cmp_lte,
- .cmp_lte_optimized,
- .cmp_gte,
- .cmp_gte_optimized,
- .cmp_gt,
- .cmp_gt_optimized,
- => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) {
- else => unreachable,
- .cmp_lt, .cmp_lt_optimized => .lt,
- .cmp_lte, .cmp_lte_optimized => .lte,
- .cmp_gte, .cmp_gte_optimized => .gte,
- .cmp_gt, .cmp_gt_optimized => .gt,
- }) else {
- const bin_op = air_datas[@intFromEnum(inst)].bin_op;
- const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu);
- const signedness = if (scalar_ty.isAbiInt(zcu))
- scalar_ty.intInfo(zcu).signedness
- else
- .unsigned;
- var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
- var res: [1]Temp = undefined;
- cg.select2(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (signedness) {
- .signed => switch (air_tag) {
- else => unreachable,
- .cmp_lt, .cmp_lt_optimized => .l,
- .cmp_lte, .cmp_lte_optimized => .le,
- .cmp_gte, .cmp_gte_optimized => .ge,
- .cmp_gt, .cmp_gt_optimized => .g,
- },
- .unsigned => switch (air_tag) {
- else => unreachable,
- .cmp_lt, .cmp_lt_optimized => .b,
- .cmp_lte, .cmp_lte_optimized => .be,
- .cmp_gte, .cmp_gte_optimized => .ae,
- .cmp_gt, .cmp_gt_optimized => .a,
- },
- })) {
- else => unreachable,
- inline .l, .le, .ge, .g, .b, .be, .ae, .a => |cc| comptime &.{ .{
- .constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
- .patterns = &.{
- .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .imm8, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } },
- },
- .clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .cc = cc.commute() }},
- .each = .{ .once = &.{
- .{ ._, .cmp, .src0b, .src1b, .none, .none },
- } },
- }, .{
- .constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .slow_incdec, null },
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .slow_incdec, null },
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .slow_incdec },
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null },
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .slow_incdec, null },
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ },
+ .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
+ .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
+ .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ },
+ .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ },
+ .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ },
+ .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
+ .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
+ .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ },
+ .{ ._, ._, .dec, .tmp2d, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ },
+ .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .slow_incdec, null },
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .slow_incdec, null },
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .slow_incdec, null },
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .slow_incdec },
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null },
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .slow_incdec, null },
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ },
+ .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
+ .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
+ .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ },
+ .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ },
+ .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ },
+ .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
+ .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
+ .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ },
+ .{ ._, ._, .dec, .tmp2d, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2b, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ },
+ .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .slow_incdec },
+ .dst_constraints = .{.{ .bool_vec = .qword }},
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null },
+ .dst_constraints = .{.{ .bool_vec = .qword }},
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .slow_incdec },
+ .dst_constraints = .{.{ .bool_vec = .qword }},
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null },
+ .dst_constraints = .{.{ .bool_vec = .qword }},
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .slow_incdec },
+ .dst_constraints = .{.{ .bool_vec = .qword }},
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null },
+ .dst_constraints = .{.{ .bool_vec = .qword }},
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .slow_incdec },
+ .dst_constraints = .{.{ .bool_vec = .qword }},
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null },
+ .dst_constraints = .{.{ .bool_vec = .qword }},
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .slow_incdec },
+ .dst_constraints = .{.{ .bool_vec = .qword }},
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ },
+ .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
+ .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
+ .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ },
+ .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ },
+ .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null },
+ .dst_constraints = .{.{ .bool_vec = .qword }},
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ },
+ .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
+ .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
+ .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ },
+ .{ ._, ._, .dec, .tmp2d, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
+ .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
+ .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ },
+ .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .slow_incdec, null },
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .reg = .ecx } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ ._, ._, .mov, .tmp4b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp4b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ },
+ .{ ._, ._, .add, .tmp1d, .i(1), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .ia(-1, .tmp2, .add_bit_size), ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_access_size), .tmp2p, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .i(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .ia(-1, .tmp2, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .reg = .ecx } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ ._, ._, .mov, .tmp4b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp4b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ },
+ .{ ._, ._, .inc, .tmp1d, ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .ia(-1, .tmp2, .add_bit_size), ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_access_size), .tmp2p, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .ia(-1, .tmp2, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ },
+ } },
+ } },
+ }) catch |err2| switch (err2) {
+ error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
+ @tagName(air_tag),
+ cg.typeOf(extra.lhs).fmt(pt),
+ ops[0].tracking(cg),
+ ops[1].tracking(cg),
+ }),
+ else => |e| return e,
+ },
+ .gte => unreachable,
+ .gt => unreachable,
+ }
+ if (ops[0].index != res[0].index) try ops[0].die(cg);
+ if (ops[1].index != res[0].index) try ops[1].die(cg);
+ try res[0].moveTo(inst, cg);
+ },
+
+ .cmp_lt,
+ .cmp_lt_optimized,
+ .cmp_lte,
+ .cmp_lte_optimized,
+ .cmp_gte,
+ .cmp_gte_optimized,
+ .cmp_gt,
+ .cmp_gt_optimized,
+ => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) {
+ else => unreachable,
+ .cmp_lt, .cmp_lt_optimized => .lt,
+ .cmp_lte, .cmp_lte_optimized => .lte,
+ .cmp_gte, .cmp_gte_optimized => .gte,
+ .cmp_gt, .cmp_gt_optimized => .gt,
+ }) else {
+ const bin_op = air_datas[@intFromEnum(inst)].bin_op;
+ const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu);
+ const signedness = if (scalar_ty.isAbiInt(zcu))
+ scalar_ty.intInfo(zcu).signedness
+ else
+ .unsigned;
+ var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
+ var res: [1]Temp = undefined;
+ cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (signedness) {
+ .signed => switch (air_tag) {
+ else => unreachable,
+ .cmp_lt, .cmp_lt_optimized => .l,
+ .cmp_lte, .cmp_lte_optimized => .le,
+ .cmp_gte, .cmp_gte_optimized => .ge,
+ .cmp_gt, .cmp_gt_optimized => .g,
+ },
+ .unsigned => switch (air_tag) {
+ else => unreachable,
+ .cmp_lt, .cmp_lt_optimized => .b,
+ .cmp_lte, .cmp_lte_optimized => .be,
+ .cmp_gte, .cmp_gte_optimized => .ae,
+ .cmp_gt, .cmp_gt_optimized => .a,
+ },
+ })) {
+ else => unreachable,
+ inline .l, .le, .ge, .g, .b, .be, .ae, .a => |cc| comptime &.{ .{
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .imm8, .gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } },
+ },
+ .clobbers = .{ .eflags = true },
+ .dst_temps = .{.{ .cc = cc.commute() }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
.patterns = &.{
.{ .src = .{ .mem, .imm8 } },
.{ .src = .{ .gpr, .imm8 } },
@@ -3612,10 +4956,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ ._, .cmp, .src0b, .src1b, .none, .none },
+ .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
} },
}, .{
- .constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
.patterns = &.{
.{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } },
.{ .src = .{ .imm16, .gpr }, .commute = .{ 0, 1 } },
@@ -3624,10 +4968,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc.commute() }},
.each = .{ .once = &.{
- .{ ._, .cmp, .src0w, .src1w, .none, .none },
+ .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
} },
}, .{
- .constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
.patterns = &.{
.{ .src = .{ .mem, .imm16 } },
.{ .src = .{ .gpr, .imm16 } },
@@ -3637,10 +4981,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ ._, .cmp, .src0w, .src1w, .none, .none },
+ .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
} },
}, .{
- .constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
.patterns = &.{
.{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } },
.{ .src = .{ .imm32, .gpr }, .commute = .{ 0, 1 } },
@@ -3649,10 +4993,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc.commute() }},
.each = .{ .once = &.{
- .{ ._, .cmp, .esrc0, .esrc1, .none, .none },
+ .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
} },
}, .{
- .constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
.patterns = &.{
.{ .src = .{ .mem, .imm32 } },
.{ .src = .{ .gpr, .imm32 } },
@@ -3662,11 +5006,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ ._, .cmp, .esrc0, .esrc1, .none, .none },
+ .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null },
- .constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
.patterns = &.{
.{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } },
.{ .src = .{ .simm32, .gpr }, .commute = .{ 0, 1 } },
@@ -3675,11 +5019,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc.commute() }},
.each = .{ .once = &.{
- .{ ._, .cmp, .rsrc0, .rsrc1, .none, .none },
+ .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null },
- .constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
.patterns = &.{
.{ .src = .{ .mem, .simm32 } },
.{ .src = .{ .gpr, .simm32 } },
@@ -3689,60 +5033,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ ._, .cmp, .rsrc0, .rsrc1, .none, .none },
+ .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", null },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .bool, .kind = .{ .rc = .general_purpose } },
.unused,
- },
- .clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .limb = .{
- .of = .rsrc0,
- .header = &.{
- .{ ._, .xor, .tmp1b, .tmp1b, .none, .none },
- },
- .body = &.{
- .{ ._, .mov, .rtmp0, .{ .src_limb = 0 }, .none, .none },
- .{ ._r, .sh, .tmp1b, .{ .simm32 = 1 }, .none, .none },
- .{ ._, .sbb, .rtmp0, .{ .src_limb = 1 }, .none, .none },
- .{ ._c, .set, .tmp1b, .none, .none, .none },
- .{ .fromCondition(cc), .set, .dst0b, .none, .none, .none },
- },
- } },
- }, .{
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .bool, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
.unused,
},
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .rc = .general_purpose }},
.each = .{ .limb = .{
- .of = .esrc0,
+ .of = .src0p,
.header = &.{
- .{ ._, .xor, .tmp1b, .tmp1b, .none, .none },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
},
.body = &.{
- .{ ._, .mov, .etmp0, .{ .src_limb = 0 }, .none, .none },
- .{ ._r, .sh, .tmp1b, .{ .simm32 = 1 }, .none, .none },
- .{ ._, .sbb, .etmp0, .{ .src_limb = 1 }, .none, .none },
- .{ ._c, .set, .tmp1b, .none, .none, .none },
- .{ .fromCondition(cc), .set, .dst0b, .none, .none, .none },
+ .{ ._, ._, .mov, .tmp0p, .limb(.src0p), ._, ._ },
+ .{ ._, ._r, .sh, .tmp1b, .i(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp0p, .limb(.src1p), ._, ._ },
+ .{ ._, ._c, .set, .tmp1b, ._, ._, ._ },
+ .{ ._, .fromCondition(cc), .set, .dst0b, ._, ._, ._ },
},
} },
} },
}) catch |err| switch (err) {
- error.Select2Failed => return cg.fail("failed to select2 {s} {} {} {}", .{
+ error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
@tagName(air_tag),
cg.typeOf(bin_op.lhs).fmt(pt),
ops[0].tracking(cg),
@@ -3767,7 +5089,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
});
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
var res: [1]Temp = undefined;
- cg.select2(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (air_tag) {
+ cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (air_tag) {
else => unreachable,
.cmp_eq, .cmp_eq_optimized => .e,
.cmp_neq, .cmp_neq_optimized => .ne,
@@ -3775,52 +5097,73 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
else => unreachable,
inline .e, .ne => |cc| comptime &.{ .{
.required_features = .{ .avx2, null },
- .constraints = .{ .any_int, .any_int },
+ .src_constraints = .{ .any_int, .any_int },
.patterns = &.{
.{ .src = .{ .ymm, .mem } },
.{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
.{ .src = .{ .ymm, .ymm } },
},
.clobbers = .{ .eflags = true },
- .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, .unused },
+ .extra_temps = .{
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ .vp_, .xor, .ytmp0, .ysrc0, .ysrc1, .none },
- .{ .vp_, .@"test", .ytmp0, .ytmp0, .none, .none },
+ .{ ._, .vp_, .xor, .tmp0y, .src0y, .src1y, ._ },
+ .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ },
} },
}, .{
.required_features = .{ .avx, null },
- .constraints = .{ .any_int, .any_int },
+ .src_constraints = .{ .any_int, .any_int },
.patterns = &.{
.{ .src = .{ .ymm, .mem } },
.{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
.{ .src = .{ .ymm, .ymm } },
},
.clobbers = .{ .eflags = true },
- .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, .unused },
+ .extra_temps = .{
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ .v_pd, .xor, .ytmp0, .ysrc0, .ysrc1, .none },
- .{ .vp_, .@"test", .ytmp0, .ytmp0, .none, .none },
+ .{ ._, .v_pd, .xor, .tmp0y, .src0y, .src1y, ._ },
+ .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ },
} },
}, .{
.required_features = .{ .avx, null },
- .constraints = .{ .any_int, .any_int },
+ .src_constraints = .{ .any_int, .any_int },
.patterns = &.{
.{ .src = .{ .xmm, .mem } },
.{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } },
.{ .src = .{ .xmm, .xmm } },
},
.clobbers = .{ .eflags = true },
- .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, .unused },
+ .extra_temps = .{
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ .vp_, .xor, .xtmp0, .xsrc0, .xsrc1, .none },
- .{ .vp_, .@"test", .xtmp0, .xtmp0, .none, .none },
+ .{ ._, .vp_, .xor, .tmp0x, .src0x, .src1x, ._ },
+ .{ ._, .vp_, .@"test", .tmp0x, .tmp0x, ._, ._ },
} },
}, .{
.required_features = .{ .sse4_1, null },
- .constraints = .{ .any_int, .any_int },
+ .src_constraints = .{ .any_int, .any_int },
.patterns = &.{
.{ .src = .{ .mut_xmm, .mem } },
.{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
@@ -3829,12 +5172,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ .p_, .xor, .xsrc0, .xsrc1, .none, .none },
- .{ .p_, .@"test", .xsrc0, .xsrc0, .none, .none },
+ .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ },
+ .{ ._, .p_, .@"test", .src0x, .src0x, ._, ._ },
} },
}, .{
.required_features = .{ .sse2, null },
- .constraints = .{ .any_int, .any_int },
+ .src_constraints = .{ .any_int, .any_int },
.patterns = &.{
.{ .src = .{ .mut_xmm, .mem } },
.{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
@@ -3845,18 +5188,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.unused,
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ .p_, .xor, .xtmp1, .xtmp1, .none, .none },
- .{ .p_, .xor, .xsrc0, .xsrc1, .none, .none },
- .{ .p_b, .cmpeq, .xtmp1, .xsrc0, .none, .none },
- .{ .p_b, .movmsk, .etmp0, .xtmp1, .none, .none },
- .{ ._, .xor, .etmp0, .{ .simm32 = std.math.maxInt(u16) }, .none, .none },
+ .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
+ .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ },
+ .{ ._, .p_b, .cmpeq, .tmp1x, .src0x, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .i(0xffff), ._, ._ },
} },
}, .{
.required_features = .{ .sse2, .mmx },
- .constraints = .{ .any_int, .any_int },
+ .src_constraints = .{ .any_int, .any_int },
.patterns = &.{
.{ .src = .{ .mut_mm, .mem } },
.{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } },
@@ -3867,17 +5213,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .mmx } },
.unused,
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ .p_, .xor, .rtmp1, .rtmp1, .none, .none },
- .{ .p_, .xor, .rsrc0, .rsrc1, .none, .none },
- .{ .p_b, .cmpeq, .rtmp1, .rsrc0, .none, .none },
- .{ .p_b, .movmsk, .etmp0, .rtmp1, .none, .none },
- .{ ._, .xor, .etmp0, .{ .simm32 = std.math.maxInt(u8) }, .none, .none },
+ .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ },
+ .{ ._, .p_, .xor, .src0q, .src1q, ._, ._ },
+ .{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .i(0xff), ._, ._ },
} },
}, .{
- .constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
.patterns = &.{
.{ .src = .{ .mem, .imm8 } },
.{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
@@ -3890,10 +5239,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ ._, .cmp, .src0b, .src1b, .none, .none },
+ .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
} },
}, .{
- .constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
.patterns = &.{
.{ .src = .{ .mem, .imm16 } },
.{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } },
@@ -3906,10 +5255,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ ._, .cmp, .src0w, .src1w, .none, .none },
+ .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
} },
}, .{
- .constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
.patterns = &.{
.{ .src = .{ .mem, .imm32 } },
.{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } },
@@ -3922,11 +5271,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ ._, .cmp, .esrc0, .esrc1, .none, .none },
+ .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null },
- .constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
.patterns = &.{
.{ .src = .{ .mem, .simm32 } },
.{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } },
@@ -3939,7 +5288,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
.each = .{ .once = &.{
- .{ ._, .cmp, .rsrc0, .rsrc1, .none, .none },
+ .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
} },
}, .{
.required_features = .{ .avx2, null },
@@ -3950,20 +5299,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.{ .cc = cc }},
.each = .{ .limb = .{
- .of = .ysrc0,
+ .of = .src0y,
.header = &.{
- .{ .vp_, .xor, .ytmp2, .ytmp2, .ytmp2, .none },
+ .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .tmp2y, ._ },
},
.body = &.{
- .{ .v_, .movdqu, .ytmp1, .{ .src_limb = 0 }, .none, .none },
- .{ .vp_, .xor, .ytmp1, .ytmp1, .{ .src_limb = 1 }, .none },
- .{ .vp_, .@"or", .ytmp2, .ytmp2, .ytmp1, .none },
+ .{ ._, .v_dqu, .mov, .tmp1y, .limb(.src0y), ._, ._ },
+ .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .limb(.src1y), ._ },
+ .{ ._, .vp_, .@"or", .tmp2y, .tmp2y, .tmp1y, ._ },
},
.trailer = &.{
- .{ .vp_, .@"test", .ytmp2, .ytmp2, .none, .none },
+ .{ ._, .vp_, .@"test", .tmp2y, .tmp2y, ._, ._ },
},
} },
}, .{
@@ -3975,20 +5327,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.{ .cc = cc }},
.each = .{ .limb = .{
- .of = .ysrc0,
+ .of = .src0y,
.header = &.{
- .{ .v_pd, .xor, .ytmp2, .ytmp2, .ytmp2, .none },
+ .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .tmp2y, ._ },
},
.body = &.{
- .{ .v_pd, .movu, .ytmp1, .{ .src_limb = 0 }, .none, .none },
- .{ .v_pd, .xor, .ytmp1, .ytmp1, .{ .src_limb = 1 }, .none },
- .{ .v_pd, .@"or", .ytmp2, .ytmp2, .ytmp1, .none },
+ .{ ._, .v_pd, .movu, .tmp1y, .limb(.src0y), ._, ._ },
+ .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .limb(.src1y), ._ },
+ .{ ._, .v_pd, .@"or", .tmp2y, .tmp2y, .tmp1y, ._ },
},
.trailer = &.{
- .{ .vp_, .@"test", .ytmp2, .ytmp2, .none, .none },
+ .{ ._, .vp_, .@"test", .tmp2y, .tmp2y, ._, ._ },
},
} },
}, .{
@@ -4000,20 +5355,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.{ .cc = cc }},
.each = .{ .limb = .{
- .of = .xsrc0,
+ .of = .src0x,
.header = &.{
- .{ .vp_, .xor, .xtmp2, .xtmp2, .xtmp2, .none },
+ .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ },
},
.body = &.{
- .{ .v_, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none },
- .{ .vp_, .xor, .xtmp1, .xtmp1, .{ .src_limb = 1 }, .none },
- .{ .vp_, .@"or", .xtmp2, .xtmp2, .xtmp1, .none },
+ .{ ._, .v_dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ },
+ .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .limb(.src1x), ._ },
+ .{ ._, .vp_, .@"or", .tmp2x, .tmp2x, .tmp1x, ._ },
},
.trailer = &.{
- .{ .vp_, .@"test", .xtmp2, .xtmp2, .none, .none },
+ .{ ._, .vp_, .@"test", .tmp2x, .tmp2x, ._, ._ },
},
} },
}, .{
@@ -4025,20 +5383,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.{ .cc = cc }},
.each = .{ .limb = .{
- .of = .xsrc0,
+ .of = .src0x,
.header = &.{
- .{ .vp_, .xor, .xtmp2, .xtmp2, .xtmp2, .none },
+ .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ },
},
.body = &.{
- .{ .v_, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none },
- .{ .vp_, .xor, .xtmp1, .xtmp1, .{ .src_limb = 1 }, .none },
- .{ .vp_, .@"or", .xtmp2, .xtmp2, .xtmp1, .none },
+ .{ ._, .v_dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ },
+ .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .limb(.src1x), ._ },
+ .{ ._, .vp_, .@"or", .tmp2x, .tmp2x, .tmp1x, ._ },
},
.trailer = &.{
- .{ .vp_, .@"test", .xtmp2, .xtmp2, .none, .none },
+ .{ ._, .vp_, .@"test", .tmp2x, .tmp2x, ._, ._ },
},
} },
}, .{
@@ -4050,20 +5411,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.{ .cc = cc }},
.each = .{ .limb = .{
- .of = .xsrc0,
+ .of = .src0x,
.header = &.{
- .{ .p_, .xor, .xtmp2, .xtmp2, .none, .none },
+ .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ },
},
.body = &.{
- .{ ._, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none },
- .{ .p_, .xor, .xtmp1, .{ .src_limb = 1 }, .none, .none },
- .{ .p_, .@"or", .xtmp2, .xtmp1, .none, .none },
+ .{ ._, ._dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ },
+ .{ ._, .p_, .xor, .tmp1x, .limb(.src1x), ._, ._ },
+ .{ ._, .p_, .@"or", .tmp2x, .tmp1x, ._, ._ },
},
.trailer = &.{
- .{ .p_, .@"test", .xtmp2, .xtmp2, .none, .none },
+ .{ ._, .p_, .@"test", .tmp2x, .tmp2x, ._, ._ },
},
} },
}, .{
@@ -4075,23 +5439,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.{ .cc = cc }},
.each = .{ .limb = .{
- .of = .xsrc0,
+ .of = .src0x,
.header = &.{
- .{ .p_, .xor, .xtmp2, .xtmp2, .none, .none },
+ .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ },
},
.body = &.{
- .{ ._, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none },
- .{ .p_, .xor, .xtmp1, .{ .src_limb = 1 }, .none, .none },
- .{ .p_, .@"or", .xtmp2, .xtmp1, .none, .none },
+ .{ ._, ._dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ },
+ .{ ._, .p_, .xor, .tmp1x, .limb(.src1x), ._, ._ },
+ .{ ._, .p_, .@"or", .tmp2x, .tmp1x, ._, ._ },
},
.trailer = &.{
- .{ .p_, .xor, .xtmp1, .xtmp1, .none, .none },
- .{ .p_b, .cmpeq, .xtmp2, .xtmp1, .none, .none },
- .{ .p_b, .movmsk, .etmp0, .xtmp2, .none, .none },
- .{ ._, .xor, .etmp0, .{ .simm32 = std.math.maxInt(u16) }, .none, .none },
+ .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
+ .{ ._, .p_b, .cmpeq, .tmp2x, .tmp1x, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp0d, .tmp2x, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .i(0xffff), ._, ._ },
},
} },
}, .{
@@ -4103,77 +5470,58 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .mmx } },
.{ .kind = .{ .rc = .mmx } },
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.{ .cc = cc }},
.each = .{ .limb = .{
- .of = .rsrc0,
+ .of = .src0q,
.header = &.{
- .{ .p_, .xor, .rtmp2, .rtmp2, .none, .none },
+ .{ ._, .p_, .xor, .tmp2q, .tmp2q, ._, ._ },
},
.body = &.{
- .{ ._q, .mov, .rtmp1, .{ .src_limb = 0 }, .none, .none },
- .{ .p_, .xor, .rtmp1, .{ .src_limb = 1 }, .none, .none },
- .{ .p_, .@"or", .rtmp2, .rtmp1, .none, .none },
+ .{ ._, ._q, .mov, .tmp1q, .limb(.src0q), ._, ._ },
+ .{ ._, .p_, .xor, .tmp1q, .limb(.src1q), ._, ._ },
+ .{ ._, .p_, .@"or", .tmp2q, .tmp1q, ._, ._ },
},
.trailer = &.{
- .{ .p_, .xor, .rtmp1, .rtmp1, .none, .none },
- .{ .p_b, .cmpeq, .rtmp2, .rtmp1, .none, .none },
- .{ .p_b, .movmsk, .etmp0, .rtmp2, .none, .none },
- .{ ._, .xor, .etmp0, .{ .simm32 = std.math.maxInt(u8) }, .none, .none },
+ .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ },
+ .{ ._, .p_b, .cmpeq, .tmp2q, .tmp1q, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp0d, .tmp2q, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .i(0xff), ._, ._ },
},
} },
}, .{
- .required_features = .{ .@"64bit", null },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
- },
- .dst_temps = .{.{ .cc = cc }},
- .each = .{ .limb = .{
- .of = .rsrc0,
- .header = &.{
- .{ ._, .xor, .rtmp2, .rtmp2, .none, .none },
- },
- .body = &.{
- .{ ._, .mov, .rtmp1, .{ .src_limb = 0 }, .none, .none },
- .{ ._, .xor, .rtmp1, .{ .src_limb = 1 }, .none, .none },
- .{ ._, .@"or", .rtmp2, .rtmp1, .none, .none },
- },
- .trailer = &.{
- .{ ._, .@"test", .rtmp2, .rtmp2, .none, .none },
- },
- } },
- }, .{
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{.{ .cc = cc }},
.each = .{ .limb = .{
- .of = .esrc0,
+ .of = .src0p,
.header = &.{
- .{ ._, .xor, .etmp2, .etmp2, .none, .none },
+ .{ ._, ._, .xor, .tmp2p, .tmp2p, ._, ._ },
},
.body = &.{
- .{ ._, .mov, .etmp1, .{ .src_limb = 0 }, .none, .none },
- .{ ._, .xor, .etmp1, .{ .src_limb = 1 }, .none, .none },
- .{ ._, .@"or", .etmp2, .etmp1, .none, .none },
+ .{ ._, ._, .mov, .tmp1p, .limb(.src0p), ._, ._ },
+ .{ ._, ._, .xor, .tmp1p, .limb(.src1p), ._, ._ },
+ .{ ._, ._, .@"or", .tmp2p, .tmp1p, ._, ._ },
},
.trailer = &.{
- .{ ._, .@"test", .etmp2, .etmp2, .none, .none },
+ .{ ._, ._, .@"test", .tmp2p, .tmp2p, ._, ._ },
},
} },
} },
}) catch |err| switch (err) {
- error.Select2Failed => return cg.fail("failed to select2 {s} {} {} {}", .{
+ error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
@tagName(air_tag),
cg.typeOf(bin_op.lhs).fmt(pt),
ops[0].tracking(cg),
@@ -16456,7 +17804,7 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
} },
} };
} else {
- if (mnem_size) |size| if (reg.bitSize() != size.bitSize())
+ if (mnem_size) |size| if (reg.bitSize() != size.bitSize(self.target))
return self.fail("invalid register size: '{s}'", .{op_str});
op.* = .{ .reg = reg };
}
@@ -16524,14 +17872,14 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
} else if (std.mem.startsWith(u8, op_str, "$")) {
if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s| {
if (mnem_size) |size| {
- const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - (size.bitSize() - 1));
+ const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - (size.bitSize(self.target) - 1));
if ((if (s < 0) ~s else s) > max)
return self.fail("invalid immediate size: '{s}'", .{op_str});
}
op.* = .{ .imm = .s(s) };
} else |_| if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u| {
if (mnem_size) |size| {
- const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - size.bitSize());
+ const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - size.bitSize(self.target));
if (u > max)
return self.fail("invalid immediate size: '{s}'", .{op_str});
}
@@ -16827,10 +18175,13 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool)
else
.{ ._q, .mov } },
9...16 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ .{ if (aligned) .v_dqa else .v_dqu, .mov }
+ else if (self.hasFeature(.sse2))
+ .{ if (aligned) ._dqa else ._dqu, .mov }
+ else
+ .{ ._ps, if (aligned) .mova else .movu } },
17...32 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
+ return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
else => {},
} else switch (abi_size) {
4 => return .{ .move = if (self.hasFeature(.avx))
@@ -16842,12 +18193,13 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool)
else
.{ ._sd, .mov } },
9...16 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
- else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
- 17...32 => if (self.hasFeature(.avx)) return .{ .move = if (aligned)
- .{ .v_pd, .mova }
+ .{ .v_pd, if (aligned) .mova else .movu }
+ else if (self.hasFeature(.sse2))
+ .{ ._pd, if (aligned) .mova else .movu }
else
- .{ .v_pd, .movu } },
+ .{ ._ps, if (aligned) .mova else .movu } },
+ 17...32 => if (self.hasFeature(.avx))
+ return .{ .move = .{ .v_pd, if (aligned) .mova else .movu } },
else => {},
}
},
@@ -16868,8 +18220,11 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool)
else
.{ ._sd, .mov } },
128 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ .{ if (aligned) .v_dqa else .v_dqu, .mov }
+ else if (self.hasFeature(.sse2))
+ .{ if (aligned) ._dqa else ._dqu, .mov }
+ else
+ .{ ._ps, if (aligned) .mova else .movu } },
else => {},
},
.vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
@@ -16883,65 +18238,62 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool)
.int => switch (ty.childType(zcu).intInfo(zcu).bits) {
1...8 => switch (ty.vectorLen(zcu)) {
1...16 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ .{ if (aligned) .v_dqa else .v_dqu, .mov }
+ else if (self.hasFeature(.sse2))
+ .{ if (aligned) ._dqa else ._dqu, .mov }
+ else
+ .{ ._ps, if (aligned) .mova else .movu } },
17...32 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
+ return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
else => {},
},
9...16 => switch (ty.vectorLen(zcu)) {
1...8 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ .{ if (aligned) .v_dqa else .v_dqu, .mov }
+ else if (self.hasFeature(.sse2))
+ .{ if (aligned) ._dqa else ._dqu, .mov }
+ else
+ .{ ._ps, if (aligned) .mova else .movu } },
9...16 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
+ return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
else => {},
},
17...32 => switch (ty.vectorLen(zcu)) {
1...4 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ .{ if (aligned) .v_dqa else .v_dqu, .mov }
+ else if (self.hasFeature(.sse2))
+ .{ if (aligned) ._dqa else ._dqu, .mov }
+ else
+ .{ ._ps, if (aligned) .mova else .movu } },
5...8 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
+ return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
else => {},
},
33...64 => switch (ty.vectorLen(zcu)) {
1...2 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ .{ if (aligned) .v_dqa else .v_dqu, .mov }
+ else if (self.hasFeature(.sse2))
+ .{ if (aligned) ._dqa else ._dqu, .mov }
+ else
+ .{ ._ps, if (aligned) .mova else .movu } },
3...4 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
+ return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
else => {},
},
65...128 => switch (ty.vectorLen(zcu)) {
1 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ .{ if (aligned) .v_dqa else .v_dqu, .mov }
+ else if (self.hasFeature(.sse2))
+ .{ if (aligned) ._dqa else ._dqu, .mov }
+ else
+ .{ ._ps, if (aligned) .mova else .movu } },
2 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
+ return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
else => {},
},
129...256 => switch (ty.vectorLen(zcu)) {
1 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
+ return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
else => {},
},
else => {},
@@ -16949,13 +18301,13 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool)
.pointer, .optional => if (ty.childType(zcu).isPtrAtRuntime(zcu))
switch (ty.vectorLen(zcu)) {
1...2 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ .{ if (aligned) .v_dqa else .v_dqu, .mov }
+ else if (self.hasFeature(.sse2))
+ .{ if (aligned) ._dqa else ._dqu, .mov }
+ else
+ .{ ._ps, if (aligned) .mova else .movu } },
3...4 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
+ return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
else => {},
}
else
@@ -16963,46 +18315,42 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool)
.float => switch (ty.childType(zcu).floatBits(self.target.*)) {
16 => switch (ty.vectorLen(zcu)) {
1...8 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ .{ if (aligned) .v_dqa else .v_dqu, .mov }
+ else if (self.hasFeature(.sse2))
+ .{ if (aligned) ._dqa else ._dqu, .mov }
+ else
+ .{ ._ps, if (aligned) .mova else .movu } },
9...16 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
+ return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
else => {},
},
32 => switch (ty.vectorLen(zcu)) {
1...4 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
- else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
+ .{ .v_ps, if (aligned) .mova else .movu }
+ else
+ .{ ._ps, if (aligned) .mova else .movu } },
5...8 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_ps, .mova }
- else
- .{ .v_ps, .movu } },
+ return .{ .move = .{ .v_ps, if (aligned) .mova else .movu } },
else => {},
},
64 => switch (ty.vectorLen(zcu)) {
1...2 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
- else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
+ .{ .v_pd, if (aligned) .mova else .movu }
+ else
+ .{ ._pd, if (aligned) .mova else .movu } },
3...4 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_pd, .mova }
- else
- .{ .v_pd, .movu } },
+ return .{ .move = .{ .v_pd, if (aligned) .mova else .movu } },
else => {},
},
128 => switch (ty.vectorLen(zcu)) {
1 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ .{ if (aligned) .v_dqa else .v_dqu, .mov }
+ else if (self.hasFeature(.sse2))
+ .{ if (aligned) ._dqa else ._dqu, .mov }
+ else
+ .{ ._ps, if (aligned) .mova else .movu } },
2 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
+ return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
else => {},
},
else => {},
@@ -17211,7 +18559,7 @@ fn genSetReg(
src_reg,
),
.x87, .mmx, .ip => unreachable,
- .sse => try self.asmRegisterRegister(
+ .sse => if (self.hasFeature(.sse2)) try self.asmRegisterRegister(
switch (abi_size) {
1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
@@ -17219,7 +18567,20 @@ fn genSetReg(
},
registerAlias(dst_reg, @max(abi_size, 4)),
src_reg.to128(),
- ),
+ ) else {
+ const frame_index = try self.allocFrameIndex(.init(.{
+ .size = 4,
+ .alignment = .@"4",
+ }));
+ try self.asmMemoryRegister(.{ ._ss, .mov }, .{
+ .base = .{ .frame = frame_index },
+ .mod = .{ .rm = .{ .size = .dword } },
+ }, src_reg.to128());
+ try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(dst_reg, abi_size), .{
+ .base = .{ .frame = frame_index },
+ .mod = .{ .rm = .{ .size = .fromSize(abi_size) } },
+ });
+ },
},
.segment => try self.asmRegisterRegister(
.{ ._, .mov },
@@ -17264,17 +18625,17 @@ fn genSetReg(
.sse => try self.asmRegisterRegister(
@as(?Mir.Inst.FixedTag, switch (ty.scalarType(zcu).zigTypeTag(zcu)) {
else => switch (abi_size) {
- 1...16 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
- 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
+ 1...16 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else .{ ._dqa, .mov },
+ 17...32 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else null,
else => null,
},
.float => switch (ty.scalarType(zcu).floatBits(self.target.*)) {
16, 128 => switch (abi_size) {
2...16 => if (self.hasFeature(.avx))
- .{ .v_, .movdqa }
+ .{ .v_dqa, .mov }
else
- .{ ._, .movdqa },
- 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
+ .{ ._dqa, .mov },
+ 17...32 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else null,
else => null,
},
32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova },
@@ -17346,7 +18707,7 @@ fn genSetReg(
const pack_lock = self.register_manager.lockReg(pack_reg);
defer if (pack_lock) |lock| self.register_manager.unlockReg(lock);
- var mask_size: u32 = @intCast(ty.vectorLen(zcu) * @divExact(src_reg_mask.info.scalar.bitSize(), 8));
+ var mask_size: u32 = @intCast(ty.vectorLen(zcu) * @divExact(src_reg_mask.info.scalar.bitSize(self.target), 8));
switch (src_reg_mask.info.scalar) {
else => {},
.word => {
@@ -17355,7 +18716,7 @@ fn genSetReg(
if (has_avx) {
try self.asmRegisterRegisterRegister(.{ .vp_b, .ackssw }, pack_alias, src_alias, src_alias);
} else {
- try self.asmRegisterRegister(.{ ._, .movdqa }, pack_alias, src_alias);
+ try self.asmRegisterRegister(.{ ._dqa, .mov }, pack_alias, src_alias);
try self.asmRegisterRegister(.{ .p_b, .ackssw }, pack_alias, pack_alias);
}
mask_size = std.math.divCeil(u32, mask_size, 2) catch unreachable;
@@ -17592,7 +18953,10 @@ fn genSetMem(
})).write(
self,
.{ .base = base, .mod = .{ .rm = .{
- .size = .fromBitSize(@min(self.memSize(ty).bitSize(), src_alias.bitSize())),
+ .size = .fromBitSize(@min(
+ self.memSize(ty).bitSize(self.target),
+ src_alias.bitSize(),
+ )),
.disp = disp,
} } },
src_alias,
@@ -22460,36 +23824,6 @@ const Temp = struct {
try cg.asmOpOnly(.{ .@"rep _sb", .mov });
}
- // i, m, r
- fn add(lhs: *Temp, rhs: *Temp, cg: *CodeGen) !Temp {
- const res_index = cg.next_temp_index;
- var res: Temp = .{ .index = res_index.toIndex() };
- try cg.select(&.{ &res, lhs, rhs }, .{ ._, .add }, &.{
- .{ .ops = &.{ .{ .match = 1 }, .r, .i } },
- .{ .ops = &.{ .{ .match = 1 }, .m, .i } },
- .{ .ops = &.{ .{ .match = 1 }, .r, .m } },
- .{ .ops = &.{ .{ .match = 1 }, .m, .r } },
- .{ .ops = &.{ .{ .match = 1 }, .r, .r } },
- });
- cg.next_temp_index = @enumFromInt(@intFromEnum(res_index) + 1);
- cg.temp_type[@intFromEnum(res_index)] = lhs.typeOf(cg);
- return res;
- }
-
- fn mul(lhs: *Temp, rhs: *Temp, cg: *CodeGen) !Temp {
- const res_index = cg.next_temp_index;
- var res: Temp = .{ .index = cg.next_temp_index.toIndex() };
- try cg.select(&.{ &res, lhs, rhs }, .{ .i_, .mul }, &.{
- .{ .ops = &.{ .r, .m, .i } },
- .{ .ops = &.{ .r, .r, .i } },
- .{ .ops = &.{ .{ .match = 1 }, .r, .m } },
- .{ .ops = &.{ .{ .match = 1 }, .r, .r } },
- });
- cg.next_temp_index = @enumFromInt(@intFromEnum(res_index) + 1);
- cg.temp_type[@intFromEnum(res_index)] = lhs.typeOf(cg);
- return res;
- }
-
fn moveTo(temp: Temp, inst: Air.Inst.Index, cg: *CodeGen) !void {
if (cg.liveness.isUnused(inst)) try temp.die(cg) else switch (temp.unwrap(cg)) {
.ref => {
@@ -22624,1242 +23958,129 @@ fn tempAllocRegPair(cg: *CodeGen, ty: Type, rs: RegisterManager.RegisterBitSet)
temp_index.tracking(cg).* = .init(
.{ .register_pair = try cg.register_manager.allocRegs(2, temp_index.toIndex(), rs) },
);
- cg.temp_type[@intFromEnum(temp_index)] = ty;
- cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
- return .{ .index = temp_index.toIndex() };
-}
-
-fn tempAllocMem(cg: *CodeGen, ty: Type) !Temp {
- const temp_index = cg.next_temp_index;
- temp_index.tracking(cg).* = .init(
- try cg.allocRegOrMemAdvanced(ty, temp_index.toIndex(), false),
- );
- cg.temp_type[@intFromEnum(temp_index)] = ty;
- cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
- return .{ .index = temp_index.toIndex() };
-}
-
-fn tempFromValue(cg: *CodeGen, ty: Type, value: MCValue) !Temp {
- const temp_index = cg.next_temp_index;
- temp_index.tracking(cg).* = .init(value);
- cg.temp_type[@intFromEnum(temp_index)] = ty;
- try cg.getValue(value, temp_index.toIndex());
- cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
- return .{ .index = temp_index.toIndex() };
-}
-
-fn tempFromOperand(
- cg: *CodeGen,
- inst: Air.Inst.Index,
- op_index: Liveness.OperandInt,
- op_ref: Air.Inst.Ref,
- ignore_death: bool,
-) !Temp {
- const zcu = cg.pt.zcu;
- const ip = &zcu.intern_pool;
-
- if (ignore_death or !cg.liveness.operandDies(inst, op_index)) {
- if (op_ref.toIndex()) |op_inst| return .{ .index = op_inst };
- const val = op_ref.toInterned().?;
- const gop = try cg.const_tracking.getOrPut(cg.gpa, val);
- if (!gop.found_existing) gop.value_ptr.* = .init(init: {
- const const_mcv = try cg.genTypedValue(.fromInterned(val));
- switch (const_mcv) {
- .lea_tlv => |tlv_sym| switch (cg.bin_file.tag) {
- .elf, .macho => {
- if (cg.mod.pic) {
- try cg.spillRegisters(&.{ .rdi, .rax });
- } else {
- try cg.spillRegisters(&.{.rax});
- }
- const frame_index = try cg.allocFrameIndex(.init(.{
- .size = 8,
- .alignment = .@"8",
- }));
- try cg.genSetMem(
- .{ .frame = frame_index },
- 0,
- .usize,
- .{ .lea_symbol = .{ .sym_index = tlv_sym } },
- .{},
- );
- break :init .{ .load_frame = .{ .index = frame_index } };
- },
- else => break :init const_mcv,
- },
- else => break :init const_mcv,
- }
- });
- return cg.tempFromValue(.fromInterned(ip.typeOf(val)), gop.value_ptr.short);
- }
-
- const temp_index = cg.next_temp_index;
- const temp: Temp = .{ .index = temp_index.toIndex() };
- const op_inst = op_ref.toIndex().?;
- const tracking = cg.getResolvedInstValue(op_inst);
- temp_index.tracking(cg).* = tracking.*;
- if (!cg.reuseTemp(temp.index, op_inst, tracking)) return .{ .index = op_ref.toIndex().? };
- cg.temp_type[@intFromEnum(temp_index)] = cg.typeOf(op_ref);
- cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
- return temp;
-}
-
-inline fn tempsFromOperands(cg: *CodeGen, inst: Air.Inst.Index, op_refs: anytype) ![op_refs.len]Temp {
- var temps: [op_refs.len]Temp = undefined;
- inline for (&temps, 0.., op_refs) |*temp, op_index, op_ref| {
- temp.* = try cg.tempFromOperand(inst, op_index, op_ref, inline for (0..op_index) |prev_op_index| {
- if (op_ref == op_refs[prev_op_index]) break true;
- } else false);
- }
- return temps;
-}
-
-const Operand = union(enum) {
- none,
- reg: Register,
- mem: Memory,
- imm: Immediate,
- inst: Mir.Inst.Index,
-};
-
-const Pattern = struct {
- ops: []const Op,
- commute: struct { u8, u8 } = .{ 0, 0 },
-
- const Set = struct {
- required_features: []const std.Target.x86.Feature = &.{},
- scalar: union(enum) {
- any,
- bool,
- float: Memory.Size,
- any_int: Memory.Size,
- signed_int: Memory.Size,
- unsigned_int: Memory.Size,
- any_float_or_int: Memory.Size,
- } = .any,
- clobbers: struct { eflags: bool = false } = .{},
- invert_result: bool = false,
- loop: enum {
- /// only execute the instruction once
- once,
- /// execute the instruction on all groups of non-overlapping bits in the entire value
- bitwise,
- /// for each element, execute the instruction on each limb, propagating the carry flag
- limbwise_carry,
- /// for each element, execute the instruction on each limb, propagating a register
- limbwise_reduce,
- /// for each element, execute the instruction on pairs of limbs, starting from the
- /// least significant, propagating a limb
- limbwise_pairs_forward,
- /// for each element, execute the instruction on pairs of limbs, starting from the
- /// most significant, propagating a limb
- limbwise_pairs_reverse,
- /// for each element, execute the instruction
- elementwise,
- } = .once,
- mir_tag: Mir.Inst.FixedTag,
- final_mir_tag: ?Mir.Inst.FixedTag = null,
- patterns: []const Pattern,
- };
-
- const Op = union(enum) {
- /// reuse another operand
- implicit: u8,
- /// repeat another operand
- explicit: u8,
- /// a condition code
- cc,
- /// any general purpose register
- gpr,
- /// any 64-bit mmx register
- mm,
- /// any 128-bit sse register
- xmm,
- /// any 256-bit sse register
- ymm,
- /// a 64-bit mmx register mask
- mm_mask,
- /// a 128-bit sse register mask
- xmm_mask,
- /// a 256-bit sse register mask
- ymm_mask,
- /// a 64-bit mmx register sign mask
- mm_sign_mask,
- /// a 128-bit sse register sign mask
- xmm_sign_mask,
- /// a 256-bit sse register sign mask
- ymm_sign_mask,
- /// any memory
- mem,
- /// a limb stored in a general purpose register
- gpr_limb,
- /// a limb stored in a 64-bit mmx register
- mm_limb,
- /// a limb stored in a 128-bit sse register
- xmm_limb,
- /// a limb stored in a 256-bit sse register
- ymm_limb,
- /// a limb stored in memory
- mem_limb,
- /// a mutable limb stored in a general purpose register
- mut_gpr_limb,
- /// a mutable limb stored in memory
- mut_mem_limb,
- /// an element stored in a condition code
- cc_elem,
- /// an element stored in a general purpose register
- gpr_elem,
- /// an element stored in memory
- mem_elem,
- /// a limb stored in a 64-bit mmx register mask
- mm_mask_limb,
- /// a limb stored in a 128-bit sse register masuk
- xmm_mask_limb,
- /// a limb stored in a 256-bit sse register masuk
- ymm_mask_limb,
- /// specific immediate
- imm: i8,
- /// any immediate signed extended from 32 bits
- simm32,
- /// a temp general purpose register containing all ones
- umax_gpr,
- /// a temp 64-bit mmx register containing all ones
- umax_mm,
- /// a temp 128-bit sse register containing all ones
- umax_xmm,
- /// a temp 256-bit sse register containing all ones
- umax_ymm,
-
- fn matches(op: Op, is_mut: bool, temp: Temp, cg: *CodeGen) bool {
- switch (op) {
- .implicit, .explicit, .cc, .cc_elem => unreachable,
- else => {},
- // temp is undefined
- .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => return true,
- }
- const temp_ty = temp.typeOf(cg);
- const abi_size = temp_ty.abiSize(cg.pt.zcu);
- return switch (op) {
- .implicit, .explicit, .cc, .cc_elem, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable,
- .gpr => abi_size <= 8 and switch (temp.tracking(cg).short) {
- .register => |reg| reg.class() == .general_purpose,
- .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and
- reg_off.off == 0,
- else => cg.regClassForType(temp_ty) == .general_purpose,
- },
- .mm, .mm_mask, .mm_sign_mask => abi_size <= 8 and switch (temp.tracking(cg).short) {
- .register => |reg| reg.class() == .mmx,
- .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0,
- else => cg.regClassForType(temp_ty) == .mmx,
- },
- .xmm, .xmm_mask, .xmm_sign_mask => abi_size > 8 and abi_size <= 16 and switch (temp.tracking(cg).short) {
- .register => |reg| reg.class() == .sse,
- .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0,
- else => cg.regClassForType(temp_ty) == .sse,
- },
- .ymm, .ymm_mask, .ymm_sign_mask => abi_size > 16 and abi_size <= 32 and switch (temp.tracking(cg).short) {
- .register => |reg| reg.class() == .sse,
- .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0,
- else => cg.regClassForType(temp_ty) == .sse,
- },
- .mem, .mem_limb, .mut_mem_limb, .mem_elem => (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(),
- .gpr_limb, .mut_gpr_limb, .gpr_elem => abi_size > 8 and switch (temp.tracking(cg).short) {
- .register, .register_pair, .register_triple, .register_quadruple => true,
- else => |mcv| mcv.isMemory(),
- },
- .mm_limb, .mm_mask_limb => abi_size > 8 and switch (temp.tracking(cg).short) {
- inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| {
- if (reg.class() != .mmx) break false;
- } else true,
- else => |mcv| mcv.isMemory() and cg.regClassForType(temp_ty) == .mmx,
- },
- .xmm_limb, .xmm_mask_limb => abi_size > 16 and switch (temp.tracking(cg).short) {
- inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| {
- if (reg.class() != .sse) break false;
- } else true,
- else => |mcv| mcv.isMemory(),
- },
- .ymm_limb, .ymm_mask_limb => abi_size > 32 and switch (temp.tracking(cg).short) {
- inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| {
- if (reg.class() != .sse) break false;
- } else true,
- else => |mcv| mcv.isMemory(),
- },
- .imm => |specific_imm| if (is_mut) unreachable else switch (temp.tracking(cg).short) {
- .immediate => |imm| @as(i64, @bitCast(imm)) == specific_imm,
- else => false,
- },
- .simm32 => if (is_mut) unreachable else switch (temp.tracking(cg).short) {
- .immediate => |imm| abi_size <= 4 or std.math.cast(i32, @as(i64, @bitCast(imm))) != null,
- else => false,
- },
- };
- }
- };
-
- const Instruction = struct {
- mir_tag: Mir.Inst.FixedTag,
- operands: [4]Instruction.Operand,
-
- const Operand = union(enum) {
- regb: u8,
- regw: u8,
- ereg: u8,
- rreg: u8,
- xmm: u8,
- ymm: u8,
- };
- };
-};
-const SelectOptions = struct {
- cc: ?Condition = null,
- invert_result: bool = false,
-};
-fn select(
- cg: *CodeGen,
- dst_temps: []Temp,
- dst_tys: []const Type,
- src_temps: []Temp,
- pattern_sets: []const Pattern.Set,
- opts: SelectOptions,
-) !void {
- var loop: struct {
- element_reloc: Mir.Inst.Index,
- element_offset: Offset,
- element_size: ?u13,
- limb_reloc: Mir.Inst.Index,
- limb_offset: Offset,
- limb_size: ?u8,
- shuffle_temp: ?Temp,
- mask_limb_temp: ?Temp,
- mask_limb_offset: Offset,
- mask_limb_offset_lock: ?RegisterLock,
- mask_limb_bit_size: ?u7,
- mask_store_temp: ?Temp,
- mask_store_reg: ?Register,
- mask_store_bit_size: ?u7,
- remaining_size: ?u64,
-
- const Offset = union(enum) {
- unused,
- known: u31,
- temp: Temp,
- };
- } = .{
- .element_reloc = undefined,
- .element_offset = .unused,
- .element_size = null,
- .limb_reloc = undefined,
- .limb_offset = .unused,
- .limb_size = null,
- .shuffle_temp = null,
- .mask_limb_temp = null,
- .mask_limb_offset = .unused,
- .mask_limb_offset_lock = null,
- .mask_limb_bit_size = null,
- .mask_store_temp = null,
- .mask_store_reg = null,
- .mask_store_bit_size = null,
- .remaining_size = null,
- };
- var extra_temps: [4]?Temp = @splat(null);
- pattern_sets: for (pattern_sets) |pattern_set| {
- for (pattern_set.required_features) |required_feature| if (!cg.hasFeature(required_feature)) continue :pattern_sets;
- for (src_temps) |src_temp| switch (pattern_set.scalar) {
- .any => {},
- .bool => if (src_temp.typeOf(cg).scalarType(cg.pt.zcu).toIntern() != .bool_type) continue :pattern_sets,
- .float => |size| {
- const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu);
- if (!scalar_ty.isRuntimeFloat()) continue :pattern_sets;
- if (scalar_ty.floatBits(cg.target.*) != size.bitSize()) continue :pattern_sets;
- },
- .any_int => |size| {
- const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu);
- if (!scalar_ty.isAbiInt(cg.pt.zcu)) continue :pattern_sets;
- if (scalar_ty.intInfo(cg.pt.zcu).bits > size.bitSize()) continue :pattern_sets;
- },
- .signed_int => |size| {
- const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu);
- if (!scalar_ty.isAbiInt(cg.pt.zcu)) continue :pattern_sets;
- const scalar_info = scalar_ty.intInfo(cg.pt.zcu);
- if (scalar_info.signedness != .signed) continue :pattern_sets;
- if (scalar_info.bits > size.bitSize()) continue :pattern_sets;
- },
- .unsigned_int => |size| {
- const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu);
- if (!scalar_ty.isAbiInt(cg.pt.zcu)) continue :pattern_sets;
- const scalar_info = scalar_ty.intInfo(cg.pt.zcu);
- if (scalar_info.signedness != .unsigned) continue :pattern_sets;
- if (scalar_info.bits > size.bitSize()) continue :pattern_sets;
- },
- .any_float_or_int => |size| {
- const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu);
- if (scalar_ty.isRuntimeFloat()) {
- if (scalar_ty.floatBits(cg.target.*) != size.bitSize()) continue :pattern_sets;
- } else if (scalar_ty.isAbiInt(cg.pt.zcu)) {
- if (scalar_ty.intInfo(cg.pt.zcu).bits > size.bitSize()) continue :pattern_sets;
- } else continue :pattern_sets;
- },
- };
- patterns: for (pattern_set.patterns) |pattern| {
- for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| {
- const ref_src_op, const is_mut = switch (src_op) {
- .implicit, .explicit => |linked_index| .{ pattern.ops[linked_index], true },
- .mut_mem_limb, .mut_gpr_limb => .{ src_op, true },
- else => .{ src_op, false },
- };
- if (!ref_src_op.matches(is_mut, src_temp, cg)) continue :patterns;
- }
-
- for (pattern.ops) |op| switch (op) {
- else => {},
- .cc_elem,
- .mm_mask_limb,
- .xmm_mask_limb,
- .ymm_mask_limb,
- => if (loop.mask_limb_offset_lock == null and !cg.hasFeature(.bmi2)) {
- try cg.register_manager.getKnownReg(.rcx, null);
- loop.mask_limb_offset_lock = cg.register_manager.lockKnownRegAssumeUnused(.rcx);
- },
- };
- while (true) for (src_temps, pattern.ops[dst_temps.len..]) |*src_temp, src_op| {
- if (switch (switch (src_op) {
- .implicit, .explicit => |linked_index| pattern.ops[linked_index],
- else => src_op,
- }) {
- .implicit, .explicit, .cc, .cc_elem => unreachable,
- .gpr => try src_temp.toRegClass(true, .general_purpose, cg),
- .mm, .mm_mask, .mm_sign_mask => try src_temp.toRegClass(true, .mmx, cg),
- .xmm,
- .ymm,
- .xmm_mask,
- .ymm_mask,
- .xmm_sign_mask,
- .ymm_sign_mask,
- => try src_temp.toRegClass(true, .sse, cg),
- .mem => try src_temp.toBase(cg),
- .imm, .simm32 => false,
- .gpr_limb,
- .mm_limb,
- .xmm_limb,
- .ymm_limb,
- .mem_limb,
- .mut_gpr_limb,
- .mut_mem_limb,
- .gpr_elem,
- .mem_elem,
- => switch (src_temp.tracking(cg).short) {
- .register, .register_pair, .register_triple, .register_quadruple => false,
- else => try src_temp.toBase(cg),
- },
- .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => false,
- .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => false,
- }) break;
- } else break;
-
- const invert_result = opts.invert_result != pattern_set.invert_result;
- var dst_is_linked: std.StaticBitSet(4) = .initEmpty();
- var mir_ops_len: usize = 0;
- for (pattern.ops[0..dst_temps.len]) |dst_op| switch (dst_op) {
- else => mir_ops_len += 1,
- .cc, .cc_elem => {},
- };
- const dst_mir_ops_len = mir_ops_len;
- for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| {
- defer mir_ops_len += @intFromBool(src_op != .implicit);
- const linked_src_op, const extra_temp = op: switch (src_op) {
- .implicit, .explicit => |linked_index| {
- if (src_temp.isMut(cg)) {
- dst_temps[linked_index] = src_temp;
- dst_is_linked.set(linked_index);
- }
- break :op .{ pattern.ops[linked_index], &extra_temps[linked_index] };
- },
- else => .{ src_op, &extra_temps[mir_ops_len] },
- };
- const limb_size: u8, const rc = switch (linked_src_op) {
- else => continue,
- .gpr_limb, .mut_gpr_limb, .gpr_elem => .{ @intCast(@divExact(Memory.Size.bitSize(switch (pattern_set.scalar) {
- .any => .qword,
- .bool => unreachable,
- .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size,
- }), 8)), abi.RegisterClass.gp },
- .mm_limb, .mm_mask_limb => .{ 8, @panic("TODO") },
- .xmm_limb, .xmm_mask_limb => .{ 16, abi.RegisterClass.sse },
- .ymm_limb, .ymm_mask_limb => .{ 32, abi.RegisterClass.sse },
- .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => {
- assert(extra_temp.* == null);
- extra_temp.* = try cg.tempAllocReg(.noreturn, switch (linked_src_op) {
- else => unreachable,
- .umax_gpr => abi.RegisterClass.gp,
- .umax_mm => @panic("TODO"),
- .umax_xmm, .umax_ymm => abi.RegisterClass.sse,
- });
- continue;
- },
- };
- assert(loop.limb_size == null or loop.limb_size == limb_size);
- loop.limb_size = limb_size;
- loop.remaining_size = loop.remaining_size orelse src_temp.typeOf(cg).abiSize(cg.pt.zcu);
- const src_mcv = src_temp.tracking(cg).short;
- switch (src_mcv) {
- .register, .register_pair, .register_triple, .register_quadruple => {
- switch (loop.limb_offset) {
- .unused, .temp => loop.limb_offset = .{ .known = 0 },
- .known => {},
- }
- if (switch (linked_src_op) {
- .mut_gpr_limb => true,
- else => !rc.isSet(RegisterManager.indexOfRegIntoTracked(src_mcv.getRegs()[0]).?),
- }) {
- if (loop.shuffle_temp == null) loop.shuffle_temp = try cg.tempAllocReg(.noreturn, abi.RegisterClass.sse);
- assert(extra_temp.* == null);
- extra_temp.* = try cg.tempAllocReg(.usize, rc);
- }
- },
- else => {
- switch (loop.limb_offset) {
- .unused => loop.limb_offset = .{ .temp = undefined },
- .known, .temp => {},
- }
- assert(extra_temp.* == null);
- extra_temp.* = try cg.tempAllocReg(.usize, rc);
- },
- }
- }
- for (
- 0..,
- dst_temps,
- pattern.ops[0..dst_temps.len],
- dst_tys,
- extra_temps[0..dst_temps.len],
- ) |dst_index, *dst_temp, dst_op, dst_ty, *extra_temp| switch (dst_op) {
- else => if (!dst_is_linked.isSet(dst_index)) {
- dst_temp.* = dst_temp: switch (dst_op) {
- .implicit => unreachable,
- .explicit => |linked_index| dst_temps[linked_index],
- .cc => try cg.tempFromValue(.bool, .{ .eflags = switch (invert_result) {
- false => opts.cc.?,
- true => opts.cc.?.negate(),
- } }),
- .gpr => try cg.tempAllocReg(dst_ty, abi.RegisterClass.gp),
- .mm, .mm_mask, .mm_sign_mask => @panic("TODO"),
- .xmm, .xmm_mask, .xmm_sign_mask => try cg.tempAllocReg(dst_ty, abi.RegisterClass.sse),
- .ymm, .ymm_mask, .ymm_sign_mask => try cg.tempAllocReg(dst_ty, abi.RegisterClass.sse),
- .mem => @panic("TODO"),
- .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb, .mut_gpr_limb, .gpr_elem => {
- if (extra_temp.* == null) extra_temp.* = try cg.tempAllocReg(.noreturn, switch (dst_op) {
- else => unreachable,
- .gpr_limb, .mut_gpr_limb, .gpr_elem => abi.RegisterClass.gp,
- .mm_limb => @panic("TODO"),
- .xmm_limb, .ymm_limb => abi.RegisterClass.sse,
- });
- break :dst_temp try cg.tempAlloc(dst_ty);
- },
- .mem_limb, .mut_mem_limb, .mem_elem => try cg.tempAlloc(dst_ty),
- .cc_elem, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => unreachable, // already checked
- .imm, .simm32, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, // unmodifiable destination
- };
- },
- .cc_elem, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => {
- const scalar_size = @divExact(Memory.Size.bitSize(switch (pattern_set.scalar) {
- .any => .qword,
- .bool => unreachable,
- .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size,
- }), 8);
- const mask_bit_size = @divExact(loop.remaining_size.?, scalar_size);
- const mask_limb_bit_size: u7 = @intCast(@divExact(loop.limb_size.?, scalar_size));
- assert(loop.mask_limb_bit_size == null or loop.mask_limb_bit_size == mask_limb_bit_size);
- loop.mask_limb_bit_size = mask_limb_bit_size;
- const mask_store_bit_size = mask_store_bit_size: {
- // Try to match limb size so that no shifting will be needed.
- if (mask_limb_bit_size % 8 == 0) break :mask_store_bit_size mask_limb_bit_size;
- // If abi size <= 8 the entire value can be stored at once,
- // enabling store forwarding and minimizing store buffer usage.
- // Otherwise, we will be performing shifts that need to wrap at
- // store size, which for x86 requires 32 or 64, so just pick 64
- // for the same reasons as above.
- break :mask_store_bit_size @min(mask_bit_size, 64);
- };
- assert(loop.mask_store_bit_size == null or loop.mask_store_bit_size == mask_store_bit_size);
- loop.mask_store_bit_size = mask_store_bit_size;
- loop.mask_limb_offset = loop.limb_offset;
- if (loop.mask_limb_temp == null) {
- loop.mask_limb_temp = try cg.tempAllocReg(.usize, abi.RegisterClass.gp);
- if (dst_op == .cc_elem and mask_store_bit_size > 8) {
- // setcc only clears 8 bits
- const mask_limb_alias = loop.mask_limb_temp.?.tracking(cg).short.register.to32();
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterRegister(.{ ._, .xor }, mask_limb_alias, mask_limb_alias);
- }
- }
- if (mask_limb_bit_size < mask_store_bit_size and loop.mask_store_reg == null) {
- loop.mask_store_temp = try cg.tempAllocReg(.usize, abi.RegisterClass.gp);
- loop.mask_store_reg = loop.mask_store_temp.?.tracking(cg).short.register;
- }
- dst_temp.* = if (mask_store_bit_size < mask_bit_size)
- try cg.tempAllocMem(dst_ty)
- else if (loop.mask_store_temp) |mask_store_temp| dst_temp: {
- loop.mask_store_temp = null;
- break :dst_temp mask_store_temp;
- } else try cg.tempAlloc(dst_ty);
- },
- };
- switch (loop.mask_limb_offset) {
- .unused, .known => {},
- .temp => |*mask_limb_offset| {
- mask_limb_offset.* = if (cg.hasFeature(.bmi2))
- try cg.tempAllocReg(.usize, abi.RegisterClass.gp)
- else if (loop.mask_limb_offset_lock != null)
- try cg.tempFromValue(.usize, .{ .register = .rcx })
- else
- unreachable;
- if (loop.mask_store_reg) |mask_store_reg| {
- const mask_store_alias =
- if (loop.mask_store_bit_size.? > 8) mask_store_reg.to32() else mask_store_reg.to8();
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias);
- }
- },
- }
- if (loop.mask_limb_offset_lock) |lock| cg.register_manager.unlockReg(lock);
- loop.mask_limb_offset_lock = null;
- switch (loop.element_offset) {
- .unused, .known => {},
- .temp => |*element_offset| {
- element_offset.* = try cg.tempAllocReg(.usize, abi.RegisterClass.gp);
- const element_offset_reg = element_offset.tracking(cg).short.register;
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterRegister(.{ ._, .xor }, element_offset_reg.to32(), element_offset_reg.to32());
- loop.element_reloc = @intCast(cg.mir_instructions.len);
- },
- }
- switch (loop.limb_offset) {
- .unused, .known => {},
- .temp => |*limb_offset| limb_offset.* = try cg.tempAllocReg(.usize, abi.RegisterClass.gp),
- }
- while (true) {
- switch (loop.mask_limb_offset) {
- .unused, .known => {},
- .temp => |mask_limb_offset| {
- const mask_limb_offset_reg = mask_limb_offset.tracking(cg).short.register.to32();
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterRegister(.{ ._, .xor }, mask_limb_offset_reg, mask_limb_offset_reg);
- },
- }
- switch (loop.limb_offset) {
- .unused, .known => {},
- .temp => |limb_offset| {
- const limb_offset_reg = limb_offset.tracking(cg).short.register.to32();
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterRegister(.{ ._, .xor }, limb_offset_reg.to32(), limb_offset_reg.to32());
- loop.limb_reloc = @intCast(cg.mir_instructions.len);
- },
- }
- while (true) {
- var mir_ops: [4]Operand = @splat(.none);
- mir_ops_len = dst_mir_ops_len;
- for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| {
- defer mir_ops_len += @intFromBool(src_op != .implicit);
- const mir_op, const linked_src_op, const extra_temp = switch (src_op) {
- .implicit => |linked_index| .{ &mir_ops[linked_index], pattern.ops[linked_index], extra_temps[linked_index] },
- .explicit => |linked_index| .{ &mir_ops[mir_ops_len], pattern.ops[linked_index], extra_temps[linked_index] },
- else => .{ &mir_ops[mir_ops_len], src_op, extra_temps[mir_ops_len] },
- };
- const src_mcv = switch (linked_src_op) {
- else => src_temp,
- // src_temp is undefined
- .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => extra_temp.?,
- }.tracking(cg).short;
- switch (linked_src_op) {
- else => {},
- .gpr_limb,
- .mm_limb,
- .xmm_limb,
- .ymm_limb,
- .mut_gpr_limb,
- .gpr_elem,
- .mm_mask_limb,
- .xmm_mask_limb,
- .ymm_mask_limb,
- => if (extra_temp) |limb_temp| switch (src_mcv) {
- .register, .register_pair, .register_triple, .register_quadruple => {
- const limb_reg = registerAlias(limb_temp.tracking(cg).short.register, loop.limb_size.?);
- const src_regs = src_mcv.getRegs();
- const src_reg_size: u32 = @intCast(switch (src_mcv) {
- .register => src_temp.typeOf(cg).abiSize(cg.pt.zcu),
- else => @divExact(src_regs[0].bitSize(), 8),
- });
- const src_reg = src_regs[loop.limb_offset.known / src_reg_size];
- assert(src_mcv == .register or src_reg.bitSize() == 8 * src_reg_size);
- switch (src_reg.class()) {
- else => unreachable,
- .general_purpose => try cg.asmRegisterRegister(
- .{ ._, .mov },
- limb_reg,
- registerAlias(src_reg, src_reg_size),
- ),
- .sse => {
- assert(src_reg_size == 16);
- const limb_alias_size = @max(loop.limb_size.?, 4);
- const limb_alias = registerAlias(limb_reg, limb_alias_size);
- const src_reg_offset = loop.limb_offset.known % src_reg_size;
- switch (limb_reg_offset: {
- extr: {
- const limb_size = if (cg.hasFeature(.sse4_1)) loop.limb_size.? else 2;
- if (loop.limb_size.? > limb_size) break :extr;
- const limb_offset = src_reg_offset / limb_size;
- if (limb_offset == 0) break :extr;
- try cg.asmRegisterRegisterImmediate(.{ switch (limb_size) {
- else => unreachable,
- 1 => if (cg.hasFeature(.avx)) .vp_b else .p_b,
- 2 => if (cg.hasFeature(.avx)) .vp_w else .p_w,
- 4 => if (cg.hasFeature(.avx)) .vp_d else .p_d,
- 8 => if (cg.hasFeature(.avx)) .vp_q else .p_q,
- }, .extr }, limb_alias, src_reg.to128(), .u(limb_offset));
- break :limb_reg_offset src_reg_offset % limb_size;
- }
- try cg.asmRegisterRegister(
- .{ switch (limb_alias_size) {
- else => unreachable,
- 4 => ._d,
- 8 => ._q,
- }, .mov },
- limb_alias,
- if (src_reg_offset < limb_alias_size) src_reg.to128() else shuffle_reg: {
- const shuffle_reg = loop.shuffle_temp.?.tracking(cg).short.register.to128();
- const mir_fixes: Mir.Inst.Fixes = if (cg.hasFeature(.sse2))
- if (src_temp.typeOf(cg).scalarType(cg.pt.zcu).isRuntimeFloat()) switch (limb_alias_size) {
- else => unreachable,
- 4 => if (cg.hasFeature(.avx)) .v_ps else ._ps,
- 8 => if (cg.hasFeature(.avx)) .v_pd else ._pd,
- } else if (cg.hasFeature(.avx)) .vp_d else .p_d
- else
- ._ps;
- try cg.asmRegisterRegisterImmediate(
- .{ mir_fixes, .shuf },
- shuffle_reg,
- src_reg: switch (mir_fixes) {
- else => unreachable,
- ._ps, ._pd => {
- try cg.asmRegisterRegister(.{ mir_fixes, .mova }, shuffle_reg, src_reg.to128());
- break :src_reg shuffle_reg;
- },
- .p_d => src_reg.to128(),
- },
- .u(switch (mir_fixes) {
- else => unreachable,
- .v_ps, ._ps, .vp_d, .p_d => switch (limb_alias_size) {
- else => unreachable,
- 4 => switch (src_reg_offset) {
- else => unreachable,
- 4...7 => 0b01_01_01_01,
- 8...11 => 0b10_10_10_10,
- 12...15 => 0b11_11_11_11,
- },
- 8 => switch (src_reg_offset) {
- else => unreachable,
- 8...15 => 0b11_10_11_10,
- },
- },
- .v_pd, ._pd => switch (limb_alias_size) {
- else => unreachable,
- 8 => switch (src_reg_offset) {
- else => unreachable,
- 8...15 => 0b1_1,
- },
- },
- }),
- );
- break :shuffle_reg shuffle_reg;
- },
- );
- break :limb_reg_offset src_reg_offset % limb_alias_size;
- }) {
- 0 => {},
- else => |limb_reg_offset| {
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterImmediate(.{ ._r, .sh }, limb_alias, .u(limb_reg_offset * 8));
- },
- }
- },
- }
- },
- else => try cg.asmRegisterMemory(
- switch (linked_src_op) {
- else => unreachable,
- .gpr_limb, .mut_gpr_limb, .gpr_elem => .{ ._, .mov },
- .mm_limb, .mm_mask_limb => .{ ._q, .mov },
- .xmm_limb,
- .ymm_limb,
- .xmm_mask_limb,
- .ymm_mask_limb,
- => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu },
- },
- registerAlias(limb_temp.tracking(cg).short.register, loop.limb_size.?),
- try src_mcv.mem(cg, switch (loop.limb_offset) {
- .unused => unreachable,
- .known => |limb_offset| .{
- .size = .fromSize(loop.limb_size.?),
- .disp = limb_offset,
- },
- .temp => |limb_offset| .{
- .size = .fromSize(loop.limb_size.?),
- .index = limb_offset.tracking(cg).short.register.to64(),
- },
- }),
- ),
- },
- }
- mir_op.* = switch (linked_src_op) {
- .implicit, .explicit, .cc, .cc_elem => unreachable,
- .gpr => .{ .reg = registerAlias(
- src_mcv.register,
- @intCast(src_temp.typeOf(cg).abiSize(cg.pt.zcu)),
- ) },
- .umax_gpr => .{ .reg = src_mcv.register.to64() }, // TODO: use other op size?
- .mm, .mm_mask, .mm_sign_mask, .umax_mm => .{ .reg = src_mcv.register },
- .xmm, .xmm_mask, .xmm_sign_mask, .umax_xmm => .{ .reg = src_mcv.register.to128() },
- .ymm, .ymm_mask, .ymm_sign_mask, .umax_ymm => .{ .reg = src_mcv.register.to256() },
- .mem => .{ .mem = try src_mcv.mem(cg, .{ .size = cg.memSize(src_temp.typeOf(cg)) }) },
- .gpr_limb,
- .mm_limb,
- .xmm_limb,
- .ymm_limb,
- .mut_gpr_limb,
- .gpr_elem,
- .mm_mask_limb,
- .xmm_mask_limb,
- .ymm_mask_limb,
- => .{ .reg = registerAlias(if (extra_temp) |limb_temp|
- limb_temp.tracking(cg).short.register
- else
- src_mcv.getRegs()[@divExact(loop.limb_offset.known, loop.limb_size.?)], loop.limb_size.?) },
- .mem_limb, .mut_mem_limb, .mem_elem => .{ .mem = switch (src_mcv) {
- .register, .register_pair, .register_triple, .register_quadruple => unreachable,
- else => switch (loop.limb_offset) {
- .unused => unreachable,
- .known => |limb_offset| try src_mcv.mem(cg, .{
- .size = .fromSize(loop.limb_size.?),
- .disp = limb_offset,
- }),
- .temp => |limb_offset| try src_mcv.mem(cg, .{
- .size = .fromSize(loop.limb_size.?),
- .index = limb_offset.tracking(cg).short.register.to64(),
- }),
- },
- } },
- .imm => |imm| .{ .imm = .s(imm) },
- .simm32 => switch (src_temp.typeOf(cg).abiSize(cg.pt.zcu)) {
- else => unreachable,
- 1 => .{ .imm = if (std.math.cast(i8, @as(i64, @bitCast(src_mcv.immediate)))) |small|
- .s(small)
- else
- .u(@as(u8, @intCast(src_mcv.immediate))) },
- 2 => .{ .imm = if (std.math.cast(i16, @as(i64, @bitCast(src_mcv.immediate)))) |small|
- .s(small)
- else
- .u(@as(u16, @intCast(src_mcv.immediate))) },
- 3...8 => .{ .imm = if (std.math.cast(i32, @as(i64, @bitCast(src_mcv.immediate)))) |small|
- .s(small)
- else
- .u(@as(u32, @intCast(src_mcv.immediate))) },
- },
- };
- switch (src_op) {
- else => {},
- .explicit => |linked_index| mir_ops[linked_index] = mir_op.*,
+ cg.temp_type[@intFromEnum(temp_index)] = ty;
+ cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
+ return .{ .index = temp_index.toIndex() };
+}
+
+fn tempAllocMem(cg: *CodeGen, ty: Type) !Temp {
+ const temp_index = cg.next_temp_index;
+ temp_index.tracking(cg).* = .init(
+ try cg.allocRegOrMemAdvanced(ty, temp_index.toIndex(), false),
+ );
+ cg.temp_type[@intFromEnum(temp_index)] = ty;
+ cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
+ return .{ .index = temp_index.toIndex() };
+}
+
+fn tempFromValue(cg: *CodeGen, ty: Type, value: MCValue) !Temp {
+ const temp_index = cg.next_temp_index;
+ temp_index.tracking(cg).* = .init(value);
+ cg.temp_type[@intFromEnum(temp_index)] = ty;
+ try cg.getValue(value, temp_index.toIndex());
+ cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
+ return .{ .index = temp_index.toIndex() };
+}
+
+fn tempFromOperand(
+ cg: *CodeGen,
+ inst: Air.Inst.Index,
+ op_index: Liveness.OperandInt,
+ op_ref: Air.Inst.Ref,
+ ignore_death: bool,
+) !Temp {
+ const zcu = cg.pt.zcu;
+ const ip = &zcu.intern_pool;
+
+ if (ignore_death or !cg.liveness.operandDies(inst, op_index)) {
+ if (op_ref.toIndex()) |op_inst| return .{ .index = op_inst };
+ const val = op_ref.toInterned().?;
+ const gop = try cg.const_tracking.getOrPut(cg.gpa, val);
+ if (!gop.found_existing) gop.value_ptr.* = .init(init: {
+ const const_mcv = try cg.genTypedValue(.fromInterned(val));
+ switch (const_mcv) {
+ .lea_tlv => |tlv_sym| switch (cg.bin_file.tag) {
+ .elf, .macho => {
+ if (cg.mod.pic) {
+ try cg.spillRegisters(&.{ .rdi, .rax });
+ } else {
+ try cg.spillRegisters(&.{.rax});
}
- }
- for (
- mir_ops[0..dst_mir_ops_len],
- pattern.ops[0..dst_mir_ops_len],
- dst_temps[0..dst_mir_ops_len],
- dst_tys[0..dst_mir_ops_len],
- extra_temps[0..dst_mir_ops_len],
- ) |*mir_op, dst_op, dst_temp, dst_ty, extra_temp| {
- if (mir_op.* != .none) continue;
- mir_op.* = switch (dst_op) {
- .implicit, .cc, .cc_elem => unreachable,
- .explicit => |linked_index| mir_ops[linked_index],
- .gpr => .{ .reg = registerAlias(
- dst_temp.tracking(cg).short.register,
- @intCast(dst_ty.abiSize(cg.pt.zcu)),
- ) },
- .mm, .mm_mask, .mm_sign_mask => @panic("TODO"),
- .xmm, .xmm_mask, .xmm_sign_mask => .{ .reg = dst_temp.tracking(cg).short.register.to128() },
- .ymm, .ymm_mask, .ymm_sign_mask => .{ .reg = dst_temp.tracking(cg).short.register.to256() },
- .mem => @panic("TODO"),
- .gpr_limb, .mut_gpr_limb, .gpr_elem => .{ .reg = registerAlias(
- extra_temp.?.tracking(cg).short.register,
- @intCast(@divExact(Memory.Size.bitSize(switch (pattern_set.scalar) {
- .any => .qword,
- .bool => unreachable,
- .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size,
- }), 8)),
- ) },
- .mm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register },
- .xmm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to128() },
- .ymm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to256() },
- .mem_limb, .mut_mem_limb, .mem_elem => .{ .mem = try dst_temp.tracking(cg).short.mem(cg, switch (loop.limb_offset) {
- .unused => unreachable,
- .known => |limb_offset| .{
- .size = .fromSize(loop.limb_size.?),
- .disp = limb_offset,
- },
- .temp => |limb_offset| .{
- .size = .fromSize(loop.limb_size.?),
- .index = limb_offset.tracking(cg).short.register.to64(),
- },
- }) },
- .mm_mask_limb => .{ .reg = extra_temp.?.tracking(cg).short.register },
- .xmm_mask_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to128() },
- .ymm_mask_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to256() },
- .imm, .simm32, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, // unmodifiable destination
- };
- }
- std.mem.swap(Operand, &mir_ops[pattern.commute[0]], &mir_ops[pattern.commute[1]]);
- if (pattern_set.clobbers.eflags) try cg.spillEflagsIfOccupied();
- cg.asmOps((if (loop.remaining_size != null and loop.limb_size != null and
- loop.remaining_size.? <= loop.limb_size.?)
- pattern_set.final_mir_tag
- else
- null) orelse pattern_set.mir_tag, mir_ops) catch |err| switch (err) {
- error.InvalidInstruction => {
- const fixes = @tagName(pattern_set.mir_tag[0]);
- const fixes_blank = std.mem.indexOfScalar(u8, fixes, '_').?;
- return cg.fail(
- "invalid instruction: '{s}{s}{s} {s} {s} {s} {s}'",
- .{
- fixes[0..fixes_blank],
- @tagName(pattern_set.mir_tag[1]),
- fixes[fixes_blank + 1 ..],
- @tagName(mir_ops[0]),
- @tagName(mir_ops[1]),
- @tagName(mir_ops[2]),
- @tagName(mir_ops[3]),
- },
- );
- },
- else => |e| return e,
- };
- for (
- extra_temps[0..dst_temps.len],
- pattern.ops[0..dst_temps.len],
- mir_ops[0..dst_temps.len],
- dst_temps,
- ) |extra_temp, dst_op, mir_op, dst_temp| switch (dst_op) {
- else => if (invert_result) {
- try cg.spillEflagsIfOccupied();
- cg.asmOps(
- .{ ._, .not },
- .{ mir_op, .none, .none, .none },
- ) catch |err| switch (err) {
- error.InvalidInstruction => return cg.fail(
- "invalid instruction: 'not {s} none none none'",
- .{@tagName(mir_op)},
- ),
- else => |e| return e,
- };
- },
- .mm_mask,
- .xmm_mask,
- .ymm_mask,
- .mm_sign_mask,
- .xmm_sign_mask,
- .ymm_sign_mask,
- => dst_temp.asMask(.{
- .kind = switch (dst_op) {
- else => unreachable,
- .mm_mask, .xmm_mask, .ymm_mask => .all,
- .mm_sign_mask, .xmm_sign_mask, .ymm_sign_mask => .sign,
- },
- .inverted = invert_result,
- .scalar = switch (pattern_set.scalar) {
- .any, .bool => unreachable,
- .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size,
- },
- }, cg),
- .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb, .mut_gpr_limb, .gpr_elem => if (extra_temp) |limb_temp| {
- const dst_mcv = dst_temp.tracking(cg).short;
- switch (dst_mcv) {
- .register_pair, .register_triple, .register_quadruple => try cg.asmRegisterRegister(
- .{ ._, .mov },
- dst_mcv.getRegs()[@divExact(loop.limb_offset.known, loop.limb_size.?)].to64(),
- limb_temp.tracking(cg).short.register.to64(),
- ),
- else => try cg.asmMemoryRegister(
- switch (dst_op) {
- else => unreachable,
- .gpr_limb, .mut_gpr_limb, .gpr_elem => .{ ._, .mov },
- .mm_limb => .{ ._q, .mov },
- .xmm_limb, .ymm_limb => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu },
- },
- try dst_mcv.mem(cg, switch (loop.limb_offset) {
- .unused => unreachable,
- .known => |limb_offset| .{
- .size = .fromSize(loop.limb_size.?),
- .disp = limb_offset,
- },
- .temp => |limb_offset| .{
- .size = .fromSize(loop.limb_size.?),
- .index = limb_offset.tracking(cg).short.register.to64(),
- },
- }),
- registerAlias(limb_temp.tracking(cg).short.register, loop.limb_size.?),
- ),
- }
- },
- .cc_elem, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => {
- const scalar_size = switch (pattern_set.scalar) {
- .any => .qword,
- .bool => unreachable,
- .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size,
- };
- const mask_store_size: u4 =
- @intCast(std.math.divCeil(u7, loop.mask_store_bit_size.?, 8) catch unreachable);
- const known_shl_count = if (loop.mask_store_reg) |_| switch (loop.mask_limb_offset) {
- .unused => unreachable,
- .known => |mask_limb_offset| mask_limb_offset & (loop.mask_store_bit_size.? - 1),
- .temp => null,
- } else null;
- const mask_limb_reg = registerAlias(if (known_shl_count != 0)
- loop.mask_limb_temp.?.tracking(cg).short.register
- else
- loop.mask_store_reg.?, mask_store_size);
- switch (dst_op) {
- else => unreachable,
- .cc_elem => try cg.asmSetccRegister(switch (invert_result) {
- false => opts.cc.?,
- true => opts.cc.?.negate(),
- }, mask_limb_reg.to8()),
- .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => {
- if (scalar_size == .word) if (cg.hasFeature(.avx)) try cg.asmRegisterRegisterRegister(
- .{ .vp_b, .ackssw },
- mir_op.reg,
- mir_op.reg,
- mir_op.reg,
- ) else try cg.asmRegisterRegister(
- .{ .p_b, .ackssw },
- mir_op.reg,
- mir_op.reg,
- );
- try cg.asmRegisterRegister(switch (scalar_size) {
- else => unreachable,
- .byte, .word => .{ if (cg.hasFeature(.avx)) .vp_b else .p_b, .movmsk },
- .dword => .{ if (cg.hasFeature(.avx)) .v_ps else ._ps, .movmsk },
- .qword => .{ if (cg.hasFeature(.avx)) .v_pd else ._pd, .movmsk },
- }, mask_limb_reg.to32(), mir_op.reg);
- if (invert_result) if (loop.mask_store_reg) |_| {
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterImmediate(
- .{ ._, .xor },
- registerAlias(mask_limb_reg, @min(mask_store_size, 4)),
- .u((@as(u32, 1) << @intCast(loop.mask_limb_bit_size.?)) - 1),
- );
- } else try cg.asmRegister(.{ ._, .not }, mask_limb_reg);
- },
- }
- if (loop.mask_store_reg) |mask_store_reg| {
- const mask_store_alias = registerAlias(mask_store_reg, mask_store_size);
- switch (loop.mask_limb_offset) {
- .unused => unreachable,
- .known => if (known_shl_count.? != 0) {
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterImmediate(.{ ._l, .sh }, mask_limb_reg, .u(known_shl_count.?));
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterRegister(.{ ._, .@"or" }, mask_store_alias, mask_limb_reg);
- },
- .temp => |mask_limb_offset| {
- if (cg.hasFeature(.bmi2)) {
- const shlx_size = @max(mask_store_size, 4);
- const shlx_mask_limb_reg = registerAlias(mask_limb_reg, shlx_size);
- try cg.asmRegisterRegisterRegister(
- .{ ._lx, .sh },
- shlx_mask_limb_reg,
- shlx_mask_limb_reg,
- registerAlias(mask_limb_offset.tracking(cg).short.register, shlx_size),
- );
- } else {
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterRegister(
- .{ ._l, .sh },
- mask_limb_reg,
- mask_limb_offset.tracking(cg).short.register.to8(),
- );
- }
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterRegister(.{ ._, .@"or" }, mask_store_alias, mask_limb_reg);
- },
- }
- }
- const dst_mcv = dst_temp.tracking(cg).short;
- switch (loop.mask_limb_offset) {
- .unused => unreachable,
- .known => |*mask_limb_offset| {
- mask_limb_offset.* += loop.mask_limb_bit_size.?;
- if (mask_limb_offset.* & (loop.mask_store_bit_size.? - 1) == 0) switch (dst_mcv) {
- .register => {},
- else => {
- try cg.asmMemoryRegister(
- .{ ._, .mov },
- try dst_mcv.mem(cg, .{
- .size = .fromSize(mask_store_size),
- .disp = @divExact(mask_limb_offset.*, 8) - mask_store_size,
- }),
- registerAlias(loop.mask_store_reg orelse mask_limb_reg, mask_store_size),
- );
- if (loop.mask_store_reg) |mask_store_reg| {
- const mask_store_alias = registerAlias(mask_store_reg, @min(mask_store_size, 4));
- try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias);
- }
- },
- };
- },
- .temp => |mask_limb_offset| {
- const mask_limb_offset_reg = mask_limb_offset.tracking(cg).short.register.to32();
- if (loop.mask_store_reg) |mask_store_reg| {
- try cg.asmRegisterMemory(.{ ._, .lea }, mask_limb_offset_reg, .{
- .base = .{ .reg = mask_limb_offset_reg.to64() },
- .mod = .{ .rm = .{
- .size = .qword,
- .disp = loop.mask_limb_bit_size.?,
- } },
- });
- switch (dst_mcv) {
- .register => {},
- else => {
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterImmediate(
- .{ ._, .@"test" },
- mask_limb_offset_reg,
- .u(loop.mask_store_bit_size.? - 1),
- );
- const skip_store_reloc = try cg.asmJccReloc(.nz, undefined);
- const mask_store_offset_reg = mask_limb_reg.to32();
- try cg.asmRegisterRegister(.{ ._, .mov }, mask_store_offset_reg, mask_limb_offset_reg);
- try cg.asmRegisterImmediate(.{ ._r, .sh }, mask_store_offset_reg, .u(3));
- try cg.asmMemoryRegister(.{ ._, .mov }, try dst_mcv.mem(cg, .{
- .size = .fromSize(mask_store_size),
- .index = mask_store_offset_reg.to64(),
- .disp = -@as(i8, mask_store_size),
- }), registerAlias(mask_store_reg, mask_store_size));
- const mask_store_alias = registerAlias(mask_store_reg, @min(mask_store_size, 4));
- try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias);
- cg.performReloc(skip_store_reloc);
- },
- }
- } else {
- switch (dst_mcv) {
- .register => {},
- else => try cg.asmMemoryRegister(.{ ._, .mov }, try dst_mcv.mem(cg, .{
- .size = .fromSize(mask_store_size),
- .index = mask_limb_offset_reg.to64(),
- }), mask_limb_reg),
- }
- try cg.asmRegisterMemory(.{ ._, .lea }, mask_limb_offset_reg, .{
- .base = .{ .reg = mask_limb_offset_reg.to64() },
- .mod = .{ .rm = .{
- .size = .qword,
- .disp = mask_store_size,
- } },
- });
- }
- },
- }
- },
- };
- switch (pattern_set.loop) {
- .once => break :pattern_sets,
- .bitwise => {},
- .limbwise_carry => @panic("TODO"),
- .limbwise_reduce => @panic("TODO"),
- .limbwise_pairs_forward => @panic("TODO"),
- .limbwise_pairs_reverse => @panic("TODO"),
- .elementwise => {},
- }
- switch (loop.limb_offset) {
- .unused => break,
- .known => |*limb_offset| {
- limb_offset.* += loop.limb_size.?;
- loop.remaining_size.? -= loop.limb_size.?;
- if (loop.remaining_size.? < loop.limb_size.? or
- (loop.element_size != null and limb_offset.* >= loop.element_size.?))
- {
- switch (loop.mask_limb_offset) {
- .unused => {},
- .known => |*mask_limb_offset| mask_limb_offset.* = 0,
- .temp => unreachable,
- }
- limb_offset.* = 0;
- break;
- }
- },
- .temp => |limb_offset| {
- const limb_offset_reg = limb_offset.tracking(cg).short.register;
- try cg.asmRegisterMemory(.{ ._, .lea }, limb_offset_reg.to32(), .{
- .base = .{ .reg = limb_offset_reg.to64() },
- .mod = .{ .rm = .{
- .size = .qword,
- .disp = loop.limb_size.?,
- } },
- });
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterImmediate(
- .{ ._, .cmp },
- limb_offset_reg.to32(),
- .u(loop.element_size orelse loop.remaining_size.?),
- );
- _ = try cg.asmJccReloc(.b, loop.limb_reloc);
- try limb_offset.die(cg);
- break;
- },
- }
- }
- if (loop.shuffle_temp) |shuffle_temp| try shuffle_temp.die(cg);
- if (loop.mask_limb_temp) |mask_limb_temp| try mask_limb_temp.die(cg);
- if (loop.mask_store_temp) |mask_store_temp| try mask_store_temp.die(cg);
- switch (loop.mask_limb_offset) {
- .unused, .known => {},
- .temp => |mask_limb_offset| try mask_limb_offset.die(cg),
- }
- switch (loop.element_offset) {
- .unused => break :pattern_sets,
- .known => |*element_offset| {
- if (loop.remaining_size.? == 0) break :pattern_sets;
- element_offset.* += loop.element_size.?;
- },
- .temp => |element_offset| {
- if (true) @panic("TODO");
- try element_offset.die(cg);
- if (loop.remaining_size.? == 0) break :pattern_sets;
- break;
+ const frame_index = try cg.allocFrameIndex(.init(.{
+ .size = 8,
+ .alignment = .@"8",
+ }));
+ try cg.genSetMem(
+ .{ .frame = frame_index },
+ 0,
+ .usize,
+ .{ .lea_symbol = .{ .sym_index = tlv_sym } },
+ .{},
+ );
+ break :init .{ .load_frame = .{ .index = frame_index } };
},
- }
+ else => break :init const_mcv,
+ },
+ else => break :init const_mcv,
}
- }
- } else {
- log.err("failed to select {s}:", .{@tagName(pattern_sets[0].mir_tag[1])});
- for (src_temps) |src_temp| log.err("{}", .{src_temp.tracking(cg)});
- return error.SelectFailed;
+ });
+ return cg.tempFromValue(.fromInterned(ip.typeOf(val)), gop.value_ptr.short);
+ }
+
+ const temp_index = cg.next_temp_index;
+ const temp: Temp = .{ .index = temp_index.toIndex() };
+ const op_inst = op_ref.toIndex().?;
+ const tracking = cg.getResolvedInstValue(op_inst);
+ temp_index.tracking(cg).* = tracking.*;
+ if (!cg.reuseTemp(temp.index, op_inst, tracking)) return .{ .index = op_ref.toIndex().? };
+ cg.temp_type[@intFromEnum(temp_index)] = cg.typeOf(op_ref);
+ cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
+ return temp;
+}
+
+inline fn tempsFromOperands(cg: *CodeGen, inst: Air.Inst.Index, op_refs: anytype) ![op_refs.len]Temp {
+ var temps: [op_refs.len]Temp = undefined;
+ inline for (&temps, 0.., op_refs) |*temp, op_index, op_ref| {
+ temp.* = try cg.tempFromOperand(inst, op_index, op_ref, inline for (0..op_index) |prev_op_index| {
+ if (op_ref == op_refs[prev_op_index]) break true;
+ } else false);
}
- for (extra_temps) |extra_temp| if (extra_temp) |temp| try temp.die(cg);
+ return temps;
}
-const Select2 = struct {
+const Operand = union(enum) {
+ none,
+ reg: Register,
+ mem: Memory,
+ imm: Immediate,
+ inst: Mir.Inst.Index,
+};
+
+const Select = struct {
cg: *CodeGen,
- case: *const Case,
- pattern: *const Select2.Pattern,
- extra_temps: [3]Temp,
- dst_temps: []const Temp,
- src_temps: []const Temp,
- commute: struct { u8, u8 },
+ temps: [@intFromEnum(Select.Operand.Ref.none)]Temp,
+ labels: [@intFromEnum(Label._)]struct {
+ backward: ?Mir.Inst.Index,
+ forward: [1]?Mir.Inst.Index,
+ },
limb: Memory.Mod.Rm,
+ mask_limb: Memory.Mod.Rm,
+
+ fn emitLabel(s: *Select, label_index: Label) void {
+ if (label_index == ._) return;
+ const label = &s.labels[@intFromEnum(label_index)];
+ for (&label.forward) |*reloc| {
+ if (reloc.*) |r| s.cg.performReloc(r);
+ reloc.* = null;
+ }
+ label.backward = @intCast(s.cg.mir_instructions.len);
+ }
- fn emit(s: Select2, inst: Instruction) !void {
- const mir_tag: Mir.Inst.FixedTag = .{ inst[0], inst[1] };
+ fn emit(s: *Select, inst: Instruction) !void {
+ s.emitLabel(inst[0]);
+ const mir_tag: Mir.Inst.FixedTag = .{ inst[1], inst[2] };
var mir_ops: [4]CodeGen.Operand = undefined;
- inline for (&mir_ops, 2..) |*mir_op, inst_index| mir_op.* = try inst[inst_index].lower(s);
+ inline for (&mir_ops, 3..) |*mir_op, inst_index| mir_op.* = try inst[inst_index].lower(s);
s.cg.asmOps(mir_tag, mir_ops) catch |err| switch (err) {
error.InvalidInstruction => {
const fixes = @tagName(mir_tag[0]);
@@ -23881,32 +24102,27 @@ const Select2 = struct {
};
}
- fn lowerLimb(s: Select2, temp: Temp) !CodeGen.Operand {
- return .{ .mem = try temp.tracking(s.cg).short.mem(s.cg, s.limb) };
- }
-
- fn srcTemp(s: Select2, index: u8) Temp {
- return s.src_temps[
- if (index == s.commute[0])
- s.commute[1]
- else if (index == s.commute[1])
- s.commute[0]
- else
- index
- ];
- }
-
const Case = struct {
required_features: [2]?std.Target.x86.Feature = @splat(null),
- constraints: [2]Constraint = @splat(.any),
- patterns: []const Select2.Pattern,
+ dst_constraints: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]Constraint = @splat(.any),
+ src_constraints: [@intFromEnum(Select.Operand.Ref.none) - @intFromEnum(Select.Operand.Ref.src0)]Constraint = @splat(.any),
+ patterns: []const Select.Pattern,
clobbers: struct { eflags: bool = false } = .{},
- extra_temps: [3]TempSpec = @splat(.unused),
- dst_temps: [1]TempSpec.Kind = @splat(.unused),
+ extra_temps: [@intFromEnum(Select.Operand.Ref.dst0) - @intFromEnum(Select.Operand.Ref.tmp0)]TempSpec = @splat(.unused),
+ dst_temps: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]TempSpec.Kind = @splat(.unused),
each: union(enum) {
once: []const Instruction,
limb: struct {
- of: Select2.Operand,
+ of: Select.Operand.Ref.Sized,
+ header: []const Instruction = &.{},
+ first: ?[]const Instruction = null,
+ body: []const Instruction,
+ last: ?[]const Instruction = null,
+ trailer: []const Instruction = &.{},
+ },
+ limb_and_mask_limb: struct {
+ of: Select.Operand.Ref.Sized,
+ of_mask: Select.Operand.Ref.Sized,
header: []const Instruction = &.{},
first: ?[]const Instruction = null,
body: []const Instruction,
@@ -23918,6 +24134,7 @@ const Select2 = struct {
const Constraint = union(enum) {
any,
+ any_bool_vec,
any_int,
any_float,
bool_vec: Memory.Size,
@@ -23925,37 +24142,35 @@ const Select2 = struct {
signed_int: Memory.Size,
unsigned_int: Memory.Size,
- fn accepts(constraint: Constraint, temp: Temp, cg: *CodeGen) bool {
+ fn accepts(constraint: Constraint, ty: Type, cg: *CodeGen) bool {
const zcu = cg.pt.zcu;
switch (constraint) {
.any => return true,
+ .any_bool_vec => return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type,
.any_int => {
- const scalar_ty = temp.typeOf(cg).scalarType(zcu);
+ const scalar_ty = ty.scalarType(zcu);
return scalar_ty.isAbiInt(zcu) or scalar_ty.isPtrAtRuntime(zcu);
},
- .any_float => return temp.typeOf(cg).scalarType(zcu).isRuntimeFloat(),
- .bool_vec => |size| {
- const ty = temp.typeOf(cg);
- return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and
- ty.vectorLen(zcu) <= size.bitSize();
- },
+ .any_float => return ty.scalarType(zcu).isRuntimeFloat(),
+ .bool_vec => |size| return ty.isVector(zcu) and
+ ty.scalarType(zcu).toIntern() == .bool_type and ty.vectorLen(zcu) <= size.bitSize(cg.target),
.int => |size| {
- const scalar_ty = temp.typeOf(cg).scalarType(zcu);
- if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize();
- return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).bits <= size.bitSize();
+ const scalar_ty = ty.scalarType(zcu);
+ if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(cg.target);
+ return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).bits <= size.bitSize(cg.target);
},
.signed_int => |size| {
- const scalar_ty = temp.typeOf(cg).scalarType(zcu);
+ const scalar_ty = ty.scalarType(zcu);
if (!scalar_ty.isAbiInt(zcu)) return false;
const info = scalar_ty.intInfo(zcu);
- return info.signedness == .signed and info.bits <= size.bitSize();
+ return info.signedness == .signed and info.bits <= size.bitSize(cg.target);
},
.unsigned_int => |size| {
- const scalar_ty = temp.typeOf(cg).scalarType(zcu);
- if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize();
+ const scalar_ty = ty.scalarType(zcu);
+ if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(cg.target);
if (!scalar_ty.isAbiInt(zcu)) return false;
const info = scalar_ty.intInfo(zcu);
- return info.signedness == .unsigned and info.bits <= size.bitSize();
+ return info.signedness == .unsigned and info.bits <= size.bitSize(cg.target);
},
}
}
@@ -24083,189 +24298,578 @@ const Select2 = struct {
rc: Register.Class,
rc_mask: struct { rc: Register.Class, info: MaskInfo },
mem,
- src: u8,
- src_mask: struct { src: u8, info: MaskInfo },
+ ref: Select.Operand.Ref,
+ ref_mask: struct { ref: Select.Operand.Ref, info: MaskInfo },
- fn finish(kind: Kind, temp: Temp, s: Select2) void {
+ fn finish(kind: Kind, temp: Temp, s: *const Select) void {
switch (kind) {
else => {},
- inline .rc_mask, .src_mask => |mask| temp.asMask(mask.info, s.cg),
+ inline .rc_mask, .ref_mask => |mask| temp.asMask(mask.info, s.cg),
}
}
};
- fn create(spec: TempSpec, s: Select2) !?Temp {
+ fn create(spec: TempSpec, s: *Select) !?Temp {
return switch (spec.kind) {
.unused => null,
.any => try s.cg.tempAlloc(spec.type),
.cc => |cc| try s.cg.tempFromValue(spec.type, .{ .eflags = cc }),
.reg => |reg| try s.cg.tempFromValue(spec.type, .{ .register = reg }),
.rc => |rc| try s.cg.tempAllocReg(spec.type, regSetForRegClass(rc)),
- .rc_mask => |mask| try s.cg.tempAllocReg(spec.type, regSetForRegClass(mask.rc)),
+ .rc_mask => |rc_mask| try s.cg.tempAllocReg(spec.type, regSetForRegClass(rc_mask.rc)),
.mem => try s.cg.tempAllocMem(spec.type),
- .src => |src| s.srcTemp(src),
- .src_mask => |mask| s.srcTemp(mask.src),
+ .ref => |ref| ref.deref(s),
+ .ref_mask => |ref_mask| ref_mask.ref.deref(s),
};
}
};
const Instruction = struct {
+ Label,
Mir.Inst.Fixes,
Mir.Inst.Tag,
- Select2.Operand,
- Select2.Operand,
- Select2.Operand,
- Select2.Operand,
+ Select.Operand,
+ Select.Operand,
+ Select.Operand,
+ Select.Operand,
};
- const Operand = union(enum) {
- none,
- extra: struct { Memory.Size, u8 },
- dst: struct { Memory.Size, u8 },
- src: struct { Memory.Size, u8 },
- dst_limb: u8,
- src_limb: u8,
- simm32: i32,
-
- const tmp0b: Select2.Operand = .{ .extra = .{ .byte, 0 } };
- const tmp0w: Select2.Operand = .{ .extra = .{ .word, 0 } };
- const etmp0: Select2.Operand = .{ .extra = .{ .dword, 0 } };
- const rtmp0: Select2.Operand = .{ .extra = .{ .qword, 0 } };
- const xtmp0: Select2.Operand = .{ .extra = .{ .xword, 0 } };
- const ytmp0: Select2.Operand = .{ .extra = .{ .yword, 0 } };
-
- const tmp1b: Select2.Operand = .{ .extra = .{ .byte, 1 } };
- const tmp1w: Select2.Operand = .{ .extra = .{ .word, 1 } };
- const etmp1: Select2.Operand = .{ .extra = .{ .dword, 1 } };
- const rtmp1: Select2.Operand = .{ .extra = .{ .qword, 1 } };
- const xtmp1: Select2.Operand = .{ .extra = .{ .xword, 1 } };
- const ytmp1: Select2.Operand = .{ .extra = .{ .yword, 1 } };
-
- const tmp2b: Select2.Operand = .{ .extra = .{ .byte, 2 } };
- const tmp2w: Select2.Operand = .{ .extra = .{ .word, 2 } };
- const etmp2: Select2.Operand = .{ .extra = .{ .dword, 2 } };
- const rtmp2: Select2.Operand = .{ .extra = .{ .qword, 2 } };
- const xtmp2: Select2.Operand = .{ .extra = .{ .xword, 2 } };
- const ytmp2: Select2.Operand = .{ .extra = .{ .yword, 2 } };
-
- const dst0b: Select2.Operand = .{ .dst = .{ .byte, 0 } };
- const dst0w: Select2.Operand = .{ .dst = .{ .word, 0 } };
- const edst0: Select2.Operand = .{ .dst = .{ .dword, 0 } };
- const rdst0: Select2.Operand = .{ .dst = .{ .qword, 0 } };
- const xdst0: Select2.Operand = .{ .dst = .{ .xword, 0 } };
- const ydst0: Select2.Operand = .{ .dst = .{ .yword, 0 } };
-
- const src0b: Select2.Operand = .{ .src = .{ .byte, 0 } };
- const src0w: Select2.Operand = .{ .src = .{ .word, 0 } };
- const esrc0: Select2.Operand = .{ .src = .{ .dword, 0 } };
- const rsrc0: Select2.Operand = .{ .src = .{ .qword, 0 } };
- const xsrc0: Select2.Operand = .{ .src = .{ .xword, 0 } };
- const ysrc0: Select2.Operand = .{ .src = .{ .yword, 0 } };
-
- const src1b: Select2.Operand = .{ .src = .{ .byte, 1 } };
- const src1w: Select2.Operand = .{ .src = .{ .word, 1 } };
- const esrc1: Select2.Operand = .{ .src = .{ .dword, 1 } };
- const rsrc1: Select2.Operand = .{ .src = .{ .qword, 1 } };
- const xsrc1: Select2.Operand = .{ .src = .{ .xword, 1 } };
- const ysrc1: Select2.Operand = .{ .src = .{ .yword, 1 } };
-
- fn unwrap(op: Select2.Operand, s: Select2) struct { Memory.Size, Temp } {
- return switch (op) {
- else => unreachable,
- .extra => |extra| .{ extra[0], s.extra_temps[extra[1]] },
- .dst => |dst| .{ dst[0], s.dst_temps[dst[1]] },
- .src => |src| .{ src[0], s.srcTemp(src[1]) },
+ const Label = enum { @"0:", @"1:", @"_" };
+ const Operand = struct {
+ tag: Tag,
+ base: Ref.Sized = .none,
+ index: packed struct(u6) {
+ ref: Ref,
+ scale: Memory.Scale,
+ } = .{ .ref = .none, .scale = .@"1" },
+ adjust: Adjust = .none,
+ imm: i32 = 0,
+
+ const Tag = enum {
+ none,
+ backward_label,
+ forward_label,
+ ref,
+ limb,
+ mask_limb,
+ simm,
+ lea,
+ mem,
+ };
+ const Adjust = enum {
+ none,
+ add_access_size,
+ sub_access_size,
+ add_size,
+ sub_size,
+ add_bit_size,
+ sub_bit_size,
+ add_limbs,
+ sub_limbs,
+ add_len,
+ sub_len,
+ add_elem_size,
+ sub_elem_size,
+ add_elem_limbs,
+ sub_elem_limbs,
+ };
+ const Ref = enum(u4) {
+ tmp0,
+ tmp1,
+ tmp2,
+ tmp3,
+ tmp4,
+ tmp5,
+ dst0,
+ src0,
+ src1,
+ none,
+
+ const Sized = packed struct(u8) {
+ ref: Ref,
+ size: Memory.Size,
+
+ const none: Sized = .{ .ref = .none, .size = .none };
+
+ const tmp0: Sized = .{ .ref = .tmp0, .size = .none };
+ const tmp0b: Sized = .{ .ref = .tmp0, .size = .byte };
+ const tmp0w: Sized = .{ .ref = .tmp0, .size = .word };
+ const tmp0d: Sized = .{ .ref = .tmp0, .size = .dword };
+ const tmp0p: Sized = .{ .ref = .tmp0, .size = .ptr };
+ const tmp0q: Sized = .{ .ref = .tmp0, .size = .qword };
+ const tmp0x: Sized = .{ .ref = .tmp0, .size = .xword };
+ const tmp0y: Sized = .{ .ref = .tmp0, .size = .yword };
+
+ const tmp1: Sized = .{ .ref = .tmp1, .size = .none };
+ const tmp1b: Sized = .{ .ref = .tmp1, .size = .byte };
+ const tmp1w: Sized = .{ .ref = .tmp1, .size = .word };
+ const tmp1d: Sized = .{ .ref = .tmp1, .size = .dword };
+ const tmp1p: Sized = .{ .ref = .tmp1, .size = .ptr };
+ const tmp1q: Sized = .{ .ref = .tmp1, .size = .qword };
+ const tmp1x: Sized = .{ .ref = .tmp1, .size = .xword };
+ const tmp1y: Sized = .{ .ref = .tmp1, .size = .yword };
+
+ const tmp2: Sized = .{ .ref = .tmp2, .size = .none };
+ const tmp2b: Sized = .{ .ref = .tmp2, .size = .byte };
+ const tmp2w: Sized = .{ .ref = .tmp2, .size = .word };
+ const tmp2d: Sized = .{ .ref = .tmp2, .size = .dword };
+ const tmp2p: Sized = .{ .ref = .tmp2, .size = .ptr };
+ const tmp2q: Sized = .{ .ref = .tmp2, .size = .qword };
+ const tmp2x: Sized = .{ .ref = .tmp2, .size = .xword };
+ const tmp2y: Sized = .{ .ref = .tmp2, .size = .yword };
+
+ const tmp3: Sized = .{ .ref = .tmp3, .size = .none };
+ const tmp3b: Sized = .{ .ref = .tmp3, .size = .byte };
+ const tmp3w: Sized = .{ .ref = .tmp3, .size = .word };
+ const tmp3d: Sized = .{ .ref = .tmp3, .size = .dword };
+ const tmp3p: Sized = .{ .ref = .tmp3, .size = .ptr };
+ const tmp3q: Sized = .{ .ref = .tmp3, .size = .qword };
+ const tmp3x: Sized = .{ .ref = .tmp3, .size = .xword };
+ const tmp3y: Sized = .{ .ref = .tmp3, .size = .yword };
+
+ const tmp4: Sized = .{ .ref = .tmp4, .size = .none };
+ const tmp4b: Sized = .{ .ref = .tmp4, .size = .byte };
+ const tmp4w: Sized = .{ .ref = .tmp4, .size = .word };
+ const tmp4d: Sized = .{ .ref = .tmp4, .size = .dword };
+ const tmp4p: Sized = .{ .ref = .tmp4, .size = .ptr };
+ const tmp4q: Sized = .{ .ref = .tmp4, .size = .qword };
+ const tmp4x: Sized = .{ .ref = .tmp4, .size = .xword };
+ const tmp4y: Sized = .{ .ref = .tmp4, .size = .yword };
+
+ const dst0: Sized = .{ .ref = .dst0, .size = .none };
+ const dst0b: Sized = .{ .ref = .dst0, .size = .byte };
+ const dst0w: Sized = .{ .ref = .dst0, .size = .word };
+ const dst0d: Sized = .{ .ref = .dst0, .size = .dword };
+ const dst0p: Sized = .{ .ref = .dst0, .size = .ptr };
+ const dst0q: Sized = .{ .ref = .dst0, .size = .qword };
+ const dst0x: Sized = .{ .ref = .dst0, .size = .xword };
+ const dst0y: Sized = .{ .ref = .dst0, .size = .yword };
+
+ const src0: Sized = .{ .ref = .src0, .size = .none };
+ const src0b: Sized = .{ .ref = .src0, .size = .byte };
+ const src0w: Sized = .{ .ref = .src0, .size = .word };
+ const src0d: Sized = .{ .ref = .src0, .size = .dword };
+ const src0p: Sized = .{ .ref = .src0, .size = .ptr };
+ const src0q: Sized = .{ .ref = .src0, .size = .qword };
+ const src0x: Sized = .{ .ref = .src0, .size = .xword };
+ const src0y: Sized = .{ .ref = .src0, .size = .yword };
+
+ const src1: Sized = .{ .ref = .src1, .size = .none };
+ const src1b: Sized = .{ .ref = .src1, .size = .byte };
+ const src1w: Sized = .{ .ref = .src1, .size = .word };
+ const src1d: Sized = .{ .ref = .src1, .size = .dword };
+ const src1p: Sized = .{ .ref = .src1, .size = .ptr };
+ const src1q: Sized = .{ .ref = .src1, .size = .qword };
+ const src1x: Sized = .{ .ref = .src1, .size = .xword };
+ const src1y: Sized = .{ .ref = .src1, .size = .yword };
};
- }
- fn lower(op: Select2.Operand, s: Select2) !CodeGen.Operand {
- switch (op) {
- .none => return .none,
- else => {},
- .dst_limb => |dst| return s.lowerLimb(s.dst_temps[dst]),
- .src_limb => |src| return s.lowerLimb(s.srcTemp(src)),
- .simm32 => |imm| return .{ .imm = .s(imm) },
+ fn deref(ref: Ref, s: *const Select) Temp {
+ return s.temps[@intFromEnum(ref)];
}
- const size, const temp = op.unwrap(s);
- return switch (temp.tracking(s.cg).short) {
- .immediate => |imm| .{ .imm = switch (size) {
- .byte => if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u8, @intCast(imm))),
- .word => if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u16, @intCast(imm))),
- .dword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u32, @intCast(imm))),
- .qword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(imm),
- else => unreachable,
+ };
+
+ const @"_": Select.Operand = .{ .tag = .none };
+
+ const @"0b": Select.Operand = .{ .tag = .backward_label, .base = .{ .ref = .tmp0, .size = .none } };
+ const @"0f": Select.Operand = .{ .tag = .forward_label, .base = .{ .ref = .tmp0, .size = .none } };
+ const @"1b": Select.Operand = .{ .tag = .backward_label, .base = .{ .ref = .tmp1, .size = .none } };
+ const @"1f": Select.Operand = .{ .tag = .forward_label, .base = .{ .ref = .tmp1, .size = .none } };
+
+ const tmp0b: Select.Operand = .{ .tag = .ref, .base = .tmp0b };
+ const tmp0w: Select.Operand = .{ .tag = .ref, .base = .tmp0w };
+ const tmp0d: Select.Operand = .{ .tag = .ref, .base = .tmp0d };
+ const tmp0p: Select.Operand = .{ .tag = .ref, .base = .tmp0p };
+ const tmp0q: Select.Operand = .{ .tag = .ref, .base = .tmp0q };
+ const tmp0x: Select.Operand = .{ .tag = .ref, .base = .tmp0x };
+ const tmp0y: Select.Operand = .{ .tag = .ref, .base = .tmp0y };
+
+ const tmp1b: Select.Operand = .{ .tag = .ref, .base = .tmp1b };
+ const tmp1w: Select.Operand = .{ .tag = .ref, .base = .tmp1w };
+ const tmp1d: Select.Operand = .{ .tag = .ref, .base = .tmp1d };
+ const tmp1p: Select.Operand = .{ .tag = .ref, .base = .tmp1p };
+ const tmp1q: Select.Operand = .{ .tag = .ref, .base = .tmp1q };
+ const tmp1x: Select.Operand = .{ .tag = .ref, .base = .tmp1x };
+ const tmp1y: Select.Operand = .{ .tag = .ref, .base = .tmp1y };
+
+ const tmp2b: Select.Operand = .{ .tag = .ref, .base = .tmp2b };
+ const tmp2w: Select.Operand = .{ .tag = .ref, .base = .tmp2w };
+ const tmp2d: Select.Operand = .{ .tag = .ref, .base = .tmp2d };
+ const tmp2p: Select.Operand = .{ .tag = .ref, .base = .tmp2p };
+ const tmp2q: Select.Operand = .{ .tag = .ref, .base = .tmp2q };
+ const tmp2x: Select.Operand = .{ .tag = .ref, .base = .tmp2x };
+ const tmp2y: Select.Operand = .{ .tag = .ref, .base = .tmp2y };
+
+ const tmp3b: Select.Operand = .{ .tag = .ref, .base = .tmp3b };
+ const tmp3w: Select.Operand = .{ .tag = .ref, .base = .tmp3w };
+ const tmp3d: Select.Operand = .{ .tag = .ref, .base = .tmp3d };
+ const tmp3p: Select.Operand = .{ .tag = .ref, .base = .tmp3p };
+ const tmp3q: Select.Operand = .{ .tag = .ref, .base = .tmp3q };
+ const tmp3x: Select.Operand = .{ .tag = .ref, .base = .tmp3x };
+ const tmp3y: Select.Operand = .{ .tag = .ref, .base = .tmp3y };
+
+ const tmp4b: Select.Operand = .{ .tag = .ref, .base = .tmp4b };
+ const tmp4w: Select.Operand = .{ .tag = .ref, .base = .tmp4w };
+ const tmp4d: Select.Operand = .{ .tag = .ref, .base = .tmp4d };
+ const tmp4p: Select.Operand = .{ .tag = .ref, .base = .tmp4p };
+ const tmp4q: Select.Operand = .{ .tag = .ref, .base = .tmp4q };
+ const tmp4x: Select.Operand = .{ .tag = .ref, .base = .tmp4x };
+ const tmp4y: Select.Operand = .{ .tag = .ref, .base = .tmp4y };
+
+ const dst0b: Select.Operand = .{ .tag = .ref, .base = .dst0b };
+ const dst0w: Select.Operand = .{ .tag = .ref, .base = .dst0w };
+ const dst0d: Select.Operand = .{ .tag = .ref, .base = .dst0d };
+ const dst0p: Select.Operand = .{ .tag = .ref, .base = .dst0p };
+ const dst0q: Select.Operand = .{ .tag = .ref, .base = .dst0q };
+ const dst0x: Select.Operand = .{ .tag = .ref, .base = .dst0x };
+ const dst0y: Select.Operand = .{ .tag = .ref, .base = .dst0y };
+
+ const src0b: Select.Operand = .{ .tag = .ref, .base = .src0b };
+ const src0w: Select.Operand = .{ .tag = .ref, .base = .src0w };
+ const src0d: Select.Operand = .{ .tag = .ref, .base = .src0d };
+ const src0p: Select.Operand = .{ .tag = .ref, .base = .src0p };
+ const src0q: Select.Operand = .{ .tag = .ref, .base = .src0q };
+ const src0x: Select.Operand = .{ .tag = .ref, .base = .src0x };
+ const src0y: Select.Operand = .{ .tag = .ref, .base = .src0y };
+
+ const src1b: Select.Operand = .{ .tag = .ref, .base = .src1b };
+ const src1w: Select.Operand = .{ .tag = .ref, .base = .src1w };
+ const src1d: Select.Operand = .{ .tag = .ref, .base = .src1d };
+ const src1p: Select.Operand = .{ .tag = .ref, .base = .src1p };
+ const src1q: Select.Operand = .{ .tag = .ref, .base = .src1q };
+ const src1x: Select.Operand = .{ .tag = .ref, .base = .src1x };
+ const src1y: Select.Operand = .{ .tag = .ref, .base = .src1y };
+
+ fn limb(ref: Ref.Sized) Select.Operand {
+ return .{ .tag = .limb, .base = ref };
+ }
+ fn maskLimb(ref: Ref.Sized) Select.Operand {
+ return .{ .tag = .mask_limb, .base = ref };
+ }
+
+ fn i(imm: i32) Select.Operand {
+ return .{ .tag = .simm, .imm = imm };
+ }
+ fn a(base: Ref.Sized, adjust: Adjust) Select.Operand {
+ return .{ .tag = .simm, .base = base, .adjust = adjust };
+ }
+ fn ia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand {
+ return .{ .tag = .simm, .base = base, .adjust = adjust, .imm = imm };
+ }
+
+ fn lea(size: Memory.Size, base: Ref) Select.Operand {
+ return .{
+ .tag = .lea,
+ .base = .{ .ref = base, .size = size },
+ };
+ }
+ fn lead(size: Memory.Size, base: Ref, disp: i32) Select.Operand {
+ return .{
+ .tag = .lea,
+ .base = .{ .ref = base, .size = size },
+ .imm = disp,
+ };
+ }
+ fn leai(size: Memory.Size, base: Ref, index: Ref) Select.Operand {
+ return .{
+ .tag = .lea,
+ .base = .{ .ref = base, .size = size },
+ .index_ = .{ .ref = index, .scale = .@"1" },
+ };
+ }
+ fn leaid(size: Memory.Size, base: Ref, index: Ref, disp: i32) Select.Operand {
+ return .{
+ .tag = .lea,
+ .base = .{ .ref = base, .size = size },
+ .index_ = .{ .ref = index, .scale = .@"1" },
+ .imm = disp,
+ };
+ }
+ fn leasi(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref) Select.Operand {
+ return .{
+ .tag = .lea,
+ .base = .{ .ref = base, .size = size },
+ .index_ = .{ .ref = index, .scale = scale },
+ };
+ }
+ fn leasid(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand {
+ return .{
+ .tag = .lea,
+ .base = .{ .ref = base, .size = size },
+ .index_ = .{ .ref = index, .scale = scale },
+ .imm = disp,
+ };
+ }
+ fn leasida(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, disp: i32, adjust: Adjust) Select.Operand {
+ return .{
+ .tag = .lea,
+ .base = .{ .ref = base, .size = size },
+ .index_ = .{ .ref = index, .scale = scale },
+ .adjust = adjust,
+ .imm = disp,
+ };
+ }
+
+ fn mem(base: Ref.Sized) Select.Operand {
+ return .{
+ .tag = .mem,
+ .base = base,
+ };
+ }
+ fn memd(base: Ref.Sized, disp: i32) Select.Operand {
+ return .{
+ .tag = .mem,
+ .base = base,
+ .imm = disp,
+ };
+ }
+ fn memi(base: Ref.Sized, index: Ref) Select.Operand {
+ return .{
+ .tag = .mem,
+ .base = base,
+ .index = .{ .ref = index, .scale = .@"1" },
+ };
+ }
+ fn memia(base: Ref.Sized, index: Ref, adjust: Adjust) Select.Operand {
+ return .{
+ .tag = .mem,
+ .base = base,
+ .index = .{ .ref = index, .scale = .@"1" },
+ .adjust = adjust,
+ };
+ }
+ fn memid(base: Ref.Sized, index: Ref, disp: i32) Select.Operand {
+ return .{
+ .tag = .mem,
+ .base = base,
+ .index = .{ .ref = index, .scale = .@"1" },
+ .imm = disp,
+ };
+ }
+ fn memsi(base: Ref.Sized, scale: Memory.Scale, index: Ref) Select.Operand {
+ return .{
+ .tag = .mem,
+ .base = base,
+ .index = .{ .ref = index, .scale = scale },
+ };
+ }
+ fn memsid(base: Ref.Sized, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand {
+ return .{
+ .tag = .mem,
+ .base = base,
+ .index = .{ .ref = index, .scale = scale },
+ .imm = disp,
+ };
+ }
+ fn memsida(base: Ref.Sized, scale: Memory.Scale, index: Ref, disp: i32, adjust: Adjust) Select.Operand {
+ return .{
+ .tag = .mem,
+ .base = base,
+ .index = .{ .ref = index, .scale = scale },
+ .adjust = adjust,
+ .imm = disp,
+ };
+ }
+
+ fn adjustedImm(op: Select.Operand, s: *const Select) i32 {
+ return switch (op.adjust) {
+ .none => op.imm,
+ .add_access_size => op.imm + @as(i32, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))),
+ .sub_access_size => op.imm - @as(i32, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))),
+ .add_size => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu))),
+ .sub_size => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu))),
+ .add_bit_size => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).bitSize(s.cg.pt.zcu))),
+ .sub_bit_size => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).bitSize(s.cg.pt.zcu))),
+ .add_limbs => op.imm + @as(i32, @intCast(@divExact(
+ op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu),
+ @divExact(op.base.size.bitSize(s.cg.target), 8),
+ ))),
+ .sub_limbs => op.imm + @as(i32, @intCast(@divExact(
+ op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu),
+ @divExact(op.base.size.bitSize(s.cg.target), 8),
+ ))),
+ .add_len => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu))),
+ .sub_len => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu))),
+ .add_elem_size => op.imm + @as(i32, @intCast(
+ op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu),
+ )),
+ .sub_elem_size => op.imm - @as(i32, @intCast(
+ op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu),
+ )),
+ .add_elem_limbs => op.imm + @as(i32, @intCast(@divExact(
+ op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu),
+ @divExact(op.base.size.bitSize(s.cg.target), 8),
+ ))),
+ .sub_elem_limbs => op.imm - @as(i32, @intCast(@divExact(
+ op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu),
+ @divExact(op.base.size.bitSize(s.cg.target), 8),
+ ))),
+ };
+ }
+
+ fn lower(op: Select.Operand, s: *Select) !CodeGen.Operand {
+ return switch (op.tag) {
+ .none => .none,
+ .backward_label => .{ .inst = s.labels[@intFromEnum(op.base.ref)].backward.? },
+ .forward_label => for (&s.labels[@intFromEnum(op.base.ref)].forward) |*label| {
+ if (label.*) |_| continue;
+ label.* = @intCast(s.cg.mir_instructions.len);
+ break .{ .inst = undefined };
+ } else unreachable,
+ .ref => switch (op.base.ref.deref(s).tracking(s.cg).short) {
+ .immediate => |imm| .{ .imm = switch (op.base.size) {
+ .byte => if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u8, @intCast(imm))),
+ .word => if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u16, @intCast(imm))),
+ .dword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u32, @intCast(imm))),
+ .qword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(imm),
+ else => unreachable,
+ } },
+ else => |mcv| .{ .mem = try mcv.mem(s.cg, .{ .size = op.base.size }) },
+ .register => |reg| .{ .reg = registerAlias(reg, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))) },
+ },
+ inline .limb, .mask_limb => |kind| .{ .mem = try op.base.ref.deref(s).tracking(s.cg).short.mem(s.cg, @field(s, @tagName(kind))) },
+ .simm => .{ .imm = .s(op.adjustedImm(s)) },
+ .lea => .{ .mem = .{
+ .base = .{ .reg = registerAlias(op.base.ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)) },
+ .mod = .{ .rm = .{
+ .size = op.base.size,
+ .index = switch (op.index.ref) {
+ else => |ref| registerAlias(ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)),
+ .none => .none,
+ },
+ .scale = op.index.scale,
+ .disp = op.adjustedImm(s),
+ } },
} },
- else => |mcv| .{ .mem = try mcv.mem(s.cg, .{ .size = size }) },
- .register => |reg| .{ .reg = registerAlias(reg, @intCast(@divExact(size.bitSize(), 8))) },
+ .mem => .{ .mem = try op.base.ref.deref(s).tracking(s.cg).short.mem(s.cg, .{
+ .size = op.base.size,
+ .index = switch (op.index.ref) {
+ else => |ref| registerAlias(ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)),
+ .none => .none,
+ },
+ .scale = op.index.scale,
+ .disp = op.adjustedImm(s),
+ }) },
};
}
};
};
-fn select2(
+fn select(
cg: *CodeGen,
dst_temps: []Temp,
dst_tys: []const Type,
src_temps: []Temp,
- cases: []const Select2.Case,
+ cases: []const Select.Case,
) !void {
- cases: for (cases) |*case| {
+ cases: for (cases) |case| {
for (case.required_features) |required_feature| if (required_feature) |feature| if (!switch (feature) {
- .@"64bit" => cg.target.cpu.arch == .x86_64,
+ .@"64bit" => cg.target.ptrBitWidth() == 64,
.mmx => false,
else => cg.hasFeature(feature),
}) continue :cases;
- for (case.constraints[0..src_temps.len], src_temps) |src_constraint, src_temp| if (!src_constraint.accepts(src_temp, cg)) continue :cases;
- patterns: for (case.patterns) |*pattern| {
+ for (case.dst_constraints[0..dst_temps.len], dst_tys) |dst_constraint, dst_ty| if (!dst_constraint.accepts(dst_ty, cg)) continue :cases;
+ for (case.src_constraints[0..src_temps.len], src_temps) |src_constraint, src_temp| if (!src_constraint.accepts(src_temp.typeOf(cg), cg)) continue :cases;
+ patterns: for (case.patterns) |pattern| {
for (pattern.src, src_temps) |src_pattern, src_temp| if (!src_pattern.matches(src_temp, cg)) continue :patterns;
- var s: Select2 = .{
+ var s: Select = .{
.cg = cg,
- .case = case,
- .pattern = pattern,
- .extra_temps = undefined,
- .dst_temps = dst_temps,
- .src_temps = src_temps,
- .commute = pattern.commute,
+ .temps = undefined,
+ .labels = @splat(.{ .forward = @splat(null), .backward = null }),
.limb = undefined,
+ .mask_limb = undefined,
};
- for (&s.extra_temps, case.extra_temps) |*temp, spec| temp.* = try spec.create(s) orelse continue;
+ const tmp_slots = s.temps[@intFromEnum(Select.Operand.Ref.tmp0)..@intFromEnum(Select.Operand.Ref.dst0)];
+ const dst_slots = s.temps[@intFromEnum(Select.Operand.Ref.dst0)..@intFromEnum(Select.Operand.Ref.src0)];
+ const src_slots = s.temps[@intFromEnum(Select.Operand.Ref.src0)..@intFromEnum(Select.Operand.Ref.none)];
+
+ for (tmp_slots, case.extra_temps) |*slot, spec| slot.* = try spec.create(&s) orelse continue;
while (true) for (pattern.src, src_temps) |src_pattern, *src_temp| {
if (try src_pattern.convert(src_temp, cg)) break;
} else break;
+ @memcpy(src_slots[0..src_temps.len], src_temps);
+ std.mem.swap(Temp, &src_slots[pattern.commute[0]], &src_slots[pattern.commute[1]]);
if (case.clobbers.eflags or case.each != .once) try cg.spillEflagsIfOccupied();
for (dst_temps, dst_tys, case.dst_temps[0..dst_temps.len]) |*dst_temp, dst_ty, dst_kind|
- dst_temp.* = (try Select2.TempSpec.create(.{ .type = dst_ty, .kind = dst_kind }, s)).?;
+ dst_temp.* = (try Select.TempSpec.create(.{ .type = dst_ty, .kind = dst_kind }, &s)).?;
+ @memcpy(dst_slots[0..dst_temps.len], dst_temps);
switch (case.each) {
.once => |body| for (body) |inst| try s.emit(inst),
.limb => |limb| {
- const limb_size, const limb_of_temp = limb.of.unwrap(s);
- const limb_of_size: u31 = @intCast(limb_of_temp.typeOf(cg).abiSize(cg.pt.zcu));
+ const limb_of_size: i32 = @intCast(limb.of.ref.deref(&s).typeOf(cg).abiSize(cg.pt.zcu));
s.limb = .{
- .size = limb_size,
- .index = s.extra_temps[0].tracking(cg).short.register.to64(),
+ .size = limb.of.size,
+ .index = (try Select.Operand.tmp0p.lower(&s)).reg,
.disp = limb_of_size,
};
for (limb.header) |inst| try s.emit(inst);
- try cg.asmRegisterImmediate(.{ ._, .mov }, s.limb.index, .s(-@as(i32, limb_of_size)));
- const limb_loop_reloc: u32 = @intCast(cg.mir_instructions.len);
+ try s.emit(.{ ._, ._, .mov, .tmp0p, .i(-limb_of_size), ._, ._ });
+ assert(s.labels[0].backward == null);
+ s.labels[0].backward = @intCast(cg.mir_instructions.len);
for (limb.body) |inst| try s.emit(inst);
- try cg.asmRegisterImmediate(
- .{ ._, .add },
- s.limb.index,
- .s(@intCast(@divExact(limb_size.bitSize(), 8))),
- );
- _ = try cg.asmJccReloc(.nc, limb_loop_reloc);
+ try s.emit(.{ ._, ._, .add, .tmp0p, .i(@intCast(@divExact(limb.of.size.bitSize(cg.target), 8))), ._, ._ });
+ try s.emit(.{ ._, ._nc, .j, .@"0b", ._, ._, ._ });
for (limb.trailer) |inst| try s.emit(inst);
},
+ .limb_and_mask_limb => |limb| {
+ const limb_of_size: i32 = @intCast(limb.of.ref.deref(&s).typeOf(cg).abiSize(cg.pt.zcu));
+ s.limb = .{
+ .size = limb.of.size,
+ .index = (try Select.Operand.tmp0p.lower(&s)).reg,
+ .disp = limb_of_size,
+ };
+ const mask_limb_bit_size: u31 = @intCast(@divExact(
+ limb.of.size.bitSize(cg.target),
+ limb.of_mask.size.bitSize(cg.target),
+ ));
+ if (mask_limb_bit_size >= 8) {
+ s.mask_limb = .{
+ .size = .fromBitSize(mask_limb_bit_size),
+ .index = (try Select.Operand.tmp1p.lower(&s)).reg,
+ };
+ for (limb.header) |inst| try s.emit(inst);
+ try s.emit(.{ ._, ._, .mov, .tmp0p, .i(-limb_of_size), ._, ._ });
+ try s.emit(.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ });
+ assert(s.labels[0].backward == null);
+ s.labels[0].backward = @intCast(cg.mir_instructions.len);
+ for (limb.body) |inst| try s.emit(inst);
+ try s.emit(.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, @divExact(mask_limb_bit_size, 8)), ._, ._ });
+ try s.emit(.{ ._, ._, .add, .tmp0p, .i(@intCast(@divExact(limb.of.size.bitSize(cg.target), 8))), ._, ._ });
+ try s.emit(.{ ._, ._nc, .j, .@"0b", ._, ._, ._ });
+ for (limb.trailer) |inst| try s.emit(inst);
+ } else {
+ for (limb.header) |inst| try s.emit(inst);
+ try s.emit(.{ ._, ._, .mov, .tmp0p, .i(-limb_of_size), ._, ._ });
+ try s.emit(.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ });
+ try s.emit(.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ });
+ assert(s.labels[0].backward == null);
+ s.labels[0].backward = @intCast(cg.mir_instructions.len);
+ for (limb.body) |inst| try s.emit(inst);
+ try s.emit(.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, mask_limb_bit_size), ._, ._ });
+ try s.emit(.{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ });
+ try s.emit(.{ ._, ._nz, .j, .@"1f", ._, ._, ._ });
+ try s.emit(.{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ });
+ try s.emit(.{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ });
+ try s.emit(.{ ._, ._, .mov, .memid(.{ .ref = limb.of_mask.ref, .size = .byte }, .tmp3, -1), .tmp2b, ._, ._ });
+ try s.emit(.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ });
+ try s.emit(.{ .@"1:", ._, .add, .tmp0p, .i(@intCast(@divExact(limb.of.size.bitSize(cg.target), 8))), ._, ._ });
+ try s.emit(.{ ._, ._nc, .j, .@"0b", ._, ._, ._ });
+ try s.emit(.{ ._, ._, .lea, .tmp3d, .lead(.none, .tmp1, -1), ._, ._ });
+ try s.emit(.{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ });
+ try s.emit(.{ ._, ._, .mov, .memi(.{ .ref = limb.of_mask.ref, .size = .byte }, .tmp3), .tmp2b, ._, ._ });
+ for (limb.trailer) |inst| try s.emit(inst);
+ }
+ },
}
+ s.emitLabel(.@"0:");
- for (dst_temps, case.dst_temps[0..dst_temps.len]) |dst_temp, dst_kind| dst_kind.finish(dst_temp, s);
- for (case.extra_temps, s.extra_temps) |spec, temp| if (spec.kind != .unused) try temp.die(cg);
+ for (dst_temps, case.dst_temps[0..dst_temps.len]) |dst_temp, dst_kind| dst_kind.finish(dst_temp, &s);
+ for (case.extra_temps, tmp_slots) |spec, temp| if (spec.kind != .unused) try temp.die(cg);
return;
}
}
- return error.Select2Failed;
+ return error.SelectFailed;
}
src/arch/x86_64/encoder.zig
@@ -167,11 +167,11 @@ pub const Instruction = struct {
};
}
- pub fn bitSize(mem: Memory) u64 {
+ pub fn bitSize(mem: Memory, target: *const std.Target) u64 {
return switch (mem) {
- .rip => |r| r.ptr_size.bitSize(),
- .sib => |s| s.ptr_size.bitSize(),
- .moffs => 64,
+ .rip => |r| r.ptr_size.bitSize(target),
+ .sib => |s| s.ptr_size.bitSize(target),
+ .moffs => target.ptrBitWidth(),
};
}
};
@@ -314,16 +314,21 @@ pub const Instruction = struct {
}
};
- pub fn new(prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) !Instruction {
+ pub fn new(
+ prefix: Prefix,
+ mnemonic: Mnemonic,
+ ops: []const Operand,
+ target: *const std.Target,
+ ) !Instruction {
const encoding: Encoding = switch (prefix) {
- else => (try Encoding.findByMnemonic(prefix, mnemonic, ops)) orelse {
+ else => (try Encoding.findByMnemonic(prefix, mnemonic, ops, target)) orelse {
log.err("no encoding found for: {s} {s} {s} {s} {s} {s}", .{
@tagName(prefix),
@tagName(mnemonic),
- @tagName(if (ops.len > 0) Encoding.Op.fromOperand(ops[0]) else .none),
- @tagName(if (ops.len > 1) Encoding.Op.fromOperand(ops[1]) else .none),
- @tagName(if (ops.len > 2) Encoding.Op.fromOperand(ops[2]) else .none),
- @tagName(if (ops.len > 3) Encoding.Op.fromOperand(ops[3]) else .none),
+ @tagName(if (ops.len > 0) Encoding.Op.fromOperand(ops[0], target) else .none),
+ @tagName(if (ops.len > 1) Encoding.Op.fromOperand(ops[1], target) else .none),
+ @tagName(if (ops.len > 2) Encoding.Op.fromOperand(ops[2], target) else .none),
+ @tagName(if (ops.len > 3) Encoding.Op.fromOperand(ops[3], target) else .none),
});
return error.InvalidInstruction;
},
@@ -332,10 +337,10 @@ pub const Instruction = struct {
.data = .{
.op_en = .zo,
.ops = .{
- if (ops.len > 0) Encoding.Op.fromOperand(ops[0]) else .none,
- if (ops.len > 1) Encoding.Op.fromOperand(ops[1]) else .none,
- if (ops.len > 2) Encoding.Op.fromOperand(ops[2]) else .none,
- if (ops.len > 3) Encoding.Op.fromOperand(ops[3]) else .none,
+ if (ops.len > 0) Encoding.Op.fromOperand(ops[0], target) else .none,
+ if (ops.len > 1) Encoding.Op.fromOperand(ops[1], target) else .none,
+ if (ops.len > 2) Encoding.Op.fromOperand(ops[2], target) else .none,
+ if (ops.len > 3) Encoding.Op.fromOperand(ops[3], target) else .none,
},
.opc_len = 0,
.opc = undefined,
src/arch/x86_64/Encoding.zig
@@ -30,9 +30,10 @@ pub fn findByMnemonic(
prefix: Instruction.Prefix,
mnemonic: Mnemonic,
ops: []const Instruction.Operand,
+ target: *const std.Target,
) !?Encoding {
- var input_ops = [1]Op{.none} ** 4;
- for (input_ops[0..ops.len], ops) |*input_op, op| input_op.* = Op.fromOperand(op);
+ var input_ops: [4]Op = @splat(.none);
+ for (input_ops[0..ops.len], ops) |*input_op, op| input_op.* = Op.fromOperand(op, target);
const rex_required = for (ops) |op| switch (op) {
.reg => |r| switch (r) {
@@ -57,6 +58,16 @@ pub fn findByMnemonic(
var shortest_enc: ?Encoding = null;
var shortest_len: ?usize = null;
next: for (mnemonic_to_encodings_map[@intFromEnum(mnemonic)]) |data| {
+ if (!switch (data.feature) {
+ .none => true,
+ inline else => |tag| has_features: {
+ comptime var feature_it = std.mem.splitScalar(u8, @tagName(tag), ' ');
+ comptime var features: []const std.Target.x86.Feature = &.{};
+ inline while (comptime feature_it.next()) |feature| features = features ++ .{@field(std.Target.x86.Feature, feature)};
+ break :has_features std.Target.x86.featureSetHasAll(target.cpu.features, features[0..features.len].*);
+ },
+ }) continue;
+
switch (data.mode) {
.none, .short => if (rex_required) continue,
.rex, .rex_short => if (!rex_required) continue,
@@ -64,7 +75,7 @@ pub fn findByMnemonic(
}
for (input_ops, data.ops) |input_op, data_op| if (!input_op.isSubset(data_op)) continue :next;
- const enc = Encoding{ .mnemonic = mnemonic, .data = data };
+ const enc: Encoding = .{ .mnemonic = mnemonic, .data = data };
if (shortest_enc) |previous_shortest_enc| {
const len = estimateInstructionLength(prefix, enc, ops);
const previous_shortest_len = shortest_len orelse
@@ -474,7 +485,7 @@ pub const Op = enum {
ymm, ymm_m256,
// zig fmt: on
- pub fn fromOperand(operand: Instruction.Operand) Op {
+ pub fn fromOperand(operand: Instruction.Operand, target: *const std.Target) Op {
return switch (operand) {
.none => .none,
@@ -516,7 +527,7 @@ pub const Op = enum {
.mem => |mem| switch (mem) {
.moffs => .moffs,
- .sib, .rip => switch (mem.bitSize()) {
+ .sib, .rip => switch (mem.bitSize(target)) {
0 => .m,
8 => .m8,
16 => .m16,
@@ -835,7 +846,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op
var inst = Instruction{
.prefix = prefix,
.encoding = encoding,
- .ops = [1]Operand{.none} ** 4,
+ .ops = @splat(.none),
};
@memcpy(inst.ops[0..ops.len], ops);
@@ -850,7 +861,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op
const mnemonic_to_encodings_map = init: {
@setEvalBranchQuota(5_000);
const mnemonic_count = @typeInfo(Mnemonic).@"enum".fields.len;
- var mnemonic_map: [mnemonic_count][]Data = .{&.{}} ** mnemonic_count;
+ var mnemonic_map: [mnemonic_count][]Data = @splat(&.{});
const encodings = @import("encodings.zig");
for (encodings.table) |entry| mnemonic_map[@intFromEnum(entry[0])].len += 1;
var data_storage: [encodings.table.len]Data = undefined;
@@ -859,7 +870,7 @@ const mnemonic_to_encodings_map = init: {
value.ptr = data_storage[storage_i..].ptr;
storage_i += value.len;
}
- var mnemonic_i: [mnemonic_count]usize = .{0} ** mnemonic_count;
+ var mnemonic_i: [mnemonic_count]usize = @splat(0);
const ops_len = @typeInfo(std.meta.FieldType(Data, .ops)).array.len;
const opc_len = @typeInfo(std.meta.FieldType(Data, .opc)).array.len;
for (encodings.table) |entry| {
@@ -876,7 +887,7 @@ const mnemonic_to_encodings_map = init: {
i.* += 1;
}
const final_storage = data_storage;
- var final_map: [mnemonic_count][]const Data = .{&.{}} ** mnemonic_count;
+ var final_map: [mnemonic_count][]const Data = @splat(&.{});
storage_i = 0;
for (&final_map, mnemonic_map) |*final_value, value| {
final_value.* = final_storage[storage_i..][0..value.len];
src/arch/x86_64/Lower.zig
@@ -1,6 +1,7 @@
//! This file contains the functionality for lowering x86_64 MIR to Instructions
bin_file: *link.File,
+target: *const std.Target,
output_mode: std.builtin.OutputMode,
link_mode: std.builtin.LinkMode,
pic: bool,
@@ -193,7 +194,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_probe_align_ri_s => {
try lower.emit(.none, .@"test", &.{
.{ .reg = inst.data.ri.r1 },
- .{ .imm = Immediate.s(@bitCast(inst.data.ri.i)) },
+ .{ .imm = .s(@bitCast(inst.data.ri.i)) },
});
try lower.emit(.none, .jz, &.{
.{ .imm = lower.reloc(.{ .inst = index + 1 }, 0) },
@@ -229,14 +230,14 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
}
try lower.emit(.none, .sub, &.{
.{ .reg = inst.data.ri.r1 },
- .{ .imm = Immediate.s(@bitCast(inst.data.ri.i)) },
+ .{ .imm = .s(@bitCast(inst.data.ri.i)) },
});
assert(lower.result_insts_len <= pseudo_probe_adjust_unrolled_max_insts);
},
.pseudo_probe_adjust_setup_rri_s => {
try lower.emit(.none, .mov, &.{
.{ .reg = inst.data.rri.r2.to32() },
- .{ .imm = Immediate.s(@bitCast(inst.data.rri.i)) },
+ .{ .imm = .s(@bitCast(inst.data.rri.i)) },
});
try lower.emit(.none, .sub, &.{
.{ .reg = inst.data.rri.r1 },
@@ -255,7 +256,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
try lower.emit(.none, .sub, &.{
.{ .reg = inst.data.rr.r2 },
- .{ .imm = Immediate.s(page_size) },
+ .{ .imm = .s(page_size) },
});
try lower.emit(.none, .jae, &.{
.{ .imm = lower.reloc(.{ .inst = index }, 0) },
@@ -355,7 +356,7 @@ pub fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate {
.mi_s,
.rmi_s,
.pseudo_dbg_local_ai_s,
- => Immediate.s(@bitCast(i)),
+ => .s(@bitCast(i)),
.rrri,
.rri_u,
@@ -368,11 +369,11 @@ pub fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate {
.rrm,
.rrmi,
.pseudo_dbg_local_ai_u,
- => Immediate.u(i),
+ => .u(i),
.ri_64,
.pseudo_dbg_local_ai_64,
- => Immediate.u(lower.mir.extraData(Mir.Imm64, i).data.decode()),
+ => .u(lower.mir.extraData(Mir.Imm64, i).data.decode()),
else => unreachable,
};
@@ -389,7 +390,7 @@ fn reloc(lower: *Lower, target: Reloc.Target, off: i32) Immediate {
.off = off,
};
lower.result_relocs_len += 1;
- return Immediate.s(0);
+ return .s(0);
}
fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) Error!void {
@@ -421,15 +422,15 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
try Instruction.new(.none, .lea, &[_]Operand{
.{ .reg = .rdi },
.{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) },
- });
+ }, lower.target);
lower.result_insts_len += 1;
_ = lower.reloc(.{
.linker_extern_fn = try elf_file.getGlobalSymbol("__tls_get_addr", null),
}, 0);
lower.result_insts[lower.result_insts_len] =
try Instruction.new(.none, .call, &[_]Operand{
- .{ .imm = Immediate.s(0) },
- });
+ .{ .imm = .s(0) },
+ }, lower.target);
lower.result_insts_len += 1;
_ = lower.reloc(.{ .linker_dtpoff = sym_index }, 0);
emit_mnemonic = .lea;
@@ -443,7 +444,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
try Instruction.new(.none, .mov, &[_]Operand{
.{ .reg = .rax },
.{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .fs } }) },
- });
+ }, lower.target);
lower.result_insts_len += 1;
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
emit_mnemonic = .lea;
@@ -467,7 +468,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
try Instruction.new(.none, .mov, &[_]Operand{
.{ .reg = reg.to64() },
.{ .mem = Memory.initRip(.qword, 0) },
- });
+ }, lower.target);
lower.result_insts_len += 1;
break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{ .base = .{
.reg = reg.to64(),
@@ -482,7 +483,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
}) },
.lea => {
emit_mnemonic = .mov;
- break :op .{ .imm = Immediate.s(0) };
+ break :op .{ .imm = .s(0) };
},
.mov => break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{
.base = .{ .reg = .ds },
@@ -541,7 +542,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
};
}
lower.result_insts[lower.result_insts_len] =
- try Instruction.new(emit_prefix, emit_mnemonic, emit_ops);
+ try Instruction.new(emit_prefix, emit_mnemonic, emit_ops, lower.target);
lower.result_insts_len += 1;
}
@@ -743,7 +744,7 @@ fn pushPopRegList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Er
while (it.next()) |i| {
try lower.emit(.directive, .@".cfi_rel_offset", &.{
.{ .reg = callee_preserved_regs[i] },
- .{ .imm = Immediate.s(off) },
+ .{ .imm = .s(off) },
});
off += 8;
}
src/arch/x86_64/Mir.zig
@@ -214,6 +214,10 @@ pub const Inst = struct {
p_q,
/// Packed ___ Double Quadword
p_dq,
+ /// ___ Aligned Packed Integer Values
+ _dqa,
+ /// ___ Unaligned Packed Integer Values
+ _dqu,
/// ___ Scalar Single-Precision Values
_ss,
@@ -234,6 +238,10 @@ pub const Inst = struct {
v_d,
/// VEX-Encoded ___ QuadWord
v_q,
+ /// VEX-Encoded ___ Aligned Packed Integer Values
+ v_dqa,
+ /// VEX-Encoded ___ Unaligned Packed Integer Values
+ v_dqu,
/// VEX-Encoded ___ Integer Data
v_i128,
/// VEX-Encoded Packed ___
@@ -362,6 +370,8 @@ pub const Inst = struct {
/// Move scalar double-precision floating-point value
/// Move doubleword
/// Move quadword
+ /// Move aligned packed integer values
+ /// Move unaligned packed integer values
mov,
/// Move data after swapping bytes
movbe,
@@ -609,10 +619,6 @@ pub const Inst = struct {
cvttps2dq,
/// Convert with truncation scalar double-precision floating-point value to doubleword integer
cvttsd2si,
- /// Move aligned packed integer values
- movdqa,
- /// Move unaligned packed integer values
- movdqu,
/// Packed interleave shuffle of quadruplets of single-precision floating-point values
/// Packed interleave shuffle of pairs of double-precision floating-point values
/// Shuffle packed doublewords
src/link/Elf/Atom.zig
@@ -976,6 +976,7 @@ const x86_64 = struct {
it: *RelocsIterator,
) !void {
dev.check(.x86_64_backend);
+ const t = &elf_file.base.comp.root_mod.resolved_target.result;
const is_static = elf_file.base.isStatic();
const is_dyn_lib = elf_file.isEffectivelyDynLib();
@@ -1046,7 +1047,7 @@ const x86_64 = struct {
.GOTTPOFF => {
const should_relax = blk: {
if (is_dyn_lib or symbol.flags.import) break :blk false;
- if (!x86_64.canRelaxGotTpOff(code.?[r_offset - 3 ..])) break :blk false;
+ if (!x86_64.canRelaxGotTpOff(code.?[r_offset - 3 ..], t)) break :blk false;
break :blk true;
};
if (!should_relax) {
@@ -1090,6 +1091,7 @@ const x86_64 = struct {
stream: anytype,
) (error{ InvalidInstruction, CannotEncode } || RelocError)!void {
dev.check(.x86_64_backend);
+ const t = &elf_file.base.comp.root_mod.resolved_target.result;
const diags = &elf_file.base.comp.link_diags;
const r_type: elf.R_X86_64 = @enumFromInt(rel.r_type());
const r_offset = std.math.cast(usize, rel.r_offset) orelse return error.Overflow;
@@ -1120,7 +1122,7 @@ const x86_64 = struct {
.GOTPCRELX => {
if (!target.flags.import and !target.isIFunc(elf_file) and !target.isAbs(elf_file)) blk: {
- x86_64.relaxGotpcrelx(code[r_offset - 2 ..]) catch break :blk;
+ x86_64.relaxGotpcrelx(code[r_offset - 2 ..], t) catch break :blk;
try cwriter.writeInt(i32, @as(i32, @intCast(S + A - P)), .little);
return;
}
@@ -1129,7 +1131,7 @@ const x86_64 = struct {
.REX_GOTPCRELX => {
if (!target.flags.import and !target.isIFunc(elf_file) and !target.isAbs(elf_file)) blk: {
- x86_64.relaxRexGotpcrelx(code[r_offset - 3 ..]) catch break :blk;
+ x86_64.relaxRexGotpcrelx(code[r_offset - 3 ..], t) catch break :blk;
try cwriter.writeInt(i32, @as(i32, @intCast(S + A - P)), .little);
return;
}
@@ -1184,7 +1186,7 @@ const x86_64 = struct {
const S_ = target.tlsDescAddress(elf_file);
try cwriter.writeInt(i32, @as(i32, @intCast(S_ + A - P)), .little);
} else {
- x86_64.relaxGotPcTlsDesc(code[r_offset - 3 ..]) catch {
+ x86_64.relaxGotPcTlsDesc(code[r_offset - 3 ..], t) catch {
var err = try diags.addErrorWithNotes(1);
try err.addMsg("could not relax {s}", .{@tagName(r_type)});
err.addNote("in {}:{s} at offset 0x{x}", .{
@@ -1208,7 +1210,7 @@ const x86_64 = struct {
const S_ = target.gotTpAddress(elf_file);
try cwriter.writeInt(i32, @as(i32, @intCast(S_ + A - P)), .little);
} else {
- x86_64.relaxGotTpOff(code[r_offset - 3 ..]);
+ x86_64.relaxGotTpOff(code[r_offset - 3 ..], t);
try cwriter.writeInt(i32, @as(i32, @intCast(S - TP)), .little);
}
},
@@ -1269,31 +1271,31 @@ const x86_64 = struct {
}
}
- fn relaxGotpcrelx(code: []u8) !void {
+ fn relaxGotpcrelx(code: []u8, t: *const std.Target) !void {
dev.check(.x86_64_backend);
const old_inst = disassemble(code) orelse return error.RelaxFailure;
const inst = switch (old_inst.encoding.mnemonic) {
.call => try Instruction.new(old_inst.prefix, .call, &.{
// TODO: hack to force imm32s in the assembler
.{ .imm = Immediate.s(-129) },
- }),
+ }, t),
.jmp => try Instruction.new(old_inst.prefix, .jmp, &.{
// TODO: hack to force imm32s in the assembler
.{ .imm = Immediate.s(-129) },
- }),
+ }, t),
else => return error.RelaxFailure,
};
relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding });
- const nop = try Instruction.new(.none, .nop, &.{});
+ const nop = try Instruction.new(.none, .nop, &.{}, t);
try encode(&.{ nop, inst }, code);
}
- fn relaxRexGotpcrelx(code: []u8) !void {
+ fn relaxRexGotpcrelx(code: []u8, t: *const std.Target) !void {
dev.check(.x86_64_backend);
const old_inst = disassemble(code) orelse return error.RelaxFailure;
switch (old_inst.encoding.mnemonic) {
.mov => {
- const inst = try Instruction.new(old_inst.prefix, .lea, &old_inst.ops);
+ const inst = try Instruction.new(old_inst.prefix, .lea, &old_inst.ops, t);
relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding });
try encode(&.{inst}, code);
},
@@ -1398,7 +1400,7 @@ const x86_64 = struct {
}
}
- fn canRelaxGotTpOff(code: []const u8) bool {
+ fn canRelaxGotTpOff(code: []const u8, t: *const std.Target) bool {
dev.check(.x86_64_backend);
const old_inst = disassemble(code) orelse return false;
switch (old_inst.encoding.mnemonic) {
@@ -1406,7 +1408,7 @@ const x86_64 = struct {
old_inst.ops[0],
// TODO: hack to force imm32s in the assembler
.{ .imm = Immediate.s(-129) },
- })) |inst| {
+ }, t)) |inst| {
inst.encode(std.io.null_writer, .{}) catch return false;
return true;
} else |_| return false,
@@ -1414,7 +1416,7 @@ const x86_64 = struct {
}
}
- fn relaxGotTpOff(code: []u8) void {
+ fn relaxGotTpOff(code: []u8, t: *const std.Target) void {
dev.check(.x86_64_backend);
const old_inst = disassemble(code) orelse unreachable;
switch (old_inst.encoding.mnemonic) {
@@ -1423,7 +1425,7 @@ const x86_64 = struct {
old_inst.ops[0],
// TODO: hack to force imm32s in the assembler
.{ .imm = Immediate.s(-129) },
- }) catch unreachable;
+ }, t) catch unreachable;
relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding });
encode(&.{inst}, code) catch unreachable;
},
@@ -1431,7 +1433,7 @@ const x86_64 = struct {
}
}
- fn relaxGotPcTlsDesc(code: []u8) !void {
+ fn relaxGotPcTlsDesc(code: []u8, target: *const std.Target) !void {
dev.check(.x86_64_backend);
const old_inst = disassemble(code) orelse return error.RelaxFailure;
switch (old_inst.encoding.mnemonic) {
@@ -1440,7 +1442,7 @@ const x86_64 = struct {
old_inst.ops[0],
// TODO: hack to force imm32s in the assembler
.{ .imm = Immediate.s(-129) },
- });
+ }, target);
relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding });
try encode(&.{inst}, code);
},
test/behavior/x86_64/math.zig
@@ -264,11 +264,48 @@ fn testBinary(comptime op: anytype) !void {
0xed533d18f8657f3f, 0x1ddd7cd7f6bab957,
});
- if (false) try testType(@Vector(1, u128), .{
+ try testType(@Vector(1, u128), .{
0x5f11e16b0ca3392f907a857881455d2e,
}, .{
0xf9142d73b408fd6955922f9fc147f7d7,
});
+ try testType(@Vector(2, u128), .{
+ 0xee0fb41fabd805923fb21b5c658e3a87,
+ 0x2352e74aad6c58b3255ff0bba5aa6552,
+ }, .{
+ 0x8d822f9fdd9cb9a5b43513b14419b224,
+ 0x1aef2a02704379e38ead4d53d69e4cc4,
+ });
+ try testType(@Vector(4, u128), .{
+ 0xc74437a4ea3bbbb193dbf0ea2f0c5281,
+ 0x039e4b1640868248780db1834a0027eb,
+ 0xb9e8bb34155b2b238da20331d08ff85b,
+ 0x863802d34a54c2e6aa71dd0f067c4904,
+ }, .{
+ 0x7471bae24ff7b84ab107f86ba2b7d1e7,
+ 0x8f34c449d0576e682c20bda74aa6b6c9,
+ 0x1f34c3efa167b61c48c9d5ec01a1a93f,
+ 0x71c8318fcf3ddc7be058c73a52dce9e3,
+ });
+ try testType(@Vector(8, u128), .{
+ 0xbf2db71463037f55ee338431f902a906,
+ 0xb7ad317626655f38ab25ae30d8a1aa67,
+ 0x7d3c5a3ffaa607b5560d69ae3fcf7863,
+ 0x009a39a8badf8b628c686dc176aa1273,
+ 0x49dba3744c91304cc7bbbdab61b6c969,
+ 0x6ec664b624f7acf79ce69d80ed7bc85c,
+ 0xe02d7a303c0f00c39010f3b815547f1c,
+ 0xb13e1ee914616f58cffe6acd33d9b5c8,
+ }, .{
+ 0x2f2d355a071942a7384f82ba72a945b8,
+ 0x61f151b3afec8cb7664f813cecf581d1,
+ 0x5bfbf5484f3a07f0eacc4739ff48af80,
+ 0x59c0abbf8d829cf525a87d5c9c41a38a,
+ 0xdad8b18eb680f0520ca49ebfb5842e22,
+ 0xa05adcaedd9057480b3ba0413d003cec,
+ 0x8b0b4a27fc94a0e90652d19bc755b63d,
+ 0xa858bce5ad0e48c13588a4e170e8667c,
+ });
}
inline fn bitAnd(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs & rhs) {