Commit ac1a975f9b
Changed files (19)
src
test
behavior
lib/std/math/big/int.zig
@@ -2520,12 +2520,13 @@ pub const Const = struct {
return order(a, b) == .eq;
}
+ /// Returns the number of leading zeros in twos-complement form.
pub fn clz(a: Const, bits: Limb) Limb {
- // Limbs are stored in little-endian order but we need
- // to iterate big-endian.
+ // Limbs are stored in little-endian order but we need to iterate big-endian.
+ if (!a.positive) return 0;
var total_limb_lz: Limb = 0;
var i: usize = a.limbs.len;
- const bits_per_limb = @sizeOf(Limb) * 8;
+ const bits_per_limb = @bitSizeOf(Limb);
while (i != 0) {
i -= 1;
const limb = a.limbs[i];
@@ -2537,13 +2538,15 @@ pub const Const = struct {
return total_limb_lz + bits - total_limb_bits;
}
+ /// Returns the number of trailing zeros in twos-complement form.
pub fn ctz(a: Const, bits: Limb) Limb {
- // Limbs are stored in little-endian order.
+ // Limbs are stored in little-endian order. Converting a negative number to twos-complement
+ // flips all bits above the lowest set bit, which does not affect the trailing zero count.
var result: Limb = 0;
for (a.limbs) |limb| {
const limb_tz = @ctz(limb);
result += limb_tz;
- if (limb_tz != @sizeOf(Limb) * 8) break;
+ if (limb_tz != @bitSizeOf(Limb)) break;
}
return @min(result, bits);
}
lib/std/Target/Query.zig
@@ -6,13 +6,13 @@
/// `null` means native.
cpu_arch: ?Target.Cpu.Arch = null,
-cpu_model: CpuModel = CpuModel.determined_by_arch_os,
+cpu_model: CpuModel = .determined_by_arch_os,
/// Sparse set of CPU features to add to the set from `cpu_model`.
-cpu_features_add: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty,
+cpu_features_add: Target.Cpu.Feature.Set = .empty,
/// Sparse set of CPU features to remove from the set from `cpu_model`.
-cpu_features_sub: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty,
+cpu_features_sub: Target.Cpu.Feature.Set = .empty,
/// `null` means native.
os_tag: ?Target.Os.Tag = null,
@@ -38,7 +38,7 @@ abi: ?Target.Abi = null,
/// When `os_tag` is `null`, then `null` means native. Otherwise it means the standard path
/// based on the `os_tag`.
-dynamic_linker: Target.DynamicLinker = Target.DynamicLinker.none,
+dynamic_linker: Target.DynamicLinker = .none,
/// `null` means default for the cpu/arch/os combo.
ofmt: ?Target.ObjectFormat = null,
lib/std/Target/x86.zig
@@ -47,6 +47,7 @@ pub const Feature = enum {
bmi2,
branch_hint,
branchfusion,
+ bsf_bsr_0_clobbers_result,
ccmp,
cf,
cldemote,
@@ -167,6 +168,8 @@ pub const Feature = enum {
slow_unaligned_mem_32,
sm3,
sm4,
+ smap,
+ smep,
soft_float,
sse,
sse2,
@@ -497,6 +500,11 @@ pub const all_features = blk: {
.description = "CMP/TEST can be fused with conditional branches",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@intFromEnum(Feature.bsf_bsr_0_clobbers_result)] = .{
+ .llvm_name = null,
+ .description = "BSF/BSR may clobber the lower 32-bits of the result register when the source is zero",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@intFromEnum(Feature.ccmp)] = .{
.llvm_name = "ccmp",
.description = "Support conditional cmp & test instructions",
@@ -1127,6 +1135,16 @@ pub const all_features = blk: {
.avx2,
}),
};
+ result[@intFromEnum(Feature.smap)] = .{
+ .llvm_name = null,
+ .description = "Enable Supervisor Mode Access Prevention",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.smep)] = .{
+ .llvm_name = null,
+ .description = "Enable Supervisor Mode Execution Prevention",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@intFromEnum(Feature.soft_float)] = .{
.llvm_name = "soft-float",
.description = "Use software floating point features",
@@ -1371,6 +1389,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
@@ -1467,6 +1487,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.uintr,
.vaes,
@@ -1545,6 +1567,8 @@ pub const cpu = struct {
.slow_3ops_lea,
.sm3,
.sm4,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.uintr,
.vaes,
@@ -1783,6 +1807,8 @@ pub const cpu = struct {
.sahf,
.sbb_dep_breaking,
.slow_shld,
+ .smap,
+ .smep,
.sse4a,
.vzeroupper,
.x87,
@@ -1995,6 +2021,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.slow_3ops_lea,
+ .smap,
+ .smep,
.vzeroupper,
.x87,
.xsaveopt,
@@ -2136,6 +2164,8 @@ pub const cpu = struct {
.sahf,
.sha,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vzeroupper,
.x87,
@@ -2195,6 +2225,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vzeroupper,
.x87,
@@ -2450,6 +2482,8 @@ pub const cpu = struct {
.serialize,
.sha,
.shstk,
+ .smap,
+ .smep,
.tsxldtrk,
.tuning_fast_imm_vector_shift,
.uintr,
@@ -2519,6 +2553,8 @@ pub const cpu = struct {
.slow_incdec,
.slow_lea,
.slow_two_mem_ops,
+ .smap,
+ .smep,
.sse4_2,
.use_glm_div_sqrt_costs,
.vzeroupper,
@@ -2898,6 +2934,7 @@ pub const cpu = struct {
.rdrnd,
.sahf,
.slow_3ops_lea,
+ .smep,
.vzeroupper,
.x87,
.xsaveopt,
@@ -2907,6 +2944,7 @@ pub const cpu = struct {
.name = "i386",
.llvm_name = "i386",
.features = featureSet(&[_]Feature{
+ .bsf_bsr_0_clobbers_result,
.slow_unaligned_mem_16,
.vzeroupper,
.x87,
@@ -2916,6 +2954,7 @@ pub const cpu = struct {
.name = "i486",
.llvm_name = "i486",
.features = featureSet(&[_]Feature{
+ .bsf_bsr_0_clobbers_result,
.slow_unaligned_mem_16,
.vzeroupper,
.x87,
@@ -3096,6 +3135,7 @@ pub const cpu = struct {
.sahf,
.slow_3ops_lea,
.slow_unaligned_mem_32,
+ .smep,
.vzeroupper,
.x87,
.xsaveopt,
@@ -3403,6 +3443,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
@@ -3766,6 +3808,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
@@ -3831,6 +3875,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.sha,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
@@ -3939,6 +3985,8 @@ pub const cpu = struct {
.serialize,
.sha,
.shstk,
+ .smap,
+ .smep,
.tsxldtrk,
.tuning_fast_imm_vector_shift,
.uintr,
@@ -4042,6 +4090,7 @@ pub const cpu = struct {
.slow_lea,
.slow_pmulld,
.slow_two_mem_ops,
+ .smep,
.sse4_2,
.use_slm_arith_costs,
.vzeroupper,
@@ -4098,6 +4147,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vzeroupper,
.x87,
@@ -4150,6 +4201,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.slow_3ops_lea,
+ .smap,
+ .smep,
.vzeroupper,
.x87,
.xsavec,
@@ -4305,6 +4358,8 @@ pub const cpu = struct {
.sahf,
.sha,
.shstk,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
@@ -4574,6 +4629,8 @@ pub const cpu = struct {
.sbb_dep_breaking,
.sha,
.slow_shld,
+ .smap,
+ .smep,
.sse4a,
.vzeroupper,
.x87,
@@ -4629,6 +4686,8 @@ pub const cpu = struct {
.sbb_dep_breaking,
.sha,
.slow_shld,
+ .smap,
+ .smep,
.sse4a,
.vzeroupper,
.wbnoinvd,
@@ -4686,6 +4745,8 @@ pub const cpu = struct {
.sbb_dep_breaking,
.sha,
.slow_shld,
+ .smap,
+ .smep,
.sse4a,
.vaes,
.vpclmulqdq,
@@ -4757,6 +4818,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_shld,
+ .smap,
+ .smep,
.sse4a,
.vaes,
.vpclmulqdq,
@@ -4833,6 +4896,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_shld,
+ .smap,
+ .smep,
.sse4a,
.vaes,
.vpclmulqdq,
lib/std/zig/system/x86.zig
@@ -369,6 +369,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
setFeature(cpu, .bmi, bit(leaf.ebx, 3));
// AVX2 is only supported if we have the OS save support from AVX.
setFeature(cpu, .avx2, bit(leaf.ebx, 5) and has_avx_save);
+ setFeature(cpu, .smep, bit(leaf.ebx, 7));
setFeature(cpu, .bmi2, bit(leaf.ebx, 8));
setFeature(cpu, .invpcid, bit(leaf.ebx, 10));
setFeature(cpu, .rtm, bit(leaf.ebx, 11));
@@ -377,6 +378,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
setFeature(cpu, .avx512dq, bit(leaf.ebx, 17) and has_avx512_save);
setFeature(cpu, .rdseed, bit(leaf.ebx, 18));
setFeature(cpu, .adx, bit(leaf.ebx, 19));
+ setFeature(cpu, .smap, bit(leaf.ebx, 20));
setFeature(cpu, .avx512ifma, bit(leaf.ebx, 21) and has_avx512_save);
setFeature(cpu, .clflushopt, bit(leaf.ebx, 23));
setFeature(cpu, .clwb, bit(leaf.ebx, 24));
src/arch/x86_64/bits.zig
@@ -571,11 +571,15 @@ pub const Memory = struct {
writer: anytype,
) @TypeOf(writer).Error!void {
if (s == .none) return;
- if (s != .ptr) {
- try writer.writeAll(@tagName(s));
- try writer.writeByte(' ');
+ try writer.writeAll(@tagName(s));
+ switch (s) {
+ .none => unreachable,
+ .ptr => {},
+ else => {
+ try writer.writeByte(' ');
+ try writer.writeAll("ptr");
+ },
}
- try writer.writeAll("ptr");
}
};
src/arch/x86_64/CodeGen.zig
@@ -1390,7 +1390,7 @@ fn asmOps(self: *CodeGen, tag: Mir.Inst.FixedTag, ops: [4]Operand) !void {
/// A `cc` of `.z_and_np` clobbers `reg2`!
fn asmCmovccRegisterRegister(self: *CodeGen, cc: Condition, reg1: Register, reg2: Register) !void {
- _ = try self.addInst(.{
+ if (self.hasFeature(.cmov)) _ = try self.addInst(.{
.tag = switch (cc) {
else => .cmov,
.z_and_np, .nz_or_p => .pseudo,
@@ -1408,12 +1408,16 @@ fn asmCmovccRegisterRegister(self: *CodeGen, cc: Condition, reg1: Register, reg2
.r1 = reg1,
.r2 = reg2,
} },
- });
+ }) else {
+ const reloc = try self.asmJccReloc(cc.negate(), undefined);
+ try self.asmRegisterRegister(.{ ._, .mov }, reg1, reg2);
+ self.performReloc(reloc);
+ }
}
/// A `cc` of `.z_and_np` is not supported by this encoding!
fn asmCmovccRegisterMemory(self: *CodeGen, cc: Condition, reg: Register, m: Memory) !void {
- _ = try self.addInst(.{
+ if (self.hasFeature(.cmov)) _ = try self.addInst(.{
.tag = switch (cc) {
else => .cmov,
.z_and_np => unreachable,
@@ -1433,7 +1437,11 @@ fn asmCmovccRegisterMemory(self: *CodeGen, cc: Condition, reg: Register, m: Memo
.r1 = reg,
.payload = try self.addExtra(Mir.Memory.encode(m)),
} },
- });
+ }) else {
+ const reloc = try self.asmJccReloc(cc.negate(), undefined);
+ try self.asmRegisterMemory(.{ ._, .mov }, reg, m);
+ self.performReloc(reloc);
+ }
}
fn asmSetccRegister(self: *CodeGen, cc: Condition, reg: Register) !void {
@@ -2319,6 +2327,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}
fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
+ @setEvalBranchQuota(1_600);
const pt = cg.pt;
const zcu = pt.zcu;
const ip = &zcu.intern_pool;
@@ -2354,9 +2363,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1);
switch (air_tags[@intFromEnum(inst)]) {
// zig fmt: off
- .not,
- => |air_tag| try cg.airUnOp(inst, air_tag),
-
.add,
.add_wrap,
.sub,
@@ -2434,7 +2440,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.memset_safe => try cg.airMemset(inst, true),
.set_union_tag => try cg.airSetUnionTag(inst),
.get_union_tag => try cg.airGetUnionTag(inst),
- .clz => try cg.airClz(inst),
.ctz => try cg.airCtz(inst),
.popcount => try cg.airPopCount(inst),
.byte_swap => try cg.airByteSwap(inst),
@@ -2525,146 +2530,156 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
})) {
else => unreachable,
inline .@"and", .@"or", .xor => |mir_tag| comptime &.{ .{
- .required_features = .{ .avx2, null },
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } },
.patterns = &.{
- .{ .src = .{ .ymm, .mem } },
- .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .ymm, .ymm } },
+ .{ .src = .{ .to_ymm, .mem } },
+ .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_ymm, .to_ymm } },
},
.dst_temps = .{.{ .rc = .sse }},
.each = .{ .once = &.{
.{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ },
} },
}, .{
- .required_features = .{ .avx, null },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } },
.patterns = &.{
- .{ .src = .{ .ymm, .mem } },
- .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .ymm, .ymm } },
+ .{ .src = .{ .to_ymm, .mem } },
+ .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_ymm, .to_ymm } },
},
.dst_temps = .{.{ .rc = .sse }},
.each = .{ .once = &.{
.{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ },
} },
}, .{
- .required_features = .{ .avx, null },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } },
.patterns = &.{
- .{ .src = .{ .xmm, .mem } },
- .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .xmm, .xmm } },
+ .{ .src = .{ .to_xmm, .mem } },
+ .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_xmm, .to_xmm } },
},
.dst_temps = .{.{ .rc = .sse }},
.each = .{ .once = &.{
.{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ },
} },
}, .{
- .required_features = .{ .sse2, null },
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } },
.patterns = &.{
- .{ .src = .{ .mut_xmm, .mem } },
- .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_xmm, .xmm } },
+ .{ .src = .{ .to_mut_xmm, .mem } },
+ .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_xmm, .to_xmm } },
},
.dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = &.{
.{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ },
} },
}, .{
- .required_features = .{ .sse, null },
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } },
.patterns = &.{
- .{ .src = .{ .mut_xmm, .mem } },
- .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_xmm, .xmm } },
+ .{ .src = .{ .to_mut_xmm, .mem } },
+ .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_xmm, .to_xmm } },
},
.dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = &.{
.{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ },
} },
}, .{
- .required_features = .{ .mmx, null },
+ .required_features = .{ .mmx, null, null, null },
+ .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } },
.patterns = &.{
- .{ .src = .{ .mut_mm, .mem } },
- .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mm, .mm } },
+ .{ .src = .{ .to_mut_mm, .mem } },
+ .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_mm, .to_mm } },
},
.dst_temps = .{.{ .ref = .src0 }},
.each = .{ .once = &.{
.{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ },
} },
}, .{
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .src_constraints = .{ .{ .int_or_vec = .byte }, .{ .int_or_vec = .byte } },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8 } },
.{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .imm8 } },
- .{ .src = .{ .imm8, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mem, .gpr } },
- .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .mem } },
- .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .gpr } },
+ .{ .src = .{ .to_mut_gpr, .imm8 } },
+ .{ .src = .{ .imm8, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mut_mem, .to_gpr } },
+ .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .mem } },
+ .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, mir_tag, .dst0b, .src1b, ._, ._ },
} },
}, .{
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .src_constraints = .{ .{ .int_or_vec = .word }, .{ .int_or_vec = .word } },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm16 } },
.{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .imm16 } },
- .{ .src = .{ .imm16, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mem, .gpr } },
- .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .mem } },
- .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .gpr } },
+ .{ .src = .{ .to_mut_gpr, .imm16 } },
+ .{ .src = .{ .imm16, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mut_mem, .to_gpr } },
+ .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .mem } },
+ .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, mir_tag, .dst0w, .src1w, ._, ._ },
} },
}, .{
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .src_constraints = .{ .{ .int_or_vec = .dword }, .{ .int_or_vec = .dword } },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm32 } },
.{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .imm32 } },
- .{ .src = .{ .imm32, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mem, .gpr } },
- .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .mem } },
- .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .gpr } },
+ .{ .src = .{ .to_mut_gpr, .imm32 } },
+ .{ .src = .{ .imm32, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mut_mem, .to_gpr } },
+ .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .mem } },
+ .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, mir_tag, .dst0d, .src1d, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", null },
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } },
.patterns = &.{
.{ .src = .{ .mut_mem, .simm32 } },
.{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .simm32 } },
- .{ .src = .{ .simm32, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mem, .gpr } },
- .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .mem } },
- .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .gpr } },
+ .{ .src = .{ .to_mut_gpr, .simm32 } },
+ .{ .src = .{ .simm32, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mut_mem, .to_gpr } },
+ .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .mem } },
+ .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, mir_tag, .dst0q, .src1q, ._, ._ },
} },
}, .{
- .required_features = .{ .avx2, null },
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{
+ .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } },
+ .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } },
+ },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -2677,16 +2692,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ .@"0:", .v_dqu, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
.{ ._, .vp_, mir_tag, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
.{ ._, .v_dqu, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .avx, null },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } },
+ .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } },
+ },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -2699,16 +2719,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ .@"0:", .v_pd, .movu, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
.{ ._, .v_pd, mir_tag, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
.{ ._, .v_pd, .movu, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .avx, null },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
+ .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
+ },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -2721,16 +2746,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ .@"0:", .v_dqu, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
.{ ._, .vp_, mir_tag, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
.{ ._, .v_dqu, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .sse2, null },
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
+ .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
+ },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -2743,16 +2773,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ .@"0:", ._dqu, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
.{ ._, .p_, mir_tag, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
.{ ._, ._dqu, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .sse, null },
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
+ .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
+ },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -2765,16 +2800,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ .@"0:", ._ps, .movu, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
.{ ._, ._ps, mir_tag, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
.{ ._, ._ps, .movu, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .mmx, null },
+ .required_features = .{ .mmx, null, null, null },
+ .src_constraints = .{
+ .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } },
+ .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } },
+ },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -2787,15 +2827,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ .@"0:", ._q, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
.{ ._, .p_, mir_tag, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
.{ ._, ._q, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .src_constraints = .{
+ .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } },
+ .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } },
+ },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -2808,16 +2853,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
.{ ._, ._, mir_tag, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
.{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
} },
- }) catch |err2| switch (err2) {
+ }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
@tagName(air_tag),
cg.typeOf(bin_op.lhs).fmt(pt),
@@ -2830,6 +2876,1058 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
if (ops[1].index != res[0].index) try ops[1].die(cg);
try res[0].moveTo(inst, cg);
},
+ .not => |air_tag| if (use_old) try cg.airUnOp(inst, air_tag) else {
+ const ty_op = air_datas[@intFromEnum(inst)].ty_op;
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+ var res: [1]Temp = undefined;
+ cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, comptime &.{ .{
+ .src_constraints = .{ .{ .signed_or_exact_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .not, .dst0b, ._, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .unsigned_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_umax), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .signed_or_exact_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .not, .dst0w, ._, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .unsigned_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_umax), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .signed_or_exact_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .not, .dst0d, ._, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .unsigned_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_umax), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .not, .dst0q, ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .xor, .dst0q, .src0q, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .mmx, null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_mm, .none } },
+ },
+ .dst_temps = .{.{ .rc = .mmx }},
+ .each = .{ .once = &.{
+ .{ ._, .p_d, .cmpeq, .dst0q, .dst0q, ._, ._ },
+ .{ ._, .p_, .xor, .dst0q, .src0q, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .mmx, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_mm, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .p_, .xor, .dst0q, .lea(.qword, .tmp0), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_xmm, .none } },
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, .vp_q, .cmpeq, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vp_, .xor, .dst0x, .dst0x, .src0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_xmm, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .vp_, .xor, .dst0x, .src0x, .lea(.xword, .tmp0), ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_xmm, .none } },
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, .p_d, .cmpeq, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .p_, .xor, .dst0x, .src0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_xmm, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .p_, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_xmm, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, ._ps, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_int = .yword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_ymm, .none } },
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, .vp_q, .cmpeq, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .vp_, .xor, .dst0y, .dst0y, .src0y, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .yword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_ymm, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .vp_, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_int = .yword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_ymm, .none } },
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .si(0b01111) },
+ .{ ._, .v_pd, .xor, .dst0y, .dst0y, .src0y, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .yword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_ymm, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_pd, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp1y, .tmp1y, .tmp1y, ._ },
+ .{ .@"0:", .vp_, .xor, .tmp2y, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._ },
+ .{ ._, .v_dqu, .mov, .memiad(.dst0y, .tmp0, .add_size, -16), .tmp2y, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ .@"0:", .vp_, .xor, .tmp2x, .tmp1x, .memad(.src0x, .add_size, -16), ._ },
+ .{ ._, .v_dqa, .mov, .memad(.dst0x, .add_size, -16), .tmp2x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .yword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp1y, .tmp1y, .tmp1y, ._ },
+ .{ .@"0:", .vp_, .xor, .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._ },
+ .{ ._, .v_dqu, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp2y, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+ .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .si(0b01111) },
+ .{ .@"0:", .v_pd, .xor, .tmp2y, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._ },
+ .{ ._, .v_pd, .movu, .memiad(.dst0y, .tmp0, .add_size, -16), .tmp2y, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ .@"0:", .v_pd, .xor, .tmp2x, .tmp1x, .memad(.src0x, .add_size, -16), ._ },
+ .{ ._, .v_pd, .mova, .memad(.dst0x, .add_size, -16), .tmp2x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .yword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .si(0b01111) },
+ .{ .@"0:", .v_pd, .xor, .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._ },
+ .{ ._, .v_pd, .movu, .memia(.dst0y, .tmp0, .add_size), .tmp2y, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp1x, .tmp1x, .tmp1x, ._ },
+ .{ .@"0:", .v_, .xor, .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._ },
+ .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp2x, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, .p_d, .cmpeq, .tmp1x, .tmp1x, ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_, .xor, .tmp2x, .tmp1x, ._, ._ },
+ .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp2x, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .not, .memia(.dst0q, .tmp0, .add_size), ._, ._, ._ },
+ .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, 8), ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ },
+ .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .not, .memad(.dst0d, .add_size, -16), ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ },
+ .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
+ .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -16), ._, ._ },
+ .{ ._, ._, .not, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16), .tmp0d, ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16 + 4), .si(0), ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16 + 8), .si(0), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .qword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ },
+ .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .not, .memad(.dst0q, .add_size, -16), ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .qword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ },
+ .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
+ .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .dword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .not, .memad(.dst0d, .add_size, -8), ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .dword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ },
+ .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
+ .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -8), ._, ._ },
+ .{ ._, ._, .not, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8), .tmp0d, ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8 + 4), .si(0), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ },
+ .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .memad(.dst0d, .add_size, -16), .sa(.src0, .add_umax), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ },
+ .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
+ .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -16), ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16), .tmp0d, ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16 + 4), .si(0), ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16 + 8), .si(0), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .memad(.dst0d, .add_size, -8), .sa(.src0, .add_umax), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ },
+ .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
+ .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -8), ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8), .tmp0d, ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8 + 4), .si(0), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ },
+ .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .xor, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ },
+ .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
+ .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .xor, .tmp0q, .memad(.src0q, .add_size, -16), ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .xor, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ },
+ .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
+ .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .xor, .tmp0q, .memad(.src0q, .add_size, -8), ._, ._ },
+ .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .mmx, null, null, null },
+ .src_constraints = .{ .{ .signed_int_or_full_vec = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_mm, .none } },
+ },
+ .dst_temps = .{.{ .rc = .mmx }},
+ .each = .{ .once = &.{
+ .{ ._, .p_d, .cmpeq, .dst0q, .dst0q, ._, ._ },
+ .{ ._, .p_, .xor, .dst0q, .src0q, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .mmx, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int_vec = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_mm, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .p_, .xor, .dst0q, .lea(.qword, .tmp0), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .signed_int_or_full_vec = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_xmm, .none } },
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, .vp_q, .cmpeq, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vp_, .xor, .dst0x, .dst0x, .src0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int_vec = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_xmm, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .vp_, .xor, .dst0x, .src0x, .lea(.xword, .tmp0), ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .signed_int_or_full_vec = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_xmm, .none } },
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, .p_d, .cmpeq, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .p_, .xor, .dst0x, .src0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int_vec = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_xmm, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .p_, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .vec = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_xmm, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, ._ps, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .signed_int_or_full_vec = .yword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_ymm, .none } },
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, .vp_q, .cmpeq, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .vp_, .xor, .dst0y, .dst0y, .src0y, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int_vec = .yword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_ymm, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .vp_, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .signed_int_or_full_vec = .yword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_ymm, .none } },
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .si(0b01111) },
+ .{ ._, .v_pd, .xor, .dst0y, .dst0y, .src0y, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int_vec = .yword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_ymm, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_pd, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_src0_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp3), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_src0_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp2q, .leaia(.qword, .tmp1, .tmp0, .add_src0_size), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_src0_size), .tmp2q, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .umax_mem = .src0 } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_src0_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp3), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_src0_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .leaia(.dword, .tmp1, .tmp0, .add_src0_size), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_src0_size), .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ } }) catch |err| switch (err) {
+ error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{
+ @tagName(air_tag),
+ cg.typeOf(ty_op.operand).fmt(pt),
+ ops[0].tracking(cg),
+ }),
+ else => |e| return e,
+ };
+ if (ops[0].index != res[0].index) try ops[0].die(cg);
+ try res[0].moveTo(inst, cg);
+ },
+
.block => if (use_old) try cg.airBlock(inst) else {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
const extra = cg.air.extraData(Air.Block, ty_pl.payload);
@@ -2880,91 +3978,2760 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.call_never_tail => try cg.airCall(inst, .never_tail),
.call_never_inline => try cg.airCall(inst, .never_inline),
- .cmp_vector, .cmp_vector_optimized => |air_tag| if (use_old) try cg.airCmpVector(inst) else fallback: {
- const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
- const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data;
- switch (extra.compareOperator()) {
- .eq, .neq => {},
- else => break :fallback try cg.airCmpVector(inst),
- }
- var ops = try cg.tempsFromOperands(inst, .{ extra.lhs, extra.rhs });
+ .clz => |air_tag| if (use_old) try cg.airClz(inst) else {
+ const ty_op = air_datas[@intFromEnum(inst)].ty_op;
+ var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
var res: [1]Temp = undefined;
- switch (extra.compareOperator()) {
- .lt => unreachable,
- .lte => unreachable,
- .eq, .neq => |cmp_op| cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (cmp_op) {
- else => unreachable,
- .eq => .e,
- .neq => .ne,
- })) {
- else => unreachable,
- inline .e, .ne => |cc| comptime &.{ .{
- .required_features = .{ .avx2, null },
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
- .patterns = &.{
- .{ .src = .{ .ymm, .mem } },
- .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .ymm, .ymm } },
- },
- .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
- .kind = .all,
- .inverted = switch (cc) {
- else => unreachable,
- .e => false,
- .ne => true,
- },
- .scalar = .byte,
- } } }},
- .each = .{ .once = &.{
- .{ ._, .vp_b, .cmpeq, .dst0y, .src0y, .src1y, ._ },
- } },
- }, .{
- .required_features = .{ .avx2, null },
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
- .patterns = &.{
- .{ .src = .{ .ymm, .mem } },
- .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .ymm, .ymm } },
- },
- .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
- .kind = .all,
- .inverted = switch (cc) {
- else => unreachable,
- .e => false,
- .ne => true,
- },
- .scalar = .word,
- } } }},
- .each = .{ .once = &.{
- .{ ._, .vp_w, .cmpeq, .dst0y, .src0y, .src1y, ._ },
- } },
- }, .{
- .required_features = .{ .avx2, null },
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
- .patterns = &.{
- .{ .src = .{ .ymm, .mem } },
- .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .ymm, .ymm } },
- },
- .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
- .kind = .all,
- .inverted = switch (cc) {
- else => unreachable,
- .e => false,
- .ne => true,
- },
- .scalar = .dword,
- } } }},
- .each = .{ .once = &.{
- .{ ._, .vp_d, .cmpeq, .dst0y, .src0y, .src1y, ._ },
- } },
- }, .{
- .required_features = .{ .avx2, null },
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
- .patterns = &.{
- .{ .src = .{ .ymm, .mem } },
- .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .ymm, .ymm } },
+ cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, comptime &.{ .{
+ .required_features = .{ .slow_incdec, null, null, null },
+ .src_constraints = .{ .{ .exact_signed_int = 1 }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .add, .dst0b, .si(1), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .exact_signed_int = 1 }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .inc, .dst0b, ._, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .exact_unsigned_int = 1 }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mut_mem, .none } },
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .xor, .dst0b, .si(1), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, null, null, null },
+ .src_constraints = .{ .{ .unsigned_or_exact_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
+ .{ ._, ._, .lzcnt, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, null, null, null },
+ .src_constraints = .{ .{ .signed_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
+ .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .lzcnt, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null },
+ .src_constraints = .{ .{ .exact_int = 16 }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, null, null, null },
+ .src_constraints = .{ .{ .exact_int = 16 }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, null, null, null },
+ .src_constraints = .{ .{ .signed_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null },
+ .src_constraints = .{ .{ .exact_int = 32 }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, null, null, null },
+ .src_constraints = .{ .{ .exact_int = 32 }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, null, null, null },
+ .src_constraints = .{ .{ .signed_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
+ .src_constraints = .{ .{ .exact_int = 64 }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, null, null },
+ .src_constraints = .{ .{ .exact_int = 64 }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, null, null },
+ .src_constraints = .{ .{ .signed_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .@"and", .dst0q, .src0q, ._, ._ },
+ .{ ._, ._, .lzcnt, .dst0q, .dst0q, ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
+ .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
+ .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ ._, ._z, .cmov, .dst0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .signed_po2_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
+ .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ ._, ._z, .cmov, .dst0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .signed_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ },
+ .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ },
+ .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ },
+ .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ },
+ .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
+ .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .signed_po2_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
+ .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .signed_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ },
+ .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ },
+ .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ ._, ._r, .bs, .dst0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .signed_po2_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ },
+ .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ ._, ._r, .bs, .dst0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .signed_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
+ .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .unsigned_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ },
+ .{ ._, ._, .mov, .dst0w, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ ._, ._nz, .cmov, .dst0w, .src0w, ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .signed_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ },
+ .{ ._, ._, .mov, .dst0w, .si(0xff), ._, ._ },
+ .{ ._, ._z, .cmov, .src0w, .dst0w, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ },
+ .{ ._, ._, .mov, .dst0w, .si(0xff), ._, ._ },
+ .{ ._, ._z, .cmov, .src0w, .dst0w, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bs, .dst0w, .src0w, ._, ._ },
+ .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .signed_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .dst0w, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ ._, ._r, .bs, .dst0w, .src0w, ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .signed_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mov, .tmp0w, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp0w, .src0w, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .unsigned_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0w, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp0w, .src0w, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ ._, ._nz, .cmov, .dst0d, .src0d, ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .signed_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ },
+ .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ },
+ .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bs, .dst0d, .src0d, ._, ._ },
+ .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .signed_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ ._, ._r, .bs, .dst0d, .src0d, ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .signed_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp0d, .src0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .unsigned_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp0d, .src0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null },
+ .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ ._, ._nz, .cmov, .dst0d, .src0d, ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null },
+ .src_constraints = .{ .{ .signed_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .@"and", .tmp0q, .src0q, ._, ._ },
+ .{ ._, ._r, .bs, .tmp0q, .tmp0q, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ },
+ .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null },
+ .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ },
+ .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bs, .dst0q, .src0q, ._, ._ },
+ .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .signed_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .@"and", .tmp0q, .src0q, ._, ._ },
+ .{ ._, ._r, .bs, .tmp0q, .tmp0q, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
+ .{ ._, ._r, .bs, .dst0q, .src0q, ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .signed_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .@"and", .dst0q, .src0q, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp0q, .dst0q, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none } },
+ .{ .src = .{ .to_gpr, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp0q, .src0q, ._, ._ },
+ .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
+ .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
+ .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, null, null },
+ .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
+ .{ .@"0:", ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
+ .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .si(-1), ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ },
+ .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .dst0d, .si(-1), ._, ._ },
+ .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ },
+ .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
+ .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
+ .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, null, null },
+ .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
+ .{ .@"0:", ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
+ .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .si(-1), ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ },
+ .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .dst0d, .si(-1), ._, ._ },
+ .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ },
+ .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ },
+ .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, null, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"0:", ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ },
+ .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"0:", ._, .@"and", .dst0q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._r, .bs, .dst0q, .dst0q, ._, ._ },
+ .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ },
+ .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ },
+ .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, null, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"0:", ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ },
+ .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .rc = .general_purpose }},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"0:", ._, .@"and", .dst0q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._r, .bs, .dst0q, .dst0q, ._, ._ },
+ .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ },
+ .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, .slow_incdec, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, .slow_incdec, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, .slow_incdec, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .lzcnt, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, .slow_incdec, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp1q, .tmp1q, ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .sia(64, .src0, .sub_bit_size), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, null, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp1q, .tmp1q, ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .sia(64, .src0, .sub_bit_size), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp2d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp2d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
+ .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
+ .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .slow_incdec, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .scalar_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp2d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp2d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
+ .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
+ .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .slow_incdec, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .scalar_int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
+ .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
+ .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .slow_incdec, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .scalar_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .@"and", .tmp2q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2q, .tmp2q, ._, ._ },
+ .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .@"and", .tmp2q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2q, .tmp2q, ._, ._ },
+ .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, .slow_incdec, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
+ .{ ._, ._r, .bs, .tmp1q, .tmp1q, ._, ._ },
+ .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
+ .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
+ .{ ._, ._r, .bs, .tmp1q, .tmp1q, ._, ._ },
+ .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
+ .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._c, .st, ._, ._, ._, ._ },
+ .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
+ .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .slow_incdec, null, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
+ .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2q, .tmp1q, ._, ._ },
+ .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
+ .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
+ .{ ._, ._r, .bs, .tmp2q, .tmp1q, ._, ._ },
+ .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
+ .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
+ .dst_constraints = .{.{ .scalar_int = .byte }},
+ .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
+ .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, null, null },
+ .dst_constraints = .{.{ .scalar_int = .byte }},
+ .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .dst_constraints = .{.{ .scalar_int = .byte }},
+ .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
+ .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ },
+ .{ ._, ._, .neg, .tmp3b, ._, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
+ .dst_constraints = .{.{ .scalar_int = .byte }},
+ .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
+ .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, null, null },
+ .dst_constraints = .{.{ .scalar_int = .byte }},
+ .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .dst_constraints = .{.{ .scalar_int = .byte }},
+ .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
+ .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ },
+ .{ ._, ._, .neg, .tmp3b, ._, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
+ .dst_constraints = .{.{ .scalar_int = .word }},
+ .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
+ .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
+ .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, null, null },
+ .dst_constraints = .{.{ .scalar_int = .word }},
+ .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
+ .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .dst_constraints = .{.{ .scalar_int = .word }},
+ .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
+ .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ },
+ .{ ._, ._, .neg, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
+ .dst_constraints = .{.{ .scalar_int = .word }},
+ .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
+ .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
+ .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .lzcnt, null, null },
+ .dst_constraints = .{.{ .scalar_int = .word }},
+ .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
+ .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
+ .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .dst_constraints = .{.{ .scalar_int = .word }},
+ .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.mem},
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
+ .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
+ .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ },
+ .{ ._, ._, .neg, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ },
+ .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ } }) catch |err| switch (err) {
+ error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{
+ @tagName(air_tag),
+ cg.typeOf(ty_op.operand).fmt(pt),
+ ops[0].tracking(cg),
+ }),
+ else => |e| return e,
+ };
+ if (ops[0].index != res[0].index) try ops[0].die(cg);
+ try res[0].moveTo(inst, cg);
+ },
+
+ .cmp_vector, .cmp_vector_optimized => |air_tag| if (use_old) try cg.airCmpVector(inst) else fallback: {
+ const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
+ const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data;
+ switch (extra.compareOperator()) {
+ .eq, .neq => {},
+ else => break :fallback try cg.airCmpVector(inst),
+ }
+ var ops = try cg.tempsFromOperands(inst, .{ extra.lhs, extra.rhs });
+ var res: [1]Temp = undefined;
+ switch (extra.compareOperator()) {
+ .lt => unreachable,
+ .lte => unreachable,
+ .eq, .neq => |cmp_op| cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (cmp_op) {
+ else => unreachable,
+ .eq => .e,
+ .neq => .ne,
+ })) {
+ else => unreachable,
+ inline .e, .ne => |cc| comptime &.{ .{
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
+ .patterns = &.{
+ .{ .src = .{ .to_ymm, .mem } },
+ .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_ymm, .to_ymm } },
+ },
+ .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
+ .kind = .all,
+ .inverted = switch (cc) {
+ else => unreachable,
+ .e => false,
+ .ne => true,
+ },
+ .scalar = .byte,
+ } } }},
+ .each = .{ .once = &.{
+ .{ ._, .vp_b, .cmpeq, .dst0y, .src0y, .src1y, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
+ .patterns = &.{
+ .{ .src = .{ .to_ymm, .mem } },
+ .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_ymm, .to_ymm } },
+ },
+ .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
+ .kind = .all,
+ .inverted = switch (cc) {
+ else => unreachable,
+ .e => false,
+ .ne => true,
+ },
+ .scalar = .word,
+ } } }},
+ .each = .{ .once = &.{
+ .{ ._, .vp_w, .cmpeq, .dst0y, .src0y, .src1y, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
+ .patterns = &.{
+ .{ .src = .{ .to_ymm, .mem } },
+ .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_ymm, .to_ymm } },
+ },
+ .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
+ .kind = .all,
+ .inverted = switch (cc) {
+ else => unreachable,
+ .e => false,
+ .ne => true,
+ },
+ .scalar = .dword,
+ } } }},
+ .each = .{ .once = &.{
+ .{ ._, .vp_d, .cmpeq, .dst0y, .src0y, .src1y, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
+ .patterns = &.{
+ .{ .src = .{ .to_ymm, .mem } },
+ .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_ymm, .to_ymm } },
},
.dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
.kind = .all,
@@ -2979,12 +6746,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_q, .cmpeq, .dst0y, .src0y, .src1y, ._ },
} },
}, .{
- .required_features = .{ .avx, null },
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
.patterns = &.{
- .{ .src = .{ .xmm, .mem } },
- .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .xmm, .xmm } },
+ .{ .src = .{ .to_xmm, .mem } },
+ .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_xmm, .to_xmm } },
},
.dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
.kind = .all,
@@ -2999,12 +6766,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_b, .cmpeq, .dst0x, .src0x, .src1x, ._ },
} },
}, .{
- .required_features = .{ .avx, null },
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
.patterns = &.{
- .{ .src = .{ .xmm, .mem } },
- .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .xmm, .xmm } },
+ .{ .src = .{ .to_xmm, .mem } },
+ .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_xmm, .to_xmm } },
},
.dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
.kind = .all,
@@ -3019,12 +6786,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_w, .cmpeq, .dst0x, .src0x, .src1x, ._ },
} },
}, .{
- .required_features = .{ .avx, null },
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
.patterns = &.{
- .{ .src = .{ .xmm, .mem } },
- .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .xmm, .xmm } },
+ .{ .src = .{ .to_xmm, .mem } },
+ .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_xmm, .to_xmm } },
},
.dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
.kind = .all,
@@ -3039,12 +6806,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .cmpeq, .dst0x, .src0x, .src1x, ._ },
} },
}, .{
- .required_features = .{ .avx, null },
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
.patterns = &.{
- .{ .src = .{ .xmm, .mem } },
- .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .xmm, .xmm } },
+ .{ .src = .{ .to_xmm, .mem } },
+ .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_xmm, .to_xmm } },
},
.dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
.kind = .all,
@@ -3059,12 +6826,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_q, .cmpeq, .dst0x, .src0x, .src1x, ._ },
} },
}, .{
- .required_features = .{ .sse2, null },
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
.patterns = &.{
- .{ .src = .{ .mut_xmm, .mem } },
- .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_xmm, .xmm } },
+ .{ .src = .{ .to_mut_xmm, .mem } },
+ .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_xmm, .to_xmm } },
},
.dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
@@ -3079,12 +6846,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_b, .cmpeq, .dst0x, .src1x, ._, ._ },
} },
}, .{
- .required_features = .{ .sse2, null },
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
.patterns = &.{
- .{ .src = .{ .mut_xmm, .mem } },
- .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_xmm, .xmm } },
+ .{ .src = .{ .to_mut_xmm, .mem } },
+ .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_xmm, .to_xmm } },
},
.dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
@@ -3099,12 +6866,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_w, .cmpeq, .dst0x, .src1x, ._, ._ },
} },
}, .{
- .required_features = .{ .sse2, null },
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
.patterns = &.{
- .{ .src = .{ .mut_xmm, .mem } },
- .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_xmm, .xmm } },
+ .{ .src = .{ .to_mut_xmm, .mem } },
+ .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_xmm, .to_xmm } },
},
.dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
@@ -3119,12 +6886,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_d, .cmpeq, .dst0x, .src1x, ._, ._ },
} },
}, .{
- .required_features = .{ .sse4_1, null },
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .required_features = .{ .sse4_1, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
.patterns = &.{
- .{ .src = .{ .mut_xmm, .mem } },
- .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_xmm, .xmm } },
+ .{ .src = .{ .to_mut_xmm, .mem } },
+ .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_xmm, .to_xmm } },
},
.dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
@@ -3139,12 +6906,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_q, .cmpeq, .dst0x, .src1x, ._, ._ },
} },
}, .{
- .required_features = .{ .mmx, null },
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .required_features = .{ .mmx, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
.patterns = &.{
- .{ .src = .{ .mut_mm, .mem } },
- .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mm, .mm } },
+ .{ .src = .{ .to_mut_mm, .mem } },
+ .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_mm, .to_mm } },
},
.dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
@@ -3159,12 +6926,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_b, .cmpeq, .dst0q, .src1q, ._, ._ },
} },
}, .{
- .required_features = .{ .mmx, null },
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .required_features = .{ .mmx, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
.patterns = &.{
- .{ .src = .{ .mut_mm, .mem } },
- .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mm, .mm } },
+ .{ .src = .{ .to_mut_mm, .mem } },
+ .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_mm, .to_mm } },
},
.dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
@@ -3179,12 +6946,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_w, .cmpeq, .dst0q, .src1q, ._, ._ },
} },
}, .{
- .required_features = .{ .mmx, null },
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .required_features = .{ .mmx, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
.patterns = &.{
- .{ .src = .{ .mut_mm, .mem } },
- .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mm, .mm } },
+ .{ .src = .{ .to_mut_mm, .mem } },
+ .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_mm, .to_mm } },
},
.dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
.kind = .all,
@@ -3203,550 +6970,182 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8 } },
.{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .imm8 } },
- .{ .src = .{ .imm8, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mem, .gpr } },
- .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .mem } },
- .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .gpr } },
- },
- .clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .ref = .src0 }},
- .each = .{ .once = switch (cc) {
- else => unreachable,
- .e => &.{
- .{ ._, ._, .xor, .dst0b, .src1b, ._, ._ },
- .{ ._, ._, .not, .dst0b, ._, ._, ._ },
- },
- .ne => &.{
- .{ ._, ._, .xor, .dst0b, .src1b, ._, ._ },
- },
- } },
- }, .{
- .src_constraints = .{ .{ .bool_vec = .word }, .{ .bool_vec = .word } },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .imm16 } },
- .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .imm16 } },
- .{ .src = .{ .imm16, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mem, .gpr } },
- .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .mem } },
- .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .gpr } },
- },
- .clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .ref = .src0 }},
- .each = .{ .once = switch (cc) {
- else => unreachable,
- .e => &.{
- .{ ._, ._, .xor, .dst0w, .src1w, ._, ._ },
- .{ ._, ._, .not, .dst0w, ._, ._, ._ },
- },
- .ne => &.{
- .{ ._, ._, .xor, .dst0w, .src1w, ._, ._ },
- },
- } },
- }, .{
- .src_constraints = .{ .{ .bool_vec = .dword }, .{ .bool_vec = .dword } },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .imm32 } },
- .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .imm32 } },
- .{ .src = .{ .imm32, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mem, .gpr } },
- .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .mem } },
- .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .gpr } },
- },
- .clobbers = .{ .eflags = true },
- .dst_temps = .{.{ .ref = .src0 }},
- .each = .{ .once = switch (cc) {
- else => unreachable,
- .e => &.{
- .{ ._, ._, .xor, .dst0d, .src1d, ._, ._ },
- .{ ._, ._, .not, .dst0d, ._, ._, ._ },
- },
- .ne => &.{
- .{ ._, ._, .xor, .dst0d, .src1d, ._, ._ },
- },
- } },
- }, .{
- .required_features = .{ .@"64bit", null },
- .src_constraints = .{ .{ .bool_vec = .qword }, .{ .bool_vec = .qword } },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .simm32 } },
- .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .simm32 } },
- .{ .src = .{ .simm32, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mem, .gpr } },
- .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .mem } },
- .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_gpr, .gpr } },
+ .{ .src = .{ .to_mut_gpr, .imm8 } },
+ .{ .src = .{ .imm8, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mut_mem, .to_gpr } },
+ .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .mem } },
+ .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .ref = .src0 }},
- .each = .{ .once = switch (cc) {
- else => unreachable,
- .e => &.{
- .{ ._, ._, .xor, .dst0q, .src1q, ._, ._ },
- .{ ._, ._, .not, .dst0q, ._, ._, ._ },
- },
- .ne => &.{
- .{ ._, ._, .xor, .dst0q, .src1q, ._, ._ },
- },
- } },
- }, .{
- .src_constraints = .{ .any_bool_vec, .any_bool_vec },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
.clobbers = .{ .eflags = true },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{.mem},
- .each = .{ .once = switch (cc) {
- else => unreachable,
- .e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .not, .tmp1p, ._, ._, ._ },
- .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- },
- .ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- },
- } },
- }, .{
- .required_features = .{ .avx2, null },
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .kind = .{ .rc = .sse } },
- .unused,
- .unused,
- },
- .dst_temps = .{.mem},
- .each = .{ .once = switch (cc) {
- else => unreachable,
- .e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
- .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- },
- .ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
- .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
- .{ ._, ._, .not, .tmp2d, ._, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- },
- } },
- }, .{
- .required_features = .{ .avx2, null },
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
- .{ .kind = .{ .rc = .sse } },
- .unused,
- .unused,
- },
- .dst_temps = .{.mem},
- .each = .{ .once = switch (cc) {
- else => unreachable,
- .e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
- .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ },
- .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- },
- .ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
- .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ },
- .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
- .{ ._, ._, .not, .tmp2d, ._, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- },
- } },
- }, .{
- .required_features = .{ .avx2, null },
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .kind = .{ .rc = .sse } },
- .unused,
- .unused,
- },
- .dst_temps = .{.mem},
- .each = .{ .once = switch (cc) {
- else => unreachable,
- .e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
- .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- },
- .ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
- .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ },
- .{ ._, ._, .not, .tmp2b, ._, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .each = .{ .once = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._, .xor, .dst0b, .src1b, ._, ._ },
+ .{ ._, ._, .not, .dst0b, ._, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._, .xor, .dst0b, .src1b, ._, ._ },
},
} },
}, .{
- .required_features = .{ .avx2, null },
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .src_constraints = .{ .{ .bool_vec = .word }, .{ .bool_vec = .word } },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .reg = .rcx } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .kind = .{ .rc = .sse } },
- .unused,
+ .{ .src = .{ .mut_mem, .imm16 } },
+ .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .imm16 } },
+ .{ .src = .{ .imm16, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mut_mem, .to_gpr } },
+ .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .mem } },
+ .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .to_gpr } },
},
- .dst_temps = .{.mem},
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._ },
- .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ },
- .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(32), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+ .{ ._, ._, .xor, .dst0w, .src1w, ._, ._ },
+ .{ ._, ._, .not, .dst0w, ._, ._, ._ },
},
.ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._ },
- .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ },
- .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ },
- .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(32), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+ .{ ._, ._, .xor, .dst0w, .src1w, ._, ._ },
},
} },
}, .{
- .required_features = .{ .avx, null },
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .src_constraints = .{ .{ .bool_vec = .dword }, .{ .bool_vec = .dword } },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
- .{ .kind = .{ .rc = .sse } },
- .unused,
- .unused,
+ .{ .src = .{ .mut_mem, .imm32 } },
+ .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .imm32 } },
+ .{ .src = .{ .imm32, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mut_mem, .to_gpr } },
+ .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .mem } },
+ .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .to_gpr } },
},
- .dst_temps = .{.mem},
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ },
- .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .dst0d, .src1d, ._, ._ },
+ .{ ._, ._, .not, .dst0d, ._, ._, ._ },
},
.ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ },
- .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
- .{ ._, ._, .not, .tmp2d, ._, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .dst0d, .src1d, ._, ._ },
},
} },
}, .{
- .required_features = .{ .avx, null },
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .bool_vec = .qword }, .{ .bool_vec = .qword } },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .kind = .{ .rc = .sse } },
- .unused,
- .unused,
+ .{ .src = .{ .mut_mem, .simm32 } },
+ .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .simm32 } },
+ .{ .src = .{ .simm32, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mut_mem, .to_gpr } },
+ .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .mem } },
+ .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .to_gpr } },
},
- .dst_temps = .{.mem},
+ .dst_temps = .{.{ .ref = .src0 }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ },
- .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ },
- .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .dst0q, .src1q, ._, ._ },
+ .{ ._, ._, .not, .dst0q, ._, ._, ._ },
},
.ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ },
- .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ },
- .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
- .{ ._, ._, .not, .tmp2b, ._, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .dst0q, .src1q, ._, ._ },
},
} },
}, .{
- .required_features = .{ .avx, null },
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .src_constraints = .{ .any_bool_vec, .any_bool_vec },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .reg = .rcx } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .kind = .{ .rc = .sse } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_d, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ },
- .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
- .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .not, .tmp1p, ._, ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
},
.ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_d, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ },
- .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
- .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ },
- .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
},
} },
}, .{
- .required_features = .{ .avx, null },
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .reg = .rcx } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.unused,
+ .unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ },
- .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
- .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
},
.ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ },
- .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
- .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ },
- .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
- .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
+ .{ ._, ._, .not, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
},
} },
}, .{
- .required_features = .{ .sse2, null },
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -3759,35 +7158,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
+ .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
},
.ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
+ .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
.{ ._, ._, .not, .tmp2d, ._, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
},
} },
}, .{
- .required_features = .{ .sse2, null },
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -3800,37 +7202,36 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ },
- .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
+ .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
},
.ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ },
- .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
+ .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ },
.{ ._, ._, .not, .tmp2b, ._, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
},
} },
}, .{
- .required_features = .{ .sse2, null },
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -3843,128 +7244,104 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_d, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
- .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._ },
+ .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ },
.{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
.{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
.{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
.{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
.{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
.{ ._, ._z, .j, .@"0f", ._, ._, ._ },
.{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
.{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
},
.ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_d, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
- .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
- .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._ },
+ .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ },
+ .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ },
.{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
.{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
.{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
.{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
.{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
.{ ._, ._z, .j, .@"0f", ._, ._, ._ },
.{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
.{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
},
} },
}, .{
- .required_features = .{ .sse4_1, null },
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .reg = .rcx } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
.unused,
+ .unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_q, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
- .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
- .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
},
.ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_q, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
- .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
- .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ },
- .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .not, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
},
} },
}, .{
- .required_features = .{ .sse, .mmx },
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -3972,111 +7349,111 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .kind = .{ .rc = .mmx } },
+ .{ .kind = .{ .rc = .sse } },
.unused,
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ },
+ .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
},
.ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ },
+ .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ },
+ .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
.{ ._, ._, .not, .tmp2b, ._, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
},
} },
}, .{
- .required_features = .{ .sse, .mmx },
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .reg = .rcx } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .kind = .{ .rc = .mmx } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .kind = .{ .rc = .mmx } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ },
- .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_w, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ },
- .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ },
- .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_d, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ },
+ .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
.{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
.{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
},
.ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ },
- .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_w, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ },
- .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ },
- .{ ._, ._, .xor, .tmp4b, .i(0b1111), ._, ._ },
- .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_d, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ },
+ .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
.{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
.{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
},
} },
}, .{
- .required_features = .{ .sse, .mmx },
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -4084,578 +7461,535 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u32, .kind = .{ .reg = .rcx } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .kind = .{ .rc = .mmx } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .{ .kind = .{ .rc = .mmx } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = switch (cc) {
else => unreachable,
.e => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ },
- .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_d, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_w, .ackssd, .tmp5q, .tmp3q, ._, ._ },
- .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ },
- .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ },
- .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ },
+ .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
.{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
.{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
},
.ne => &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ },
- .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_d, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
- .{ ._, .p_w, .ackssd, .tmp5q, .tmp3q, ._, ._ },
- .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ },
- .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ },
- .{ ._, ._, .xor, .tmp4b, .i(0b11), ._, ._ },
- .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ },
+ .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._, .xor, .tmp3b, .si(0b11), ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
.{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
- .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
- },
- } },
- }, .{
- .required_features = .{ .slow_incdec, null },
- .dst_constraints = .{.{ .bool_vec = .byte }},
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- } },
- }, .{
- .dst_constraints = .{.{ .bool_vec = .byte }},
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
- .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
- } },
- }, .{
- .required_features = .{ .slow_incdec, null },
- .dst_constraints = .{.{ .bool_vec = .byte }},
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- } },
- }, .{
- .dst_constraints = .{.{ .bool_vec = .byte }},
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+ },
} },
}, .{
- .required_features = .{ .slow_incdec, null },
- .dst_constraints = .{.{ .bool_vec = .byte }},
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .unused,
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
.unused,
.unused,
},
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .not, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ },
} },
}, .{
- .dst_constraints = .{.{ .bool_vec = .byte }},
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .unused,
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
.unused,
.unused,
},
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
+ .{ ._, ._, .not, .tmp2b, ._, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ },
} },
}, .{
- .required_features = .{ .@"64bit", .slow_incdec },
- .dst_constraints = .{.{ .bool_vec = .byte }},
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
- .unused,
- .unused,
+ .{ .type = .u32, .kind = .{ .reg = .rcx } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
.unused,
},
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_d, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_d, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+ },
} },
}, .{
- .required_features = .{ .@"64bit", null },
- .dst_constraints = .{.{ .bool_vec = .byte }},
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .required_features = .{ .sse4_1, null, null, null },
+ .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- } },
- }, .{
- .required_features = .{ .slow_incdec, null },
- .dst_constraints = .{.{ .bool_vec = .byte }},
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .unused,
- },
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ },
- .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
- .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
- .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
- .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ },
- .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ },
- .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ },
- .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
- } },
- }, .{
- .dst_constraints = .{.{ .bool_vec = .byte }},
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .reg = .rcx } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
.unused,
},
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ },
- .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
- .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
- .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
- .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ },
- .{ ._, ._, .dec, .tmp2d, ._, ._, ._ },
- .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ },
- .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_q, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_q, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
+ .{ ._, ._, .xor, .tmp3b, .si(0b11), ._, ._ },
+ .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+ },
} },
}, .{
- .required_features = .{ .slow_incdec, null },
- .dst_constraints = .{.{ .bool_vec = .dword }},
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .required_features = .{ .sse, .mmx, null, null },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .mmx } },
.unused,
.unused,
},
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ },
+ .{ ._, ._, .not, .tmp2b, ._, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ },
} },
}, .{
- .dst_constraints = .{.{ .bool_vec = .dword }},
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .required_features = .{ .sse, .mmx, null, null },
+ .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .unused,
- .unused,
+ .{ .kind = .{ .rc = .mmx } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .mmx } },
},
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2d, .tmp1d, ._, ._ },
- .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
- .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ },
+ .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_w, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ },
+ .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ },
+ .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_w, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ },
+ .{ ._, ._, .xor, .tmp4b, .si(0b1111), ._, ._ },
+ .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+ },
} },
}, .{
- .required_features = .{ .slow_incdec, null },
- .dst_constraints = .{.{ .bool_vec = .dword }},
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .required_features = .{ .sse, .mmx, null, null },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
- .unused,
- .unused,
+ .{ .type = .u32, .kind = .{ .reg = .rcx } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .mmx } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .mmx } },
},
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = switch (cc) {
+ else => unreachable,
+ .e => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ },
+ .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_d, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_w, .ackssd, .tmp5q, .tmp3q, ._, ._ },
+ .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ },
+ .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+ },
+ .ne => &.{
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ },
+ .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_d, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, .p_w, .ackssd, .tmp5q, .tmp3q, ._, ._ },
+ .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ },
+ .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ },
+ .{ ._, ._, .xor, .tmp4b, .si(0b11), ._, ._ },
+ .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
+ .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
+ .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
+ .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+ },
} },
}, .{
- .dst_constraints = .{.{ .bool_vec = .dword }},
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
- .unused,
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
.unused,
- },
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- } },
- }, .{
- .required_features = .{ .slow_incdec, null },
- .dst_constraints = .{.{ .bool_vec = .dword }},
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .dst_constraints = .{.{ .bool_vec = .dword }},
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .unused,
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
.unused,
- },
- .dst_temps = .{.{ .rc = .general_purpose }},
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- } },
- }, .{
- .required_features = .{ .@"64bit", .slow_incdec },
- .dst_constraints = .{.{ .bool_vec = .dword }},
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
.{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
.{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", null },
- .dst_constraints = .{.{ .bool_vec = .dword }},
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -4663,249 +7997,251 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
.unused,
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .slow_incdec, null },
- .dst_constraints = .{.{ .bool_vec = .dword }},
+ .required_features = .{ .@"64bit", null, null, null },
+ .dst_constraints = .{.{ .bool_vec = .byte }},
+ .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ },
- .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
- .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
- .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
- .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ },
- .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ },
- .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
- .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ },
- .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .dst_constraints = .{.{ .bool_vec = .dword }},
+ .dst_constraints = .{.{ .bool_vec = .byte }},
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
.{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
.{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
.{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ },
- .{ ._, ._, .dec, .tmp2d, ._, ._, ._ },
- .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
- .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ },
+ .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
.{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0d, .tmp2b, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ },
+ .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ },
.{ ._, ._b, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", .slow_incdec },
- .dst_constraints = .{.{ .bool_vec = .qword }},
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
.{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
.{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
.{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", null },
- .dst_constraints = .{.{ .bool_vec = .qword }},
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
.{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
- .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", .slow_incdec },
- .dst_constraints = .{.{ .bool_vec = .qword }},
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
.{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", null },
- .dst_constraints = .{.{ .bool_vec = .qword }},
- .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+ .required_features = .{ .@"64bit", null, null, null },
+ .dst_constraints = .{.{ .bool_vec = .dword }},
+ .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
.{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", .slow_incdec },
- .dst_constraints = .{.{ .bool_vec = .qword }},
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .dst_constraints = .{.{ .bool_vec = .dword }},
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .unused,
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ },
+ .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
+ .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
+ .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ },
+ .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ },
+ .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", null },
+ .required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{.{ .bool_vec = .qword }},
- .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -4913,29 +8249,30 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
.{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
.{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
.{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", .slow_incdec },
+ .required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{.{ .bool_vec = .qword }},
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -4943,29 +8280,29 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
.{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
.{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", null },
+ .required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{.{ .bool_vec = .qword }},
- .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+ .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -4973,63 +8310,59 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
.{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
.{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
.{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", .slow_incdec },
+ .required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{.{ .bool_vec = .qword }},
+ .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
.extra_temps = .{
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ },
- .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
- .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
- .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
- .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ },
- .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ },
- .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
- .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
.{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
.{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
- .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ },
- .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ },
- .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", null },
+ .required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{.{ .bool_vec = .qword }},
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
@@ -5043,30 +8376,30 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
.{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
.{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
.{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ },
- .{ ._, ._, .dec, .tmp2d, ._, ._, ._ },
- .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ },
+ .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
.{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
.{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
.{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
.{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
.{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
- .{ ._, ._, .inc, .tmp1b, ._, ._, ._ },
- .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ },
+ .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
+ .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ },
.{ ._, ._b, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .slow_incdec, null },
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+ .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -5079,8 +8412,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.mem},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
.{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
.{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
@@ -5089,62 +8423,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ },
.{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ },
.{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ },
- .{ ._, ._, .add, .tmp1d, .i(1), ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ },
+ .{ ._, ._, .add, .tmp1d, .si(1), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .sia(-1, .none, .add_ptr_bit_size), ._, ._ },
.{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
.{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
.{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_ptr_size), .tmp2p, ._, ._ },
.{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ .@"1:", ._, .add, .tmp0p, .i(1), ._, ._ },
+ .{ .@"1:", ._, .add, .tmp0p, .si(1), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
- },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .reg = .ecx } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
- .unused,
- },
- .dst_temps = .{.mem},
- .each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
- .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
- .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
- .{ ._, ._, .mov, .tmp4b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp4b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
- .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ },
- .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ },
- .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ },
- .{ ._, ._, .inc, .tmp1d, ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ },
- .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
- .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_ptr_size), .tmp2p, ._, ._ },
- .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ .@"1:", ._, .inc, .tmp0p, ._, ._, ._ },
- .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
- .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ },
+ .{ ._, ._, .@"test", .tmp1d, .sia(-1, .none, .add_ptr_bit_size), ._, ._ },
.{ ._, ._z, .j, .@"0f", ._, ._, ._ },
.{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
- .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ },
+ .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
.{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ },
} },
} },
- }) catch |err2| switch (err2) {
+ }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
@tagName(air_tag),
cg.typeOf(extra.lhs).fmt(pt),
@@ -5175,9 +8470,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.cmp_lte, .cmp_lte_optimized => .lte,
.cmp_gte, .cmp_gte_optimized => .gte,
.cmp_gt, .cmp_gt_optimized => .gt,
- }) else {
+ }) else fallback: {
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu);
+ if (scalar_ty.isRuntimeFloat()) break :fallback try cg.airCmp(inst, switch (air_tag) {
+ else => unreachable,
+ .cmp_lt, .cmp_lt_optimized => .lt,
+ .cmp_lte, .cmp_lte_optimized => .lte,
+ .cmp_gte, .cmp_gte_optimized => .gte,
+ .cmp_gt, .cmp_gt_optimized => .gt,
+ });
const signedness = if (scalar_ty.isAbiInt(zcu))
scalar_ty.intInfo(zcu).signedness
else
@@ -5205,11 +8507,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
.patterns = &.{
.{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .imm8, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc.commute() }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
} },
@@ -5217,12 +8519,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
.patterns = &.{
.{ .src = .{ .mem, .imm8 } },
- .{ .src = .{ .gpr, .imm8 } },
- .{ .src = .{ .gpr, .mem } },
- .{ .src = .{ .gpr, .gpr } },
+ .{ .src = .{ .to_gpr, .imm8 } },
+ .{ .src = .{ .to_gpr, .mem } },
+ .{ .src = .{ .to_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
} },
@@ -5230,11 +8532,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
.patterns = &.{
.{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .imm16, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc.commute() }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
} },
@@ -5242,12 +8544,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
.patterns = &.{
.{ .src = .{ .mem, .imm16 } },
- .{ .src = .{ .gpr, .imm16 } },
- .{ .src = .{ .gpr, .mem } },
- .{ .src = .{ .gpr, .gpr } },
+ .{ .src = .{ .to_gpr, .imm16 } },
+ .{ .src = .{ .to_gpr, .mem } },
+ .{ .src = .{ .to_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
} },
@@ -5255,11 +8557,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
.patterns = &.{
.{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .imm32, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc.commute() }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
} },
@@ -5267,45 +8569,50 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
.patterns = &.{
.{ .src = .{ .mem, .imm32 } },
- .{ .src = .{ .gpr, .imm32 } },
- .{ .src = .{ .gpr, .mem } },
- .{ .src = .{ .gpr, .gpr } },
+ .{ .src = .{ .to_gpr, .imm32 } },
+ .{ .src = .{ .to_gpr, .mem } },
+ .{ .src = .{ .to_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", null },
+ .required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
.patterns = &.{
.{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .simm32, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc.commute() }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", null },
+ .required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
.patterns = &.{
.{ .src = .{ .mem, .simm32 } },
- .{ .src = .{ .gpr, .simm32 } },
- .{ .src = .{ .gpr, .mem } },
- .{ .src = .{ .gpr, .gpr } },
+ .{ .src = .{ .to_gpr, .simm32 } },
+ .{ .src = .{ .to_gpr, .mem } },
+ .{ .src = .{ .to_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
} },
}, .{
+ .src_constraints = .{ .any_int, .any_int },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem } },
+ .{ .src = .{ .to_mem, .to_mem }, .commute = switch (cc) {
+ else => unreachable,
+ .l, .ge, .b, .ae => .{ 0, 0 },
+ .le, .g, .be, .a => .{ 0, 1 },
+ } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -5315,17 +8622,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .rc = .general_purpose }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
- .{ .@"0:", ._r, .sh, .tmp1b, .i(1), ._, ._ },
+ .{ .@"0:", ._r, .sh, .tmp1b, .si(1), ._, ._ },
.{ ._, ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
.{ ._, ._, .sbb, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
.{ ._, ._c, .set, .tmp1b, ._, ._, ._ },
- .{ ._, .fromCondition(cc), .set, .dst0b, ._, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ },
+ .{ ._, .fromCondition(switch (cc) {
+ else => unreachable,
+ .l, .ge, .b, .ae => cc,
+ .le, .g, .be, .a => cc.commute(),
+ }), .set, .dst0b, ._, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
} },
@@ -5342,13 +8653,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
if (ops[1].index != res[0].index) try ops[1].die(cg);
try res[0].moveTo(inst, cg);
},
- .cmp_eq, .cmp_eq_optimized, .cmp_neq, .cmp_neq_optimized => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) {
+ .cmp_eq,
+ .cmp_eq_optimized,
+ .cmp_neq,
+ .cmp_neq_optimized,
+ => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) {
else => unreachable,
.cmp_eq, .cmp_eq_optimized => .eq,
.cmp_neq, .cmp_neq_optimized => .neq,
}) else fallback: {
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
- if (ip.isOptionalType(cg.typeOf(bin_op.lhs).toIntern())) break :fallback try cg.airCmp(inst, switch (air_tag) {
+ const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu);
+ if (scalar_ty.isRuntimeFloat() or ip.isOptionalType(scalar_ty.toIntern())) break :fallback try cg.airCmp(inst, switch (air_tag) {
else => unreachable,
.cmp_eq, .cmp_eq_optimized => .eq,
.cmp_neq, .cmp_neq_optimized => .neq,
@@ -5362,14 +8678,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
})) {
else => unreachable,
inline .e, .ne => |cc| comptime &.{ .{
- .required_features = .{ .avx2, null },
- .src_constraints = .{ .any_int, .any_int },
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } },
.patterns = &.{
- .{ .src = .{ .ymm, .mem } },
- .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .ymm, .ymm } },
+ .{ .src = .{ .to_ymm, .mem } },
+ .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_ymm, .to_ymm } },
},
- .clobbers = .{ .eflags = true },
.extra_temps = .{
.{ .kind = .{ .rc = .sse } },
.unused,
@@ -5379,19 +8694,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .vp_, .xor, .tmp0y, .src0y, .src1y, ._ },
.{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ },
} },
}, .{
- .required_features = .{ .avx, null },
- .src_constraints = .{ .any_int, .any_int },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } },
.patterns = &.{
- .{ .src = .{ .ymm, .mem } },
- .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .ymm, .ymm } },
+ .{ .src = .{ .to_ymm, .mem } },
+ .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_ymm, .to_ymm } },
},
- .clobbers = .{ .eflags = true },
.extra_temps = .{
.{ .kind = .{ .rc = .sse } },
.unused,
@@ -5401,19 +8716,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .v_pd, .xor, .tmp0y, .src0y, .src1y, ._ },
.{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ },
} },
}, .{
- .required_features = .{ .avx, null },
- .src_constraints = .{ .any_int, .any_int },
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
.patterns = &.{
- .{ .src = .{ .xmm, .mem } },
- .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .xmm, .xmm } },
+ .{ .src = .{ .to_xmm, .mem } },
+ .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_xmm, .to_xmm } },
},
- .clobbers = .{ .eflags = true },
.extra_temps = .{
.{ .kind = .{ .rc = .sse } },
.unused,
@@ -5423,33 +8738,33 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .vp_, .xor, .tmp0x, .src0x, .src1x, ._ },
.{ ._, .vp_, .@"test", .tmp0x, .tmp0x, ._, ._ },
} },
}, .{
- .required_features = .{ .sse4_1, null },
- .src_constraints = .{ .any_int, .any_int },
+ .required_features = .{ .sse4_1, null, null, null },
+ .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
.patterns = &.{
- .{ .src = .{ .mut_xmm, .mem } },
- .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_xmm, .xmm } },
+ .{ .src = .{ .to_mut_xmm, .mem } },
+ .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_xmm, .to_xmm } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .p_, .xor, .src0x, .src1x, ._, ._ },
.{ ._, .p_, .@"test", .src0x, .src0x, ._, ._ },
} },
}, .{
- .required_features = .{ .sse2, null },
- .src_constraints = .{ .any_int, .any_int },
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
.patterns = &.{
- .{ .src = .{ .mut_xmm, .mem } },
- .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_xmm, .xmm } },
+ .{ .src = .{ .to_mut_xmm, .mem } },
+ .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_xmm, .to_xmm } },
},
- .clobbers = .{ .eflags = true },
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .sse } },
@@ -5459,22 +8774,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
.{ ._, .p_, .xor, .src0x, .src1x, ._, ._ },
.{ ._, .p_b, .cmpeq, .tmp1x, .src0x, ._, ._ },
.{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ },
- .{ ._, ._, .xor, .tmp0d, .i(0xffff), ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .si(0xffff), ._, ._ },
} },
}, .{
- .required_features = .{ .sse2, .mmx },
- .src_constraints = .{ .any_int, .any_int },
+ .required_features = .{ .sse, .mmx, null, null },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
.patterns = &.{
- .{ .src = .{ .mut_mm, .mem } },
- .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mut_mm, .mm } },
+ .{ .src = .{ .to_mut_mm, .mem } },
+ .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_mm, .to_mm } },
},
- .clobbers = .{ .eflags = true },
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .kind = .{ .rc = .mmx } },
@@ -5484,26 +8799,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ },
.{ ._, .p_, .xor, .src0q, .src1q, ._, ._ },
.{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ },
.{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ },
- .{ ._, ._, .xor, .tmp0d, .i(0xff), ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .si(0xff), ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
.patterns = &.{
.{ .src = .{ .mem, .imm8 } },
.{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .gpr, .imm8 } },
- .{ .src = .{ .imm8, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .gpr, .mem } },
- .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .gpr, .gpr } },
+ .{ .src = .{ .to_gpr, .imm8 } },
+ .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_gpr, .mem } },
+ .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
} },
@@ -5512,14 +8828,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.patterns = &.{
.{ .src = .{ .mem, .imm16 } },
.{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .gpr, .imm16 } },
- .{ .src = .{ .imm16, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .gpr, .mem } },
- .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .gpr, .gpr } },
+ .{ .src = .{ .to_gpr, .imm16 } },
+ .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_gpr, .mem } },
+ .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
} },
@@ -5528,36 +8844,68 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.patterns = &.{
.{ .src = .{ .mem, .imm32 } },
.{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .gpr, .imm32 } },
- .{ .src = .{ .imm32, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .gpr, .mem } },
- .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .gpr, .gpr } },
+ .{ .src = .{ .to_gpr, .imm32 } },
+ .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_gpr, .mem } },
+ .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
} },
}, .{
- .required_features = .{ .@"64bit", null },
+ .required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
.patterns = &.{
.{ .src = .{ .mem, .simm32 } },
.{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } },
- .{ .src = .{ .gpr, .simm32 } },
- .{ .src = .{ .simm32, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .gpr, .mem } },
- .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } },
- .{ .src = .{ .gpr, .gpr } },
+ .{ .src = .{ .to_gpr, .simm32 } },
+ .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_gpr, .mem } },
+ .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_gpr, .to_gpr } },
},
- .clobbers = .{ .eflags = true },
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
} },
}, .{
- .required_features = .{ .avx2, null },
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{
+ .{ .remainder_int = .{ .of = .yword, .is = .xword } },
+ .{ .remainder_int = .{ .of = .yword, .is = .xword } },
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+ .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ },
+ .{ .@"0:", .v_dqu, .mov, .tmp2y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ },
+ .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memiad(.src1y, .tmp0, .add_size, -16), ._ },
+ .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .tmp2x, .memad(.src0x, .add_size, -16), ._, ._ },
+ .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memad(.src1x, .add_size, -16), ._ },
+ .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
+ .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx2, null, null, null },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -5570,18 +8918,51 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ },
.{ .@"0:", .v_dqu, .mov, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
.{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ },
.{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
- .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .remainder_int = .{ .of = .yword, .is = .xword } },
+ .{ .remainder_int = .{ .of = .yword, .is = .xword } },
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .kind = .{ .rc = .sse } },
+ .{ .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+ .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ },
+ .{ .@"0:", .v_pd, .movu, .tmp2y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ },
+ .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memiad(.src1y, .tmp0, .add_size, -16), ._ },
+ .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_pd, .mova, .tmp2x, .memad(.src0x, .add_size, -16), ._, ._ },
+ .{ ._, .v_pd, .xor, .tmp2x, .tmp2x, .memad(.src1x, .add_size, -16), ._ },
+ .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
.{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
} },
}, .{
- .required_features = .{ .avx, null },
+ .required_features = .{ .avx, null, null, null },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -5594,18 +8975,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ },
.{ .@"0:", .v_pd, .movu, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
.{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ },
.{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
- .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
} },
}, .{
- .required_features = .{ .avx, null },
+ .required_features = .{ .avx, null, null, null },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -5618,18 +9000,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, .vp_, .xor, .tmp1x, .tmp1x, .tmp1x, ._ },
.{ .@"0:", .v_dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
.{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._ },
.{ ._, .vp_, .@"or", .tmp1x, .tmp1x, .tmp2x, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, .vp_, .@"test", .tmp1x, .tmp1x, ._, ._ },
} },
}, .{
- .required_features = .{ .sse4_1, null },
+ .required_features = .{ .sse4_1, null, null, null },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -5642,18 +9025,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
.{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
.{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
.{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, .p_, .@"test", .tmp1x, .tmp1x, ._, ._ },
} },
}, .{
- .required_features = .{ .sse2, null },
+ .required_features = .{ .sse2, null, null, null },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -5666,21 +9050,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
.{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
.{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
.{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ },
.{ ._, .p_b, .cmpeq, .tmp1x, .tmp2x, ._, ._ },
.{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ },
- .{ ._, ._, .cmp, .tmp0d, .i(0xffff), ._, ._ },
+ .{ ._, ._, .cmp, .tmp0d, .si(0xffff), ._, ._ },
} },
}, .{
- .required_features = .{ .sse, .mmx },
+ .required_features = .{ .sse, .mmx, null, null },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -5693,18 +9078,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ },
.{ .@"0:", ._q, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
.{ ._, .p_, .xor, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
.{ ._, .p_, .@"or", .tmp1q, .tmp2q, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, .p_, .xor, .tmp2q, .tmp2q, ._, ._ },
.{ ._, .p_b, .cmpeq, .tmp1q, .tmp2q, ._, ._ },
.{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ },
- .{ ._, ._, .cmp, .tmp0d, .i(0xff), ._, ._ },
+ .{ ._, ._, .cmp, .tmp0d, .si(0xff), ._, ._ },
} },
}, .{
.patterns = &.{
@@ -5719,13 +9105,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{.{ .cc = cc }},
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
.{ ._, ._, .xor, .tmp1p, .tmp1p, ._, ._ },
.{ .@"0:", ._, .mov, .tmp2p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
.{ ._, ._, .xor, .tmp2p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
.{ ._, ._, .@"or", .tmp1p, .tmp2p, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .a(.tmp2, .add_size), ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .sa(.tmp2, .add_size), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, ._, .@"test", .tmp1p, .tmp1p, ._, ._ },
} },
@@ -6453,11 +9840,8 @@ fn regClassForType(self: *CodeGen, ty: Type) Register.Class {
else => .sse,
},
.vector => switch (ty.childType(zcu).toIntern()) {
- .bool_type, .u1_type => .general_purpose,
- else => if (ty.isAbiInt(zcu) and ty.intInfo(zcu).bits == 1)
- .general_purpose
- else
- .sse,
+ .bool_type => .general_purpose,
+ else => .sse,
},
else => .general_purpose,
};
@@ -8383,32 +11767,54 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
const lhs_mcv = try self.resolveInst(bin_op.lhs);
const rhs_mcv = try self.resolveInst(bin_op.rhs);
- const mat_lhs_mcv = switch (lhs_mcv) {
- .load_symbol => mat_lhs_mcv: {
+ const mat_lhs_mcv = mat_lhs_mcv: switch (lhs_mcv) {
+ .register => |lhs_reg| switch (lhs_reg.class()) {
+ else => lhs_mcv,
+ .sse => {
+ const mat_lhs_mcv: MCValue = .{
+ .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp),
+ };
+ try self.genCopy(dst_ty, mat_lhs_mcv, lhs_mcv, .{});
+ break :mat_lhs_mcv mat_lhs_mcv;
+ },
+ },
+ .load_symbol => {
// TODO clean this up!
const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address());
break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
},
else => lhs_mcv,
};
- const mat_lhs_lock = switch (mat_lhs_mcv) {
- .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
- else => null,
+ const mat_lhs_locks: [2]?RegisterLock = switch (mat_lhs_mcv) {
+ .register_pair => |mat_lhs_regs| self.register_manager.lockRegs(2, mat_lhs_regs),
+ .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null },
+ else => @splat(null),
};
- defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
- const mat_rhs_mcv = switch (rhs_mcv) {
- .load_symbol => mat_rhs_mcv: {
+ defer for (mat_lhs_locks) |mat_lhs_lock| if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
+ const mat_rhs_mcv = mat_rhs_mcv: switch (rhs_mcv) {
+ .register => |rhs_reg| switch (rhs_reg.class()) {
+ else => rhs_mcv,
+ .sse => {
+ const mat_rhs_mcv: MCValue = .{
+ .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp),
+ };
+ try self.genCopy(dst_ty, mat_rhs_mcv, rhs_mcv, .{});
+ break :mat_rhs_mcv mat_rhs_mcv;
+ },
+ },
+ .load_symbol => {
// TODO clean this up!
const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address());
break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
},
else => rhs_mcv,
};
- const mat_rhs_lock = switch (mat_rhs_mcv) {
- .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
- else => null,
+ const mat_rhs_locks: [2]?RegisterLock = switch (mat_rhs_mcv) {
+ .register_pair => |mat_rhs_regs| self.register_manager.lockRegs(2, mat_rhs_regs),
+ .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null },
+ else => @splat(null),
};
- defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
+ defer for (mat_rhs_locks) |mat_rhs_lock| if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
.{ ._, .mov },
@@ -10003,7 +13409,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void {
} },
}, .u(0));
_ = try self.asmJccReloc(.e, loop);
- try self.asmRegisterMemory(.{ ._, .bsr }, dst_reg.to64(), .{
+ try self.asmRegisterMemory(.{ ._r, .bs }, dst_reg.to64(), .{
.base = .{ .frame = src_frame_addr.index },
.mod = .{ .rm = .{
.size = .qword,
@@ -10080,8 +13486,8 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void {
defer self.register_manager.unlockReg(wide_lock);
try self.truncateRegister(src_ty, wide_reg);
- try self.genBinOpMir(.{ ._, .bsr }, .u16, dst_mcv, .{ .register = wide_reg });
- } else try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv);
+ try self.genBinOpMir(.{ ._r, .bs }, .u16, dst_mcv, .{ .register = wide_reg });
+ } else try self.genBinOpMir(.{ ._r, .bs }, src_ty, dst_mcv, mat_src_mcv);
try self.asmCmovccRegisterRegister(
.z,
@@ -10103,7 +13509,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.truncateRegister(src_ty, wide_reg);
try self.genBinOpMir(
- .{ ._, .bsr },
+ .{ ._r, .bs },
if (src_bits <= 8) .u16 else src_ty,
dst_mcv,
.{ .register = wide_reg },
@@ -10200,7 +13606,7 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void {
} },
}, .u(0));
_ = try self.asmJccReloc(.e, loop);
- try self.asmRegisterMemory(.{ ._, .bsf }, dst_reg.to64(), .{
+ try self.asmRegisterMemory(.{ ._f, .bs }, dst_reg.to64(), .{
.base = .{ .frame = src_frame_addr.index },
.mod = .{ .rm = .{
.size = .qword,
@@ -10280,8 +13686,8 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void {
defer self.register_manager.unlockReg(wide_lock);
try self.truncateRegister(src_ty, wide_reg);
- try self.genBinOpMir(.{ ._, .bsf }, wide_ty, dst_mcv, .{ .register = wide_reg });
- } else try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv);
+ try self.genBinOpMir(.{ ._f, .bs }, wide_ty, dst_mcv, .{ .register = wide_reg });
+ } else try self.genBinOpMir(.{ ._f, .bs }, src_ty, dst_mcv, mat_src_mcv);
const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(zcu))), 2);
try self.asmCmovccRegisterRegister(
@@ -12975,7 +16381,18 @@ fn genShiftBinOp(
const rcx_lock = self.register_manager.lockReg(.rcx);
defer if (rcx_lock) |lock| self.register_manager.unlockReg(lock);
- const lhs_lock = switch (lhs_mcv) {
+ const mat_lhs_mcv: MCValue, const can_reuse_lhs = switch (lhs_mcv) {
+ .register => |lhs_reg| switch (lhs_reg.class()) {
+ .general_purpose => .{ lhs_mcv, true },
+ else => lhs: {
+ const mat_lhs_mcv = try self.allocTempRegOrMem(lhs_ty, true);
+ try self.genCopy(lhs_ty, mat_lhs_mcv, lhs_mcv, .{});
+ break :lhs .{ mat_lhs_mcv, false };
+ },
+ },
+ else => .{ lhs_mcv, true },
+ };
+ const lhs_lock = switch (mat_lhs_mcv) {
.register => |reg| self.register_manager.lockReg(reg),
else => null,
};
@@ -12988,12 +16405,12 @@ fn genShiftBinOp(
defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
const dst_mcv: MCValue = dst: {
- if (maybe_inst) |inst| {
+ if (can_reuse_lhs) if (maybe_inst) |inst| {
const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
- if (self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) break :dst lhs_mcv;
- }
+ if (self.reuseOperand(inst, bin_op.lhs, 0, mat_lhs_mcv)) break :dst mat_lhs_mcv;
+ };
const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
- try self.genCopy(lhs_ty, dst_mcv, lhs_mcv, .{});
+ try self.genCopy(lhs_ty, dst_mcv, mat_lhs_mcv, .{});
break :dst dst_mcv;
};
@@ -18337,12 +21754,28 @@ const MoveStrategy = union(enum) {
try self.asmRegister(.{ .f_, .ld }, src_reg);
try self.asmMemory(.{ .f_p, .st }, dst_mem);
},
- .insert_extract, .vex_insert_extract => |ie| try self.asmMemoryRegisterImmediate(
- ie.extract,
- dst_mem,
- src_reg,
- .u(0),
- ),
+ .insert_extract, .vex_insert_extract => |ie| if (ie.extract[0] != .p_w or self.hasFeature(.sse4_1))
+ try self.asmMemoryRegisterImmediate(ie.extract, dst_mem, src_reg, .u(0))
+ else if (self.hasFeature(.sse2)) {
+ const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
+ try self.asmRegisterRegisterImmediate(ie.extract, tmp_reg.to32(), src_reg.to128(), .u(0));
+ try self.asmMemoryRegister(.{ ._, .mov }, dst_mem, tmp_reg.to16());
+ } else {
+ const tmp_frame_index = try self.allocFrameIndex(.init(.{
+ .size = 16,
+ .alignment = .@"16",
+ }));
+ try self.asmMemoryRegister(.{ ._ps, .mova }, .{
+ .base = .{ .frame = tmp_frame_index },
+ .mod = .{ .rm = .{ .size = .xword } },
+ }, src_reg.to128());
+ const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
+ try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg.to16(), .{
+ .base = .{ .frame = tmp_frame_index },
+ .mod = .{ .rm = .{ .size = .word } },
+ });
+ try self.asmMemoryRegister(.{ ._, .mov }, dst_mem, tmp_reg.to16());
+ },
}
}
};
@@ -18400,8 +21833,10 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool)
.{ ._ss, .mov } },
5...8 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_sd, .mov }
+ else if (self.hasFeature(.sse2))
+ .{ ._sd, .mov }
else
- .{ ._sd, .mov } },
+ .{ ._ps, .movl } },
9...16 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_pd, if (aligned) .mova else .movu }
else if (self.hasFeature(.sse2))
@@ -18427,8 +21862,10 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool)
.{ ._ss, .mov } },
64 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_sd, .mov }
+ else if (self.hasFeature(.sse2))
+ .{ ._sd, .mov }
else
- .{ ._sd, .mov } },
+ .{ ._ps, .movl } },
128 => return .{ .move = if (self.hasFeature(.avx))
.{ if (aligned) .v_dqa else .v_dqu, .mov }
else if (self.hasFeature(.sse2))
@@ -18623,6 +22060,30 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C
}, opts),
inline .register_pair, .register_triple, .register_quadruple => |dst_regs| {
const src_info: ?struct { addr_reg: Register, addr_lock: RegisterLock } = switch (src_mcv) {
+ .register => |src_reg| switch (dst_regs[0].class()) {
+ .general_purpose => switch (src_reg.class()) {
+ else => unreachable,
+ .sse => if (ty.abiSize(pt.zcu) <= 16) {
+ if (self.hasFeature(.avx)) {
+ try self.asmRegisterRegister(.{ .v_q, .mov }, dst_regs[0].to64(), src_reg.to128());
+ try self.asmRegisterRegisterImmediate(.{ .vp_q, .extr }, dst_regs[1].to64(), src_reg.to128(), .u(1));
+ } else if (self.hasFeature(.sse4_1)) {
+ try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128());
+ try self.asmRegisterRegisterImmediate(.{ .p_q, .extr }, dst_regs[1].to64(), src_reg.to128(), .u(1));
+ } else {
+ const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse);
+ const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+ defer self.register_manager.unlockReg(tmp_lock);
+
+ try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128());
+ try self.asmRegisterRegister(.{ ._ps, .movhl }, tmp_reg.to128(), src_reg.to128());
+ try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[1].to64(), src_reg.to128());
+ }
+ return;
+ } else unreachable,
+ },
+ else => unreachable,
+ },
.register_pair, .memory, .indirect, .load_frame => null,
.load_symbol, .load_direct, .load_got, .load_tlv => src: {
const src_addr_reg =
@@ -18863,7 +22324,39 @@ fn genSetReg(
inline .register_pair,
.register_triple,
.register_quadruple,
- => |src_regs| try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }, opts),
+ => |src_regs| switch (dst_reg.class()) {
+ .general_purpose => switch (src_regs[0].class()) {
+ .general_purpose => try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }, opts),
+ else => unreachable,
+ },
+ .sse => switch (src_regs[0].class()) {
+ .general_purpose => if (abi_size <= 16) {
+ if (self.hasFeature(.avx)) {
+ try self.asmRegisterRegister(.{ .v_q, .mov }, dst_reg.to128(), src_regs[0].to64());
+ try self.asmRegisterRegisterRegisterImmediate(
+ .{ .vp_q, .insr },
+ dst_reg.to128(),
+ dst_reg.to128(),
+ src_regs[1].to64(),
+ .u(1),
+ );
+ } else if (self.hasFeature(.sse4_1)) {
+ try self.asmRegisterRegister(.{ ._q, .mov }, dst_reg.to128(), src_regs[0].to64());
+ try self.asmRegisterRegisterImmediate(.{ .p_q, .insr }, dst_reg.to128(), src_regs[1].to64(), .u(1));
+ } else {
+ const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse);
+ const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+ defer self.register_manager.unlockReg(tmp_lock);
+
+ try self.asmRegisterRegister(.{ ._q, .mov }, dst_reg.to128(), src_regs[0].to64());
+ try self.asmRegisterRegister(.{ ._q, .mov }, tmp_reg.to128(), src_regs[1].to64());
+ try self.asmRegisterRegister(.{ ._ps, .movlh }, dst_reg.to128(), tmp_reg.to128());
+ }
+ } else unreachable,
+ else => unreachable,
+ },
+ else => unreachable,
+ },
.register_offset,
.indirect,
.load_frame,
@@ -23517,8 +27010,6 @@ fn promoteVarArg(self: *CodeGen, ty: Type) Type {
}
}
-// ====================================== rewrite starts here ======================================
-
const Temp = struct {
index: Air.Inst.Index,
@@ -24311,13 +27802,13 @@ const Select = struct {
}
const Case = struct {
- required_features: [2]?std.Target.x86.Feature = @splat(null),
+ required_features: [4]?std.Target.x86.Feature = @splat(null),
dst_constraints: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]Constraint = @splat(.any),
src_constraints: [@intFromEnum(Select.Operand.Ref.none) - @intFromEnum(Select.Operand.Ref.src0)]Constraint = @splat(.any),
patterns: []const Select.Pattern,
- clobbers: struct { eflags: bool = false } = .{},
extra_temps: [@intFromEnum(Select.Operand.Ref.dst0) - @intFromEnum(Select.Operand.Ref.tmp0)]TempSpec = @splat(.unused),
dst_temps: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]TempSpec.Kind = @splat(.unused),
+ clobbers: struct { eflags: bool = false } = .{},
each: union(enum) {
once: []const Instruction,
},
@@ -24327,9 +27818,32 @@ const Select = struct {
any,
any_bool_vec,
any_int,
+ any_signed_int,
any_float,
bool_vec: Memory.Size,
+ vec: Memory.Size,
+ signed_int_vec: Memory.Size,
+ signed_int_or_full_vec: Memory.Size,
+ unsigned_int_vec: Memory.Size,
+ int_or_vec: Memory.Size,
+ exact_remainder_int_or_vec: struct { of: Memory.Size, is: Memory.Size },
int: Memory.Size,
+ scalar_int: Memory.Size,
+ scalar_signed_int: Memory.Size,
+ scalar_unsigned_int: Memory.Size,
+ scalar_remainder_int: struct { of: Memory.Size, is: Memory.Size },
+ exact_int: u16,
+ exact_signed_int: u16,
+ exact_unsigned_int: u16,
+ signed_or_exact_int: Memory.Size,
+ unsigned_or_exact_int: Memory.Size,
+ po2_int: Memory.Size,
+ signed_po2_int: Memory.Size,
+ unsigned_po2_or_exact_int: Memory.Size,
+ remainder_int: struct { of: Memory.Size, is: Memory.Size },
+ exact_remainder_int: struct { of: Memory.Size, is: Memory.Size },
+ signed_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size },
+ unsigned_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size },
signed_int: Memory.Size,
unsigned_int: Memory.Size,
@@ -24338,30 +27852,183 @@ const Select = struct {
switch (constraint) {
.any => return true,
.any_bool_vec => return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type,
- .any_int => {
+ .any_int => return ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu) or ty.isAbiInt(zcu),
+ .any_signed_int => return ty.isAbiInt(zcu) and ty.intInfo(zcu).signedness == .signed,
+ .any_float => return ty.scalarType(zcu).isRuntimeFloat(),
+ .bool_vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and
+ size.bitSize(cg.target) >= ty.vectorLen(zcu),
+ .vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() != .bool_type and
+ size.bitSize(cg.target) >= ty.abiSize(zcu),
+ .signed_int_vec => |size| {
+ if (!ty.isVector(zcu) or size.bitSize(cg.target) < 8 * ty.abiSize(zcu)) return false;
const scalar_ty = ty.scalarType(zcu);
- return scalar_ty.isAbiInt(zcu) or scalar_ty.isPtrAtRuntime(zcu);
+ return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).signedness == .signed;
+ },
+ .signed_int_or_full_vec => |size| {
+ if (!ty.isVector(zcu) or size.bitSize(cg.target) < 8 * ty.abiSize(zcu)) return false;
+ const scalar_ty = ty.scalarType(zcu);
+ if (scalar_ty.isPtrAtRuntime(zcu)) return true;
+ if (!scalar_ty.isAbiInt(zcu)) return false;
+ const scalar_int_info = scalar_ty.intInfo(zcu);
+ return switch (scalar_int_info.signedness) {
+ .signed => true,
+ .unsigned => scalar_int_info.bits >= 8 and std.math.isPowerOfTwo(scalar_int_info.bits),
+ };
+ },
+ .unsigned_int_vec => |size| {
+ if (!ty.isVector(zcu) or size.bitSize(cg.target) < ty.bitSize(zcu)) return false;
+ const scalar_ty = ty.scalarType(zcu);
+ if (scalar_ty.isPtrAtRuntime(zcu)) return true;
+ return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).signedness == .unsigned;
+ },
+ .int_or_vec => |size| {
+ if (ty.isVector(zcu)) return ty.scalarType(zcu).toIntern() != .bool_type and
+ size.bitSize(cg.target) >= 8 * ty.abiSize(zcu);
+ if (ty.toIntern() == .bool_type) return true;
+ if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
+ return ty.isAbiInt(zcu) and size.bitSize(cg.target) >= ty.intInfo(zcu).bits;
+ },
+ .exact_remainder_int_or_vec => |of_is| {
+ if (ty.isVector(zcu)) return ty.scalarType(zcu).toIntern() != .bool_type and
+ of_is.is.bitSize(cg.target) == (8 * ty.abiSize(zcu) - 1) % of_is.of.bitSize(cg.target) + 1;
+ if (ty.isPtrAtRuntime(zcu))
+ return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1;
+ if (!ty.isAbiInt(zcu)) return false;
+ return of_is.is.bitSize(cg.target) == (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1;
},
- .any_float => return ty.scalarType(zcu).isRuntimeFloat(),
- .bool_vec => |size| return ty.isVector(zcu) and
- ty.scalarType(zcu).toIntern() == .bool_type and ty.vectorLen(zcu) <= size.bitSize(cg.target),
.int => |size| {
+ if (ty.toIntern() == .bool_type) return true;
+ if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
+ return ty.isAbiInt(zcu) and size.bitSize(cg.target) >= ty.intInfo(zcu).bits;
+ },
+ .scalar_int => |size| {
const scalar_ty = ty.scalarType(zcu);
- if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(cg.target);
- return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).bits <= size.bitSize(cg.target);
+ if (scalar_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
+ return scalar_ty.isAbiInt(zcu) and size.bitSize(cg.target) >= scalar_ty.intInfo(zcu).bits;
},
- .signed_int => |size| {
+ .scalar_signed_int => |size| {
const scalar_ty = ty.scalarType(zcu);
if (!scalar_ty.isAbiInt(zcu)) return false;
- const info = scalar_ty.intInfo(zcu);
- return info.signedness == .signed and info.bits <= size.bitSize(cg.target);
+ const scalar_int_info = scalar_ty.intInfo(zcu);
+ return scalar_int_info.signedness == .signed and size.bitSize(cg.target) >= scalar_int_info.bits;
},
- .unsigned_int => |size| {
+ .scalar_unsigned_int => |size| {
+ const scalar_ty = ty.scalarType(zcu);
+ if (scalar_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
+ if (!scalar_ty.isAbiInt(zcu)) return false;
+ const scalar_int_info = scalar_ty.intInfo(zcu);
+ return scalar_int_info.signedness == .unsigned and size.bitSize(cg.target) >= scalar_int_info.bits;
+ },
+ .scalar_remainder_int => |of_is| {
const scalar_ty = ty.scalarType(zcu);
- if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(cg.target);
+ if (scalar_ty.isPtrAtRuntime(zcu))
+ return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1;
if (!scalar_ty.isAbiInt(zcu)) return false;
- const info = scalar_ty.intInfo(zcu);
- return info.signedness == .unsigned and info.bits <= size.bitSize(cg.target);
+ return of_is.is.bitSize(cg.target) >= (scalar_ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1;
+ },
+ .exact_int => |bit_size| {
+ if (ty.toIntern() == .bool_type) return bit_size == 1;
+ if (ty.isPtrAtRuntime(zcu)) return bit_size == cg.target.ptrBitWidth();
+ return ty.isAbiInt(zcu) and bit_size == ty.intInfo(zcu).bits;
+ },
+ .exact_signed_int => |bit_size| {
+ if (!ty.isAbiInt(zcu)) return false;
+ const int_info = ty.intInfo(zcu);
+ return int_info.signedness == .signed and bit_size == int_info.bits;
+ },
+ .exact_unsigned_int => |bit_size| {
+ if (ty.toIntern() == .bool_type) return bit_size == 1;
+ if (ty.isPtrAtRuntime(zcu)) return bit_size == cg.target.ptrBitWidth();
+ if (!ty.isAbiInt(zcu)) return false;
+ const int_info = ty.intInfo(zcu);
+ return int_info.signedness == .unsigned and bit_size == int_info.bits;
+ },
+ .signed_or_exact_int => |size| {
+ if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) == cg.target.ptrBitWidth();
+ if (!ty.isAbiInt(zcu)) return false;
+ const int_info = ty.intInfo(zcu);
+ return switch (int_info.signedness) {
+ .signed => size.bitSize(cg.target) >= int_info.bits,
+ .unsigned => size.bitSize(cg.target) == int_info.bits,
+ };
+ },
+ .unsigned_or_exact_int => |size| {
+ if (ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu)) return true;
+ if (!ty.isAbiInt(zcu)) return false;
+ const int_info = ty.intInfo(zcu);
+ return switch (int_info.signedness) {
+ .signed => size.bitSize(cg.target) == int_info.bits,
+ .unsigned => size.bitSize(cg.target) >= int_info.bits,
+ };
+ },
+ .po2_int => |size| {
+ if (ty.toIntern() == .bool_type) return true;
+ if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
+ if (!ty.isAbiInt(zcu)) return false;
+ const bit_size = ty.intInfo(zcu).bits;
+ return std.math.isPowerOfTwo(bit_size) and size.bitSize(cg.target) >= bit_size;
+ },
+ .signed_po2_int => |size| {
+ if (!ty.isAbiInt(zcu)) return false;
+ const int_info = ty.intInfo(zcu);
+ return int_info.signedness == .signed and std.math.isPowerOfTwo(int_info.bits) and
+ size.bitSize(cg.target) >= int_info.bits;
+ },
+ .unsigned_po2_or_exact_int => |size| {
+ if (ty.toIntern() == .bool_type) return true;
+ if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
+ if (!ty.isAbiInt(zcu)) return false;
+ const int_info = ty.intInfo(zcu);
+ return switch (int_info.signedness) {
+ .signed => size.bitSize(cg.target) == int_info.bits,
+ .unsigned => std.math.isPowerOfTwo(int_info.bits) and size.bitSize(cg.target) >= int_info.bits,
+ };
+ },
+ .remainder_int => |of_is| {
+ if (ty.toIntern() == .bool_type) return true;
+ if (ty.isPtrAtRuntime(zcu))
+ return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1;
+ if (!ty.isAbiInt(zcu)) return false;
+ return of_is.is.bitSize(cg.target) >= (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1;
+ },
+ .exact_remainder_int => |of_is| {
+ if (ty.isPtrAtRuntime(zcu))
+ return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1;
+ if (!ty.isAbiInt(zcu)) return false;
+ return of_is.is.bitSize(cg.target) == (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1;
+ },
+ .signed_or_exact_remainder_int => |of_is| {
+ if (ty.isPtrAtRuntime(zcu))
+ return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1;
+ if (!ty.isAbiInt(zcu)) return false;
+ const int_info = ty.intInfo(zcu);
+ return switch (int_info.signedness) {
+ .signed => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1,
+ .unsigned => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1,
+ };
+ },
+ .unsigned_or_exact_remainder_int => |of_is| {
+ if (ty.toIntern() == .bool_type) return true;
+ if (ty.isPtrAtRuntime(zcu))
+ return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1;
+ if (!ty.isAbiInt(zcu)) return false;
+ const int_info = ty.intInfo(zcu);
+ return switch (int_info.signedness) {
+ .signed => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1,
+ .unsigned => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1,
+ };
+ },
+ .signed_int => |size| {
+ if (!ty.isAbiInt(zcu)) return false;
+ const int_info = ty.intInfo(zcu);
+ return int_info.signedness == .signed and size.bitSize(cg.target) >= int_info.bits;
+ },
+ .unsigned_int => |size| {
+ if (ty.toIntern() == .bool_type) return true;
+ if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
+ if (!ty.isAbiInt(zcu)) return false;
+ const int_info = ty.intInfo(zcu);
+ return int_info.signedness == .unsigned and size.bitSize(cg.target) >= int_info.bits;
},
}
}
@@ -24379,97 +28046,107 @@ const Select = struct {
imm32,
simm32,
mem,
- mut_mem,
to_mem,
+ mut_mem,
+ to_mut_mem,
gpr,
+ to_gpr,
mut_gpr,
+ to_mut_gpr,
mm,
+ to_mm,
mut_mm,
+ to_mut_mm,
xmm,
+ to_xmm,
mut_xmm,
+ to_mut_xmm,
ymm,
+ to_ymm,
mut_ymm,
+ to_mut_ymm,
fn matches(src: Src, temp: Temp, cg: *CodeGen) bool {
- switch (src) {
+ return switch (src) {
.none => unreachable,
- .any => return true,
- .imm8 => return switch (temp.tracking(cg).short) {
+ .any => true,
+ .imm8 => switch (temp.tracking(cg).short) {
.immediate => |imm| std.math.cast(u8, imm) != null,
else => false,
},
- .imm16 => return switch (temp.tracking(cg).short) {
+ .imm16 => switch (temp.tracking(cg).short) {
.immediate => |imm| std.math.cast(u16, imm) != null,
else => false,
},
- .imm32 => return switch (temp.tracking(cg).short) {
+ .imm32 => switch (temp.tracking(cg).short) {
.immediate => |imm| std.math.cast(u32, imm) != null,
else => false,
},
- .simm32 => return switch (temp.tracking(cg).short) {
+ .simm32 => switch (temp.tracking(cg).short) {
.immediate => |imm| std.math.cast(i32, @as(i64, @bitCast(imm))) != null,
else => false,
},
- .mem => return temp.tracking(cg).short.isMemory(),
- .mut_mem => return temp.isMut(cg) and temp.tracking(cg).short.isMemory(),
- .to_mem => return true,
- .gpr, .mut_gpr => {
- const mcv = temp.tracking(cg).short;
- const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu);
- return abi_size <= 8 and switch (mcv) {
- .register => |reg| reg.class() == .general_purpose,
- .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and
- reg_off.off == 0,
- .register_pair, .register_triple, .register_quadruple => false,
- else => true,
- };
+ .mem => temp.tracking(cg).short.isMemory(),
+ .to_mem, .to_mut_mem => true,
+ .mut_mem => temp.isMut(cg) and temp.tracking(cg).short.isMemory(),
+ .gpr => temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8 and switch (temp.tracking(cg).short) {
+ .register => |reg| reg.class() == .general_purpose,
+ .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and reg_off.off == 0,
+ else => false,
},
- .mm, .mut_mm => {
- const mcv = temp.tracking(cg).short;
- const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu);
- return abi_size <= 8 and switch (mcv) {
- .register => |reg| reg.class() == .mmx,
- .register_offset => |reg_off| reg_off.reg.class() == .mmx and
- reg_off.off == 0,
- else => false,
- };
+ .mut_gpr => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8 and switch (temp.tracking(cg).short) {
+ .register => |reg| reg.class() == .general_purpose,
+ .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and reg_off.off == 0,
+ else => false,
},
- .xmm, .mut_xmm => {
- const mcv = temp.tracking(cg).short;
- const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu);
- return abi_size > 8 and abi_size <= 16 and switch (mcv) {
- .register => |reg| reg.class() == .sse,
- .register_offset => |reg_off| reg_off.reg.class() == .sse and
- reg_off.off == 0,
- .register_pair, .register_triple, .register_quadruple => false,
- else => true,
- };
+ .to_gpr, .to_mut_gpr => temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8,
+ .mm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 8 and switch (temp.tracking(cg).short) {
+ .register => |reg| reg.class() == .mmx,
+ .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0,
+ else => false,
},
- .ymm, .mut_ymm => {
- const mcv = temp.tracking(cg).short;
- const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu);
- return abi_size > 16 and abi_size <= 32 and switch (mcv) {
- .register => |reg| reg.class() == .sse,
- .register_offset => |reg_off| reg_off.reg.class() == .sse and
- reg_off.off == 0,
- .register_pair, .register_triple, .register_quadruple => false,
- else => true,
- };
+ .mut_mm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 8 and switch (temp.tracking(cg).short) {
+ .register => |reg| reg.class() == .mmx,
+ .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0,
+ else => false,
},
- }
+ .to_mm, .to_mut_mm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 8,
+ .xmm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 16 and switch (temp.tracking(cg).short) {
+ .register => |reg| reg.class() == .sse,
+ .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0,
+ else => false,
+ },
+ .mut_xmm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 16 and switch (temp.tracking(cg).short) {
+ .register => |reg| reg.class() == .sse,
+ .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0,
+ else => false,
+ },
+ .to_xmm, .to_mut_xmm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 16,
+ .ymm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 32 and switch (temp.tracking(cg).short) {
+ .register => |reg| reg.class() == .sse,
+ .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0,
+ else => false,
+ },
+ .mut_ymm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 32 and switch (temp.tracking(cg).short) {
+ .register => |reg| reg.class() == .sse,
+ .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0,
+ else => false,
+ },
+ .to_ymm, .to_mut_ymm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 32,
+ };
}
fn convert(src: Src, temp: *Temp, cg: *CodeGen) !bool {
return switch (src) {
.none => unreachable,
.any, .imm8, .imm16, .imm32, .simm32 => false,
- .mem, .mut_mem, .to_mem => try temp.toBase(cg),
- .gpr => try temp.toRegClass(false, .general_purpose, cg),
- .mut_gpr => try temp.toRegClass(true, .general_purpose, cg),
- .mm => try temp.toRegClass(false, .mmx, cg),
- .mut_mm => try temp.toRegClass(true, .mmx, cg),
- .xmm, .ymm => try temp.toRegClass(false, .sse, cg),
- .mut_xmm, .mut_ymm => try temp.toRegClass(true, .sse, cg),
+ .mem, .to_mem, .mut_mem, .to_mut_mem => try temp.toBase(cg),
+ .gpr, .to_gpr => try temp.toRegClass(false, .general_purpose, cg),
+ .mut_gpr, .to_mut_gpr => try temp.toRegClass(true, .general_purpose, cg),
+ .mm, .to_mm => try temp.toRegClass(false, .mmx, cg),
+ .mut_mm, .to_mut_mm => try temp.toRegClass(true, .mmx, cg),
+ .xmm, .to_xmm, .ymm, .to_ymm => try temp.toRegClass(false, .sse, cg),
+ .mut_xmm, .to_mut_xmm, .mut_ymm, .to_mut_ymm => try temp.toRegClass(true, .sse, cg),
};
}
};
@@ -24489,6 +28166,10 @@ const Select = struct {
rc: Register.Class,
rc_mask: struct { rc: Register.Class, info: MaskInfo },
mem,
+ smin_mem: Select.Operand.Ref,
+ smax_mem: Select.Operand.Ref,
+ umin_mem: Select.Operand.Ref,
+ umax_mem: Select.Operand.Ref,
ref: Select.Operand.Ref,
ref_mask: struct { ref: Select.Operand.Ref, info: MaskInfo },
@@ -24501,14 +28182,81 @@ const Select = struct {
};
fn create(spec: TempSpec, s: *Select) !?Temp {
+ const cg = s.cg;
return switch (spec.kind) {
.unused => null,
- .any => try s.cg.tempAlloc(spec.type),
- .cc => |cc| try s.cg.tempFromValue(spec.type, .{ .eflags = cc }),
- .reg => |reg| try s.cg.tempFromValue(spec.type, .{ .register = reg }),
- .rc => |rc| try s.cg.tempAllocReg(spec.type, regSetForRegClass(rc)),
- .rc_mask => |rc_mask| try s.cg.tempAllocReg(spec.type, regSetForRegClass(rc_mask.rc)),
- .mem => try s.cg.tempAllocMem(spec.type),
+ .any => try cg.tempAlloc(spec.type),
+ .cc => |cc| try cg.tempFromValue(spec.type, .{ .eflags = cc }),
+ .reg => |reg| try cg.tempFromValue(spec.type, .{ .register = reg }),
+ .rc => |rc| try cg.tempAllocReg(spec.type, regSetForRegClass(rc)),
+ .rc_mask => |rc_mask| try cg.tempAllocReg(spec.type, regSetForRegClass(rc_mask.rc)),
+ .mem => try cg.tempAllocMem(spec.type),
+ .smin_mem, .smax_mem, .umin_mem, .umax_mem => |ty_ref| {
+ const pt = cg.pt;
+ const zcu = pt.zcu;
+ const ip = &zcu.intern_pool;
+ const ty = ty_ref.deref(s).typeOf(s.cg);
+ const vector_len, const scalar_ty: Type = switch (ip.indexToKey(ty.toIntern())) {
+ else => .{ null, ty },
+ .vector_type => |vector_type| .{ vector_type.len, .fromInterned(vector_type.child) },
+ };
+ const res_scalar_ty, const res_scalar_val: Value = res_scalar: switch (scalar_ty.toIntern()) {
+ .bool_type => .{
+ scalar_ty,
+ .fromInterned(switch (spec.kind) {
+ else => unreachable,
+ .smin_mem, .umax_mem => .bool_true,
+ .smax_mem, .umin_mem => .bool_false,
+ }),
+ },
+ else => {
+ const scalar_info: InternPool.Key.IntType = if (scalar_ty.isAbiInt(zcu))
+ scalar_ty.intInfo(zcu)
+ else
+ .{ .signedness = .unsigned, .bits = @intCast(scalar_ty.bitSize(zcu)) };
+ const scalar_int_ty = try pt.intType(scalar_info.signedness, scalar_info.bits);
+ if (scalar_info.bits <= 64) {
+ const int_val: i64 = switch (spec.kind) {
+ else => unreachable,
+ .smin_mem => std.math.minInt(i64),
+ .smax_mem => std.math.maxInt(i64),
+ .umin_mem => 0,
+ .umax_mem => -1,
+ };
+ const shift: u6 = @intCast(64 - scalar_info.bits);
+ break :res_scalar .{ scalar_int_ty, switch (scalar_info.signedness) {
+ .signed => try pt.intValue_i64(scalar_int_ty, int_val >> shift),
+ .unsigned => try pt.intValue_u64(scalar_int_ty, @as(u64, @bitCast(int_val)) >> shift),
+ } };
+ }
+ var big_int: std.math.big.int.Managed = try .init(cg.gpa);
+ defer big_int.deinit();
+ try big_int.setTwosCompIntLimit(switch (spec.kind) {
+ else => unreachable,
+ .smin_mem, .umin_mem => .min,
+ .smax_mem, .umax_mem => .max,
+ }, switch (spec.kind) {
+ else => unreachable,
+ .smin_mem, .smax_mem => .signed,
+ .umin_mem, .umax_mem => .unsigned,
+ }, scalar_info.bits);
+ try big_int.truncate(&big_int, scalar_info.signedness, scalar_info.bits);
+ break :res_scalar .{ scalar_int_ty, try pt.intValue_big(scalar_int_ty, big_int.toConst()) };
+ },
+ };
+ const res_ty, const res_val: Value = if (vector_len) |len| res: {
+ const vector_ty = try pt.vectorType(.{
+ .len = len,
+ .child = res_scalar_ty.toIntern(),
+ });
+ const vector_val = try pt.intern(.{ .aggregate = .{
+ .ty = vector_ty.toIntern(),
+ .storage = .{ .repeated_elem = res_scalar_val.toIntern() },
+ } });
+ break :res .{ vector_ty, .fromInterned(vector_val) };
+ } else .{ res_scalar_ty, res_scalar_val };
+ return try cg.tempFromValue(res_ty, try cg.genTypedValue(res_val));
+ },
.ref => |ref| ref.deref(s),
.ref_mask => |ref_mask| ref_mask.ref.deref(s),
};
@@ -24541,21 +28289,51 @@ const Select = struct {
forward_label,
ref,
simm,
+ uimm,
lea,
mem,
};
- const Adjust = enum {
- none,
- add_ptr_size,
- sub_ptr_size,
- add_ptr_bit_size,
- sub_ptr_bit_size,
- add_size,
- sub_size,
- add_len,
- sub_len,
- add_elem_limbs,
- sub_elem_limbs,
+ const Adjust = packed struct(u8) {
+ factor: i2,
+ scale: Memory.Scale,
+ amount: enum(u4) {
+ none,
+ ptr_size,
+ ptr_bit_size,
+ size,
+ src0_size,
+ bit_size,
+ src0_bit_size,
+ len,
+ elem_limbs,
+ src0_elem_size,
+ smin,
+ smax,
+ umax,
+ },
+
+ const none: Adjust = .{ .factor = 0, .scale = .@"1", .amount = .none };
+ const sub_ptr_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .ptr_size };
+ const add_ptr_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .ptr_bit_size };
+ const add_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .size };
+ const sub_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .size };
+ const add_src0_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_size };
+ const sub_src0_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_size };
+ const add_2_bit_size: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .bit_size };
+ const add_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .bit_size };
+ const sub_bit_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .bit_size };
+ const add_src0_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_bit_size };
+ const sub_src0_bit_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_bit_size };
+ const add_8_len: Adjust = .{ .factor = 1, .scale = .@"8", .amount = .len };
+ const add_4_len: Adjust = .{ .factor = 1, .scale = .@"4", .amount = .len };
+ const add_3_len: Adjust = .{ .factor = 1, .scale = .@"3", .amount = .len };
+ const add_2_len: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .len };
+ const add_len: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .len };
+ const sub_len: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .len };
+ const add_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_elem_size };
+ const sub_src0_elem_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_elem_size };
+ const add_elem_limbs: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .elem_limbs };
+ const add_umax: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .umax };
};
const Ref = enum(u4) {
tmp0,
@@ -24741,15 +28519,24 @@ const Select = struct {
const src1x: Select.Operand = .{ .tag = .ref, .base = .src1x };
const src1y: Select.Operand = .{ .tag = .ref, .base = .src1y };
- fn i(imm: i32) Select.Operand {
+ fn si(imm: i32) Select.Operand {
return .{ .tag = .simm, .imm = imm };
}
- fn a(base: Ref.Sized, adjust: Adjust) Select.Operand {
+ fn sa(base: Ref.Sized, adjust: Adjust) Select.Operand {
return .{ .tag = .simm, .base = base, .adjust = adjust };
}
- fn ia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand {
+ fn sia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand {
return .{ .tag = .simm, .base = base, .adjust = adjust, .imm = imm };
}
+ fn ui(imm: i32) Select.Operand {
+ return .{ .tag = .uimm, .imm = imm };
+ }
+ fn ua(base: Ref.Sized, adjust: Adjust) Select.Operand {
+ return .{ .tag = .uimm, .base = base, .adjust = adjust };
+ }
+ fn uia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand {
+ return .{ .tag = .uimm, .base = base, .adjust = adjust, .imm = imm };
+ }
fn lea(size: Memory.Size, base: Ref) Select.Operand {
return .{
@@ -24757,6 +28544,13 @@ const Select = struct {
.base = .{ .ref = base, .size = size },
};
}
+ fn leaa(size: Memory.Size, base: Ref, adjust: Adjust) Select.Operand {
+ return .{
+ .tag = .lea,
+ .base = .{ .ref = base, .size = size },
+ .adjust = adjust,
+ };
+ }
fn lead(size: Memory.Size, base: Ref, disp: i32) Select.Operand {
return .{
.tag = .lea,
@@ -24768,14 +28562,22 @@ const Select = struct {
return .{
.tag = .lea,
.base = .{ .ref = base, .size = size },
- .index_ = .{ .ref = index, .scale = .@"1" },
+ .index = .{ .ref = index, .scale = .@"1" },
+ };
+ }
+ fn leaia(size: Memory.Size, base: Ref, index: Ref, adjust: Adjust) Select.Operand {
+ return .{
+ .tag = .lea,
+ .base = .{ .ref = base, .size = size },
+ .index = .{ .ref = index, .scale = .@"1" },
+ .adjust = adjust,
};
}
fn leaid(size: Memory.Size, base: Ref, index: Ref, disp: i32) Select.Operand {
return .{
.tag = .lea,
.base = .{ .ref = base, .size = size },
- .index_ = .{ .ref = index, .scale = .@"1" },
+ .index = .{ .ref = index, .scale = .@"1" },
.imm = disp,
};
}
@@ -24783,22 +28585,22 @@ const Select = struct {
return .{
.tag = .lea,
.base = .{ .ref = base, .size = size },
- .index_ = .{ .ref = index, .scale = scale },
+ .index = .{ .ref = index, .scale = scale },
};
}
fn leasid(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand {
return .{
.tag = .lea,
.base = .{ .ref = base, .size = size },
- .index_ = .{ .ref = index, .scale = scale },
+ .index = .{ .ref = index, .scale = scale },
.imm = disp,
};
}
- fn leasida(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, disp: i32, adjust: Adjust) Select.Operand {
+ fn leasiad(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, adjust: Adjust, disp: i32) Select.Operand {
return .{
.tag = .lea,
.base = .{ .ref = base, .size = size },
- .index_ = .{ .ref = index, .scale = scale },
+ .index = .{ .ref = index, .scale = scale },
.adjust = adjust,
.imm = disp,
};
@@ -24817,6 +28619,21 @@ const Select = struct {
.imm = disp,
};
}
+ fn mema(base: Ref.Sized, adjust: Adjust) Select.Operand {
+ return .{
+ .tag = .mem,
+ .base = base,
+ .adjust = adjust,
+ };
+ }
+ fn memad(base: Ref.Sized, adjust: Adjust, disp: i32) Select.Operand {
+ return .{
+ .tag = .mem,
+ .base = base,
+ .adjust = adjust,
+ .imm = disp,
+ };
+ }
fn memi(base: Ref.Sized, index: Ref) Select.Operand {
return .{
.tag = .mem,
@@ -24832,6 +28649,15 @@ const Select = struct {
.adjust = adjust,
};
}
+ fn memiad(base: Ref.Sized, index: Ref, adjust: Adjust, disp: i32) Select.Operand {
+ return .{
+ .tag = .mem,
+ .base = base,
+ .index = .{ .ref = index, .scale = .@"1" },
+ .adjust = adjust,
+ .imm = disp,
+ };
+ }
fn memid(base: Ref.Sized, index: Ref, disp: i32) Select.Operand {
return .{
.tag = .mem,
@@ -24847,6 +28673,14 @@ const Select = struct {
.index = .{ .ref = index, .scale = scale },
};
}
+ fn memsia(base: Ref.Sized, scale: Memory.Scale, index: Ref, adjust: Adjust) Select.Operand {
+ return .{
+ .tag = .mem,
+ .base = base,
+ .index = .{ .ref = index, .scale = scale },
+ .adjust = adjust,
+ };
+ }
fn memsid(base: Ref.Sized, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand {
return .{
.tag = .mem,
@@ -24855,7 +28689,7 @@ const Select = struct {
.imm = disp,
};
}
- fn memsida(base: Ref.Sized, scale: Memory.Scale, index: Ref, disp: i32, adjust: Adjust) Select.Operand {
+ fn memsiad(base: Ref.Sized, scale: Memory.Scale, index: Ref, adjust: Adjust, disp: i32) Select.Operand {
return .{
.tag = .mem,
.base = base,
@@ -24865,26 +28699,34 @@ const Select = struct {
};
}
- fn adjustedImm(op: Select.Operand, s: *const Select) i32 {
- return switch (op.adjust) {
- .none => op.imm,
- .add_ptr_size => op.imm + @divExact(s.cg.target.ptrBitWidth(), 8),
- .sub_ptr_size => op.imm - @divExact(s.cg.target.ptrBitWidth(), 8),
- .add_ptr_bit_size => op.imm + s.cg.target.ptrBitWidth(),
- .sub_ptr_bit_size => op.imm - s.cg.target.ptrBitWidth(),
- .add_size => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu))),
- .sub_size => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu))),
- .add_len => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu))),
- .sub_len => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu))),
- .add_elem_limbs => op.imm + @as(i32, @intCast(@divExact(
- op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu),
- @divExact(op.base.size.bitSize(s.cg.target), 8),
- ))),
- .sub_elem_limbs => op.imm - @as(i32, @intCast(@divExact(
+ fn adjustedImm(op: Select.Operand, comptime SignedImm: type, s: *const Select) SignedImm {
+ const UnsignedImm = @Type(.{
+ .int = .{ .signedness = .unsigned, .bits = @typeInfo(SignedImm).int.bits },
+ });
+ return op.imm + @as(i5, op.adjust.factor) * op.adjust.scale.toFactor() * @as(SignedImm, switch (op.adjust.amount) {
+ .none => 0,
+ .ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8),
+ .ptr_bit_size => s.cg.target.ptrBitWidth(),
+ .size => @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu)),
+ .src0_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu)),
+ .bit_size => @intCast(op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)),
+ .src0_bit_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)),
+ .len => @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu)),
+ .elem_limbs => @intCast(@divExact(
op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu),
@divExact(op.base.size.bitSize(s.cg.target), 8),
- ))),
- };
+ )),
+ .src0_elem_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)),
+ .smin => @as(SignedImm, std.math.minInt(SignedImm)) >> @truncate(
+ -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu),
+ ),
+ .smax => @as(SignedImm, std.math.maxInt(SignedImm)) >> @truncate(
+ -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu),
+ ),
+ .umax => @bitCast(@as(UnsignedImm, std.math.maxInt(UnsignedImm)) >> @truncate(
+ -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu),
+ )),
+ });
}
fn lower(op: Select.Operand, s: *Select) !CodeGen.Operand {
@@ -24907,7 +28749,8 @@ const Select = struct {
else => |mcv| .{ .mem = try mcv.mem(s.cg, .{ .size = op.base.size }) },
.register => |reg| .{ .reg = registerAlias(reg, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))) },
},
- .simm => .{ .imm = .s(op.adjustedImm(s)) },
+ .simm => .{ .imm = .s(op.adjustedImm(i32, s)) },
+ .uimm => .{ .imm = .u(@bitCast(op.adjustedImm(i64, s))) },
.lea => .{ .mem = .{
.base = .{ .reg = registerAlias(op.base.ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)) },
.mod = .{ .rm = .{
@@ -24917,7 +28760,7 @@ const Select = struct {
.none => .none,
},
.scale = op.index.scale,
- .disp = op.adjustedImm(s),
+ .disp = op.adjustedImm(i32, s),
} },
} },
.mem => .{ .mem = try op.base.ref.deref(s).tracking(s.cg).short.mem(s.cg, .{
@@ -24927,7 +28770,7 @@ const Select = struct {
.none => .none,
},
.scale = op.index.scale,
- .disp = op.adjustedImm(s),
+ .disp = op.adjustedImm(i32, s),
}) },
};
}
@@ -24942,14 +28785,23 @@ fn select(
) !void {
cases: for (cases) |case| {
for (case.required_features) |required_feature| if (required_feature) |feature| if (!switch (feature) {
- .@"64bit" => cg.target.ptrBitWidth() == 64,
+ .@"64bit" => switch (cg.target.cpu.arch) {
+ else => unreachable,
+ .x86 => false,
+ .x86_64 => true,
+ },
.mmx => false,
else => cg.hasFeature(feature),
}) continue :cases;
for (case.dst_constraints[0..dst_temps.len], dst_tys) |dst_constraint, dst_ty| if (!dst_constraint.accepts(dst_ty, cg)) continue :cases;
for (case.src_constraints[0..src_temps.len], src_temps) |src_constraint, src_temp| if (!src_constraint.accepts(src_temp.typeOf(cg), cg)) continue :cases;
+ if (std.debug.runtime_safety) {
+ for (case.dst_constraints[dst_temps.len..]) |dst_constraint| assert(dst_constraint == .any);
+ for (case.src_constraints[src_temps.len..]) |src_constraint| assert(src_constraint == .any);
+ }
patterns: for (case.patterns) |pattern| {
- for (pattern.src, src_temps) |src_pattern, src_temp| if (!src_pattern.matches(src_temp, cg)) continue :patterns;
+ for (pattern.src[0..src_temps.len], src_temps) |src_pattern, src_temp| if (!src_pattern.matches(src_temp, cg)) continue :patterns;
+ if (std.debug.runtime_safety) for (pattern.src[src_temps.len..]) |src_pattern| assert(src_pattern == .none);
var s: Select = .{
.cg = cg,
@@ -24960,9 +28812,11 @@ fn select(
const dst_slots = s.temps[@intFromEnum(Select.Operand.Ref.dst0)..@intFromEnum(Select.Operand.Ref.src0)];
const src_slots = s.temps[@intFromEnum(Select.Operand.Ref.src0)..@intFromEnum(Select.Operand.Ref.none)];
+ @memcpy(src_slots[0..src_temps.len], src_temps);
+ std.mem.swap(Temp, &src_slots[pattern.commute[0]], &src_slots[pattern.commute[1]]);
for (tmp_slots, case.extra_temps) |*slot, spec| slot.* = try spec.create(&s) orelse continue;
- while (true) for (pattern.src, src_temps) |src_pattern, *src_temp| {
+ while (true) for (pattern.src[0..src_temps.len], src_temps) |src_pattern, *src_temp| {
if (try src_pattern.convert(src_temp, cg)) break;
} else break;
@memcpy(src_slots[0..src_temps.len], src_temps);
src/arch/x86_64/Encoding.zig
@@ -64,7 +64,7 @@ pub fn findByMnemonic(
comptime var feature_it = std.mem.splitScalar(u8, @tagName(tag), ' ');
comptime var features: []const std.Target.x86.Feature = &.{};
inline while (comptime feature_it.next()) |feature| features = features ++ .{@field(std.Target.x86.Feature, feature)};
- break :has_features std.Target.x86.featureSetHasAll(target.cpu.features, features[0..features.len].*);
+ break :has_features std.Target.x86.featureSetHasAll(target.cpu.features, features[0..].*);
},
}) continue;
@@ -250,7 +250,8 @@ pub const Mnemonic = enum {
// General-purpose
adc, add, @"and",
bsf, bsr, bswap, bt, btc, btr, bts,
- call, cbw, cdq, cdqe, clflush,
+ call, cbw, cdq, cdqe,
+ clac, clc, cld, clflush, cli, clts, clui,
cmova, cmovae, cmovb, cmovbe, cmovc, cmove, cmovg, cmovge, cmovl, cmovle, cmovna,
cmovnae, cmovnb, cmovnbe, cmovnc, cmovne, cmovng, cmovnge, cmovnl, cmovnle, cmovno,
cmovnp, cmovns, cmovnz, cmovo, cmovp, cmovpe, cmovpo, cmovs, cmovz,
@@ -274,7 +275,9 @@ pub const Mnemonic = enum {
rcl, rcr, ret, rol, ror, rorx,
sal, sar, sarx, sbb,
scas, scasb, scasd, scasq, scasw,
- shl, shld, shlx, shr, shrd, shrx, sub, syscall,
+ shl, shld, shlx, shr, shrd, shrx,
+ stac, stc, std, sti, stui,
+ sub, syscall,
seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae,
setnb, setnbe, setnc, setne, setng, setnge, setnl, setnle, setno, setnp, setns,
setnz, seto, setp, setpe, setpo, sets, setz,
@@ -307,7 +310,7 @@ pub const Mnemonic = enum {
ldmxcsr,
maxps, maxss,
minps, minss,
- movaps, movhlps, movlhps,
+ movaps, movhlps, movhps, movlhps, movlps,
movmskps,
movss, movups,
mulps, mulss,
@@ -333,6 +336,7 @@ pub const Mnemonic = enum {
minpd, minsd,
movapd,
movdqa, movdqu,
+ movhpd, movlpd,
movmskpd,
//movsd,
movupd,
@@ -395,7 +399,7 @@ pub const Mnemonic = enum {
vmovd,
vmovddup,
vmovdqa, vmovdqu,
- vmovhlps, vmovlhps,
+ vmovhlps, vmovhpd, vmovhps, vmovlhps, vmovlpd, vmovlps,
vmovmskpd, vmovmskps,
vmovq,
vmovsd,
@@ -823,6 +827,7 @@ pub const Feature = enum {
avx2,
bmi,
bmi2,
+ cmov,
f16c,
fma,
lzcnt,
@@ -830,6 +835,7 @@ pub const Feature = enum {
pclmul,
@"pclmul avx",
popcnt,
+ smap,
sse,
sse2,
sse3,
@@ -837,6 +843,7 @@ pub const Feature = enum {
sse4_2,
ssse3,
sha,
+ uintr,
vaes,
vpclmulqdq,
x87,
src/arch/x86_64/encodings.zig
@@ -132,98 +132,110 @@ pub const table = [_]Entry{
.{ .cdq, .zo, &.{ .o32 }, &.{ 0x99 }, 0, .none, .none },
.{ .cqo, .zo, &.{ .o64 }, &.{ 0x99 }, 0, .long, .none },
+ .{ .clac, .zo, &.{}, &.{ 0x0f, 0x01, 0xca }, 0, .none, .smap },
+
+ .{ .clc, .zo, &.{}, &.{ 0xf8 }, 0, .none, .none },
+
+ .{ .cld, .zo, &.{}, &.{ 0xfc }, 0, .none, .none },
+
.{ .clflush, .m, &.{ .m8 }, &.{ 0x0f, 0xae }, 7, .none, .none },
- .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none },
- .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none },
- .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none },
- .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none },
- .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none },
- .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none },
- .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none },
- .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none },
- .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none },
- .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .none },
- .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none },
- .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none },
- .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none },
- .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none },
- .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none },
- .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .none },
- .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none },
- .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none },
- .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .none },
- .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none },
- .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none },
- .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .none },
- .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none },
- .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none },
- .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .none },
- .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none },
- .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none },
- .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .none },
- .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none },
- .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none },
- .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .none },
- .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none },
- .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none },
- .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none },
- .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none },
- .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none },
- .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none },
- .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none },
- .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none },
- .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none },
- .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none },
- .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none },
- .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none },
- .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none },
- .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none },
- .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .none },
- .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none },
- .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none },
- .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .none },
- .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none },
- .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none },
- .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .none },
- .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none },
- .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none },
- .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .none },
- .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none },
- .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none },
- .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .none },
- .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none },
- .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none },
- .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .short, .none },
- .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none, .none },
- .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long, .none },
- .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .none },
- .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none },
- .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none },
- .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .short, .none },
- .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none, .none },
- .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long, .none },
- .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .none },
- .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none },
- .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none },
- .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .short, .none },
- .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none, .none },
- .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long, .none },
- .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .none },
- .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none },
- .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none },
- .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .none },
- .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none },
- .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none },
- .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .none },
- .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none },
- .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none },
- .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .short, .none },
- .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none, .none },
- .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long, .none },
- .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .none },
- .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none },
- .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none },
+ .{ .cli, .zo, &.{}, &.{ 0xfa }, 0, .none, .none },
+
+ .{ .clts, .zo, &.{}, &.{ 0x0f, 0x06 }, 0, .none, .none },
+
+ .{ .clui, .zo, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xee }, 0, .none, .uintr },
+
+ .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .cmov },
+ .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .cmov },
+ .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .cmov },
+ .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .cmov },
+ .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .cmov },
+ .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .cmov },
+ .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .cmov },
+ .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .cmov },
+ .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .cmov },
+ .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .cmov },
+ .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .cmov },
+ .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .cmov },
+ .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .cmov },
+ .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .cmov },
+ .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .cmov },
+ .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .cmov },
+ .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .cmov },
+ .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .cmov },
+ .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .cmov },
+ .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .cmov },
+ .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .cmov },
+ .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .cmov },
+ .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .cmov },
+ .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .cmov },
+ .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .cmov },
+ .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .cmov },
+ .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .cmov },
+ .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .cmov },
+ .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .cmov },
+ .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .cmov },
+ .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .cmov },
+ .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .cmov },
+ .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .cmov },
+ .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .cmov },
+ .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .cmov },
+ .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .cmov },
+ .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .cmov },
+ .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .cmov },
+ .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .cmov },
+ .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .cmov },
+ .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .cmov },
+ .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .cmov },
+ .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .cmov },
+ .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .cmov },
+ .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .cmov },
+ .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .cmov },
+ .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .cmov },
+ .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .cmov },
+ .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .cmov },
+ .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .cmov },
+ .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .cmov },
+ .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .cmov },
+ .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .cmov },
+ .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .cmov },
+ .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .cmov },
+ .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .cmov },
+ .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .cmov },
+ .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .cmov },
+ .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .cmov },
+ .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .cmov },
+ .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .short, .cmov },
+ .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none, .cmov },
+ .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long, .cmov },
+ .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .cmov },
+ .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .cmov },
+ .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .cmov },
+ .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .short, .cmov },
+ .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none, .cmov },
+ .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long, .cmov },
+ .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .cmov },
+ .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .cmov },
+ .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .cmov },
+ .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .short, .cmov },
+ .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none, .cmov },
+ .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long, .cmov },
+ .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .cmov },
+ .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .cmov },
+ .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .cmov },
+ .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .cmov },
+ .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .cmov },
+ .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .cmov },
+ .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .cmov },
+ .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .cmov },
+ .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .cmov },
+ .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .short, .cmov },
+ .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none, .cmov },
+ .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long, .cmov },
+ .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .cmov },
+ .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .cmov },
+ .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .cmov },
.{ .cmp, .zi, &.{ .al, .imm8 }, &.{ 0x3c }, 0, .none, .none },
.{ .cmp, .zi, &.{ .ax, .imm16 }, &.{ 0x3d }, 0, .short, .none },
@@ -747,6 +759,16 @@ pub const table = [_]Entry{
.{ .shrd, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xad }, 0, .none, .none },
.{ .shrd, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xad }, 0, .long, .none },
+ .{ .stac, .zo, &.{}, &.{ 0x0f, 0x01, 0xcb }, 0, .none, .smap },
+
+ .{ .stc, .zo, &.{}, &.{ 0xf9 }, 0, .none, .none },
+
+ .{ .std, .zo, &.{}, &.{ 0xfd }, 0, .none, .none },
+
+ .{ .sti, .zo, &.{}, &.{ 0xfb }, 0, .none, .none },
+
+ .{ .stui, .zo, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xef }, 0, .none, .uintr },
+
.{ .stos, .zo, &.{ .m8 }, &.{ 0xaa }, 0, .none, .none },
.{ .stos, .zo, &.{ .m16 }, &.{ 0xab }, 0, .short, .none },
.{ .stos, .zo, &.{ .m32 }, &.{ 0xab }, 0, .none, .none },
@@ -927,8 +949,14 @@ pub const table = [_]Entry{
.{ .movhlps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .none, .sse },
+ .{ .movhps, .rm, &.{ .xmm, .m64 }, &.{ 0x0f, 0x16 }, 0, .none, .sse },
+ .{ .movhps, .mr, &.{ .m64, .xmm }, &.{ 0x0f, 0x17 }, 0, .none, .sse },
+
.{ .movlhps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .none, .sse },
+ .{ .movlps, .rm, &.{ .xmm, .m64 }, &.{ 0x0f, 0x12 }, 0, .none, .sse },
+ .{ .movlps, .mr, &.{ .m64, .xmm }, &.{ 0x0f, 0x13 }, 0, .none, .sse },
+
.{ .movmskps, .rm, &.{ .r32, .xmm }, &.{ 0x0f, 0x50 }, 0, .none, .sse },
.{ .movmskps, .rm, &.{ .r64, .xmm }, &.{ 0x0f, 0x50 }, 0, .none, .sse },
@@ -1037,6 +1065,12 @@ pub const table = [_]Entry{
.{ .movdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .none, .sse2 },
.{ .movdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .none, .sse2 },
+ .{ .movhpd, .rm, &.{ .xmm, .m64 }, &.{ 0x66, 0x0f, 0x16 }, 0, .none, .sse2 },
+ .{ .movhpd, .mr, &.{ .m64, .xmm }, &.{ 0x66, 0x0f, 0x17 }, 0, .none, .sse2 },
+
+ .{ .movlpd, .rm, &.{ .xmm, .m64 }, &.{ 0x66, 0x0f, 0x12 }, 0, .none, .sse2 },
+ .{ .movlpd, .mr, &.{ .m64, .xmm }, &.{ 0x66, 0x0f, 0x13 }, 0, .none, .sse2 },
+
.{ .movmskpd, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .none, .sse2 },
.{ .movmskpd, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .none, .sse2 },
@@ -1486,8 +1520,20 @@ pub const table = [_]Entry{
.{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
+ .{ .vmovhpd, .rvm, &.{ .xmm, .xmm, .m64 }, &.{ 0x66, 0x0f, 0x16 }, 0, .vex_128_wig, .avx },
+ .{ .vmovhpd, .mr, &.{ .m64, .xmm }, &.{ 0x66, 0x0f, 0x17 }, 0, .vex_128_wig, .avx },
+
+ .{ .vmovhps, .rvm, &.{ .xmm, .xmm, .m64 }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx },
+ .{ .vmovhps, .mr, &.{ .m64, .xmm }, &.{ 0x0f, 0x17 }, 0, .vex_128_wig, .avx },
+
.{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx },
+ .{ .vmovlpd, .rvm, &.{ .xmm, .xmm, .m64 }, &.{ 0x66, 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
+ .{ .vmovlpd, .mr, &.{ .m64, .xmm }, &.{ 0x66, 0x0f, 0x13 }, 0, .vex_128_wig, .avx },
+
+ .{ .vmovlps, .rvm, &.{ .xmm, .xmm, .m64 }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
+ .{ .vmovlps, .mr, &.{ .m64, .xmm }, &.{ 0x0f, 0x13 }, 0, .vex_128_wig, .avx },
+
.{ .vmovq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .vex_128_wig, .avx },
.{ .vmovq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .vex_128_wig, .avx },
@@ -1583,14 +1629,14 @@ pub const table = [_]Entry{
.{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx },
.{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx },
- .{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx },
- .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx },
+ .{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .vex_128_w0, .avx },
+ .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_w0, .avx },
- .{ .vpinsrb, .rmi, &.{ .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .vex_128_w0, .avx },
- .{ .vpinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w0, .avx },
- .{ .vpinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w1, .avx },
+ .{ .vpinsrb, .rvmi, &.{ .xmm, .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .vex_128_w0, .avx },
+ .{ .vpinsrd, .rvmi, &.{ .xmm, .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w0, .avx },
+ .{ .vpinsrq, .rvmi, &.{ .xmm, .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w1, .avx },
- .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx },
+ .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_w0, .avx },
.{ .vpmaxsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_128_wig, .avx },
.{ .vpmaxsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_128_wig, .avx },
src/arch/x86_64/Lower.zig
@@ -418,8 +418,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
// Here, we currently assume local dynamic TLS vars, and so
// we emit LD model.
_ = lower.reloc(.{ .linker_tlsld = sym_index }, 0);
- lower.result_insts[lower.result_insts_len] =
- try Instruction.new(.none, .lea, &[_]Operand{
+ lower.result_insts[lower.result_insts_len] = try .new(.none, .lea, &.{
.{ .reg = .rdi },
.{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) },
}, lower.target);
@@ -427,8 +426,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
_ = lower.reloc(.{
.linker_extern_fn = try elf_file.getGlobalSymbol("__tls_get_addr", null),
}, 0);
- lower.result_insts[lower.result_insts_len] =
- try Instruction.new(.none, .call, &[_]Operand{
+ lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{
.{ .imm = .s(0) },
}, lower.target);
lower.result_insts_len += 1;
@@ -440,8 +438,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
}) };
} else {
// Since we are linking statically, we emit LE model directly.
- lower.result_insts[lower.result_insts_len] =
- try Instruction.new(.none, .mov, &[_]Operand{
+ lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
.{ .reg = .rax },
.{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .fs } }) },
}, lower.target);
@@ -464,8 +461,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
.mov => {
if (elf_sym.flags.is_extern_ptr) {
const reg = ops[0].reg;
- lower.result_insts[lower.result_insts_len] =
- try Instruction.new(.none, .mov, &[_]Operand{
+ lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
.{ .reg = reg.to64() },
.{ .mem = Memory.initRip(.qword, 0) },
}, lower.target);
@@ -496,16 +492,14 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
if (macho_sym.flags.tlv) {
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
- lower.result_insts[lower.result_insts_len] =
- try Instruction.new(.none, .mov, &[_]Operand{
+ lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
.{ .reg = .rdi },
.{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) },
- });
+ }, lower.target);
lower.result_insts_len += 1;
- lower.result_insts[lower.result_insts_len] =
- try Instruction.new(.none, .call, &[_]Operand{
+ lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{
.{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .rdi } }) },
- });
+ }, lower.target);
lower.result_insts_len += 1;
emit_mnemonic = .mov;
break :op .{ .reg = .rax };
@@ -520,11 +514,10 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
.mov => {
if (macho_sym.flags.is_extern_ptr) {
const reg = ops[0].reg;
- lower.result_insts[lower.result_insts_len] =
- try Instruction.new(.none, .mov, &[_]Operand{
+ lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
.{ .reg = reg.to64() },
.{ .mem = Memory.initRip(.qword, 0) },
- });
+ }, lower.target);
lower.result_insts_len += 1;
break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{ .base = .{
.reg = reg.to64(),
@@ -541,8 +534,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
},
};
}
- lower.result_insts[lower.result_insts_len] =
- try Instruction.new(emit_prefix, emit_mnemonic, emit_ops, lower.target);
+ lower.result_insts[lower.result_insts_len] = try .new(emit_prefix, emit_mnemonic, emit_ops, lower.target);
lower.result_insts_len += 1;
}
src/arch/x86_64/Mir.zig
@@ -38,6 +38,11 @@ pub const Inst = struct {
/// ___ Right Without Affecting Flags
_rx,
+ /// ___ Forward
+ _f,
+ /// ___ Reverse
+ //_r,
+
/// ___ Above
_a,
/// ___ Above Or Equal
@@ -47,6 +52,7 @@ pub const Inst = struct {
/// ___ Below Or Equal
_be,
/// ___ Carry
+ /// ___ Carry Flag
_c,
/// ___ Equal
_e,
@@ -98,6 +104,14 @@ pub const Inst = struct {
_s,
/// ___ Zero
_z,
+ /// ___ Alignment Check Flag
+ _ac,
+ /// ___ Direction Flag
+ //_d,
+ /// ___ Interrupt Flag
+ _i,
+ /// ___ User Interrupt Flag
+ _ui,
/// ___ Byte
//_b,
@@ -299,9 +313,8 @@ pub const Inst = struct {
/// Bitwise logical and of packed double-precision floating-point values
@"and",
/// Bit scan forward
- bsf,
/// Bit scan reverse
- bsr,
+ bs,
/// Byte swap
bswap,
/// Bit test
@@ -317,6 +330,10 @@ pub const Inst = struct {
cdq,
/// Convert doubleword to quadword
cdqe,
+ /// Clear carry flag
+ /// Clear direction flag
+ /// Clear interrupt flag
+ cl,
/// Flush cache line
clflush,
/// Conditional move
@@ -443,6 +460,11 @@ pub const Inst = struct {
/// Subtract packed double-precision floating-point values
/// Subtract scalar double-precision floating-point values
sub,
+ /// Set carry flag
+ /// Set direction flag
+ /// Set interrupt flag
+ /// Store floating-point value
+ st,
/// Store string
sto,
/// Syscall
@@ -478,8 +500,6 @@ pub const Inst = struct {
ldenv,
/// Store x87 FPU environment
nstenv,
- /// Store floating-point value
- st,
/// Store x87 FPU environment
stenv,
@@ -560,8 +580,14 @@ pub const Inst = struct {
/// Move aligned packed single-precision floating-point values
/// Move aligned packed double-precision floating-point values
mova,
+ /// Move high packed single-precision floating-point values
+ /// Move high packed double-precision floating-point values
+ movh,
/// Move packed single-precision floating-point values high to low
movhl,
+ /// Move low packed single-precision floating-point values
+ /// Move low packed double-precision floating-point values
+ movl,
/// Move packed single-precision floating-point values low to high
movlh,
/// Move unaligned packed single-precision floating-point values
src/link/Elf/Atom.zig
@@ -1274,19 +1274,19 @@ const x86_64 = struct {
fn relaxGotpcrelx(code: []u8, t: *const std.Target) !void {
dev.check(.x86_64_backend);
const old_inst = disassemble(code) orelse return error.RelaxFailure;
- const inst = switch (old_inst.encoding.mnemonic) {
- .call => try Instruction.new(old_inst.prefix, .call, &.{
+ const inst: Instruction = switch (old_inst.encoding.mnemonic) {
+ .call => try .new(old_inst.prefix, .call, &.{
// TODO: hack to force imm32s in the assembler
- .{ .imm = Immediate.s(-129) },
+ .{ .imm = .s(-129) },
}, t),
- .jmp => try Instruction.new(old_inst.prefix, .jmp, &.{
+ .jmp => try .new(old_inst.prefix, .jmp, &.{
// TODO: hack to force imm32s in the assembler
- .{ .imm = Immediate.s(-129) },
+ .{ .imm = .s(-129) },
}, t),
else => return error.RelaxFailure,
};
relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding });
- const nop = try Instruction.new(.none, .nop, &.{}, t);
+ const nop: Instruction = try .new(.none, .nop, &.{}, t);
try encode(&.{ nop, inst }, code);
}
@@ -1295,7 +1295,7 @@ const x86_64 = struct {
const old_inst = disassemble(code) orelse return error.RelaxFailure;
switch (old_inst.encoding.mnemonic) {
.mov => {
- const inst = try Instruction.new(old_inst.prefix, .lea, &old_inst.ops, t);
+ const inst: Instruction = try .new(old_inst.prefix, .lea, &old_inst.ops, t);
relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding });
try encode(&.{inst}, code);
},
@@ -1404,14 +1404,15 @@ const x86_64 = struct {
dev.check(.x86_64_backend);
const old_inst = disassemble(code) orelse return false;
switch (old_inst.encoding.mnemonic) {
- .mov => if (Instruction.new(old_inst.prefix, .mov, &.{
- old_inst.ops[0],
- // TODO: hack to force imm32s in the assembler
- .{ .imm = Immediate.s(-129) },
- }, t)) |inst| {
+ .mov => {
+ const inst = Instruction.new(old_inst.prefix, .mov, &.{
+ old_inst.ops[0],
+ // TODO: hack to force imm32s in the assembler
+ .{ .imm = .s(-129) },
+ }, t) catch return false;
inst.encode(std.io.null_writer, .{}) catch return false;
return true;
- } else |_| return false,
+ },
else => return false,
}
}
@@ -1424,7 +1425,7 @@ const x86_64 = struct {
const inst = Instruction.new(old_inst.prefix, .mov, &.{
old_inst.ops[0],
// TODO: hack to force imm32s in the assembler
- .{ .imm = Immediate.s(-129) },
+ .{ .imm = .s(-129) },
}, t) catch unreachable;
relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding });
encode(&.{inst}, code) catch unreachable;
@@ -1438,10 +1439,10 @@ const x86_64 = struct {
const old_inst = disassemble(code) orelse return error.RelaxFailure;
switch (old_inst.encoding.mnemonic) {
.lea => {
- const inst = try Instruction.new(old_inst.prefix, .mov, &.{
+ const inst: Instruction = try .new(old_inst.prefix, .mov, &.{
old_inst.ops[0],
// TODO: hack to force imm32s in the assembler
- .{ .imm = Immediate.s(-129) },
+ .{ .imm = .s(-129) },
}, target);
relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding });
try encode(&.{inst}, code);
@@ -1781,7 +1782,7 @@ const aarch64 = struct {
const off: u12 = @truncate(@as(u64, @bitCast(S_ + A)));
aarch64_util.writeAddImmInst(off, code);
} else {
- const old_inst = Instruction{
+ const old_inst: Instruction = .{
.add_subtract_immediate = mem.bytesToValue(std.meta.TagPayload(
Instruction,
Instruction.add_subtract_immediate,
@@ -1795,7 +1796,7 @@ const aarch64 = struct {
},
.TLSDESC_CALL => if (!target.flags.has_tlsdesc) {
- const old_inst = Instruction{
+ const old_inst: Instruction = .{
.unconditional_branch_register = mem.bytesToValue(std.meta.TagPayload(
Instruction,
Instruction.unconditional_branch_register,
src/link/MachO/Atom.zig
@@ -640,7 +640,8 @@ fn resolveRelocInner(
macho_file: *MachO,
writer: anytype,
) ResolveError!void {
- const cpu_arch = macho_file.getTarget().cpu.arch;
+ const t = &macho_file.base.comp.root_mod.resolved_target.result;
+ const cpu_arch = t.cpu.arch;
const rel_offset = math.cast(usize, rel.offset - self.off) orelse return error.Overflow;
const P = @as(i64, @intCast(self.getAddress(macho_file))) + @as(i64, @intCast(rel_offset));
const A = rel.addend + rel.getRelocAddend(cpu_arch);
@@ -747,7 +748,7 @@ fn resolveRelocInner(
const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file));
try writer.writeInt(i32, @intCast(S_ + A - P), .little);
} else {
- try x86_64.relaxTlv(code[rel_offset - 3 ..]);
+ try x86_64.relaxTlv(code[rel_offset - 3 ..], t);
try writer.writeInt(i32, @intCast(S + A - P), .little);
}
},
@@ -893,11 +894,12 @@ fn resolveRelocInner(
const x86_64 = struct {
fn relaxGotLoad(self: Atom, code: []u8, rel: Relocation, macho_file: *MachO) ResolveError!void {
dev.check(.x86_64_backend);
+ const t = &macho_file.base.comp.root_mod.resolved_target.result;
const diags = &macho_file.base.comp.link_diags;
const old_inst = disassemble(code) orelse return error.RelaxFail;
switch (old_inst.encoding.mnemonic) {
.mov => {
- const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail;
+ const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops, t) catch return error.RelaxFail;
relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding });
encode(&.{inst}, code) catch return error.RelaxFail;
},
@@ -916,12 +918,12 @@ const x86_64 = struct {
}
}
- fn relaxTlv(code: []u8) error{RelaxFail}!void {
+ fn relaxTlv(code: []u8, t: *const std.Target) error{RelaxFail}!void {
dev.check(.x86_64_backend);
const old_inst = disassemble(code) orelse return error.RelaxFail;
switch (old_inst.encoding.mnemonic) {
.mov => {
- const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail;
+ const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops, t) catch return error.RelaxFail;
relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding });
encode(&.{inst}, code) catch return error.RelaxFail;
},
src/dev.zig
@@ -135,6 +135,7 @@ pub const Env = enum {
else => Env.ast_gen.supports(feature),
},
.@"x86_64-linux" => switch (feature) {
+ .build_command,
.stdio_listen,
.incremental,
.x86_64_backend,
test/behavior/x86_64/build.zig
@@ -0,0 +1,114 @@
+const std = @import("std");
+pub fn build(b: *std.Build) void {
+ const compiler_rt_lib = b.addStaticLibrary(.{
+ .name = "compiler_rt",
+ .use_llvm = false,
+ .use_lld = false,
+ .root_module = b.createModule(.{
+ .root_source_file = b.addWriteFiles().add("compiler_rt.zig", ""),
+ .target = b.resolveTargetQuery(.{ .cpu_arch = .x86_64 }),
+ }),
+ });
+ compiler_rt_lib.bundle_compiler_rt = true;
+
+ for ([_]std.Target.Query{
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 },
+ .cpu_features_add = std.Target.x86.featureSet(&.{.bsf_bsr_0_clobbers_result}),
+ //.cpu_features_sub = std.Target.x86.featureSet(&.{.sse}),
+ },
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 },
+ .cpu_features_add = std.Target.x86.featureSet(&.{.bsf_bsr_0_clobbers_result}),
+ .cpu_features_sub = std.Target.x86.featureSet(&.{
+ .cmov,
+ //.sse,
+ }),
+ },
+ //.{
+ // .cpu_arch = .x86_64,
+ // .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 },
+ // .cpu_features_sub = std.Target.x86.featureSet(&.{.sse}),
+ //},
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 },
+ .cpu_features_sub = std.Target.x86.featureSet(&.{.sse2}),
+ },
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 },
+ },
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 },
+ .cpu_features_add = std.Target.x86.featureSet(&.{.sse3}),
+ },
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 },
+ .cpu_features_add = std.Target.x86.featureSet(&.{.ssse3}),
+ },
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 },
+ .cpu_features_add = std.Target.x86.featureSet(&.{.sse4_1}),
+ },
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 },
+ .cpu_features_add = std.Target.x86.featureSet(&.{.sse4_2}),
+ },
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v2 },
+ },
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v2 },
+ .cpu_features_add = std.Target.x86.featureSet(&.{.avx}),
+ },
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 },
+ .cpu_features_sub = std.Target.x86.featureSet(&.{.avx2}),
+ },
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 },
+ },
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v4 },
+ },
+ }) |query| {
+ const target = b.resolveTargetQuery(query);
+ const cpu = query.serializeCpuAlloc(b.allocator) catch @panic("OOM");
+ for ([_][]const u8{
+ "math.zig",
+ }) |path| {
+ const test_mod = b.createModule(.{
+ .root_source_file = b.path(path),
+ .target = target,
+ });
+ const test_exe = b.addTest(.{
+ .name = std.fs.path.stem(path),
+ .use_llvm = false,
+ .use_lld = false,
+ .root_module = test_mod,
+ });
+ if (!std.Target.x86.featureSetHas(target.result.cpu.features, .sse2)) {
+ test_exe.bundle_compiler_rt = false;
+ test_mod.linkLibrary(compiler_rt_lib);
+ }
+ const test_run = b.addRunArtifact(test_exe);
+ b.default_step.dependOn(&test_run.step);
+ for ([_]*std.Build.Step{
+ &test_exe.step,
+ &test_run.step,
+ }) |step| step.name = b.fmt("{s} {s}", .{ step.name, cpu });
+ }
+ }
+}
test/behavior/x86_64/math.zig
@@ -1,3 +1,709 @@
+fn testUnary(comptime op: anytype) !void {
+ const testType = struct {
+ fn testType(comptime Type: type, comptime imm_arg: Type) !void {
+ const expected = op(Type, imm_arg);
+ try struct {
+ fn testOne(actual: @TypeOf(expected)) !void {
+ if (switch (@typeInfo(@TypeOf(expected))) {
+ else => actual != expected,
+ .vector => @reduce(.Or, actual != expected),
+ }) return error.Unexpected;
+ }
+ noinline fn testOps(mem_arg: Type) !void {
+ var reg_arg = mem_arg;
+ _ = .{®_arg};
+ try testOne(op(Type, reg_arg));
+ try testOne(op(Type, mem_arg));
+ try testOne(op(Type, imm_arg));
+ }
+ }.testOps(imm_arg);
+ }
+ }.testType;
+
+ try testType(i0, 0);
+ try testType(u0, 0);
+
+ try testType(i1, -1);
+ try testType(i1, 0);
+ try testType(u1, 0);
+ try testType(u1, 1 << 0);
+
+ try testType(i2, -1 << 1);
+ try testType(i2, -1);
+ try testType(i2, 0);
+ try testType(u2, 0);
+ try testType(u2, 1 << 0);
+ try testType(u2, 1 << 1);
+
+ try testType(i3, -1 << 2);
+ try testType(i3, -1);
+ try testType(i3, 0);
+ try testType(u3, 0);
+ try testType(u3, 1 << 0);
+ try testType(u3, 1 << 1);
+ try testType(u3, 1 << 2);
+
+ try testType(i4, -1 << 3);
+ try testType(i4, -1);
+ try testType(i4, 0);
+ try testType(u4, 0);
+ try testType(u4, 1 << 0);
+ try testType(u4, 1 << 1);
+ try testType(u4, 1 << 2);
+ try testType(u4, 1 << 3);
+
+ try testType(i5, -1 << 4);
+ try testType(i5, -1);
+ try testType(i5, 0);
+ try testType(u5, 0);
+ try testType(u5, 1 << 0);
+ try testType(u5, 1 << 1);
+ try testType(u5, 1 << 3);
+ try testType(u5, 1 << 4);
+
+ try testType(i7, -1 << 6);
+ try testType(i7, -1);
+ try testType(i7, 0);
+ try testType(u7, 0);
+ try testType(u7, 1 << 0);
+ try testType(u7, 1 << 1);
+ try testType(u7, 1 << 5);
+ try testType(u7, 1 << 6);
+
+ try testType(i8, -1 << 7);
+ try testType(i8, -1);
+ try testType(i8, 0);
+ try testType(u8, 0);
+ try testType(u8, 1 << 0);
+ try testType(u8, 1 << 1);
+ try testType(u8, 1 << 6);
+ try testType(u8, 1 << 7);
+
+ try testType(i9, -1 << 8);
+ try testType(i9, -1);
+ try testType(i9, 0);
+ try testType(u9, 0);
+ try testType(u9, 1 << 0);
+ try testType(u9, 1 << 1);
+ try testType(u9, 1 << 7);
+ try testType(u9, 1 << 8);
+
+ try testType(i15, -1 << 14);
+ try testType(i15, -1);
+ try testType(i15, 0);
+ try testType(u15, 0);
+ try testType(u15, 1 << 0);
+ try testType(u15, 1 << 1);
+ try testType(u15, 1 << 13);
+ try testType(u15, 1 << 14);
+
+ try testType(i16, -1 << 15);
+ try testType(i16, -1);
+ try testType(i16, 0);
+ try testType(u16, 0);
+ try testType(u16, 1 << 0);
+ try testType(u16, 1 << 1);
+ try testType(u16, 1 << 14);
+ try testType(u16, 1 << 15);
+
+ try testType(i17, -1 << 16);
+ try testType(i17, -1);
+ try testType(i17, 0);
+ try testType(u17, 0);
+ try testType(u17, 1 << 0);
+ try testType(u17, 1 << 1);
+ try testType(u17, 1 << 15);
+ try testType(u17, 1 << 16);
+
+ try testType(i31, -1 << 30);
+ try testType(i31, -1);
+ try testType(i31, 0);
+ try testType(u31, 0);
+ try testType(u31, 1 << 0);
+ try testType(u31, 1 << 1);
+ try testType(u31, 1 << 29);
+ try testType(u31, 1 << 30);
+
+ try testType(i32, -1 << 31);
+ try testType(i32, -1);
+ try testType(i32, 0);
+ try testType(u32, 0);
+ try testType(u32, 1 << 0);
+ try testType(u32, 1 << 1);
+ try testType(u32, 1 << 30);
+ try testType(u32, 1 << 31);
+
+ try testType(i33, -1 << 32);
+ try testType(i33, -1);
+ try testType(i33, 0);
+ try testType(u33, 0);
+ try testType(u33, 1 << 0);
+ try testType(u33, 1 << 1);
+ try testType(u33, 1 << 31);
+ try testType(u33, 1 << 32);
+
+ try testType(i63, -1 << 62);
+ try testType(i63, -1);
+ try testType(i63, 0);
+ try testType(u63, 0);
+ try testType(u63, 1 << 0);
+ try testType(u63, 1 << 1);
+ try testType(u63, 1 << 61);
+ try testType(u63, 1 << 62);
+
+ try testType(i64, -1 << 63);
+ try testType(i64, -1);
+ try testType(i64, 0);
+ try testType(u64, 0);
+ try testType(u64, 1 << 0);
+ try testType(u64, 1 << 1);
+ try testType(u64, 1 << 62);
+ try testType(u64, 1 << 63);
+
+ try testType(i65, -1 << 64);
+ try testType(i65, -1);
+ try testType(i65, 0);
+ try testType(u65, 0);
+ try testType(u65, 1 << 0);
+ try testType(u65, 1 << 1);
+ try testType(u65, 1 << 63);
+ try testType(u65, 1 << 64);
+
+ try testType(i95, -1 << 94);
+ try testType(i95, -1);
+ try testType(i95, 0);
+ try testType(u95, 0);
+ try testType(u95, 1 << 0);
+ try testType(u95, 1 << 1);
+ try testType(u95, 1 << 93);
+ try testType(u95, 1 << 94);
+
+ try testType(i96, -1 << 95);
+ try testType(i96, -1);
+ try testType(i96, 0);
+ try testType(u96, 0);
+ try testType(u96, 1 << 0);
+ try testType(u96, 1 << 1);
+ try testType(u96, 1 << 94);
+ try testType(u96, 1 << 95);
+
+ try testType(i97, -1 << 96);
+ try testType(i97, -1);
+ try testType(i97, 0);
+ try testType(u97, 0);
+ try testType(u97, 1 << 0);
+ try testType(u97, 1 << 1);
+ try testType(u97, 1 << 95);
+ try testType(u97, 1 << 96);
+
+ try testType(i127, -1 << 126);
+ try testType(i127, -1);
+ try testType(i127, 0);
+ try testType(u127, 0);
+ try testType(u127, 1 << 0);
+ try testType(u127, 1 << 1);
+ try testType(u127, 1 << 125);
+ try testType(u127, 1 << 126);
+
+ try testType(i128, -1 << 127);
+ try testType(i128, -1);
+ try testType(i128, 0);
+ try testType(u128, 0);
+ try testType(u128, 1 << 0);
+ try testType(u128, 1 << 1);
+ try testType(u128, 1 << 126);
+ try testType(u128, 1 << 127);
+
+ try testType(i129, -1 << 128);
+ try testType(i129, -1);
+ try testType(i129, 0);
+ try testType(u129, 0);
+ try testType(u129, 1 << 0);
+ try testType(u129, 1 << 1);
+ try testType(u129, 1 << 127);
+ try testType(u129, 1 << 128);
+
+ try testType(i159, -1 << 158);
+ try testType(i159, -1);
+ try testType(i159, 0);
+ try testType(u159, 0);
+ try testType(u159, 1 << 0);
+ try testType(u159, 1 << 1);
+ try testType(u159, 1 << 157);
+ try testType(u159, 1 << 158);
+
+ try testType(i160, -1 << 159);
+ try testType(i160, -1);
+ try testType(i160, 0);
+ try testType(u160, 0);
+ try testType(u160, 1 << 0);
+ try testType(u160, 1 << 1);
+ try testType(u160, 1 << 158);
+ try testType(u160, 1 << 159);
+
+ try testType(i161, -1 << 160);
+ try testType(i161, -1);
+ try testType(i161, 0);
+ try testType(u161, 0);
+ try testType(u161, 1 << 0);
+ try testType(u161, 1 << 1);
+ try testType(u161, 1 << 159);
+ try testType(u161, 1 << 160);
+
+ try testType(i191, -1 << 190);
+ try testType(i191, -1);
+ try testType(i191, 0);
+ try testType(u191, 0);
+ try testType(u191, 1 << 0);
+ try testType(u191, 1 << 1);
+ try testType(u191, 1 << 189);
+ try testType(u191, 1 << 190);
+
+ try testType(i192, -1 << 191);
+ try testType(i192, -1);
+ try testType(i192, 0);
+ try testType(u192, 0);
+ try testType(u192, 1 << 0);
+ try testType(u192, 1 << 1);
+ try testType(u192, 1 << 190);
+ try testType(u192, 1 << 191);
+
+ try testType(i193, -1 << 192);
+ try testType(i193, -1);
+ try testType(i193, 0);
+ try testType(u193, 0);
+ try testType(u193, 1 << 0);
+ try testType(u193, 1 << 1);
+ try testType(u193, 1 << 191);
+ try testType(u193, 1 << 192);
+
+ try testType(i223, -1 << 222);
+ try testType(i223, -1);
+ try testType(i223, 0);
+ try testType(u223, 0);
+ try testType(u223, 1 << 0);
+ try testType(u223, 1 << 1);
+ try testType(u223, 1 << 221);
+ try testType(u223, 1 << 222);
+
+ try testType(i224, -1 << 223);
+ try testType(i224, -1);
+ try testType(i224, 0);
+ try testType(u224, 0);
+ try testType(u224, 1 << 0);
+ try testType(u224, 1 << 1);
+ try testType(u224, 1 << 222);
+ try testType(u224, 1 << 223);
+
+ try testType(i225, -1 << 224);
+ try testType(i225, -1);
+ try testType(i225, 0);
+ try testType(u225, 0);
+ try testType(u225, 1 << 0);
+ try testType(u225, 1 << 1);
+ try testType(u225, 1 << 223);
+ try testType(u225, 1 << 224);
+
+ try testType(i255, -1 << 254);
+ try testType(i255, -1);
+ try testType(i255, 0);
+ try testType(u255, 0);
+ try testType(u255, 1 << 0);
+ try testType(u255, 1 << 1);
+ try testType(u255, 1 << 253);
+ try testType(u255, 1 << 254);
+
+ try testType(i256, -1 << 255);
+ try testType(i256, -1);
+ try testType(i256, 0);
+ try testType(u256, 0);
+ try testType(u256, 1 << 0);
+ try testType(u256, 1 << 1);
+ try testType(u256, 1 << 254);
+ try testType(u256, 1 << 255);
+
+ try testType(i257, -1 << 256);
+ try testType(i257, -1);
+ try testType(i257, 0);
+ try testType(u257, 0);
+ try testType(u257, 1 << 0);
+ try testType(u257, 1 << 1);
+ try testType(u257, 1 << 255);
+ try testType(u257, 1 << 256);
+
+ try testType(i511, -1 << 510);
+ try testType(i511, -1);
+ try testType(i511, 0);
+ try testType(u511, 0);
+ try testType(u511, 1 << 0);
+ try testType(u511, 1 << 1);
+ try testType(u511, 1 << 509);
+ try testType(u511, 1 << 510);
+
+ try testType(i512, -1 << 511);
+ try testType(i512, -1);
+ try testType(i512, 0);
+ try testType(u512, 0);
+ try testType(u512, 1 << 0);
+ try testType(u512, 1 << 1);
+ try testType(u512, 1 << 510);
+ try testType(u512, 1 << 511);
+
+ try testType(i513, -1 << 512);
+ try testType(i513, -1);
+ try testType(i513, 0);
+ try testType(u513, 0);
+ try testType(u513, 1 << 0);
+ try testType(u513, 1 << 1);
+ try testType(u513, 1 << 511);
+ try testType(u513, 1 << 512);
+
+ try testType(i1023, -1 << 1022);
+ try testType(i1023, -1);
+ try testType(i1023, 0);
+ try testType(u1023, 0);
+ try testType(u1023, 1 << 0);
+ try testType(u1023, 1 << 1);
+ try testType(u1023, 1 << 1021);
+ try testType(u1023, 1 << 1022);
+
+ try testType(i1024, -1 << 1023);
+ try testType(i1024, -1);
+ try testType(i1024, 0);
+ try testType(u1024, 0);
+ try testType(u1024, 1 << 0);
+ try testType(u1024, 1 << 1);
+ try testType(u1024, 1 << 1022);
+ try testType(u1024, 1 << 1023);
+
+ try testType(i1025, -1 << 1024);
+ try testType(i1025, -1);
+ try testType(i1025, 0);
+ try testType(u1025, 0);
+ try testType(u1025, 1 << 0);
+ try testType(u1025, 1 << 1);
+ try testType(u1025, 1 << 1023);
+ try testType(u1025, 1 << 1024);
+
+ try testType(@Vector(3, i0), .{ 0 << 0, 0, 0 });
+ try testType(@Vector(3, u0), .{ 0, 0, 0 << 0 });
+
+ try testType(@Vector(3, i1), .{ -1 << 0, -1, 0 });
+ try testType(@Vector(3, u1), .{ 0, 1, 1 << 0 });
+
+ try testType(@Vector(3, i2), .{ -1 << 1, -1, 0 });
+ try testType(@Vector(3, u2), .{ 0, 1, 1 << 1 });
+
+ try testType(@Vector(3, i3), .{ -1 << 2, -1, 0 });
+ try testType(@Vector(3, u3), .{ 0, 1, 1 << 2 });
+
+ try testType(@Vector(3, i4), .{ -1 << 3, -1, 0 });
+ try testType(@Vector(3, u4), .{ 0, 1, 1 << 3 });
+ try testType(@Vector(1, u4), .{
+ 0xb,
+ });
+ try testType(@Vector(2, u4), .{
+ 0x3, 0x4,
+ });
+ try testType(@Vector(4, u4), .{
+ 0x9, 0x2, 0xf, 0xe,
+ });
+ try testType(@Vector(8, u4), .{
+ 0x8, 0x1, 0xb, 0x1, 0xf, 0x5, 0x9, 0x6,
+ });
+ try testType(@Vector(16, u4), .{
+ 0xb, 0x6, 0x0, 0x7, 0x8, 0x5, 0x6, 0x9, 0xe, 0xb, 0x3, 0xa, 0xb, 0x5, 0x8, 0xc,
+ });
+ try testType(@Vector(32, u4), .{
+ 0xe, 0x6, 0xe, 0xa, 0xb, 0x4, 0xa, 0xb, 0x1, 0x3, 0xb, 0xc, 0x0, 0xb, 0x9, 0x4, 0xd, 0xa, 0xd, 0xd, 0x4, 0x8, 0x8, 0x6, 0xb, 0xe, 0x9, 0x6, 0xc, 0xd, 0x5, 0xd,
+ });
+ try testType(@Vector(64, u4), .{
+ 0x1, 0xc, 0xe, 0x9, 0x9, 0xf, 0x3, 0xf, 0x9, 0x9, 0x5, 0x3, 0xb, 0xd, 0xd, 0xf, 0x1, 0x2, 0xf, 0x9, 0x4, 0x4, 0x8, 0x9, 0x2, 0x9, 0x8, 0xe, 0x8, 0xa, 0x4, 0x3,
+ 0x4, 0xc, 0xb, 0x6, 0x4, 0x0, 0xa, 0x5, 0x1, 0xa, 0x4, 0xe, 0xa, 0x7, 0xd, 0x0, 0x4, 0xe, 0xe, 0x7, 0x7, 0xa, 0x4, 0x5, 0x6, 0xc, 0x6, 0x2, 0x6, 0xa, 0xe, 0xa,
+ });
+ try testType(@Vector(128, u4), .{
+ 0xd, 0x5, 0x6, 0xe, 0x3, 0x3, 0x3, 0xe, 0xd, 0xd, 0x9, 0x0, 0x0, 0xe, 0xa, 0x9, 0x8, 0x7, 0xb, 0x5, 0x7, 0xf, 0xb, 0x8, 0x0, 0xf, 0xb, 0x3, 0xa, 0x2, 0xb, 0xc,
+ 0x1, 0x1, 0xc, 0x8, 0x8, 0x6, 0x9, 0x1, 0xb, 0x0, 0x2, 0xb, 0x2, 0x2, 0x7, 0x6, 0x1, 0x1, 0xb, 0x4, 0x6, 0x4, 0x7, 0xc, 0xd, 0xc, 0xa, 0x8, 0x1, 0x7, 0x8, 0xa,
+ 0x9, 0xa, 0x1, 0x8, 0x1, 0x7, 0x9, 0x4, 0x5, 0x9, 0xd, 0x0, 0xa, 0xf, 0x3, 0x3, 0x9, 0x2, 0xf, 0x5, 0xb, 0x8, 0x6, 0xb, 0xf, 0x5, 0x8, 0x3, 0x9, 0xf, 0x6, 0x8,
+ 0xc, 0x8, 0x3, 0x4, 0xa, 0xe, 0xc, 0x1, 0xe, 0x9, 0x1, 0x8, 0xf, 0x6, 0xc, 0xc, 0x6, 0xf, 0x6, 0xd, 0xb, 0x9, 0xc, 0x3, 0xd, 0xa, 0x6, 0x8, 0x4, 0xa, 0x6, 0x9,
+ });
+ try testType(@Vector(256, u4), .{
+ 0x6, 0xc, 0xe, 0x3, 0x8, 0x2, 0xb, 0xd, 0x3, 0xa, 0x3, 0x8, 0xb, 0x8, 0x3, 0x0, 0xb, 0x5, 0x1, 0x3, 0x2, 0x2, 0xf, 0xc, 0x5, 0x1, 0x3, 0xb, 0x1, 0xc, 0x2, 0xd,
+ 0xa, 0x8, 0x1, 0xc, 0xb, 0xa, 0x3, 0x1, 0xe, 0x4, 0xf, 0xb, 0xd, 0x8, 0xf, 0xa, 0xc, 0xb, 0xb, 0x0, 0xa, 0xc, 0xf, 0xe, 0x8, 0xd, 0x9, 0x3, 0xa, 0xe, 0x8, 0x7,
+ 0x5, 0xa, 0x0, 0xe, 0x0, 0xd, 0x2, 0x2, 0x9, 0x4, 0x8, 0x9, 0x0, 0x4, 0x4, 0x8, 0xe, 0x1, 0xf, 0x1, 0x9, 0x3, 0xf, 0xc, 0xa, 0x0, 0x3, 0x2, 0x4, 0x1, 0x2, 0x3,
+ 0xf, 0x2, 0x7, 0xb, 0x5, 0x0, 0xd, 0x3, 0x4, 0xf, 0xa, 0x3, 0xc, 0x2, 0x5, 0xe, 0x7, 0x5, 0xd, 0x7, 0x9, 0x0, 0xd, 0x7, 0x9, 0xd, 0x5, 0x7, 0xf, 0xd, 0xb, 0x4,
+ 0x9, 0x6, 0xf, 0xb, 0x1, 0xb, 0x6, 0xb, 0xf, 0x7, 0xf, 0x0, 0x4, 0x7, 0x5, 0xa, 0x8, 0x1, 0xf, 0x9, 0x9, 0x0, 0x6, 0xb, 0x1, 0x2, 0x4, 0x3, 0x2, 0x0, 0x7, 0x0,
+ 0x6, 0x7, 0xf, 0x1, 0xe, 0xa, 0x8, 0x2, 0x9, 0xc, 0x1, 0x5, 0x7, 0x1, 0xb, 0x0, 0x1, 0x3, 0xd, 0x3, 0x0, 0x1, 0xa, 0x0, 0x3, 0x7, 0x1, 0x2, 0xb, 0xc, 0x2, 0x9,
+ 0x8, 0x8, 0x7, 0x0, 0xd, 0x5, 0x1, 0x5, 0x7, 0x7, 0x2, 0x3, 0x8, 0x7, 0xc, 0x8, 0xf, 0xa, 0xf, 0xf, 0x3, 0x2, 0x0, 0x4, 0x7, 0x5, 0x6, 0xd, 0x6, 0x3, 0xa, 0x4,
+ 0x1, 0x1, 0x2, 0xc, 0x3, 0xe, 0x2, 0xc, 0x7, 0x6, 0xe, 0xf, 0xb, 0x8, 0x6, 0x6, 0x9, 0x0, 0x4, 0xb, 0xe, 0x4, 0x2, 0x7, 0xf, 0xc, 0x0, 0x6, 0xd, 0xa, 0xe, 0xc,
+ });
+
+ try testType(@Vector(3, i5), .{ -1 << 4, -1, 0 });
+ try testType(@Vector(3, u5), .{ 0, 1, 1 << 4 });
+
+ try testType(@Vector(3, i7), .{ -1 << 6, -1, 0 });
+ try testType(@Vector(3, u7), .{ 0, 1, 1 << 6 });
+
+ try testType(@Vector(3, i8), .{ -1 << 7, -1, 0 });
+ try testType(@Vector(3, u8), .{ 0, 1, 1 << 7 });
+ try testType(@Vector(1, u8), .{
+ 0x33,
+ });
+ try testType(@Vector(2, u8), .{
+ 0x66, 0x87,
+ });
+ try testType(@Vector(4, u8), .{
+ 0x9d, 0xcb, 0x30, 0x7b,
+ });
+ try testType(@Vector(8, u8), .{
+ 0x4b, 0x35, 0x3f, 0x5c, 0xa5, 0x91, 0x23, 0x6d,
+ });
+ try testType(@Vector(16, u8), .{
+ 0xb7, 0x57, 0x27, 0x29, 0x58, 0xf8, 0xc9, 0x6c, 0xbe, 0x41, 0xf4, 0xd7, 0x4d, 0x01, 0xf0, 0x37,
+ });
+ try testType(@Vector(32, u8), .{
+ 0x5f, 0x61, 0x34, 0xe8, 0x37, 0x12, 0xba, 0x5a, 0x85, 0xf3, 0x3e, 0xa2, 0x0f, 0xd0, 0x65, 0xae,
+ 0xed, 0xf5, 0xe8, 0x65, 0x61, 0x28, 0x4a, 0x27, 0x2e, 0x01, 0x40, 0x8c, 0xe3, 0x36, 0x5d, 0xb6,
+ });
+ try testType(@Vector(64, u8), .{
+ 0xb0, 0x19, 0x5c, 0xc2, 0x3b, 0x16, 0x70, 0xad, 0x26, 0x45, 0xf2, 0xe1, 0x4f, 0x0f, 0x01, 0x72,
+ 0x7f, 0x1f, 0x07, 0x9e, 0xee, 0x9b, 0xb3, 0x38, 0x50, 0xf3, 0x56, 0x73, 0xd0, 0xd1, 0xee, 0xe3,
+ 0xeb, 0xf3, 0x1b, 0xe0, 0x77, 0x78, 0x75, 0xc6, 0x19, 0xe4, 0x69, 0xaa, 0x73, 0x08, 0xcd, 0x0c,
+ 0xf9, 0xed, 0x94, 0xf8, 0x79, 0x86, 0x63, 0x31, 0xbf, 0xd1, 0xe3, 0x17, 0x2b, 0xb9, 0xa1, 0x72,
+ });
+ try testType(@Vector(128, u8), .{
+ 0x2e, 0x93, 0x87, 0x09, 0x4f, 0x68, 0x14, 0xab, 0x3f, 0x04, 0x86, 0xc1, 0x95, 0xe8, 0x74, 0x11,
+ 0x57, 0x25, 0xe1, 0x88, 0xc0, 0x96, 0x33, 0x99, 0x15, 0x86, 0x2c, 0x84, 0x2e, 0xd7, 0x57, 0x21,
+ 0xd3, 0x18, 0xd5, 0x0e, 0xb4, 0x60, 0xe2, 0x08, 0xce, 0xbc, 0xd5, 0x4d, 0x8f, 0x59, 0x01, 0x67,
+ 0x71, 0x0a, 0x74, 0x48, 0xef, 0x39, 0x49, 0x7e, 0xa8, 0x39, 0x34, 0x75, 0x95, 0x3b, 0x38, 0xea,
+ 0x60, 0xd7, 0xed, 0x8f, 0xbb, 0xc0, 0x7d, 0xc2, 0x79, 0x2d, 0xbf, 0xa5, 0x64, 0xf4, 0x09, 0x86,
+ 0xfb, 0x29, 0xfe, 0xc7, 0xff, 0x62, 0x1a, 0x6f, 0xf8, 0xbd, 0xfe, 0xa4, 0xac, 0x24, 0xcf, 0x56,
+ 0x82, 0x69, 0x81, 0x0d, 0xc1, 0x51, 0x8d, 0x85, 0xf4, 0x00, 0xe7, 0x25, 0xab, 0xa5, 0x33, 0x45,
+ 0x66, 0x2e, 0x33, 0xc8, 0xf3, 0x35, 0x16, 0x7d, 0x1f, 0xc9, 0xf7, 0x44, 0xab, 0x66, 0x28, 0x0d,
+ });
+
+ try testType(@Vector(3, i9), .{ -1 << 8, -1, 0 });
+ try testType(@Vector(3, u9), .{ 0, 1, 1 << 8 });
+
+ try testType(@Vector(3, i15), .{ -1 << 14, -1, 0 });
+ try testType(@Vector(3, u15), .{ 0, 1, 1 << 14 });
+
+ try testType(@Vector(3, i16), .{ -1 << 15, -1, 0 });
+ try testType(@Vector(3, u16), .{ 0, 1, 1 << 15 });
+ try testType(@Vector(1, u16), .{
+ 0x4da6,
+ });
+ try testType(@Vector(2, u16), .{
+ 0x04d7, 0x50c6,
+ });
+ try testType(@Vector(4, u16), .{
+ 0x4c06, 0xd71f, 0x4d8f, 0xe0a4,
+ });
+ try testType(@Vector(8, u16), .{
+ 0xee9a, 0x881d, 0x31fb, 0xd3f7, 0x2c74, 0x6949, 0x4e04, 0x53d7,
+ });
+ try testType(@Vector(16, u16), .{
+ 0xeafe, 0x9a7b, 0x0d6f, 0x18cb, 0xaf8f, 0x8ee4, 0xa47e, 0xd39a,
+ 0x6572, 0x9c53, 0xf36e, 0x982e, 0x41c1, 0x8682, 0xf5dc, 0x7e01,
+ });
+ try testType(@Vector(32, u16), .{
+ 0xdfb3, 0x7de6, 0xd9ed, 0xb42e, 0x95ac, 0x9b5b, 0x0422, 0xdfcd,
+ 0x6196, 0x4dbe, 0x1818, 0x8816, 0x75e7, 0xc9b0, 0x92f7, 0x1f71,
+ 0xe584, 0x576c, 0x043a, 0x0f31, 0xfc4c, 0x2c87, 0x6b02, 0x0229,
+ 0x25b7, 0x53cd, 0x9bab, 0x866b, 0x9008, 0xf0f3, 0xeb21, 0x88e2,
+ });
+ try testType(@Vector(64, u16), .{
+ 0x084c, 0x445f, 0xce89, 0xd3ee, 0xb399, 0x315d, 0x8ef8, 0x4f6f,
+ 0xf9af, 0xcbc4, 0x0332, 0xcd55, 0xa4dc, 0xbc38, 0x6e33, 0x8ead,
+ 0xd15a, 0x5057, 0x58ef, 0x657a, 0xe9f0, 0x1418, 0x2b62, 0x3387,
+ 0x1c15, 0x04e1, 0x0276, 0x3783, 0xad9c, 0xea9a, 0x0e5e, 0xe803,
+ 0x2ee7, 0x0cf1, 0x30f1, 0xb12a, 0x381b, 0x353d, 0xf637, 0xf853,
+ 0x2ac1, 0x7ce8, 0x6a50, 0xcbb8, 0xc9b8, 0x9b25, 0xd1e9, 0xeff0,
+ 0xc0a2, 0x8e51, 0xde7a, 0x4e58, 0x5685, 0xeb3f, 0xd29b, 0x66ed,
+ 0x3dd5, 0xcb59, 0x6003, 0xf710, 0x943a, 0x7276, 0xe547, 0xe48f,
+ });
+
+ try testType(@Vector(3, i17), .{ -1 << 16, -1, 0 });
+ try testType(@Vector(3, u17), .{ 0, 1, 1 << 16 });
+
+ try testType(@Vector(3, i31), .{ -1 << 30, -1, 0 });
+ try testType(@Vector(3, u31), .{ 0, 1, 1 << 30 });
+
+ try testType(@Vector(3, i32), .{ -1 << 31, -1, 0 });
+ try testType(@Vector(3, u32), .{ 0, 1, 1 << 31 });
+ try testType(@Vector(1, u32), .{
+ 0x17e2805c,
+ });
+ try testType(@Vector(2, u32), .{
+ 0xdb6aadc5, 0xb1ff3754,
+ });
+ try testType(@Vector(4, u32), .{
+ 0xf7897b31, 0x342e1af9, 0x190fd76b, 0x283b5374,
+ });
+ try testType(@Vector(8, u32), .{
+ 0x81a0bd16, 0xc55da94e, 0x910f7e7c, 0x078d5ef7,
+ 0x0bdb1e4a, 0xf1a96e99, 0xcdd729b5, 0xe6966a1c,
+ });
+ try testType(@Vector(16, u32), .{
+ 0xfee812db, 0x29eacbed, 0xaed48136, 0x3053de13,
+ 0xbbda20df, 0x6faa274a, 0xe0b5ec3a, 0x1878b0dc,
+ 0x98204475, 0x810d8d05, 0x1e6996b6, 0xc543826a,
+ 0x53b47d8c, 0xc72c3142, 0x12f7e1f9, 0xf6782e54,
+ });
+ try testType(@Vector(32, u32), .{
+ 0xf0cf30d3, 0xe3c587b8, 0xcee44739, 0xe4a0bd72,
+ 0x41d44cce, 0x6d7c4259, 0xd85580a5, 0xec4b02d7,
+ 0xa366483d, 0x2d7b59d4, 0xe9c0ace4, 0x82cb441c,
+ 0xa23958ba, 0x04a70148, 0x3f0d20a3, 0xf9e21e37,
+ 0x009fce8b, 0x4a34a229, 0xf09c35cf, 0xc0977d4d,
+ 0xcc4d4647, 0xa30f1363, 0x27a65b14, 0xe572c785,
+ 0x8f42e320, 0x2b2cdeca, 0x11205bd4, 0x739d26aa,
+ 0xcbcc2df0, 0x5f7a3649, 0xbde1b7aa, 0x180a169f,
+ });
+
+ try testType(@Vector(3, i33), .{ -1 << 32, -1, 0 });
+ try testType(@Vector(3, u33), .{ 0, 1, 1 << 32 });
+
+ try testType(@Vector(3, i63), .{ -1 << 62, -1, 0 });
+ try testType(@Vector(3, u63), .{ 0, 1, 1 << 62 });
+
+ try testType(@Vector(3, i64), .{ -1 << 63, -1, 0 });
+ try testType(@Vector(3, u64), .{ 0, 1, 1 << 63 });
+ try testType(@Vector(1, u64), .{
+ 0x7d2e439abb0edba7,
+ });
+ try testType(@Vector(2, u64), .{
+ 0x3749ee5a2d237b9f, 0x6d8f4c3e1378f389,
+ });
+ try testType(@Vector(4, u64), .{
+ 0x03c127040e10d52b, 0xa86fe019072e27eb,
+ 0x0a554a47b709cdba, 0xf4342cc597e196c3,
+ });
+ try testType(@Vector(8, u64), .{
+ 0xea455c104375a055, 0x5c35d9d945edb2fa,
+ 0xc11b73d9d9d546fc, 0x2a9d63aae838dd5b,
+ 0xed6603f1f5d574b3, 0x2f37b354c81c1e56,
+ 0xbe7f5e2476bc76bd, 0xb0c88eacfffa9a8f,
+ });
+ try testType(@Vector(16, u64), .{
+ 0x2258fc04b31f8dbe, 0x3a2e5483003a10d8,
+ 0xebf24b31c0460510, 0x15d5b4c09b53ffa5,
+ 0x05abf6e744b17cc6, 0x9747b483f2d159fe,
+ 0x4616d8b2c8673125, 0x8ae3f91d422447eb,
+ 0x18da2f101a9e9776, 0x77a1197fb0441007,
+ 0x4ba480c8ec2dd10b, 0xeb99b9c0a1725278,
+ 0xd9d0acc5084ecdf0, 0xa0a23317fff4f515,
+ 0x0901c59a9a6a408b, 0x7c77ca72e25df033,
+ });
+
+ try testType(@Vector(3, i65), .{ -1 << 64, -1, 0 });
+ try testType(@Vector(3, u65), .{ 0, 1, 1 << 64 });
+
+ try testType(@Vector(3, i127), .{ -1 << 126, -1, 0 });
+ try testType(@Vector(3, u127), .{ 0, 1, 1 << 126 });
+
+ try testType(@Vector(3, i128), .{ -1 << 127, -1, 0 });
+ try testType(@Vector(3, u128), .{ 0, 1, 1 << 127 });
+ try testType(@Vector(1, u128), .{
+ 0x809f29e7fbafadc01145e1732590e7d9,
+ });
+ try testType(@Vector(2, u128), .{
+ 0x5150ac3438aacd0d51132cc2723b2995,
+ 0x151be9c47ad29cf719cf8358dd40165c,
+ });
+ try testType(@Vector(4, u128), .{
+ 0x4bae22df929f2f7cb9bd84deaad3e7a8,
+ 0x1ed46b2d6e1f3569f56b2ac33d8bc1cb,
+ 0xae93ea459d2ccfd5fb794e6d5c31aabb,
+ 0xb1177136acf099f550b70949ac202ec4,
+ });
+ try testType(@Vector(8, u128), .{
+ 0x7cd78db6baed6bfdf8c5265136c4e0fd,
+ 0xa41b8984c6bbde84640068194b7eba98,
+ 0xd33102778f2ae1a48d1e9bf8801bbbf0,
+ 0x0d59f6de003513a60055c86cbce2c200,
+ 0x825579d90012afddfbf04851c0748561,
+ 0xc2647c885e9d6f0ee1f5fac5da8ef7f5,
+ 0xcb4bbc1f81aa8ee68aa4dc140745687b,
+ 0x4ff10f914f74b46c694407f5bf7c7836,
+ });
+
+ try testType(@Vector(3, i129), .{ -1 << 128, -1, 0 });
+ try testType(@Vector(3, u129), .{ 0, 1, 1 << 128 });
+
+ try testType(@Vector(3, i191), .{ -1 << 190, -1, 0 });
+ try testType(@Vector(3, u191), .{ 0, 1, 1 << 190 });
+
+ try testType(@Vector(3, i192), .{ -1 << 191, -1, 0 });
+ try testType(@Vector(3, u192), .{ 0, 1, 1 << 191 });
+ try testType(@Vector(1, u192), .{
+ 0xe7baafcb9781626a77571b0539b9471a60c97d6c02106c8b,
+ });
+ try testType(@Vector(2, u192), .{
+ 0xbc9510913ed09e2c2aa50ffab9f1bc7b303a87f36e232a83,
+ 0x1f37bee446d7712d1ad457c47a66812cb926198d052aee65,
+ });
+ try testType(@Vector(4, u192), .{
+ 0xdca6a7cfc19c69efc34022062a8ca36f2569ab3dce001202,
+ 0xd25a4529e621c9084181fdb6917c6a32eccc58b63601b35d,
+ 0x0a258afd6debbaf8c158f1caa61fed63b31871d13f51b43d,
+ 0x6b40a178674fcb82c623ac322f851623d5e993dac97a219a,
+ });
+
+ try testType(@Vector(3, i193), .{ -1 << 192, -1, 0 });
+ try testType(@Vector(3, u193), .{ 0, 1, 1 << 192 });
+
+ try testType(@Vector(3, i255), .{ -1 << 254, -1, 0 });
+ try testType(@Vector(3, u255), .{ 0, 1, 1 << 254 });
+
+ try testType(@Vector(3, i256), .{ -1 << 255, -1, 0 });
+ try testType(@Vector(3, u256), .{ 0, 1, 1 << 255 });
+ try testType(@Vector(1, u256), .{
+ 0x230413bb481fa3a997796acf282010c560d1942e7339fd584a0f15a90c83fbda,
+ });
+ try testType(@Vector(2, u256), .{
+ 0x3ad569f8d91fdbc9da8ec0e933565919f2feb90b996c90c352b461aa0908e62d,
+ 0x0f109696d64647983f1f757042515510729ad1350e862cbf38cb73b5cf99f0f7,
+ });
+ try testType(@Vector(4, u256), .{
+ 0x1717c6ded4ac6de282d59f75f068da47d5a47a30f2c5053d2d59e715f9d28b97,
+ 0x3087189ce7540e2e0028b80af571ebc6353a00b2917f243a869ed29ecca0adaa,
+ 0x1507c6a9d104684bf503cdb08841cf91adab4644306bd67aafff5326604833ce,
+ 0x857e134ff9179733c871295b25f824bd3eb562977bad30890964fa0cdc15bb07,
+ });
+
+ try testType(@Vector(3, i257), .{ -1 << 256, -1, 0 });
+ try testType(@Vector(3, u257), .{ 0, 1, 1 << 256 });
+
+ try testType(@Vector(3, i511), .{ -1 << 510, -1, 0 });
+ try testType(@Vector(3, u511), .{ 0, 1, 1 << 510 });
+
+ try testType(@Vector(3, i512), .{ -1 << 511, -1, 0 });
+ try testType(@Vector(3, u512), .{ 0, 1, 1 << 511 });
+ try testType(@Vector(1, u512), .{
+ 0xa3ff51a609f1370e5eeb96b05169bf7469e465cf76ac5b4ea8ffd166c1ba3cd94f2dedf0d647a1fe424f3a06e6d7940f03e257f28100970b00bd5528c52b9ae6,
+ });
+ try testType(@Vector(2, u512), .{
+ 0xc6d43cd46ae31ab71f9468a895c83bf17516c6b2f1c9b04b9aa113bf7fe1b789eb7d95fcf951f12a9a6f2124589551efdd8c00f528b366a7bfb852faf8f3da53,
+ 0xc9099d2bdf8d1a0d30485ec6db4a24cbc0d89a863de30e18313ee1d66f71dd2d26235caaa703286cf4a2b51e1a12ef96d2d944c66c0bd3f0d72dd4cf0fc8100e,
+ });
+
+ try testType(@Vector(3, i513), .{ -1 << 512, -1, 0 });
+ try testType(@Vector(3, u513), .{ 0, 1, 1 << 512 });
+
+ try testType(@Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+ try testType(@Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+
+ try testType(@Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+ try testType(@Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+ try testType(@Vector(1, u1024), .{
+ 0xc6cfaa6571139552e1f067402dfc131d9b9a58aafda97198a78764b05138fb68cf26f085b7652f3d5ae0e56aa21732f296a581bb411d4a73795c213de793489fa49b173b9f5c089aa6295ff1fcdc14d491a05035b45d08fc35cd67a83d887a02b8db512f07518132e0ba56533c7d6fbe958255eddf5649bd8aba288c0dd84a25,
+ });
+
+ try testType(@Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+ try testType(@Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+}
+
fn testBinary(comptime op: anytype) !void {
const testType = struct {
fn testType(comptime Type: type, comptime imm_lhs: Type, comptime imm_rhs: Type) !void {
@@ -306,6 +1012,63 @@ fn testBinary(comptime op: anytype) !void {
0x8b0b4a27fc94a0e90652d19bc755b63d,
0xa858bce5ad0e48c13588a4e170e8667c,
});
+
+ try testType(@Vector(1, u256), .{
+ 0x28df37e1f57a56133ba3f5b5b2164ce24eb6c29a8973a597fd91fbee8ab4bafb,
+ }, .{
+ 0x63f725028cab082b5b1e6cb474428c8c3655cf438f3bb05c7a87f8270198f357,
+ });
+ try testType(@Vector(2, u256), .{
+ 0xcc79740b85597ef411e6d7e92049dfaa2328781ea4911540a3dcb512b71c7f3c,
+ 0x51ae46d2f93cbecff1578481f6ddc633dacee94ecaf81597c752c5c5db0ae766,
+ }, .{
+ 0x257f0107305cb71cef582a9a58612a019f335e390d7998f51f5898f245874a6e,
+ 0x0a95a17323a4d16a715720f122b752785e9877e3dd3d3f9b72cdac3d1139a81f,
+ });
+ try testType(@Vector(4, u256), .{
+ 0x19667a6e269342cba437a8904c7ba42a762358d32723723ae2637b01124e63c5,
+ 0x14f7d3599a7edc7bcc46874f68d4291793e6ef72bd1f3763bc5e923f54f2f781,
+ 0x1c939de0ae980b80de773a04088ba45813441336cdfdc281ee356c98d71f653b,
+ 0x39f5d755965382fe13d1b1d6690b8e3827f153f8166768c4ad8a28a963b781f2,
+ }, .{
+ 0xbe03de37cdcb8126083b4e86cd8a9803121d31b186fd5ce555ad77ce624dd6c7,
+ 0xa0c0730f0d7f141cc959849d09730b049f00693361539f1bc4758270554a60c1,
+ 0x2664bdba8de4eaa36ecee72f6bfec5b4daa6b4e00272d8116f2cc532c29490cc,
+ 0xe47a122bd45d5e7d69722d864a6b795ddee965a0993094f8791dd309d692de8b,
+ });
+
+ try testType(@Vector(1, u512), .{
+ 0x651058c1d89a8f34cfc5e66b6d25294eecfcc4a7e1e4a356eb51ee7d7b2db25378e4afee51b7d18d16e520772a60c50a02d7966f40ced1870b32c658e5821397,
+ }, .{
+ 0xd726e265ec80cb99510ba4f480ca64e959de5c528a7f54c386ecad22eeeefa845f0fd44b1bd64258a5f868197ee2d8fed59df9c9f0b72e74051a7ff20230880e,
+ });
+ try testType(@Vector(2, u512), .{
+ 0x22c8183c95cca8b09fdf541e431b73e9e4a1a5a00dff12381937fab52681d09d38ea25727d7025a2be08942cfa01535759e1644792e347c7901ec94b343c6337,
+ 0x292fdf644e75927e1aea9465ae2f60fb27550cd095f1afdea2cf7855286d26fbeed1c0b9c0474b73cb6b75621f7eadaa2f94ec358179ce2aaa0766df20da1ef3,
+ }, .{
+ 0xe1cd8c0ca244c6626d4415e10b4ac43fa69e454c529c24fec4b13e6b945684d4ea833709c16c636ca78cffa5c5bf0fe945cd714a9ad695184a6bdad31dec9e31,
+ 0x8fa3d86099e9e2789d72f8e792290356d659ab20ac0414ff94745984c6ae7d986082197bb849889f912e896670aa2c1a11bd7e66e3f650710b0f0a18a1533f90,
+ });
+
+ try testType(@Vector(1, u1024), .{
+ 0x0ca1a0dfaf8bb1da714b457d23c71aef948e66c7cd45c0aa941498a796fb18502ec32f34e885d0a107d44ae81595f8b52c2f0fb38e584b7139903a0e8a823ae20d01ca0662722dd474e7efc40f32d74cc065d97d8a09d0447f1ab6107fa0a57f3f8c866ae872506627ce82f18add79cee8dc69837f4ead3ca770c4d622d7e544,
+ }, .{
+ 0xf1e3bbe031d59351770a7a501b6e969b2c00d144f17648db3f944b69dfeb7be72e5ff933a061eba4eaa422f8ca09e5a97d0b0dd740fd4076eba8c72d7a278523f399202dc2d043c4e0eb58a2bcd4066e2146e321810b1ee4d3afdddb4f026bcc7905ce17e033a7727b4e08f33b53c63d8c9f763fc6c31d0523eb38c30d5e40bc,
+ });
+}
+
+inline fn bitNot(comptime Type: type, rhs: Type) @TypeOf(~rhs) {
+ return ~rhs;
+}
+test bitNot {
+ try testUnary(bitNot);
+}
+
+inline fn clz(comptime Type: type, rhs: Type) @TypeOf(@clz(rhs)) {
+ return @clz(rhs);
+}
+test clz {
+ try testUnary(clz);
}
inline fn bitAnd(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs & rhs) {
test/behavior/math.zig
@@ -65,6 +65,8 @@ test "@clz" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
+ if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+ if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
try testClz();
try comptime testClz();
@@ -75,6 +77,7 @@ fn testClz() !void {
try expect(testOneClz(u8, 0b00001010) == 4);
try expect(testOneClz(u8, 0b00011010) == 3);
try expect(testOneClz(u8, 0b00000000) == 8);
+ try expect(testOneClz(i8, -1) == 0);
}
test "@clz big ints" {
@@ -100,7 +103,7 @@ fn testOneClz(comptime T: type, x: T) u32 {
test "@clz vectors" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+ if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -159,6 +162,8 @@ fn testCtz() !void {
try expect(testOneCtz(u8, 0b10100000) == 5);
try expect(testOneCtz(u8, 0b10001010) == 1);
try expect(testOneCtz(u8, 0b00000000) == 8);
+ try expect(testOneCtz(i8, -1) == 0);
+ try expect(testOneCtz(i8, -2) == 1);
try expect(testOneCtz(u16, 0b00000000) == 16);
}
@@ -1712,7 +1717,7 @@ test "mod lazy values" {
test "@clz works on both vector and scalar inputs" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+ if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
test/behavior/vector.zig
@@ -646,7 +646,7 @@ test "vector division operators" {
test "vector bitwise not operator" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+ if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
test/behavior/x86_64.zig
@@ -1,8 +1,7 @@
//! CodeGen tests for the x86_64 backend.
-const builtin = @import("builtin");
-
test {
+ const builtin = @import("builtin");
if (builtin.zig_backend != .stage2_x86_64) return error.SkipZigTest;
if (builtin.object_format == .coff) return error.SkipZigTest;
_ = @import("x86_64/math.zig");
tools/update_cpu_features.zig
@@ -902,8 +902,8 @@ const llvm_targets = [_]LlvmTarget{
.features = &.{ "v8a", "exynos" },
},
},
- // LLVM removed support for v2 and v3 but zig wants to support targeting old hardware
.extra_features = &.{
+ // LLVM removed support for v2 and v3 but zig wants to support targeting old hardware
.{
.zig_name = "v2",
.desc = "ARMv2 architecture",
@@ -1043,10 +1043,22 @@ const llvm_targets = [_]LlvmTarget{
.llvm_name = "64bit-mode",
.omit = true,
},
+ .{
+ .llvm_name = "alderlake",
+ .extra_deps = &.{ "smap", "smep" },
+ },
.{
.llvm_name = "amdfam10",
.extra_deps = &.{"3dnowa"},
},
+ .{
+ .llvm_name = "arrowlake",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "arrowlake-s",
+ .extra_deps = &.{ "smap", "smep" },
+ },
.{
.llvm_name = "athlon",
.extra_deps = &.{"3dnowa"},
@@ -1081,16 +1093,64 @@ const llvm_targets = [_]LlvmTarget{
},
.{
.llvm_name = "barcelona",
- .extra_deps = &.{"3dnowa"},
+ .extra_deps = &.{ "3dnowa", "smap", "smep" },
+ },
+ .{
+ .llvm_name = "broadwell",
+ .extra_deps = &.{ "smap", "smep" },
},
.{
.llvm_name = "c3",
.extra_deps = &.{"3dnow"},
},
+ .{
+ .llvm_name = "cannonlake",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "cascadelake",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "emeraldrapids",
+ .extra_deps = &.{ "smap", "smep" },
+ },
.{
.llvm_name = "geode",
.extra_deps = &.{"3dnowa"},
},
+ .{
+ .llvm_name = "goldmont",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "goldmont_plus",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "haswell",
+ .extra_deps = &.{"smep"},
+ },
+ .{
+ .llvm_name = "i386",
+ .extra_deps = &.{"bsf_bsr_0_clobbers_result"},
+ },
+ .{
+ .llvm_name = "i486",
+ .extra_deps = &.{"bsf_bsr_0_clobbers_result"},
+ },
+ .{
+ .llvm_name = "icelake_client",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "icelake_server",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "ivybridge",
+ .extra_deps = &.{"smep"},
+ },
.{
.llvm_name = "k6-2",
.extra_deps = &.{"3dnow"},
@@ -1127,6 +1187,10 @@ const llvm_targets = [_]LlvmTarget{
.llvm_name = "lakemont",
.extra_deps = &.{"soft_float"},
},
+ .{
+ .llvm_name = "meteorlake",
+ .extra_deps = &.{ "smap", "smep" },
+ },
.{
.llvm_name = "opteron",
.extra_deps = &.{"3dnowa"},
@@ -1135,6 +1199,38 @@ const llvm_targets = [_]LlvmTarget{
.llvm_name = "opteron-sse3",
.extra_deps = &.{"3dnowa"},
},
+ .{
+ .llvm_name = "raptorlake",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "rocketlake",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "sapphirerapids",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "silvermont",
+ .extra_deps = &.{"smep"},
+ },
+ .{
+ .llvm_name = "skx",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "skylake",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "skylake_avx512",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "tigerlake",
+ .extra_deps = &.{ "smap", "smep" },
+ },
.{
.llvm_name = "winchip2",
.extra_deps = &.{"3dnow"},
@@ -1143,9 +1239,29 @@ const llvm_targets = [_]LlvmTarget{
.llvm_name = "sse4.2",
.extra_deps = &.{"crc32"},
},
+ .{
+ .llvm_name = "znver1",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "znver2",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "znver3",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "znver4",
+ .extra_deps = &.{ "smap", "smep" },
+ },
+ .{
+ .llvm_name = "znver5",
+ .extra_deps = &.{ "smap", "smep" },
+ },
},
- // Features removed from LLVM
.extra_features = &.{
+ // Features removed from LLVM
.{
.zig_name = "3dnow",
.desc = "Enable 3DNow! instructions",
@@ -1171,6 +1287,22 @@ const llvm_targets = [_]LlvmTarget{
.desc = "Prefetch with Intent to Write and T1 Hint",
.deps = &.{},
},
+ // Custom Zig features
+ .{
+ .zig_name = "bsf_bsr_0_clobbers_result",
+ .desc = "BSF/BSR may clobber the lower 32-bits of the result register when the source is zero",
+ .deps = &.{},
+ },
+ .{
+ .zig_name = "smap",
+ .desc = "Enable Supervisor Mode Access Prevention",
+ .deps = &.{},
+ },
+ .{
+ .zig_name = "smep",
+ .desc = "Enable Supervisor Mode Execution Prevention",
+ .deps = &.{},
+ },
},
.omit_cpus = &.{
// LLVM defines a bunch of dumb aliases with foreach loops in X86.td.