Commit 612f5784cf
Changed files (9)
src
test
behavior
src/arch/x86_64/CodeGen.zig
@@ -2389,7 +2389,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}
fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
- @setEvalBranchQuota(23_100);
+ @setEvalBranchQuota(23_600);
const pt = cg.pt;
const zcu = pt.zcu;
const ip = &zcu.intern_pool;
@@ -2427,7 +2427,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
// zig fmt: off
.select => try cg.airSelect(inst),
.shuffle => try cg.airShuffle(inst),
- .reduce_optimized => try cg.airReduce(inst),
// zig fmt: on
.arg => if (cg.debug_output != .none) {
@@ -67795,7 +67794,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
- .{ ._, ._, .movbe, .dst0w, .src0w, ._, ._ },
+ .{ ._, ._be, .mov, .dst0w, .src0w, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .exact_int = 16 }, .any, .any },
@@ -67815,7 +67814,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
- .{ ._, ._, .movbe, .dst0d, .src0d, ._, ._ },
+ .{ ._, ._be, .mov, .dst0d, .src0d, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .exact_int = 32 }, .any, .any },
@@ -67824,7 +67823,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.each = .{ .once = &.{
- .{ ._, ._, .bswap, .dst0d, ._, ._, ._ },
+ .{ ._, .b_, .swap, .dst0d, ._, ._, ._ },
} },
}, .{
.required_features = .{ .movbe, null, null, null },
@@ -67835,7 +67834,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .movbe, .dst0d, .src0d, ._, ._ },
+ .{ ._, ._be, .mov, .dst0d, .src0d, ._, ._ },
.{ ._, ._r, .sa, .dst0d, .uia(32, .src0, .sub_bit_size), ._, ._ },
} },
}, .{
@@ -67846,7 +67845,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .bswap, .dst0d, ._, ._, ._ },
+ .{ ._, .b_, .swap, .dst0d, ._, ._, ._ },
.{ ._, ._r, .sa, .dst0d, .uia(32, .src0, .sub_bit_size), ._, ._ },
} },
}, .{
@@ -67858,7 +67857,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .movbe, .dst0d, .src0d, ._, ._ },
+ .{ ._, ._be, .mov, .dst0d, .src0d, ._, ._ },
.{ ._, ._r, .sh, .dst0d, .uia(32, .src0, .sub_bit_size), ._, ._ },
} },
}, .{
@@ -67869,7 +67868,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .bswap, .dst0d, ._, ._, ._ },
+ .{ ._, .b_, .swap, .dst0d, ._, ._, ._ },
.{ ._, ._r, .sh, .dst0d, .uia(32, .src0, .sub_bit_size), ._, ._ },
} },
}, .{
@@ -67880,7 +67879,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.each = .{ .once = &.{
- .{ ._, ._, .movbe, .dst0q, .src0q, ._, ._ },
+ .{ ._, ._be, .mov, .dst0q, .src0q, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .exact_int = 64 }, .any, .any },
@@ -67889,7 +67888,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.each = .{ .once = &.{
- .{ ._, ._, .bswap, .dst0q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .dst0q, ._, ._, ._ },
} },
}, .{
.required_features = .{ .movbe, null, null, null },
@@ -67900,7 +67899,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .movbe, .dst0q, .src0q, ._, ._ },
+ .{ ._, ._be, .mov, .dst0q, .src0q, ._, ._ },
.{ ._, ._r, .sa, .dst0q, .uia(64, .src0, .sub_bit_size), ._, ._ },
} },
}, .{
@@ -67911,7 +67910,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .bswap, .dst0q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .dst0q, ._, ._, ._ },
.{ ._, ._r, .sa, .dst0q, .uia(64, .src0, .sub_bit_size), ._, ._ },
} },
}, .{
@@ -67923,7 +67922,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .rc = .general_purpose }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .movbe, .dst0q, .src0q, ._, ._ },
+ .{ ._, ._be, .mov, .dst0q, .src0q, ._, ._ },
.{ ._, ._r, .sh, .dst0q, .uia(64, .src0, .sub_bit_size), ._, ._ },
} },
}, .{
@@ -67934,7 +67933,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .bswap, .dst0q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .dst0q, ._, ._, ._ },
.{ ._, ._r, .sh, .dst0q, .uia(64, .src0, .sub_bit_size), ._, ._ },
} },
}, .{
@@ -67962,7 +67961,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_size), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
.{ .@"0:", ._, .mov, .tmp2q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .movbe, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ },
+ .{ ._, ._be, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ },
.{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
@@ -67992,7 +67991,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_size), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
.{ .@"0:", ._, .mov, .tmp2q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .bswap, .tmp2q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp2q, ._, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ },
.{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
@@ -68026,7 +68025,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ },
.{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ },
.{ .@"0:", ._, .mov, .tmp2q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .movbe, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ },
+ .{ ._, ._be, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ },
.{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
@@ -68059,7 +68058,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ },
.{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ },
.{ .@"0:", ._, .mov, .tmp2q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .bswap, .tmp2q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp2q, ._, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ },
.{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
@@ -68091,7 +68090,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
.{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ },
.{ .@"0:", ._, .mov, .tmp2q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .movbe, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ },
+ .{ ._, ._be, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ },
.{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
@@ -68122,7 +68121,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
.{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ },
.{ .@"0:", ._, .mov, .tmp2q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .bswap, .tmp2q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp2q, ._, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ },
.{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
@@ -68155,7 +68154,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .movsx, .tmp2q, .mem(.src0b), ._, ._ },
.{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ },
.{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ },
- .{ .@"0:", ._, .movbe, .tmp3q, .lea(.tmp1q), ._, ._ },
+ .{ .@"0:", ._be, .mov, .tmp3q, .lea(.tmp1q), ._, ._ },
.{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ },
.{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ },
@@ -68192,7 +68191,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ },
.{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ },
.{ .@"0:", ._, .mov, .tmp3q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .bswap, .tmp3q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp3q, ._, ._, ._ },
.{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ },
.{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ },
@@ -68227,7 +68226,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
.{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
.{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ },
- .{ .@"0:", ._, .movbe, .tmp3q, .lea(.tmp1q), ._, ._ },
+ .{ .@"0:", ._be, .mov, .tmp3q, .lea(.tmp1q), ._, ._ },
.{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ },
.{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ },
@@ -68263,7 +68262,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
.{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ },
.{ .@"0:", ._, .mov, .tmp3q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .bswap, .tmp3q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp3q, ._, ._, ._ },
.{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ },
.{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ },
@@ -68298,7 +68297,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
.{ ._, ._, .movsx, .tmp2q, .mem(.src0b), ._, ._ },
.{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ },
- .{ .@"0:", ._, .movbe, .tmp3q, .lea(.tmp1q), ._, ._ },
+ .{ .@"0:", ._be, .mov, .tmp3q, .lea(.tmp1q), ._, ._ },
.{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ },
.{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ },
@@ -68334,7 +68333,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .movsx, .tmp2q, .mem(.src0b), ._, ._ },
.{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ },
.{ .@"0:", ._, .mov, .tmp3q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .bswap, .tmp3q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp3q, ._, ._, ._ },
.{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ },
.{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ },
@@ -68368,7 +68367,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_size), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
.{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
- .{ .@"0:", ._, .movbe, .tmp3q, .lea(.tmp1q), ._, ._ },
+ .{ .@"0:", ._be, .mov, .tmp3q, .lea(.tmp1q), ._, ._ },
.{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ },
.{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ },
@@ -68403,7 +68402,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
.{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
.{ .@"0:", ._, .mov, .tmp3q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .bswap, .tmp3q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp3q, ._, ._, ._ },
.{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ },
.{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ },
@@ -69742,7 +69741,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .bswap, .src0d, ._, ._, ._ },
+ .{ ._, .b_, .swap, .src0d, ._, ._, ._ },
.{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ },
.{ ._, ._r, .sh, .src0d, .ui(4), ._, ._ },
.{ ._, ._l, .sh, .tmp0d, .ui(4), ._, ._ },
@@ -69897,7 +69896,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .bswap, .src0d, ._, ._, ._ },
+ .{ ._, .b_, .swap, .src0d, ._, ._, ._ },
.{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ },
.{ ._, ._r, .sh, .src0d, .ui(4), ._, ._ },
.{ ._, ._l, .sh, .tmp0d, .ui(4), ._, ._ },
@@ -70053,7 +70052,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .bswap, .src0d, ._, ._, ._ },
+ .{ ._, .b_, .swap, .src0d, ._, ._, ._ },
.{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ },
.{ ._, ._r, .sh, .src0d, .ui(4), ._, ._ },
.{ ._, ._l, .sh, .tmp0d, .ui(4), ._, ._ },
@@ -70150,7 +70149,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .lea, .dst0p, .mem(.tmp0), ._, ._ },
.{ ._, ._b, .gf2p8affineq, .src0x, .lea(.dst0x), .ui(0), ._ },
.{ ._, ._q, .mov, .dst0q, .src0x, ._, ._ },
- .{ ._, ._, .bswap, .dst0q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .dst0q, ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
@@ -70174,7 +70173,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .bswap, .src0q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .src0q, ._, ._, ._ },
.{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ },
.{ ._, ._, .mov, .tmp1q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ },
.{ ._, ._, .@"and", .tmp0q, .tmp1q, ._, ._ },
@@ -70222,7 +70221,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .lea, .dst0p, .mem(.tmp0), ._, ._ },
.{ ._, .v_b, .gf2p8affineq, .tmp1x, .src0x, .lea(.dst0x), .ui(0) },
.{ ._, .v_q, .mov, .dst0q, .tmp1x, ._, ._ },
- .{ ._, ._, .bswap, .dst0q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .dst0q, ._, ._, ._ },
.{ ._, ._r, .sa, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
} },
}, .{
@@ -70250,7 +70249,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .lea, .dst0p, .mem(.tmp0), ._, ._ },
.{ ._, ._b, .gf2p8affineq, .src0x, .lea(.dst0x), .ui(0), ._ },
.{ ._, ._q, .mov, .dst0q, .src0x, ._, ._ },
- .{ ._, ._, .bswap, .dst0q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .dst0q, ._, ._, ._ },
.{ ._, ._r, .sa, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
} },
}, .{
@@ -70275,7 +70274,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .bswap, .src0q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .src0q, ._, ._, ._ },
.{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ },
.{ ._, ._, .mov, .tmp1q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ },
.{ ._, ._, .@"and", .tmp0q, .tmp1q, ._, ._ },
@@ -70378,7 +70377,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .lea, .dst0p, .mem(.tmp0), ._, ._ },
.{ ._, ._b, .gf2p8affineq, .src0x, .lea(.dst0x), .ui(0), ._ },
.{ ._, ._q, .mov, .dst0q, .src0x, ._, ._ },
- .{ ._, ._, .bswap, .dst0q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .dst0q, ._, ._, ._ },
.{ ._, ._r, .sh, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
} },
}, .{
@@ -70403,7 +70402,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .bswap, .src0q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .src0q, ._, ._, ._ },
.{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ },
.{ ._, ._, .mov, .tmp1q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ },
.{ ._, ._, .@"and", .tmp0q, .tmp1q, ._, ._ },
@@ -70646,10 +70645,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.extra_temps = .{
.{ .type = .vector_32_u8, .kind = .forward_bits_mem },
- .{ .type = .vector_32_u8, .kind = .{ .pshufb_bswap_mem = .{ .repeat = 2, .size = .xword } } },
+ .{ .type = .vector_16_u8, .kind = .{ .pshufb_bswap_mem = .{ .size = .xword } } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .unused,
- .unused,
+ .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+ .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
.unused,
.unused,
.unused,
@@ -70661,9 +70660,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp2p, .mem(.tmp0), ._, ._ },
.{ ._, .v_b, .gf2p8affineq, .dst0y, .src0y, .lea(.tmp2y), .ui(0) },
+ .{ ._, .v_f128, .extract, .tmp3x, .dst0y, .ui(1), ._ },
.{ ._, ._, .lea, .tmp2p, .mem(.tmp1), ._, ._ },
- .{ ._, .vp_b, .shuf, .dst0y, .dst0y, .lea(.tmp2y), ._ },
- .{ ._, .v_pd, .perm, .dst0y, .dst0y, .ui(0b01_00_11_10), ._ },
+ .{ ._, .v_dqa, .mov, .tmp4x, .lea(.tmp2x), ._, ._ },
+ .{ ._, .vp_b, .shuf, .dst0x, .dst0x, .tmp4x, ._ },
+ .{ ._, .vp_b, .shuf, .tmp3x, .tmp3x, .tmp4x, ._ },
+ .{ ._, .v_f128, .insert, .dst0y, .tmp3y, .dst0x, .ui(1) },
} },
}, .{
.required_features = .{ .avx2, .gfni, null, null },
@@ -70701,42 +70703,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .avx, .gfni, null, null },
- .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .yword, .is = 256 } }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .to_mem, .none, .none } },
- },
- .extra_temps = .{
- .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .vector_32_u8, .kind = .forward_bits_mem },
- .{ .type = .vector_32_u8, .kind = .{ .pshufb_bswap_mem = .{ .repeat = 2, .size = .xword } } },
- .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } },
- .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } },
- .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } },
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .mem, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp4y, .lea(.tmp0y), ._, ._ },
- .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp5y, .lea(.tmp0y), ._, ._ },
- .{ ._, ._, .mov, .tmp0d, .sia(-32, .dst0, .add_size), ._, ._ },
- .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
- .{ .@"0:", .v_pd, .perm, .tmp6y, .lea(.tmp1y), .ui(0b01_00_11_10), ._ },
- .{ ._, .v_b, .gf2p8affineq, .tmp6y, .tmp6y, .tmp4y, .ui(0) },
- .{ ._, .vp_b, .shuf, .tmp6y, .tmp6y, .tmp5y, ._ },
- .{ ._, .v_dqu, .mov, .memi(.dst0y, .tmp0), .tmp6y, ._, ._ },
- .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 32), ._, ._ },
- .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ },
- .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
- } },
}, .{
.required_features = .{ .avx, .gfni, null, null },
.src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = 128 } }, .any, .any },
@@ -70819,7 +70785,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._l, .sh, .tmp6q, .ui(1), ._, ._ },
.{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ },
.{ ._, ._, .@"or", .tmp5q, .tmp6q, ._, ._ },
- .{ ._, ._, .movbe, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ },
+ .{ ._, ._be, .mov, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ },
.{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
@@ -70870,7 +70836,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._l, .sh, .tmp6q, .ui(1), ._, ._ },
.{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ },
.{ ._, ._, .@"or", .tmp5q, .tmp6q, ._, ._ },
- .{ ._, ._, .bswap, .tmp5q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp5q, ._, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ },
.{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
@@ -70926,7 +70892,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._l, .sh, .tmp6q, .ui(1), ._, ._ },
.{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ },
.{ ._, ._, .@"or", .tmp5q, .tmp6q, ._, ._ },
- .{ ._, ._, .movbe, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ },
+ .{ ._, ._be, .mov, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ },
.{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
@@ -70981,7 +70947,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._l, .sh, .tmp6q, .ui(1), ._, ._ },
.{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ },
.{ ._, ._, .@"or", .tmp5q, .tmp6q, ._, ._ },
- .{ ._, ._, .bswap, .tmp5q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp5q, ._, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ },
.{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
@@ -71034,7 +71000,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._l, .sh, .tmp6q, .ui(1), ._, ._ },
.{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ },
.{ ._, ._, .@"or", .tmp5q, .tmp6q, ._, ._ },
- .{ ._, ._, .movbe, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ },
+ .{ ._, ._be, .mov, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ },
.{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
@@ -71068,7 +71034,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp3q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp4q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ },
.{ .@"0:", ._, .mov, .tmp5q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .bswap, .tmp5q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp5q, ._, ._, ._ },
.{ ._, ._, .mov, .tmp6q, .tmp5q, ._, ._ },
.{ ._, ._, .@"and", .tmp6q, .tmp2q, ._, ._ },
.{ ._, ._r, .sh, .tmp5q, .ui(4), ._, ._ },
@@ -71123,7 +71089,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp3q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ },
- .{ .@"0:", ._, .movbe, .tmp6q, .lea(.tmp1q), ._, ._ },
+ .{ .@"0:", ._be, .mov, .tmp6q, .lea(.tmp1q), ._, ._ },
.{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ },
.{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ },
.{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ },
@@ -71182,7 +71148,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ },
.{ .@"0:", ._, .mov, .tmp6q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .bswap, .tmp6q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp6q, ._, ._, ._ },
.{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ },
.{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ },
.{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ },
@@ -71238,7 +71204,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp3q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ },
- .{ .@"0:", ._, .movbe, .tmp6q, .lea(.tmp1q), ._, ._ },
+ .{ .@"0:", ._be, .mov, .tmp6q, .lea(.tmp1q), ._, ._ },
.{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ },
.{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ },
.{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ },
@@ -71295,7 +71261,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ },
.{ .@"0:", ._, .mov, .tmp6q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .bswap, .tmp6q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp6q, ._, ._, ._ },
.{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ },
.{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ },
.{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ },
@@ -71352,7 +71318,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp3q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ },
- .{ .@"0:", ._, .movbe, .tmp6q, .lea(.tmp1q), ._, ._ },
+ .{ .@"0:", ._be, .mov, .tmp6q, .lea(.tmp1q), ._, ._ },
.{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ },
.{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ },
.{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ },
@@ -71410,7 +71376,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ },
.{ .@"0:", ._, .mov, .tmp6q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .bswap, .tmp6q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp6q, ._, ._, ._ },
.{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ },
.{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ },
.{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ },
@@ -71465,7 +71431,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp3q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ },
- .{ .@"0:", ._, .movbe, .tmp6q, .lea(.tmp1q), ._, ._ },
+ .{ .@"0:", ._be, .mov, .tmp6q, .lea(.tmp1q), ._, ._ },
.{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ },
.{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ },
.{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ },
@@ -71521,7 +71487,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ },
.{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ },
.{ .@"0:", ._, .mov, .tmp6q, .lea(.tmp1q), ._, ._ },
- .{ ._, ._, .bswap, .tmp6q, ._, ._, ._ },
+ .{ ._, .b_, .swap, .tmp6q, ._, ._, ._ },
.{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ },
.{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ },
.{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ },
@@ -98883,7 +98849,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
+ .{ ._, .v_ps, .mova, .tmp2x, .lea(.tmp0x), ._, ._ },
.{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
.{ .@"0:", .v_ps, .cvtph2, .tmp3x, .memsia(.src0q, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
.{ ._, .v_, .cvttps2dq, .tmp3x, .tmp3x, ._, ._ },
@@ -118968,26 +118934,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
} },
} ++ [_]Select.Case{ .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .byte }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .byte }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .to_mut_gpr, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .byte }, .any },
.src_constraints = .{ .{ .scalar_int = .{ .of = .word, .is = .byte } }, .any, .any },
@@ -119627,7 +119573,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .vp_, mir_tag, .dst0y, .dst0y, .memd(.src0y, 32), ._ },
@@ -119669,7 +119615,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
@@ -119717,6 +119663,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -119755,6 +119702,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -119793,6 +119741,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -119838,6 +119787,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -119897,6 +119847,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -119948,6 +119899,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -119999,6 +119951,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ },
@@ -120025,7 +119978,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}, .{
.required_features = .{ .slow_incdec, null, null, null },
.dst_constraints = .{ .{ .int = .byte }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -120053,7 +120006,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.dst_constraints = .{ .{ .int = .byte }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -120079,26 +120032,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .word }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .word }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .to_mut_gpr, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -120573,7 +120506,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .vp_, mir_tag, .dst0y, .dst0y, .memd(.src0y, 32), ._ },
@@ -120613,7 +120546,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
@@ -120659,6 +120592,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -120695,6 +120629,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -120731,6 +120666,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -120773,6 +120709,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -120830,6 +120767,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -120879,6 +120817,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -120928,6 +120867,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ },
@@ -120950,7 +120890,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.dst_constraints = .{ .{ .int = .word }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -120976,26 +120916,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .dword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .dword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .to_mut_gpr, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -121316,7 +121236,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .vp_, mir_tag, .dst0y, .dst0y, .memd(.src0y, 32), ._ },
@@ -121354,7 +121274,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
@@ -121398,6 +121318,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -121432,6 +121353,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -121466,6 +121388,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -121505,6 +121428,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -121560,6 +121484,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -121607,6 +121532,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -121654,6 +121580,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ },
@@ -121673,7 +121600,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.dst_constraints = .{ .{ .int = .dword }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -121699,27 +121626,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .qword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .required_features = .{ .@"64bit", null, null, null },
- .dst_constraints = .{ .{ .int = .qword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .to_mut_gpr, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .qword }, .any },
@@ -121898,7 +121804,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .vp_, mir_tag, .dst0y, .dst0y, .memd(.src0y, 32), ._ },
@@ -121934,7 +121840,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
@@ -121976,6 +121882,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -122008,6 +121915,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -122040,6 +121948,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -122077,6 +121986,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -122130,6 +122040,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -122175,6 +122086,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -122220,6 +122132,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ },
@@ -122238,7 +122151,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}, .{
.required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{ .{ .int = .qword }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -122264,25 +122177,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .xword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .xword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .to_mut_mem, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
}, .{
.required_features = .{ .avx2, null, null, null },
.dst_constraints = .{ .{ .int = .xword }, .any },
@@ -122353,7 +122247,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .vp_, mir_tag, .dst0y, .dst0y, .memd(.src0y, 32), ._ },
@@ -122408,6 +122302,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -122438,6 +122333,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -122450,7 +122346,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .xword }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -122468,6 +122364,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -122503,6 +122400,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -122554,6 +122452,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -122597,6 +122496,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -122640,6 +122540,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ },
@@ -122653,25 +122554,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .avx, null, null, null },
- .dst_constraints = .{ .{ .int = .yword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .yword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .to_mut_mem, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
}, .{
.required_features = .{ .avx512f, null, null, null },
.dst_constraints = .{ .{ .int = .yword }, .any },
@@ -122679,7 +122561,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .vp_, mir_tag, .dst0y, .dst0y, .memd(.src0y, 32), ._ },
@@ -122687,7 +122569,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}, .{
.required_features = .{ .avx2, null, null, null },
.dst_constraints = .{ .{ .int = .yword }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .yword, .is = .yword } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .yword } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -122705,6 +122587,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -122715,7 +122598,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .yword }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .yword, .is = .yword } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .yword } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -122733,6 +122616,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -122768,6 +122652,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -122817,6 +122702,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -122858,6 +122744,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -122871,14 +122758,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .dst_constraints = .{ .any_int, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .to_mut_mem, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .qword } }, .any },
@@ -122920,26 +122799,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.Min, .Max => unreachable,
.Add => comptime &.{ .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .byte }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .byte }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .to_mut_gpr, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .byte }, .any },
.src_constraints = .{ .{ .scalar_int = .{ .of = .word, .is = .byte } }, .any, .any },
@@ -123513,7 +123372,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .vp_b, .add, .dst0y, .dst0y, .memd(.src0y, 32), ._ },
@@ -123548,7 +123407,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
@@ -123588,6 +123447,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -123626,6 +123486,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -123662,6 +123523,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -123700,6 +123562,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -123744,6 +123607,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -123784,6 +123648,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -123824,6 +123689,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ },
@@ -123846,7 +123712,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}, .{
.required_features = .{ .slow_incdec, null, null, null },
.dst_constraints = .{ .{ .int = .byte }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -123874,7 +123740,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.dst_constraints = .{ .{ .int = .byte }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -123901,25 +123767,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .word }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .dword, .is = .word } }, .any, .any },
.patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .to_mut_gpr, .none, .none } },
+ .{ .src = .{ .to_sse, .none, .none } },
},
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vph_w, .add, .dst0x, .src0x, .src0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -123945,6 +123802,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp0x, .src0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .src0x, .tmp0x, ._ },
} },
+ }, .{
+ .required_features = .{ .ssse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .dword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .ph_w, .add, .dst0x, .src0x, ._, ._ },
+ } },
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -123971,6 +123839,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ },
.{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .qword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vph_w, .add, .dst0x, .src0x, .src0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -123998,6 +123878,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ },
} },
+ }, .{
+ .required_features = .{ .ssse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .qword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .ph_w, .add, .dst0x, .src0x, ._, ._ },
+ .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ },
+ } },
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124026,6 +123918,32 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ },
.{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vph_w, .add, .tmp0x, .src0x, .src0x, ._ },
+ .{ ._, .vp_d, .shuf, .dst0x, .src0x, .ui(0b01_01_01_01), ._ },
+ .{ ._, .vp_w, .add, .dst0x, .tmp0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124053,6 +123971,32 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .dst0x, .src0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .tmp0x, .dst0x, ._ },
} },
+ }, .{
+ .required_features = .{ .ssse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ },
+ .{ ._, .ph_w, .add, .dst0x, .src0x, ._, ._ },
+ .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ },
+ } },
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124080,6 +124024,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_d, .srl, .dst0x, .ui(16), ._, ._ },
.{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vph_w, .add, .dst0x, .src0x, .src0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124109,6 +124066,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ },
} },
+ }, .{
+ .required_features = .{ .ssse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .ph_w, .add, .dst0x, .src0x, ._, ._ },
+ .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ },
+ } },
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124139,6 +124109,34 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ },
.{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124170,6 +124168,34 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ },
} },
+ }, .{
+ .required_features = .{ .ssse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+ .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ },
+ } },
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124202,6 +124228,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_d, .srl, .tmp2x, .ui(16), ._, ._ },
.{ ._, .p_w, .add, .dst0x, .tmp2x, ._, ._ },
} },
+ }, .{
+ .required_features = .{ .avx2, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_i128, .extract, .dst0x, .src0y, .ui(1), ._ },
+ .{ ._, .vp_w, .add, .dst0x, .src0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx2, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124233,6 +124274,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_f128, .extract, .dst0x, .src0y, .ui(1), ._ },
+ .{ ._, .vp_w, .add, .dst0x, .src0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124264,6 +124320,36 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx2, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+ .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx2, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124297,6 +124383,36 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124350,7 +124466,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .vp_w, .add, .dst0y, .dst0y, .memd(.src0y, 32), ._ },
@@ -124383,7 +124499,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
@@ -124400,6 +124516,40 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx2, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
+ .{ .@"0:", .vp_w, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_i128, .extract, .tmp1x, .dst0y, .ui(1), ._ },
+ .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx2, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124421,6 +124571,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -124436,6 +124587,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp1x, .dst0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", .vp_w, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124457,6 +124640,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -124470,6 +124654,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp1x, .dst0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ },
} },
+ }, .{
+ .required_features = .{ .ssse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", .p_w, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ },
+ } },
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124491,6 +124707,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -124526,6 +124743,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -124547,6 +124765,42 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx2, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_size), ._, ._ },
+ .{ ._, .vp_, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ },
+ .{ .@"0:", .vp_w, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx2, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124568,6 +124822,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -124585,6 +124840,42 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ .@"0:", .vp_w, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124606,6 +124897,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -124623,6 +124915,40 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ },
.{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ },
} },
+ }, .{
+ .required_features = .{ .ssse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .word }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ },
+ .{ ._, .p_, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ },
+ .{ .@"0:", .p_w, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ },
+ } },
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -124644,6 +124970,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ },
@@ -124662,7 +124989,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.dst_constraints = .{ .{ .int = .word }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -124689,25 +125016,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_features = .{ .sse, null, null, null },
+ .required_features = .{ .avx, .fast_hops, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .dword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .dword } }, .any, .any },
.patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .to_mut_gpr, .none, .none } },
+ .{ .src = .{ .to_sse, .none, .none } },
},
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vph_d, .add, .dst0x, .src0x, .src0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -124733,6 +125051,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ },
.{ ._, .vp_d, .add, .dst0x, .src0x, .tmp0x, ._ },
} },
+ }, .{
+ .required_features = .{ .ssse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .ph_d, .add, .dst0x, .src0x, ._, ._ },
+ } },
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -124758,6 +125087,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ },
.{ ._, .p_d, .add, .dst0x, .tmp0x, ._, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vph_d, .add, .dst0x, .src0x, .src0x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -124785,6 +125126,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ },
.{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ },
} },
+ }, .{
+ .required_features = .{ .ssse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .ph_d, .add, .dst0x, .src0x, ._, ._ },
+ .{ ._, .ph_d, .add, .dst0x, .dst0x, ._, ._ },
+ } },
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -124812,6 +125165,32 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ },
.{ ._, .p_d, .add, .dst0x, .tmp0x, ._, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vph_d, .add, .tmp0x, .src0x, .src0x, ._ },
+ .{ ._, .vp_d, .shuf, .dst0x, .src0x, .ui(0b11_10_11_10), ._ },
+ .{ ._, .vp_d, .add, .dst0x, .tmp0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -124839,6 +125218,32 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .shuf, .dst0x, .src0x, .ui(0b01_01_01_01), ._ },
.{ ._, .vp_d, .add, .dst0x, .tmp0x, .dst0x, ._ },
} },
+ }, .{
+ .required_features = .{ .ssse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ },
+ .{ ._, .ph_d, .add, .dst0x, .src0x, ._, ._ },
+ .{ ._, .p_d, .add, .dst0x, .tmp0x, ._, ._ },
+ } },
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -124866,6 +125271,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .p_d, .shuf, .dst0x, .src0x, .ui(0b01_01_01_01), ._ },
.{ ._, .p_d, .add, .dst0x, .tmp0x, ._, ._ },
} },
+ }, .{
+ .required_features = .{ .avx2, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_i128, .extract, .dst0x, .src0y, .ui(1), ._ },
+ .{ ._, .vp_d, .add, .dst0x, .src0x, .dst0x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx2, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -124895,6 +125314,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ },
.{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_f128, .extract, .dst0x, .src0y, .ui(1), ._ },
+ .{ ._, .vp_d, .add, .dst0x, .src0x, .dst0x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -124924,6 +125357,35 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ },
.{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx2, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_u32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+ .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx2, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -124955,6 +125417,35 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ },
.{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -125006,7 +125497,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .vp_d, .add, .dst0y, .dst0y, .memd(.src0y, 32), ._ },
@@ -125037,7 +125529,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
@@ -125052,6 +125545,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ },
.{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx2, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
+ .{ .@"0:", .vp_d, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_i128, .extract, .tmp1x, .dst0y, .ui(1), ._ },
+ .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx2, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -125073,6 +125599,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -125086,6 +125613,37 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ },
.{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp1x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", .vp_d, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -125107,6 +125665,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -125118,6 +125677,37 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ },
.{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp1x, ._ },
} },
+ }, .{
+ .required_features = .{ .ssse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", .p_d, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .ph_d, .add, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .ph_d, .add, .dst0x, .dst0x, ._, ._ },
+ } },
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -125139,6 +125729,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -125171,6 +125762,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -125190,6 +125782,41 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ },
.{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx2, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_size), ._, ._ },
+ .{ ._, .vp_, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ },
+ .{ .@"0:", .vp_d, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx2, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -125211,6 +125838,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -125226,6 +125854,41 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ },
.{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ },
} },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ .@"0:", .vp_d, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -125262,6 +125925,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ },
.{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ },
} },
+ }, .{
+ .required_features = .{ .ssse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .int = .dword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ },
+ .{ ._, .p_, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ },
+ .{ .@"0:", .p_d, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .ph_d, .add, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .ph_d, .add, .dst0x, .dst0x, ._, ._ },
+ } },
}, .{
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -125283,6 +125979,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ },
@@ -125298,7 +125995,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.dst_constraints = .{ .{ .int = .dword }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -125324,27 +126021,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .qword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .required_features = .{ .@"64bit", null, null, null },
- .dst_constraints = .{ .{ .int = .qword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .to_mut_gpr, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .qword }, .any },
@@ -125523,7 +126199,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .vp_q, .add, .dst0y, .dst0y, .memd(.src0y, 32), ._ },
@@ -125552,7 +126228,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
@@ -125586,6 +126262,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -125618,6 +126295,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -125648,6 +126326,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -125678,6 +126357,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -125716,6 +126396,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -125750,6 +126431,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -125784,6 +126466,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ },
@@ -125798,7 +126481,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}, .{
.required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{ .{ .int = .qword }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -125824,52 +126507,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .xword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .xword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .to_mut_mem, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .required_features = .{ .avx, null, null, null },
- .dst_constraints = .{ .{ .int = .yword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .yword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .to_mut_mem, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .any_int, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .to_mut_mem, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .qword } }, .any },
@@ -125910,26 +126547,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
} },
.Mul => comptime &.{ .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .byte }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .byte }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .to_mut_gpr, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .byte }, .any },
.src_constraints = .{ .{ .scalar_int = .{ .of = .word, .is = .byte } }, .any, .any },
@@ -126528,7 +127145,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .v_dqa, .mov, .tmp0y, .memd(.src0y, 32), ._, ._ },
@@ -126566,7 +127183,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -126610,6 +127227,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -126649,6 +127267,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-24, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -126686,6 +127305,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-24, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -126725,6 +127345,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-24, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -126765,6 +127386,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -126815,6 +127437,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .tmp2y, .lea(.tmp0y), ._, ._ },
@@ -126857,6 +127480,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_ps, .mova, .tmp2y, .lea(.tmp0y), ._, ._ },
@@ -126903,6 +127527,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
@@ -126946,6 +127571,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
@@ -126972,7 +127598,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}, .{
.required_features = .{ .slow_incdec, null, null, null },
.dst_constraints = .{ .{ .int = .byte }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -127000,7 +127626,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.dst_constraints = .{ .{ .int = .byte }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -127026,26 +127652,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .word }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .word }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .to_mut_gpr, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .word }, .any },
@@ -127489,7 +128095,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .vp_w, .mull, .dst0y, .dst0y, .memd(.src0y, 32), ._ },
@@ -127522,7 +128128,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -127562,6 +128168,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -127598,6 +128205,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -127632,6 +128240,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -127667,6 +128276,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -127711,6 +128321,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .tmp2y, .lea(.tmp0y), ._, ._ },
@@ -127750,6 +128361,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_ps, .mova, .tmp2y, .lea(.tmp0y), ._, ._ },
@@ -127791,6 +128403,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
@@ -127811,7 +128424,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.dst_constraints = .{ .{ .int = .word }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -127837,26 +128450,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .dword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .dword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .to_mut_gpr, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
}, .{
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .int = .dword }, .any },
@@ -128163,7 +128756,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ },
.{ ._, .vp_d, .mull, .dst0y, .dst0y, .memd(.src0y, 32), ._ },
@@ -128194,7 +128787,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -128232,6 +128825,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
@@ -128266,6 +128860,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -128298,6 +128893,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -128330,6 +128926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
.{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
@@ -128365,6 +128962,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ },
@@ -128407,6 +129005,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_dqa, .mov, .tmp2y, .lea(.tmp0y), ._, ._ },
@@ -128444,6 +129043,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .v_ps, .mova, .tmp2y, .lea(.tmp0y), ._, ._ },
@@ -128483,6 +129083,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
@@ -128519,6 +129120,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
@@ -128539,7 +129141,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.dst_constraints = .{ .{ .int = .dword }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -128565,31 +129167,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .qword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .required_features = .{ .@"64bit", null, null, null },
- .dst_constraints = .{ .{ .int = .qword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .to_mut_gpr, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{ .{ .int = .qword }, .any },
- .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any },
+ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -128615,52 +129196,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .dst_constraints = .{ .{ .int = .xword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .xword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .to_mut_mem, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .required_features = .{ .avx, null, null, null },
- .dst_constraints = .{ .{ .int = .yword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .mut_mem, .none, .none } },
- .{ .src = .{ .mut_gpr, .none, .none } },
- .{ .src = .{ .to_mut_sse, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .{ .int = .yword }, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .to_mut_mem, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
- }, .{
- .dst_constraints = .{ .any_int, .any },
- .src_constraints = .{ .{ .vec_len = 1 }, .any, .any },
- .patterns = &.{
- .{ .src = .{ .to_mut_mem, .none, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .each = .{ .once = &.{} },
}, .{
.required_features = .{ .@"64bit", .bmi2, .adx, null },
.dst_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .qword } }, .any },
@@ -128937,6 +129472,2289 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}
try res[0].finish(inst, &.{reduce.operand}, &ops, cg);
},
+ .reduce_optimized => |air_tag| if (use_old) try cg.airReduce(inst) else fallback: {
+ const reduce = air_datas[@intFromEnum(inst)].reduce;
+ switch (reduce.operation) {
+ .And, .Or, .Xor => unreachable,
+ .Min, .Max => break :fallback try cg.airReduce(inst),
+ .Add => {},
+ .Mul => break :fallback try cg.airReduce(inst),
+ }
+ var ops = try cg.tempsFromOperands(inst, .{reduce.operand});
+ var res: [1]Temp = undefined;
+ cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (reduce.operation) {
+ .And, .Or, .Xor => unreachable,
+ .Min, .Max => unreachable,
+ .Add => comptime &.{ .{
+ .required_features = .{ .f16c, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .dword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none, .none } },
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ },
+ .{ ._, .vh_ps, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, null, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .dword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none, .none } },
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ },
+ .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .qword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none, .none } },
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ },
+ .{ ._, .vh_ps, .add, .tmp0x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_ps, .shuf, .dst0x, .dst0x, .dst0x, .ui(0b11_10_11_10) },
+ .{ ._, .v_ss, .add, .dst0x, .tmp0x, .dst0x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, null, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .qword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none, .none } },
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ },
+ .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b11_10_11_10) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .qword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none, .none } },
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ },
+ .{ ._, .vh_ps, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vh_ps, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, null, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .qword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none, .none } },
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ },
+ .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b11_10_11_10) },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, null, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none, .none } },
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .v_f128, .extract, .tmp0x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, null, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .none, .none } },
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ },
+ .{ ._, .v_f128, .extract, .tmp0x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b11_10_11_10) },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, null, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp0y, .tmp0x, ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp0y, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .v_f128, .extract, .tmp0x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, null, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp0y, .tmp0x, ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp0y, ._ },
+ .{ ._, .v_f128, .extract, .tmp0x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b11_10_11_10) },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx512f, .f16c, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .zword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, .v_ps, .mova, .tmp2y, .lead(.tmp0y, 32), ._, ._ },
+ .{ ._, .v_ps, .@"and", .tmp2y, .tmp2y, .memd(.src0y, 32), ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .mem(.src0y), ._ },
+ .{ ._, .v_f128, .extract, .tmp3x, .tmp2y, .ui(1), ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp3y, .tmp3x, ._, ._ },
+ .{ ._, .v_ps, .add, .tmp2y, .tmp2y, .tmp3y, ._ },
+ .{ ._, .v_f128, .extract, .tmp3x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp3y, .tmp3x, ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp3y, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx512f, .f16c, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .zword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .mova, .tmp1y, .memd(.src0y, 32), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .mem(.src0y), ._, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .tmp1y, .ui(1), ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp1y, .tmp1x, ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ },
+ .{ ._, .v_ps, .add, .tmp1y, .tmp1y, .tmp2y, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp1y, ._ },
+ .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b11_10_11_10) },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", .v_ps, .cvtph2, .tmp1y, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp1y, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, null, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .xword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", .v_ps, .cvtph2, .tmp1y, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp1y, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b11_10_11_10) },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx512f, .f16c, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .zword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, .v_ps, .mova, .tmp2y, .lead(.tmp0y, 32), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-80, .src0, .add_size), ._, ._ },
+ .{ ._, .v_ps, .@"and", .tmp2y, .tmp2y, .memad(.src0y, .add_size, -32), ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -64), ._ },
+ .{ ._, .v_f128, .extract, .tmp3x, .tmp2y, .ui(1), ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp3y, .tmp3x, ._, ._ },
+ .{ ._, .v_ps, .add, .tmp2y, .tmp2y, .tmp3y, ._ },
+ .{ ._, .v_f128, .extract, .tmp3x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp3y, .tmp3x, ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp3y, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ .@"0:", .v_ps, .cvtph2, .tmp2y, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ .@"0:", .v_ps, .cvtph2, .tmp2y, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .f16c, null, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ },
+ .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ .@"0:", .v_ps, .cvtph2, .tmp2y, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+ .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__addhf3" } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .reg = .xmm0 }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, .vp_, .xor, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-4, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .vp_w, .insr, .dst0x, .dst0x, .memad(.src0w, .add_unaligned_size, -2), .ui(0) },
+ .{ .@"0:", .vp_, .xor, .tmp1x, .tmp1x, .tmp1x, ._ },
+ .{ ._, .vp_w, .insr, .tmp1x, .tmp1x, .memi(.src0w, .tmp0), .ui(0) },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+ .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__addhf3" } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .reg = .xmm0 }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, .p_, .xor, .dst0x, .dst0x, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-4, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .p_w, .insr, .dst0x, .memad(.src0w, .add_unaligned_size, -2), .ui(0), ._ },
+ .{ .@"0:", .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
+ .{ ._, .p_w, .insr, .tmp1x, .memi(.src0w, .tmp0), .ui(0), ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse, null, null, null },
+ .dst_constraints = .{ .{ .float = .word }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .f16, .kind = .{ .reg = .ax } },
+ .{ .type = .f32, .kind = .mem },
+ .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+ .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__addhf3" } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .reg = .xmm0 }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._ps, .xor, .dst0x, .dst0x, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-4, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .movzx, .tmp1d, .memad(.src0w, .add_unaligned_size, -2), ._, ._ },
+ .{ ._, ._, .mov, .mem(.tmp2d), .tmp1d, ._, ._ },
+ .{ ._, ._ss, .mov, .dst0x, .mem(.tmp2d), ._, ._ },
+ .{ .@"0:", ._ps, .xor, .tmp3x, .tmp3x, ._, ._ },
+ .{ ._, ._ss, .mov, .tmp3x, .memi(.src0d, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .qword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vh_ps, .add, .dst0x, .src0x, .src0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .qword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .shuf, .tmp0x, .src0x, .src0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .src0x, .tmp0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .qword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .h_ps, .add, .dst0x, .src0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .qword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._ps, .mova, .tmp0x, .src0x, ._, ._ },
+ .{ ._, ._ps, .shuf, .tmp0x, .tmp0x, .ui(0b01_01_01_01), ._ },
+ .{ ._, ._ss, .add, .dst0x, .tmp0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vh_ps, .add, .tmp0x, .src0x, .src0x, ._ },
+ .{ ._, .v_ps, .movhl, .dst0x, .src0x, .src0x, ._ },
+ .{ ._, .v_ss, .add, .dst0x, .tmp0x, .dst0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .movhl, .tmp0x, .src0x, .src0x, ._ },
+ .{ ._, .v_ps, .add, .tmp0x, .src0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .shuf, .dst0x, .src0x, .src0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .tmp0x, .dst0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._ps, .xor, .tmp0x, .tmp0x, ._, ._ },
+ .{ ._, ._ps, .movhl, .tmp0x, .src0x, ._, ._ },
+ .{ ._, .h_ps, .add, .dst0x, .src0x, ._, ._ },
+ .{ ._, ._ss, .add, .dst0x, .tmp0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._ps, .xor, .tmp0x, .tmp0x, ._, ._ },
+ .{ ._, ._ps, .movhl, .tmp0x, .src0x, ._, ._ },
+ .{ ._, ._ss, .add, .tmp0x, .src0x, ._, ._ },
+ .{ ._, ._ps, .shuf, .dst0x, .src0x, .ui(0b01_01_01_01), ._ },
+ .{ ._, ._ss, .add, .dst0x, .tmp0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vh_ps, .add, .dst0x, .src0x, .src0x, ._ },
+ .{ ._, .vh_ps, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .movhl, .tmp0x, .src0x, .src0x, ._ },
+ .{ ._, .v_ps, .add, .dst0x, .src0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp0x, .src0x, .src0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .src0x, .tmp0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .h_ps, .add, .dst0x, .src0x, ._, ._ },
+ .{ ._, .h_ps, .add, .dst0x, .dst0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._ps, .xor, .tmp0x, .tmp0x, ._, ._ },
+ .{ ._, ._ps, .movhl, .tmp0x, .src0x, ._, ._ },
+ .{ ._, ._ps, .add, .dst0x, .tmp0x, ._, ._ },
+ .{ ._, ._ps, .mova, .tmp0x, .dst0x, ._, ._ },
+ .{ ._, ._ps, .shuf, .tmp0x, .tmp0x, .ui(0b01_01_01_01), ._ },
+ .{ ._, ._ss, .add, .dst0x, .tmp0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .v_f128, .extract, .tmp1x, .src0y, .ui(1), ._ },
+ .{ ._, .v_ss, .add, .dst0x, .src0x, .tmp1x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_f128, .extract, .tmp1x, .src0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .src0x, .tmp1x, ._ },
+ .{ ._, .v_ps, .movhl, .tmp1x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx512f, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .zword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .mem(.src0y), ._, ._ },
+ .{ ._, .v_ps, .mova, .tmp2y, .memd(.src0y, 32), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .lea(.tmp0y), ._ },
+ .{ ._, .v_ps, .@"and", .tmp2y, .tmp2y, .lead(.tmp0y, 32), ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx512f, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .zword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .mova, .dst0y, .mem(.src0y), ._, ._ },
+ .{ ._, .v_ps, .mova, .tmp0y, .memd(.src0y, 32), ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp0y, ._ },
+ .{ ._, .v_i128, .extract, .tmp0x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .movhl, .tmp0x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
+ .{ .@"0:", .v_ps, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
+ .{ .@"0:", .v_ps, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .v_ps, .movhl, .tmp1x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._ps, .mova, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._ps, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .h_ps, .add, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .h_ps, .add, .dst0x, .dst0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._ps, .mova, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._ps, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .xor, .tmp1x, .tmp1x, ._, ._ },
+ .{ ._, ._ps, .movhl, .tmp1x, .dst0x, ._, ._ },
+ .{ ._, ._ps, .add, .dst0x, .tmp1x, ._, ._ },
+ .{ ._, ._ps, .mova, .tmp1x, .dst0x, ._, ._ },
+ .{ ._, ._ps, .shuf, .tmp1x, .tmp1x, .ui(0b01_01_01_01), ._ },
+ .{ ._, ._ss, .add, .dst0x, .tmp1x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx512f, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .zword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, .v_ps, .mova, .tmp2y, .lead(.tmp0y, 32), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-128, .src0, .add_size), ._, ._ },
+ .{ ._, .v_ps, .@"and", .tmp2y, .tmp2y, .memad(.src0y, .add_size, -32), ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -64), ._ },
+ .{ .@"0:", .v_ps, .add, .tmp2y, .tmp2y, .memid(.src0y, .tmp0, 32), ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(64), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ .@"0:", .v_ps, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .vh_ps, .add, .dst0x, .dst0x, .dst0x, ._ },
+ .{ ._, .vh_ps, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ },
+ .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ .@"0:", .v_ps, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) },
+ .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, ._ps, .mova, .dst0x, .lea(.tmp0x), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ },
+ .{ ._, ._ps, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ },
+ .{ .@"0:", ._ps, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .h_ps, .add, .dst0x, .dst0x, ._, ._ },
+ .{ ._, .h_ps, .add, .dst0x, .dst0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse, null, null, null },
+ .dst_constraints = .{ .{ .float = .dword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, ._ps, .mova, .dst0x, .lea(.tmp0x), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ },
+ .{ ._, ._ps, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ },
+ .{ .@"0:", ._ps, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .xor, .tmp2x, .tmp2x, ._, ._ },
+ .{ ._, ._ps, .movhl, .tmp2x, .dst0x, ._, ._ },
+ .{ ._, ._ps, .add, .dst0x, .tmp2x, ._, ._ },
+ .{ ._, ._ps, .mova, .tmp2x, .dst0x, ._, ._ },
+ .{ ._, ._ps, .shuf, .tmp2x, .tmp2x, .ui(0b01_01_01_01), ._ },
+ .{ ._, ._ss, .add, .dst0x, .tmp2x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vh_pd, .add, .dst0x, .src0x, .src0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_ps, .movhl, .tmp0x, .src0x, .src0x, ._ },
+ .{ ._, .v_sd, .add, .dst0x, .src0x, .tmp0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .h_pd, .add, .dst0x, .src0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._ps, .xor, .tmp0x, .tmp0x, ._, ._ },
+ .{ ._, ._ps, .movhl, .tmp0x, .src0x, ._, ._ },
+ .{ ._, ._sd, .add, .dst0x, .tmp0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vh_pd, .add, .tmp0x, .src0x, .src0x, ._ },
+ .{ ._, .v_f128, .extract, .dst0x, .src0y, .ui(1), ._ },
+ .{ ._, .v_sd, .add, .dst0x, .tmp0x, .dst0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ },
+ .{ ._, .v_pd, .add, .tmp0x, .src0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .movhl, .dst0x, .src0x, .src0x, ._ },
+ .{ ._, .v_sd, .add, .dst0x, .tmp0x, .dst0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .vh_pd, .add, .dst0y, .src0y, .src0y, ._ },
+ .{ ._, .v_f128, .extract, .tmp0x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_sd, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_sse, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ },
+ .{ ._, .v_pd, .add, .dst0x, .src0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .movhl, .tmp0x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_sd, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx512f, null, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .zword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_pd, .mova, .dst0y, .mem(.src0y), ._, ._ },
+ .{ ._, .v_pd, .mova, .tmp2y, .memd(.src0y, 32), ._, ._ },
+ .{ ._, .v_pd, .@"and", .dst0y, .dst0y, .lea(.tmp0y), ._ },
+ .{ ._, .v_pd, .@"and", .tmp2y, .tmp2y, .lead(.tmp0y, 32), ._ },
+ .{ ._, .v_pd, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_sd, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx512f, null, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .zword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .each = .{ .once = &.{
+ .{ ._, .v_pd, .mova, .dst0y, .mem(.src0y), ._, ._ },
+ .{ ._, .v_pd, .mova, .tmp0y, .memd(.src0y, 32), ._, ._ },
+ .{ ._, .v_pd, .add, .dst0y, .dst0y, .tmp0y, ._ },
+ .{ ._, .v_i128, .extract, .tmp0x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ .{ ._, .v_ps, .movhl, .tmp0x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_sd, .add, .dst0x, .dst0x, .tmp0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .v_pd, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
+ .{ .@"0:", .v_pd, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .vh_pd, .add, .dst0y, .dst0y, .dst0y, ._ },
+ .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_sd, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .v_pd, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ },
+ .{ .@"0:", .v_pd, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ .{ ._, .v_ps, .movhl, .tmp1x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp1x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._pd, .mova, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._pd, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .h_pd, .add, .dst0x, .dst0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._pd, .mova, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._pd, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .xor, .tmp1x, .tmp1x, ._, ._ },
+ .{ ._, ._ps, .movhl, .tmp1x, .dst0x, ._, ._ },
+ .{ ._, ._sd, .add, .dst0x, .tmp1x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx512f, null, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .zword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_pd, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, .v_pd, .mova, .tmp2y, .lead(.tmp0y, 32), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-128, .src0, .add_size), ._, ._ },
+ .{ ._, .v_pd, .@"and", .tmp2y, .tmp2y, .memad(.src0y, .add_size, -32), ._ },
+ .{ ._, .v_pd, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -64), ._ },
+ .{ .@"0:", .v_pd, .add, .tmp2y, .tmp2y, .memid(.src0y, .tmp0, 32), ._ },
+ .{ ._, .v_pd, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(64), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_pd, .add, .dst0y, .dst0y, .tmp2y, ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_sd, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_pd, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ },
+ .{ ._, .v_pd, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ .@"0:", .v_pd, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .vh_pd, .add, .dst0x, .dst0x, .dst0x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, .v_pd, .mova, .dst0y, .lea(.tmp0y), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ },
+ .{ ._, .v_pd, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ },
+ .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ },
+ .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ .{ .@"0:", .v_pd, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ },
+ .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp2x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse3, .fast_hops, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, ._pd, .mova, .dst0x, .lea(.tmp0x), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ },
+ .{ ._, ._pd, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ },
+ .{ .@"0:", ._pd, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .h_pd, .add, .dst0x, .dst0x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } },
+ .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .rc = .sse }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+ .{ ._, ._pd, .mova, .dst0x, .lea(.tmp0x), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ },
+ .{ ._, ._pd, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ },
+ .{ .@"0:", ._pd, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .xor, .tmp2x, .tmp2x, ._, ._ },
+ .{ ._, ._ps, .movhl, .tmp2x, .dst0x, ._, ._ },
+ .{ ._, ._pd, .add, .dst0x, .tmp2x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .x87, null, null, null },
+ .dst_constraints = .{ .{ .float = .qword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .f64, .kind = .{ .reg = .st7 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .f_, .ld, .memad(.src0q, .add_unaligned_size, -8), ._, ._, ._ },
+ .{ .@"0:", .f_, .add, .memi(.src0q, .tmp0), ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0p, .si(8), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .f_p, .st, .dst0q, ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .x87, null, null, null },
+ .dst_constraints = .{ .{ .float = .tbyte }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .tbyte } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
+ .{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .f_, .ld, .memad(.src0t, .add_unaligned_size, -16), ._, ._, ._ },
+ .{ .@"0:", .f_, .ld, .memi(.src0t, .tmp0), ._, ._, ._ },
+ .{ ._, .f_p, .add, ._, ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .f_p, .st, .dst0t, ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+ .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__addtf3" } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .reg = .xmm0 }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+ .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__addtf3" } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .reg = .xmm0 }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+ .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__addtf3" } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .reg = .xmm0 }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._ps, .mova, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ } },
+ .Mul => unreachable,
+ }) catch |err| switch (err) {
+ error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{
+ @tagName(air_tag),
+ cg.typeOf(reduce.operand).fmt(pt),
+ ops[0].tracking(cg),
+ }),
+ else => |e| return e,
+ };
+ try res[0].finish(inst, &.{reduce.operand}, &ops, cg);
+ },
.splat => |air_tag| if (use_old) try cg.airSplat(inst) else fallback: {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
if (cg.typeOf(ty_op.operand).toIntern() == .bool_type) break :fallback try cg.airSplat(inst);
@@ -137624,7 +140442,7 @@ fn genByteSwap(
return src_mcv;
},
3...8 => if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
- try self.genUnOpMir(.{ ._, .bswap }, src_ty, src_mcv);
+ try self.genUnOpMir(.{ .b_, .swap }, src_ty, src_mcv);
return src_mcv;
},
9...16 => {
@@ -137635,7 +140453,7 @@ fn genByteSwap(
break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } };
},
.register_pair => |src_regs| if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
- for (src_regs) |src_reg| try self.asmRegister(.{ ._, .bswap }, src_reg.to64());
+ for (src_regs) |src_reg| try self.asmRegister(.{ .b_, .swap }, src_reg.to64());
return .{ .register_pair = .{ src_regs[1], src_regs[0] } };
} else src_mcv,
else => src_mcv,
@@ -137649,18 +140467,18 @@ fn genByteSwap(
for (dst_regs, 0..) |dst_reg, limb_index| {
if (mat_src_mcv.isBase()) {
try self.asmRegisterMemory(
- .{ ._, if (has_movbe) .movbe else .mov },
+ .{ if (has_movbe) ._be else ._, .mov },
dst_reg.to64(),
try mat_src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }),
);
- if (!has_movbe) try self.asmRegister(.{ ._, .bswap }, dst_reg.to64());
+ if (!has_movbe) try self.asmRegister(.{ .b_, .swap }, dst_reg.to64());
} else {
try self.asmRegisterRegister(
.{ ._, .mov },
dst_reg.to64(),
mat_src_mcv.register_pair[limb_index].to64(),
);
- try self.asmRegister(.{ ._, .bswap }, dst_reg.to64());
+ try self.asmRegister(.{ .b_, .swap }, dst_reg.to64());
}
}
return .{ .register_pair = .{ dst_regs[1], dst_regs[0] } };
@@ -137679,7 +140497,7 @@ fn genByteSwap(
const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
try self.asmRegisterMemory(
- .{ ._, if (has_movbe) .movbe else .mov },
+ .{ if (has_movbe) ._be else ._, .mov },
temp_regs[2].to64(),
.{
.base = .{ .frame = dst_mcv.load_frame.index },
@@ -137692,7 +140510,7 @@ fn genByteSwap(
},
);
try self.asmRegisterMemory(
- .{ ._, if (has_movbe) .movbe else .mov },
+ .{ if (has_movbe) ._be else ._, .mov },
temp_regs[3].to64(),
.{
.base = .{ .frame = dst_mcv.load_frame.index },
@@ -137705,8 +140523,8 @@ fn genByteSwap(
},
);
if (!has_movbe) {
- try self.asmRegister(.{ ._, .bswap }, temp_regs[2].to64());
- try self.asmRegister(.{ ._, .bswap }, temp_regs[3].to64());
+ try self.asmRegister(.{ .b_, .swap }, temp_regs[2].to64());
+ try self.asmRegister(.{ .b_, .swap }, temp_regs[3].to64());
}
try self.asmMemoryRegister(.{ ._, .mov }, .{
.base = .{ .frame = dst_mcv.load_frame.index },
@@ -137751,9 +140569,9 @@ fn genByteSwap(
switch (abi_size) {
else => unreachable,
2 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }),
- 3...8 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv),
+ 3...8 => try self.genUnOpMir(.{ .b_, .swap }, src_ty, dst_mcv),
}
- } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv);
+ } else try self.genBinOpMir(.{ ._be, .mov }, src_ty, dst_mcv, src_mcv);
return dst_mcv;
}
@@ -145626,16 +148444,16 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
.{ ._, .pseudo }
else for (std.enums.values(Mir.Inst.Fixes)) |fixes| {
const fixes_name = @tagName(fixes);
- const space_i = std.mem.indexOfScalar(u8, fixes_name, ' ');
- const fixes_prefix = if (space_i) |i|
- std.meta.stringToEnum(encoder.Instruction.Prefix, fixes_name[0..i]).?
+ const space_index = std.mem.indexOfScalar(u8, fixes_name, ' ');
+ const fixes_prefix = if (space_index) |index|
+ std.meta.stringToEnum(encoder.Instruction.Prefix, fixes_name[0..index]).?
else
.none;
if (fixes_prefix != prefix) continue;
- const pattern = fixes_name[if (space_i) |i| i + " ".len else 0..];
- const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?;
- const mnem_prefix = pattern[0..wildcard_i];
- const mnem_suffix = pattern[wildcard_i + "_".len ..];
+ const pattern = fixes_name[if (space_index) |index| index + " ".len else 0..];
+ const wildcard_index = std.mem.indexOfScalar(u8, pattern, '_').?;
+ const mnem_prefix = pattern[0..wildcard_index];
+ const mnem_suffix = pattern[wildcard_index + "_".len ..];
if (!std.mem.startsWith(u8, mnem_name, mnem_prefix)) continue;
if (!std.mem.endsWith(u8, mnem_name, mnem_suffix)) continue;
break .{ fixes, std.meta.stringToEnum(
@@ -157348,7 +160166,6 @@ const Select = struct {
bool,
bool_vec: Memory.Size,
exact_bool_vec: u16,
- vec_len: u32,
ptr_any_bool_vec,
ptr_bool_vec: Memory.Size,
remainder_bool_vec: OfIsSizes,
@@ -157390,8 +160207,11 @@ const Select = struct {
float: Memory.Size,
scalar_any_float: Memory.Size,
scalar_float: OfIsSizes,
+ exclusive_scalar_float: OfIsSizes,
+ exact_scalar_float: OfIsSizes,
multiple_scalar_any_float: Memory.Size,
multiple_scalar_float: OfIsSizes,
+ unaligned_multiple_scalar_float: OfIsSizes,
exact_int: u16,
exact_signed_int: u16,
exact_unsigned_int: u16,
@@ -157435,7 +160255,6 @@ const Select = struct {
size.bitSize(cg.target) >= ty.vectorLen(zcu),
.exact_bool_vec => |size| ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and
size == ty.vectorLen(zcu),
- .vec_len => |len| ty.isVector(zcu) and ty.vectorLen(zcu) == len,
.ptr_any_bool_vec => switch (zcu.intern_pool.indexToKey(ty.childType(zcu).toIntern())) {
.vector_type => |vector_type| vector_type.child == .bool_type,
else => false,
@@ -157559,10 +160378,16 @@ const Select = struct {
cg.floatBits(ty.scalarType(zcu)) != null,
.scalar_float => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and
if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) == float_bits else false,
+ .exclusive_scalar_float => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) > cg.unalignedSize(ty) and
+ if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) == float_bits else false,
+ .exact_scalar_float => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) == cg.unalignedSize(ty) and
+ if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) == float_bits else false,
.multiple_scalar_any_float => |size| ty.abiSize(zcu) % @divExact(size.bitSize(cg.target), 8) == 0 and
cg.floatBits(ty.scalarType(zcu)) != null,
.multiple_scalar_float => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) == float_bits else false,
+ .unaligned_multiple_scalar_float => |of_is| cg.unalignedSize(ty) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
+ if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) == float_bits else false,
.exact_int => |bit_size| if (cg.intInfo(ty)) |int_info| bit_size == int_info.bits else false,
.exact_signed_int => |bit_size| if (cg.intInfo(ty)) |int_info| switch (int_info.signedness) {
.signed => bit_size == int_info.bits,
src/arch/x86_64/Encoding.zig
@@ -313,7 +313,7 @@ pub const Mnemonic = enum {
@"or", out, outs, outsb, outsd, outsw,
pause, pop, popf, popfd, popfq, push, pushfq,
rcl, rcr,
- rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdssd, rdssq, rdtsc, rdtscp,
+ rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdsspd, rdsspq, rdtsc, rdtscp,
ret, rol, ror, rsm,
sahf, sal, sar, sbb,
scas, scasb, scasd, scasq, scasw,
@@ -436,6 +436,7 @@ pub const Mnemonic = enum {
pblendvb, pblendw,
pcmpeqq,
pextrb, pextrd, pextrq,
+ phminposuw,
pinsrb, pinsrd, pinsrq,
pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw,
pmovsxbd, pmovsxbq, pmovsxbw, pmovsxdq, pmovsxwd, pmovsxwq,
@@ -494,19 +495,19 @@ pub const Mnemonic = enum {
vpblendvb, vpblendw, vpclmulqdq,
vpcmpeqb, vpcmpeqd, vpcmpeqq, vpcmpeqw,
vpcmpgtb, vpcmpgtd, vpcmpgtq, vpcmpgtw,
- vphaddw, vphaddsw, vphaddd, vphsubw, vphsubsw, vphsubd,
vperm2f128, vpermilpd, vpermilps,
vpextrb, vpextrd, vpextrq, vpextrw,
+ vphaddw, vphaddsw, vphaddd, vphminposuw, vphsubw, vphsubsw, vphsubd,
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
+ vpmaddubsw, vpmaddwd,
vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw,
vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw,
- vpmaddubsw,
vpmovmskb,
vpmovsxbd, vpmovsxbq, vpmovsxbw, vpmovsxdq, vpmovsxwd, vpmovsxwq,
vpmovzxbd, vpmovzxbq, vpmovzxbw, vpmovzxdq, vpmovzxwd, vpmovzxwq,
- vpmuldq, vpmulhrsw, vpmulhw, vpmulld, vpmullw, vpmuludq,
+ vpmuldq, vpmulhrsw, vpmulhuw, vpmulhw, vpmulld, vpmullw, vpmuludq,
vpor,
- vpshufb, vpshufd, vpshufhw, vpshuflw,
+ vpsadbw, vpshufb, vpshufd, vpshufhw, vpshuflw,
vpsignb, vpsignd, vpsignw,
vpslld, vpslldq, vpsllq, vpsllw,
vpsrad, vpsraq, vpsraw,
@@ -1029,7 +1030,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op
}
const mnemonic_to_encodings_map = init: {
- @setEvalBranchQuota(5_800);
+ @setEvalBranchQuota(5_900);
const ModrmExt = u3;
const Entry = struct { Mnemonic, OpEn, []const Op, []const u8, ModrmExt, Mode, Feature };
const encodings: []const Entry = @import("encodings.zon");
@@ -1038,17 +1039,17 @@ const mnemonic_to_encodings_map = init: {
var mnemonic_map: [mnemonic_count][]Data = @splat(&.{});
for (encodings) |entry| mnemonic_map[@intFromEnum(entry[0])].len += 1;
var data_storage: [encodings.len]Data = undefined;
- var storage_i: usize = 0;
+ var storage_index: usize = 0;
for (&mnemonic_map) |*value| {
- value.ptr = data_storage[storage_i..].ptr;
- storage_i += value.len;
+ value.ptr = data_storage[storage_index..].ptr;
+ storage_index += value.len;
}
- var mnemonic_i: [mnemonic_count]usize = @splat(0);
+ var mnemonic_index: [mnemonic_count]usize = @splat(0);
const ops_len = @typeInfo(@FieldType(Data, "ops")).array.len;
const opc_len = @typeInfo(@FieldType(Data, "opc")).array.len;
for (encodings) |entry| {
- const i = &mnemonic_i[@intFromEnum(entry[0])];
- mnemonic_map[@intFromEnum(entry[0])][i.*] = .{
+ const index = &mnemonic_index[@intFromEnum(entry[0])];
+ mnemonic_map[@intFromEnum(entry[0])][index.*] = .{
.op_en = entry[1],
.ops = (entry[2] ++ .{.none} ** (ops_len - entry[2].len)).*,
.opc_len = entry[3].len,
@@ -1057,14 +1058,14 @@ const mnemonic_to_encodings_map = init: {
.mode = entry[5],
.feature = entry[6],
};
- i.* += 1;
+ index.* += 1;
}
const final_storage = data_storage;
var final_map: [mnemonic_count][]const Data = @splat(&.{});
- storage_i = 0;
+ storage_index = 0;
for (&final_map, mnemonic_map) |*final_value, value| {
- final_value.* = final_storage[storage_i..][0..value.len];
- storage_i += value.len;
+ final_value.* = final_storage[storage_index..][0..value.len];
+ storage_index += value.len;
}
break :init final_map;
};
src/arch/x86_64/encodings.zon
@@ -684,8 +684,8 @@
.{ .rdseed, .m, .{ .r32 }, .{ 0x0f, 0xc7 }, 7, .none, .rdseed },
.{ .rdseed, .m, .{ .r64 }, .{ 0x0f, 0xc7 }, 7, .long, .rdseed },
- .{ .rdssd, .m, .{ .r32 }, .{ 0xf3, 0x0f, 0x1e }, 1, .none, .shstk },
- .{ .rdssq, .m, .{ .r64 }, .{ 0xf3, 0x0f, 0x1e }, 1, .long, .shstk },
+ .{ .rdsspd, .m, .{ .r32 }, .{ 0xf3, 0x0f, 0x1e }, 1, .none, .shstk },
+ .{ .rdsspq, .m, .{ .r64 }, .{ 0xf3, 0x0f, 0x1e }, 1, .long, .shstk },
.{ .rdtsc, .z, .{}, .{ 0x0f, 0x31 }, 0, .none, .none },
@@ -1524,6 +1524,8 @@
.{ .pinsrw, .rmi, .{ .xmm, .r32_m16, .imm8 }, .{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 },
+ .{ .pmaddwd, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf5 }, 0, .none, .sse2 },
+
.{ .pmaxsw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xee }, 0, .none, .sse2 },
.{ .pmaxub, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xde }, 0, .none, .sse2 },
@@ -1532,6 +1534,8 @@
.{ .pminub, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xda }, 0, .none, .sse2 },
+ .{ .pmulhuw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xe4 }, 0, .none, .sse2 },
+
.{ .pmulhw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xe5 }, 0, .none, .sse2 },
.{ .pmullw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 },
@@ -1540,6 +1544,8 @@
.{ .por, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 },
+ .{ .psadbw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf6 }, 0, .none, .sse2 },
+
.{ .pshufd, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0x66, 0x0f, 0x70 }, 0, .none, .sse2 },
.{ .pshufhw, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 },
@@ -1642,8 +1648,26 @@
.{ .palignr, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x0f }, 0, .none, .ssse3 },
+ .{ .phaddw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x01 }, 0, .none, .ssse3 },
+ .{ .phaddd, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x02 }, 0, .none, .ssse3 },
+
+ .{ .phaddsw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x03 }, 0, .none, .ssse3 },
+
+ .{ .phsubw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x05 }, 0, .none, .ssse3 },
+ .{ .phsubd, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x06 }, 0, .none, .ssse3 },
+
+ .{ .phsubsw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x07 }, 0, .none, .ssse3 },
+
+ .{ .pmaddubsw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x04 }, 0, .none, .ssse3 },
+
+ .{ .pmulhrsw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x0b }, 0, .none, .ssse3 },
+
.{ .pshufb, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x00 }, 0, .none, .ssse3 },
+ .{ .psignb, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x08 }, 0, .none, .ssse3 },
+ .{ .psignw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x09 }, 0, .none, .ssse3 },
+ .{ .psignd, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x0a }, 0, .none, .ssse3 },
+
// SSE4.1
.{ .blendpd, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 },
@@ -1678,6 +1702,8 @@
.{ .pextrw, .mri, .{ .r32_m16, .xmm, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 },
+ .{ .phminposuw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x41 }, 0, .none, .sse4_1 },
+
.{ .pinsrb, .rmi, .{ .xmm, .r32_m8, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .none, .sse4_1 },
.{ .pinsrd, .rmi, .{ .xmm, .rm32, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 },
.{ .pinsrq, .rmi, .{ .xmm, .rm64, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 },
@@ -2129,12 +2155,28 @@
.{ .vpextrw, .rmi, .{ .r32, .xmm, .imm8 }, .{ 0x66, 0x0f, 0xc5 }, 0, .vex_128_w0, .avx },
.{ .vpextrw, .mri, .{ .r32_m16, .xmm, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_w0, .avx },
+ .{ .vphaddw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x01 }, 0, .vex_128_wig, .avx },
+ .{ .vphaddd, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x02 }, 0, .vex_128_wig, .avx },
+
+ .{ .vphaddsw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x03 }, 0, .vex_128_wig, .avx },
+
+ .{ .vphminposuw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x41 }, 0, .vex_128_wig, .avx },
+
+ .{ .vphsubw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x05 }, 0, .vex_128_wig, .avx },
+ .{ .vphsubd, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x06 }, 0, .vex_128_wig, .avx },
+
+ .{ .vphsubsw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x07 }, 0, .vex_128_wig, .avx },
+
.{ .vpinsrb, .rvmi, .{ .xmm, .xmm, .r32_m8, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .vex_128_w0, .avx },
.{ .vpinsrd, .rvmi, .{ .xmm, .xmm, .rm32, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w0, .avx },
.{ .vpinsrq, .rvmi, .{ .xmm, .xmm, .rm64, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w1, .avx },
.{ .vpinsrw, .rvmi, .{ .xmm, .xmm, .r32_m16, .imm8 }, .{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_w0, .avx },
+ .{ .vpmaddubsw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x04 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpmaddwd, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf5 }, 0, .vex_128_wig, .avx },
+
.{ .vpmaxsb, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_128_wig, .avx },
.{ .vpmaxsw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xee }, 0, .vex_128_wig, .avx },
.{ .vpmaxsd, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_128_wig, .avx },
@@ -2172,6 +2214,10 @@
.{ .vpmuldq, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x28 }, 0, .vex_128_wig, .avx },
+ .{ .vpmulhrsw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x0b }, 0, .vex_128_wig, .avx },
+
+ .{ .vpmulhuw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xe4 }, 0, .vex_128_wig, .avx },
+
.{ .vpmulhw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx },
.{ .vpmulld, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx },
@@ -2182,6 +2228,8 @@
.{ .vpor, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx },
+ .{ .vpsadbw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf6 }, 0, .vex_128_wig, .avx },
+
.{ .vpshufb, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_128_wig, .avx },
.{ .vpshufd, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0x66, 0x0f, 0x70 }, 0, .vex_128_wig, .avx },
@@ -2190,6 +2238,10 @@
.{ .vpshuflw, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0xf2, 0x0f, 0x70 }, 0, .vex_128_wig, .avx },
+ .{ .vpsignb, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x08 }, 0, .vex_128_wig, .avx },
+ .{ .vpsignw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x09 }, 0, .vex_128_wig, .avx },
+ .{ .vpsignd, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x0a }, 0, .vex_128_wig, .avx },
+
.{ .vpsllw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf1 }, 0, .vex_128_wig, .avx },
.{ .vpsllw, .vmi, .{ .xmm, .xmm, .imm8 }, .{ 0x66, 0x0f, 0x71 }, 6, .vex_128_wig, .avx },
.{ .vpslld, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf2 }, 0, .vex_128_wig, .avx },
@@ -2447,6 +2499,16 @@
.{ .vpcmpgtq, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x37 }, 0, .vex_256_wig, .avx2 },
+ .{ .vphaddw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x01 }, 0, .vex_256_wig, .avx2 },
+ .{ .vphaddd, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x02 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vphaddsw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x03 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vphaddw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x05 }, 0, .vex_256_wig, .avx2 },
+ .{ .vphaddd, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x06 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vphaddsw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x07 }, 0, .vex_256_wig, .avx2 },
+
.{ .vperm2i128, .rvmi, .{ .ymm, .ymm, .ymm_m256, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x46 }, 0, .vex_256_w0, .avx2 },
.{ .vpermd, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x36 }, 0, .vex_256_w0, .avx2 },
@@ -2457,6 +2519,10 @@
.{ .vpermq, .rmi, .{ .ymm, .ymm_m256, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x00 }, 0, .vex_256_w1, .avx2 },
+ .{ .vpmaddubsw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x04 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpmaddwd, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xf5 }, 0, .vex_256_wig, .avx2 },
+
.{ .vpmaskmovd, .rvm, .{ .xmm, .xmm, .m128 }, .{ 0x66, 0x0f, 0x38, 0x8c }, 0, .vex_128_w0, .avx2 },
.{ .vpmaskmovd, .rvm, .{ .ymm, .ymm, .m256 }, .{ 0x66, 0x0f, 0x38, 0x8c }, 0, .vex_256_w0, .avx2 },
.{ .vpmaskmovq, .rvm, .{ .xmm, .xmm, .m128 }, .{ 0x66, 0x0f, 0x38, 0x8c }, 0, .vex_128_w1, .avx2 },
@@ -2503,6 +2569,10 @@
.{ .vpmuldq, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x28 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmulhrsw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x0b }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpmulhuw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xe4 }, 0, .vex_256_wig, .avx2 },
+
.{ .vpmulhw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx2 },
.{ .vpmulld, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx2 },
@@ -2513,6 +2583,8 @@
.{ .vpor, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 },
+ .{ .vpsadbw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xf6 }, 0, .vex_256_wig, .avx2 },
+
.{ .vpshufb, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_256_wig, .avx2 },
.{ .vpshufd, .rmi, .{ .ymm, .ymm_m256, .imm8 }, .{ 0x66, 0x0f, 0x70 }, 0, .vex_256_wig, .avx2 },
@@ -2520,6 +2592,10 @@
.{ .vpshuflw, .rmi, .{ .ymm, .ymm_m256, .imm8 }, .{ 0xf2, 0x0f, 0x70 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpsignb, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x08 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpsignw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x09 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpsignd, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x0a }, 0, .vex_256_wig, .avx2 },
+
.{ .vpsllw, .rvm, .{ .ymm, .ymm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf1 }, 0, .vex_256_wig, .avx2 },
.{ .vpsllw, .vmi, .{ .ymm, .ymm, .imm8 }, .{ 0x66, 0x0f, 0x71 }, 6, .vex_256_wig, .avx2 },
.{ .vpslld, .rvm, .{ .ymm, .ymm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf2 }, 0, .vex_256_wig, .avx2 },
src/arch/x86_64/Lower.zig
@@ -567,7 +567,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
}
fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
- @setEvalBranchQuota(2_600);
+ @setEvalBranchQuota(2_800);
const fixes = switch (inst.ops) {
.none => inst.data.none.fixes,
.inst => inst.data.inst.fixes,
@@ -601,9 +601,9 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
var buf: [max_len]u8 = undefined;
const fixes_name = @tagName(fixes);
- const pattern = fixes_name[if (std.mem.indexOfScalar(u8, fixes_name, ' ')) |i| i + 1 else 0..];
- const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?;
- const parts = .{ pattern[0..wildcard_i], @tagName(inst.tag), pattern[wildcard_i + 1 ..] };
+ const pattern = fixes_name[if (std.mem.indexOfScalar(u8, fixes_name, ' ')) |i| i + " ".len else 0..];
+ const wildcard_index = std.mem.indexOfScalar(u8, pattern, '_').?;
+ const parts = .{ pattern[0..wildcard_index], @tagName(inst.tag), pattern[wildcard_index + "_".len ..] };
const err_msg = "unsupported mnemonic: ";
const mnemonic = std.fmt.bufPrint(&buf, "{s}{s}{s}", parts) catch
return lower.fail(err_msg ++ "'{s}{s}{s}'", parts);
src/arch/x86_64/Mir.zig
@@ -34,6 +34,16 @@ pub const Inst = struct {
/// ___ 4
_4,
+ /// ___ Demote
+ _demote,
+ /// ___ Flush
+ _flush,
+ /// ___ Flush Optimized
+ _flushopt,
+ /// ___ Instructions With T0 Hint
+ _it0,
+ /// ___ Instructions With T0 Hint
+ _it1,
/// ___ With NTA Hint
_nta,
/// System Call ___
@@ -44,6 +54,8 @@ pub const Inst = struct {
_t1,
/// ___ With T2 Hint
_t2,
+ /// ___ Write Back
+ _wb,
/// ___ With Intent to Write and T1 Hint
_wt1,
@@ -53,6 +65,8 @@ pub const Inst = struct {
_csspq,
/// ___ FS Segment Base
_fsbase,
+ /// ___ GS
+ _gs,
/// ___ GS Segment Base
_gsbase,
/// ___ Model Specific Register
@@ -67,8 +81,14 @@ pub const Inst = struct {
_pmc,
/// ___ Random Number
_rand,
+ /// ___ r Busy Flag in a Supervisor Shadow Stack token
+ _rssbsy,
/// ___ Random Seed
_seed,
+ /// ___ Shadow Stack Doubleword
+ _ssd,
+ /// ___ Shadow Stack Quadword
+ _ssq,
/// ___ Shadow Stack Pointer Doubleword
_sspd,
/// ___ Shadow Stack Pointer Quadword
@@ -77,9 +97,15 @@ pub const Inst = struct {
_tsc,
/// ___ Time-Stamp Counter And Processor ID
_tscp,
+ /// ___ User Shadow Stack Doubleword
+ _ussd,
+ /// ___ User Shadow Stack Quadword
+ _ussq,
/// VEX-Encoded ___ MXCSR
v_mxcsr,
+ /// Byte ___
+ b_,
/// Interrupt ___
/// Integer ___
i_,
@@ -118,6 +144,8 @@ pub const Inst = struct {
_ld,
/// ___ Left Without Affecting Flags
_lx,
+ /// ___ Mask
+ _msk,
/// ___ Right
/// ___ For Reading
/// ___ Register
@@ -139,6 +167,7 @@ pub const Inst = struct {
/// ___ Below
_b,
/// ___ Below Or Equal
+ /// ___ Big Endian
_be,
/// ___ Carry
/// ___ Carry Flag
@@ -212,8 +241,12 @@ pub const Inst = struct {
_w,
/// ___ Doubleword
//_d,
+ /// ___ Double Quadword to Quadword
+ _dq2q,
/// ___ QuadWord
_q,
+ /// ___ Quadword to Double Quadword
+ _q2dq,
/// ___ String
//_s,
@@ -369,6 +402,8 @@ pub const Inst = struct {
fn_sw,
/// Float Extended ___
fx_,
+ /// Float Extended ___ 64
+ fx_64,
/// ___ in 32-bit and Compatibility Mode
_32,
@@ -390,6 +425,14 @@ pub const Inst = struct {
p_dq,
/// Packed ___ Unsigned Doubleword to Quadword
p_udq,
+ /// Packed Carry-Less ___ Quadword to Double Quadword
+ pcl_qdq,
+ /// Packed Half ___ Doubleword
+ ph_d,
+ /// Packed Half ___ Saturate Word
+ ph_sw,
+ /// Packed Half ___ Word
+ ph_w,
/// ___ Aligned Packed Integer Values
_dqa,
/// ___ Unaligned Packed Integer Values
@@ -403,6 +446,10 @@ pub const Inst = struct {
//_sd,
/// ___ Packed Double-Precision Values
_pd,
+ /// Half ___ Packed Single-Precision Values
+ h_ps,
+ /// Half ___ Packed Double-Precision Values
+ h_pd,
/// ___ Internal Caches
//_d,
@@ -430,7 +477,7 @@ pub const Inst = struct {
v_w,
/// VEX-Encoded ___ Doubleword
v_d,
- /// VEX-Encoded ___ QuadWord
+ /// VEX-Encoded ___ Quadword
v_q,
/// VEX-Encoded ___ Aligned Packed Integer Values
v_dqa,
@@ -453,6 +500,14 @@ pub const Inst = struct {
vp_dq,
/// VEX-Encoded Packed ___ Unsigned Doubleword to Quadword
vp_udq,
+ /// VEx-Encoded Packed Carry-Less ___ Quadword to Double Quadword
+ vpcl_qdq,
+ /// VEX-Encoded Packed Half ___ Doubleword
+ vph_d,
+ /// VEX-Encoded Packed Half ___ Saturate Word
+ vph_sw,
+ /// VEX-Encoded Packed Half ___ Word
+ vph_w,
/// VEX-Encoded ___ Scalar Single-Precision Values
v_ss,
/// VEX-Encoded ___ Packed Single-Precision Values
@@ -463,6 +518,10 @@ pub const Inst = struct {
v_pd,
/// VEX-Encoded ___ 128-Bits Of Floating-Point Data
v_f128,
+ /// VEX-Encoded Half ___ Packed Single-Precision Values
+ vh_ps,
+ /// VEX-Encoded Half ___ Packed Double-Precision Values
+ vh_pd,
/// ___ 128-bit key with key locker
_128,
@@ -510,6 +569,10 @@ pub const Inst = struct {
/// Add scalar single-precision floating-point values
/// Add packed double-precision floating-point values
/// Add scalar double-precision floating-point values
+ /// Packed single-precision floating-point horizontal add
+ /// Packed double-precision floating-point horizontal add
+ /// Packed horizontal add
+ /// Packed horizontal add and saturate
add,
/// Logical and
/// Bitwise logical and of packed single-precision floating-point values
@@ -521,12 +584,15 @@ pub const Inst = struct {
/// Bit scan reverse
bs,
/// Byte swap
- bswap,
+ /// Swap GS base register
+ swap,
/// Bit test
/// Bit test and complement
/// Bit test and reset
/// Bit test and set
bt,
+ /// Check array index against bounds
+ bound,
/// Call
/// Fast system call
call,
@@ -542,17 +608,12 @@ pub const Inst = struct {
/// Clear interrupt flag
/// Clear task-switched flag in CR0
/// Clear user interrupt flag
- cl,
/// Cache line demote
- cldemote,
/// Flush cache line
- clflush,
/// Flush cache line optimized
- clflushopt,
/// Clear busy flag in a supervisor shadow stack token
- clrssbsy,
/// Cache line write back
- clwb,
+ cl,
/// Complement carry flag
cmc,
/// Conditional move
@@ -650,15 +711,16 @@ pub const Inst = struct {
lzcnt,
/// Move
/// Move data from string to string
+ /// Move data after swapping bytes
/// Move scalar single-precision floating-point value
/// Move scalar double-precision floating-point value
/// Move doubleword
/// Move quadword
/// Move aligned packed integer values
/// Move unaligned packed integer values
+ /// Move quadword from XMM to MMX technology register
+ /// Move quadword from MMX technology to XMM register
mov,
- /// Move data after swapping bytes
- movbe,
/// Move with sign extension
movsx,
/// Move with zero extension
@@ -671,6 +733,7 @@ pub const Inst = struct {
/// Multiply scalar double-precision floating-point values
/// Multiply packed unsigned doubleword integers
/// Multiply packed doubleword integers
+ /// Carry-less multiplication quadword
mul,
/// Two's complement negation
neg,
@@ -737,6 +800,8 @@ pub const Inst = struct {
sca,
/// Send user interprocessor interrupt
senduipi,
+ /// Serialize instruction execution
+ serialize,
/// Set byte on condition
set,
/// Logical shift left
@@ -758,6 +823,10 @@ pub const Inst = struct {
/// Subtract scalar single-precision floating-point values
/// Subtract packed double-precision floating-point values
/// Subtract scalar double-precision floating-point values
+ /// Packed single-precision floating-point horizontal subtract
+ /// Packed double-precision floating-point horizontal subtract
+ /// Packed horizontal subtract
+ /// Packed horizontal subtract and saturate
sub,
/// Set carry flag
/// Set direction flag
@@ -772,8 +841,6 @@ pub const Inst = struct {
st,
/// Store string
sto,
- /// Swap GS base register
- swapgs,
/// Test condition
/// Logical compare
/// Packed bit test
@@ -788,6 +855,8 @@ pub const Inst = struct {
/// Write to model specific register
/// Write to model specific register
/// Write to model specific register
+ /// Write to shadow stack
+ /// Write to user shadow stack
wr,
/// Exchange and add
xadd,
@@ -904,6 +973,10 @@ pub const Inst = struct {
cmpgt,
/// Empty MMX technology state
emms,
+ /// Multiply and add packed signed and unsigned bytes
+ maddubs,
+ /// Multiply and add packed integers
+ maddw,
/// Multiply packed signed integers and store low result
mull,
/// Multiply packed signed integers and store high result
@@ -932,6 +1005,8 @@ pub const Inst = struct {
unpcklwd,
// SSE
+ /// Average packed integers
+ avg,
/// Convert packed doubleword integers to packed single-precision floating-point values
/// Convert packed doubleword integers to packed double-precision floating-point values
cvtpi2,
@@ -994,9 +1069,13 @@ pub const Inst = struct {
/// Move unaligned packed single-precision floating-point values
/// Move unaligned packed double-precision floating-point values
movu,
+ /// Multiply packed unsigned integers and store high result
+ mulhu,
/// Prefetch data into caches
/// Prefetch data into caches with intent to write
prefetch,
+ /// Compute sum of absolute differences
+ sadb,
/// Packed interleave shuffle of quadruplets of single-precision floating-point values
/// Packed interleave shuffle of pairs of double-precision floating-point values
/// Shuffle packed doublewords
@@ -1056,9 +1135,6 @@ pub const Inst = struct {
/// Packed single-precision floating-point add/subtract
/// Packed double-precision floating-point add/subtract
addsub,
- /// Packed single-precision floating-point horizontal add
- /// Packed double-precision floating-point horizontal add
- hadd,
/// Replicate double floating-point values
movddup,
/// Replicate single floating-point values
@@ -1069,6 +1145,10 @@ pub const Inst = struct {
// SSSE3
/// Packed align right
alignr,
+ /// Packed multiply high with round and scale
+ mulhrs,
+ /// Packed sign
+ sign,
// SSE4.1
/// Pack with unsigned saturation
@@ -1090,6 +1170,8 @@ pub const Inst = struct {
/// Extract packed floating-point values
/// Extract packed integer values
extract,
+ /// Packed horizontal word minimum
+ hminposu,
/// Insert scalar single-precision floating-point value
/// Insert packed floating-point values
insert,
@@ -1111,10 +1193,6 @@ pub const Inst = struct {
/// Accumulate CRC32 value
crc32,
- // PCLMUL
- /// Carry-less multiplication quadword
- clmulq,
-
// AES
/// Perform one round of an AES decryption flow
/// Perform ten rounds of AES decryption flow with key locker using 128-bit key
@@ -1634,12 +1712,51 @@ pub const Inst = struct {
reg_list: RegisterList,
};
- // Make sure we don't accidentally make instructions bigger than expected.
- // Note that in safety builds, Zig is allowed to insert a secret field for safety checks.
comptime {
if (!std.debug.runtime_safety) {
+ // Make sure we don't accidentally make instructions bigger than expected.
+ // Note that in safety builds, Zig is allowed to insert a secret field for safety checks.
assert(@sizeOf(Data) == 8);
}
+ const Mnemonic = @import("Encoding.zig").Mnemonic;
+ if (@typeInfo(Mnemonic).@"enum".fields.len != 977 or
+ @typeInfo(Fixes).@"enum".fields.len != 231 or
+ @typeInfo(Tag).@"enum".fields.len != 251)
+ {
+ const cond_src = (struct {
+ fn src() std.builtin.SourceLocation {
+ return @src();
+ }
+ }).src();
+ @setEvalBranchQuota(1_750_000);
+ for (@typeInfo(Mnemonic).@"enum".fields) |mnemonic| {
+ if (mnemonic.name[0] == '.') continue;
+ for (@typeInfo(Fixes).@"enum".fields) |fixes| {
+ const pattern = fixes.name[if (std.mem.indexOfScalar(u8, fixes.name, ' ')) |index| index + " ".len else 0..];
+ const wildcard_index = std.mem.indexOfScalar(u8, pattern, '_').?;
+ const mnem_prefix = pattern[0..wildcard_index];
+ const mnem_suffix = pattern[wildcard_index + "_".len ..];
+ if (!std.mem.startsWith(u8, mnemonic.name, mnem_prefix)) continue;
+ if (!std.mem.endsWith(u8, mnemonic.name, mnem_suffix)) continue;
+ if (@hasField(
+ Tag,
+ mnemonic.name[mnem_prefix.len .. mnemonic.name.len - mnem_suffix.len],
+ )) break;
+ } else @compileError("'" ++ mnemonic.name ++ "' is not encodable in Mir");
+ }
+ @compileError(std.fmt.comptimePrint(
+ \\All mnemonics are encodable in Mir! You may now change the condition at {s}:{d} to:
+ \\if (@typeInfo(Mnemonic).@"enum".fields.len != {d} or
+ \\ @typeInfo(Fixes).@"enum".fields.len != {d} or
+ \\ @typeInfo(Tag).@"enum".fields.len != {d})
+ , .{
+ cond_src.file,
+ cond_src.line - 6,
+ @typeInfo(Mnemonic).@"enum".fields.len,
+ @typeInfo(Fixes).@"enum".fields.len,
+ @typeInfo(Tag).@"enum".fields.len,
+ }));
+ }
}
};
test/behavior/x86_64/build.zig
@@ -87,7 +87,7 @@ pub fn build(b: *std.Build) void {
.{
.cpu_arch = .x86_64,
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v2 },
- .cpu_features_add = std.Target.x86.featureSet(&.{ .adx, .gfni, .pclmul }),
+ .cpu_features_add = std.Target.x86.featureSet(&.{ .adx, .fast_hops, .gfni, .pclmul, .slow_incdec }),
},
.{
.cpu_arch = .x86_64,
@@ -97,6 +97,7 @@ pub fn build(b: *std.Build) void {
.{
.cpu_arch = .x86_64,
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 },
+ .cpu_features_add = std.Target.x86.featureSet(&.{ .adx, .fast_hops, .gfni, .pclmul, .slow_incdec }),
.cpu_features_sub = std.Target.x86.featureSet(&.{.avx2}),
},
.{
@@ -106,12 +107,11 @@ pub fn build(b: *std.Build) void {
.{
.cpu_arch = .x86_64,
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 },
- .cpu_features_add = std.Target.x86.featureSet(&.{ .adx, .gfni, .pclmul }),
+ .cpu_features_add = std.Target.x86.featureSet(&.{ .adx, .fast_hops, .gfni, .slow_incdec, .vpclmulqdq }),
},
.{
.cpu_arch = .x86_64,
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v4 },
- .cpu_features_add = std.Target.x86.featureSet(&.{.vpclmulqdq}),
},
}) |query| {
const target = b.resolveTargetQuery(query);
test/behavior/x86_64/unary.zig
@@ -4451,141 +4451,354 @@ fn unary(comptime op: anytype, comptime opts: struct {
}
fn testFloatVectors() !void {
try testArgs(@Vector(1, f16), .{
- -0x1.17cp-12,
+ 0x1.7d8p12,
});
try testArgs(@Vector(2, f16), .{
- 0x1.47cp9, 0x1.3acp9,
+ -0x0.054p-14, -0x1.c6cp10,
+ });
+ try testArgs(@Vector(3, f16), .{
+ -0x1.39cp-3, -0x1.088p4, -0x0.644p-14,
});
try testArgs(@Vector(4, f16), .{
- 0x1.ab4p0, -0x1.7fcp-7, -0x1.1cp0, -0x1.f14p12,
+ -0x1.108p11, 0x1.364p-3, 0x1.8f4p-2, -0x0.8acp-14,
+ });
+ try testArgs(@Vector(5, f16), .{
+ 0x1.e1p8, 0x1.ddp11, 0x0.388p-14, 0x1.7p-7, -0x0.a08p-14,
+ });
+ try testArgs(@Vector(7, f16), .{
+ 0x1.988p-14, -0x1.f7p-14, 0x1.38cp12, 0x0.0fp-14, -0x1.774p2, -0x1.de4p11, -0x1.9bp-10,
});
try testArgs(@Vector(8, f16), .{
- -0x1.8d8p8, 0x1.83p10, -0x1.5ap-1, -0x1.d78p13, -0x1.608p12, 0x1.e8p-9, -0x1.688p-10, -0x1.738p9,
+ 0x1.6ecp12, -0x1.834p9, -0x1.2c8p13, 0x1.e7cp3, -0x1.418p3, 0x1.15cp-1, 0x1.fecp-2, 0x1.1dp-3,
+ });
+ try testArgs(@Vector(9, f16), .{
+ 0x1.da8p-1, 0x1.d44p-11, 0x1.884p-10, -0x1.898p1, 0x1.5ccp-5, 0x1.68p0, 0x1.618p14, -0x1.c34p2,
+ -0x1.318p6,
+ });
+ try testArgs(@Vector(15, f16), .{
+ 0x1.41cp11, 0x1.edp-1, 0x1.1c8p-12, -0x0.0ecp-14, -0x1.abp8, 0x1.34p0, -0x1.24cp-4, -0x1.214p1,
+ -0x1.604p9, -0x1.364p-1, 0x1.adp0, 0x0.63p-14, 0x0.60cp-14, 0x1.6ep-6, 0x0.84cp-14,
});
try testArgs(@Vector(16, f16), .{
- 0x1.da8p-1, -0x1.ed4p-10, -0x1.dc8p1, 0x1.b78p-14, nan(f16), 0x1.9d8p8, nan(f16), 0x1.d5p13,
- -0x1.2dp13, 0x1.6c4p12, 0x1.a9cp-11, -0x1.0ecp8, 0x0.4ccp-14, -0x1.0a8p-6, -0x1.5bcp-14, 0x1.6d8p-9,
+ 0x1.308p6, -0x1.078p-1, 0x0.81p-14, 0x1.1b4p-14, 0x1.4ep-7, 0x1.75p12, 0x1.264p-8, 0x1.a6p2,
+ 0x1.9a4p-3, 0x1.e9p4, -0x1.a4p-6, 0x1.6acp-1, 0x1.7e8p-12, -0x1.02cp6, -0x1.0ccp-14, 0x1.edp-12,
+ });
+ try testArgs(@Vector(17, f16), .{
+ 0x1.2c4p-1, 0x1.91cp-3, 0x1.bf8p10, -0x0.25p-14, 0x1.45p-9, 0x1.cap-2, 0x1.e9cp8, 0x1.b7p8,
+ 0x1.21cp9, -0x0.ba4p-14, -0x1.ddcp-4, -0x1.bcp9, -0x1.7dcp-3, 0x1.6a4p-12, 0x1.ca8p-8, -0x1.558p11,
+ 0x0.26cp-14,
+ });
+ try testArgs(@Vector(31, f16), .{
+ -0x1.f94p7, 0x1.55cp9, -0x1.f78p11, -0x0.f48p-14, -0x1.b6p-2, 0x1.85cp1, -0x1.114p4, -0x1.97cp-5,
+ -0x1.6f8p2, 0x1.79cp-3, 0x1.e58p-9, -0x1.f5cp-10, 0x1.a74p5, -0x0.1e8p-14, 0x1.15cp-14, 0x1.814p-7,
+ -0x0.318p-14, -0x1.b5p-5, -0x1.058p-10, 0x1.124p0, -0x1.20cp-1, 0x1.978p10, -0x1.808p-8, 0x1.528p-6,
+ -0x1.ba8p9, 0x0.294p-14, 0x1.11cp0, 0x1.e5p5, 0x1.904p-11, 0x1.d78p11, -0x1.c1p5,
});
try testArgs(@Vector(32, f16), .{
- 0x1.d5cp-6, -0x1.a98p5, 0x1.49cp5, -0x1.e4p-1, -0x1.21p-13, -0x1.c94p-1, -0x1.adcp-5, -0x1.524p-1,
- -0x1.0d8p-3, -0x1.5c4p-2, 0x1.f84p-2, 0x1.664p1, -0x1.f64p13, -0x1.bf4p4, -0x1.4b8p0, -0x0.f64p-14,
- -0x1.3f8p1, 0x1.098p2, -0x1.a44p8, 0x1.048p13, 0x1.fd4p-11, 0x1.18p-9, -0x1.504p2, 0x1.d04p7,
- -nan(f16), 0x1.a94p2, 0x0.5e8p-14, -0x1.7acp-7, 0x1.4c8p-3, 0x1.518p-4, nan(f16), 0x1.8f8p10,
+ -0x0.11p-14, 0x0.29cp-14, 0x1.7a8p5, 0x1.49cp-11, 0x1.6c4p-3, -0x1.85cp-11, 0x1.ap-8, -0x0.49cp-14,
+ 0x1.dfp2, -0x1.4cp1, 0x1.138p-5, -0x1.45p-9, 0x0.88cp-14, 0x1.6acp10, 0x1.594p3, 0x1.704p6,
+ -0x1.c34p13, 0x1.44cp0, -0x1.cfcp-10, 0x1.5c8p-4, -0x1.b2cp-10, -0x1.178p1, -0x1.b74p7, -0x1.d18p0,
+ 0x1.0fcp-9, 0x1.b6p-11, -0x1.ff4p-2, -0x0.0b8p-14, 0x1.4dcp-10, -0x1.af4p-5, -0x1.eap2, -0x1.79cp-4,
+ });
+ try testArgs(@Vector(33, f16), .{
+ -0x1.6e8p0, -0x1.304p-12, 0x1.558p11, 0x1.cf4p13, 0x1.cc4p-9, 0x1.d88p-11, 0x1.838p8, -0x1.2ecp-10,
+ -0x1.65cp-1, -0x1.644p8, -0x1.048p10, 0x0.114p-14, 0x1.8a4p13, 0x1.c9p-3, 0x1.dfp-6, -0x1.774p12,
+ -0x0.4dp-14, 0x1.2ccp-12, 0x0.98p-14, -0x1.b18p-6, 0x0.1ecp-14, 0x0.86cp-14, 0x0.6e8p-14, -0x1.6dp14,
+ 0x1.9e8p-3, 0x1.1ep10, -0x1.6cp13, -0x1.d44p1, -0x1.f54p-12, -0x1.fe8p-14, 0x1.968p-1, -0x1.ab4p-9,
+ 0x1.f0cp0,
+ });
+ try testArgs(@Vector(63, f16), .{
+ -0x1.3ecp-1, 0x0.04p-14, -0x1.1cp-2, 0x1.0dp10, 0x1.ddcp-12, -0x1.57cp-11, -0x1.84p-9, 0x1.dfp4,
+ 0x1.6e4p-9, 0x0.5d4p-14, -0x0.51cp-14, -0x1.bp2, -0x1.8ecp-14, 0x1.268p-2, -0x0.69p-14, -0x1.b98p7,
+ -0x0.cb4p-14, -0x1.accp-3, 0x1.cdcp6, -0x1.e6p7, 0x1.4ep-14, 0x1.5fp5, -0x1.95p8, 0x1.044p8,
+ -0x1.e14p9, 0x1.e84p14, 0x1.ee8p-10, -0x1.0a4p8, 0x1.b14p-8, -0x1.5dp9, 0x0.e68p-14, -0x0.1acp-14,
+ -0x1.7ccp-11, 0x1.45p-10, 0x0.044p-14, 0x1.078p4, 0x1.c8p-1, -0x1.8fp11, -0x1.cbp0, -0x1.208p-10,
+ -0x1.a5p-1, -0x1.164p-8, -0x1.304p-3, -0x1.038p-10, -0x1.4dp11, 0x0.248p-14, 0x1.09cp-4, -0x1.a7cp14,
+ -0x1.a38p-6, -0x1.0bp-9, -0x1.fecp-14, -0x1.c78p-10, -0x1.e38p-11, 0x1.47p-5, -0x1.3bcp5, 0x1.6a4p9,
+ 0x0.728p-14, 0x1.9c8p9, 0x1.88p12, -0x1.e6p0, 0x1.5dcp-2, -0x1.7f4p-4, -0x1.a6p3,
});
try testArgs(@Vector(64, f16), .{
- -0x1.c2p2, 0x0.2fcp-14, 0x1.de8p0, -0x1.714p2, 0x1.f9p-7, -0x1.11cp-13, -0x1.558p10, -0x1.2acp-7,
- 0x1.348p14, 0x1.2dcp7, -0x1.8acp-12, -0x1.2cp2, 0x1.868p1, -0x1.1f8p-14, 0x1.638p7, -0x1.734p-5,
- 0x0.b98p-14, -0x1.7f4p-12, -0x1.38cp15, 0x1.50cp15, 0x1.91cp8, 0x1.cb4p-1, 0x1.fc4p-13, 0x1.9a4p0,
- 0x1.18p-4, 0x1.60cp10, 0x1.6fp-12, 0x1.b48p6, 0x1.37cp-11, 0x1.424p7, 0x1.44cp13, 0x1.aep5,
- 0x1.968p14, 0x1.e8p13, -0x1.bp2, -0x1.644p5, 0x1.de4p-8, -0x1.5b4p-14, -0x1.4ap1, -0x1.868p9,
- -0x1.d14p0, 0x1.d7cp15, 0x1.3c8p14, 0x1.2ccp-14, -0x1.ee4p8, 0x1.49p-3, 0x1.35cp12, 0x1.d34p6,
- 0x1.7acp3, -0x1.fa4p2, 0x1.7b4p13, -0x1.cf4p-12, -0x1.ebcp-10, -0x1.5p-3, 0x1.4bp-6, 0x1.83p12,
- -0x1.f9cp-8, -0x1.43p-8, -0x1.99p-1, -0x1.dacp3, -0x1.728p-4, -0x1.03cp4, 0x1.604p-2, -0x1.0ep13,
+ -0x1.67cp-13, 0x1.f2cp-10, 0x1.69cp11, -0x1.0dp-2, 0x1.a8p9, 0x1.7dp-11, 0x1.908p-5, -0x1.37cp0,
+ 0x1.8f8p5, 0x1.38p11, 0x1.d2p8, 0x1.b74p-10, -0x1.188p-7, 0x1.578p5, 0x1.68p-11, -0x1.b9cp8,
+ -0x1.ba4p2, 0x0.b78p-14, 0x1.458p-8, 0x0.054p-14, -0x0.63p-14, 0x1.83p10, 0x1.94cp-2, -0x1.d7p2,
+ -0x1.62p4, 0x1.b34p4, -0x1.4cp-11, -0x1.714p9, -0x1.ce4p1, 0x1.75p-3, -0x1.cbp-13, 0x1.714p6,
+ -0x1.cb8p7, -0x1.b98p-4, 0x1.facp-13, -0x1.1f4p8, -0x1.92p-3, 0x0.144p-14, 0x1.504p-4, 0x1.a9p-10,
+ 0x1.a94p3, 0x1.708p-2, 0x1.c84p-14, 0x1.77cp9, -0x0.1e4p-14, -0x0.3d8p-14, -0x1.f8p4, -0x1.2bp5,
+ 0x1.5b8p-14, 0x1.898p14, -0x1.e2p3, -0x1.0e8p-5, 0x1.4dcp-12, 0x1.368p8, 0x1.968p-7, -0x1.98cp-5,
+ 0x1.39cp-13, 0x1.23p2, 0x1.8e8p6, 0x1.344p7, 0x1.70cp-5, -0x1.f24p11, -0x1.54p-7, -0x1.904p3,
+ });
+ try testArgs(@Vector(65, f16), .{
+ -0x1.d78p-4, 0x1.ea8p-8, -0x1.b4cp6, -0x1.c7cp4, 0x1.dfcp7, 0x1.a8cp6, -0x1.768p11, 0x0.0fp-14,
+ -0x1.a3p-4, -0x1.868p-9, 0x1.23p-1, -0x1.2e8p3, -0x1.9e8p-12, 0x1.8a8p3, 0x1.168p-5, -0x1.608p8,
+ -0x1.9d4p-4, -0x1.17cp-1, -0x1.f2p1, -0x1.d38p-11, 0x1.f38p-12, -0x1.92p-11, 0x1.c44p6, 0x1.4fp-3,
+ 0x0.18p-14, 0x1.3dp11, -0x1.ce4p9, -0x1.bf8p-12, 0x0.88cp-14, -0x1.998p-9, 0x1.788p-2, -0x1.5c4p2,
+ 0x0.08cp-14, -0x0.6f8p-14, 0x1.c7cp-10, -0x0.1p-14, -0x1.0fcp-9, -0x1.5a4p6, -0x1.8c8p-12, 0x0.57p-14,
+ -0x1.96cp-9, 0x1.6ecp10, -0x1.c18p1, -0x1.0ap5, -0x0.768p-14, -0x1.f8cp-6, 0x0.44p-14, -0x1.2b4p-2,
+ 0x1.efcp-13, -0x1.434p-13, 0x1.434p-3, 0x1.a6p-2, 0x1.bc4p7, -0x0.e1p-14, -0x1.d9cp-7, -0x1.f94p-9,
+ 0x1.448p-6, 0x1.0d8p3, -0x0.4a4p-14, -0x1.25cp-10, 0x1.c18p12, 0x0.1ccp-14, -0x1.ep14, -0x1.42cp6,
+ 0x1.14p8,
});
try testArgs(@Vector(1, f32), .{
- -0x1.17cp-12,
+ 0x1.12e082p8,
});
try testArgs(@Vector(2, f32), .{
- -0x1.a3123ap90, -0x1.4a2ec6p-54,
+ -0x1.f04666p17, 0x1.27d624p4,
+ });
+ try testArgs(@Vector(3, f32), .{
+ -0x1.c3168cp-85, -0x1.169cdcp9, -0x1.4bdb2ap13,
});
try testArgs(@Vector(4, f32), .{
- -0x1.8a41p77, -0x1.7c54e2p-61, -0x1.498556p-41, 0x1.d77c22p-20,
+ -0x1.a8b1d6p29, -0x1.b94e32p-76, 0x1.f4d9aap-43, 0x1.e6c654p44,
+ });
+ try testArgs(@Vector(5, f32), .{
+ 0x1.37c57ep-53, -0x1.832c84p49, -0x1.04256ep-110, -0x1.de4454p-37,
+ -0x1.a36832p-34,
+ });
+ try testArgs(@Vector(7, f32), .{
+ -0x1.35df86p87, -0x1.d96a52p62, 0x1.f9d3ecp-12, 0x1.5f4cc6p112,
+ 0x1.176cfap94, 0x1.bb86fcp69, 0x1.015e56p0,
});
try testArgs(@Vector(8, f32), .{
- 0x1.943da4p-86, 0x1.528792p95, -0x1.9c9bfap-26, -0x1.8df936p-90,
- -0x1.6a70cep56, 0x1.626638p-48, 0x1.7bb2bap-57, -0x1.ac5104p94,
+ -0x1.9dd6cap3, 0x1.726066p-42, 0x1.5b1f5ep-20, -0x1.347ed6p29,
+ 0x1.bfb5d4p-126, -0x1.b0e8dp45, 0x1.5577bep45, -0x1.9d1608p2,
+ });
+ try testArgs(@Vector(9, f32), .{
+ -0x1.4159b2p76, 0x1.bea7b8p-107, -0x1.b47036p-82, -0x1.4635ap-26,
+ -0x1.27bc98p-47, 0x1.1e0ap-116, 0x1.0f628p-118, 0x1.2e63bcp-62,
+ 0x1.d0e45ep-57,
+ });
+ try testArgs(@Vector(15, f32), .{
+ 0x1.65e0bcp-12, 0x1.d947c6p-42, -0x1.4596acp64, -0x1.2a897cp75,
+ 0x1.cb074ap-8, 0x1.e44a98p-62, -0x1.3edb2p74, 0x1.07aecep-2,
+ -0x1.fda1f8p14, 0x1.2f2c7ap-95, 0x1.9814e6p-33, 0x1.6d6a58p3,
+ 0x1.6a1478p-3, -0x1.85886ap64, -0x1.e2b9bcp-114,
});
try testArgs(@Vector(16, f32), .{
- 0x1.157044p115, -0x1.416c04p-111, 0x1.a8f164p-104, 0x1.9b6678p84,
- -0x1.9d065cp9, -0x1.e8c4b4p126, -0x1.ddb968p84, -0x1.fec8c8p74,
- 0x1.64ffb2p59, 0x1.548922p20, 0x1.7270fcp22, -0x1.abac68p33,
- 0x1.faabfp33, -0x1.8aee82p55, 0x1.1bf8fp75, 0x1.33c46ap-66,
+ 0x1.348b38p103, 0x1.bbc8e4p8, -0x1.03f48ap-119, -0x1.90f87cp115,
+ -0x1.88aaaep28, -0x1.21ec4p-94, 0x1.e1f21cp-57, 0x1.0e7dd2p-37,
+ -0x1.5963a2p-24, 0x1.4c314cp-61, -0x1.753d5ap113, -0x1.65705p-12,
+ -0x1.e34902p-54, -0x1.ab8022p87, -0x1.5cc252p-99, 0x1.4f4fe6p41,
+ });
+ try testArgs(@Vector(17, f32), .{
+ 0x1.6be79ap-19, -0x1.38819p-21, -0x1.8551dp2, -0x1.43155ep-126,
+ 0x1.96e6p108, 0x1.58abaap41, 0x1.145ffcp124, -0x1.8e314ep-41,
+ -0x1.63151p42, 0x1.9585e8p124, 0x1.4bdd42p-66, 0x1.858674p-45,
+ -0x1.bccb68p66, -0x1.88e0e8p-14, -0x1.e0461cp-116, 0x1.3c1e2ep120,
+ -0x1.0076dep14,
+ });
+ try testArgs(@Vector(31, f32), .{
+ 0x1.8d5b34p-49, -0x1.bd019cp-83, -0x1.1d06e2p-95, -0x1.d9ac6ap-45,
+ 0x1.f942dap10, -0x1.c23402p121, -0x1.8e5656p-32, 0x1.925222p-53,
+ -0x1.16440ep-117, 0x1.b146cep107, -0x1.b58cdep-52, 0x1.713f34p8,
+ 0x1.3de424p99, -0x1.3e6d6ep-28, -0x1.8261b4p-69, 0x1.043d66p-91,
+ -0x1.fbcd6ep113, 0x1.7934dcp-47, 0x1.fa8152p99, 0x1.c29968p-58,
+ 0x1.77f26ap82, 0x1.4602aap-57, -0x1.8a4cb4p8, 0x1.d48cdap113,
+ 0x1.636a7ep29, 0x1.730262p57, 0x1.29e668p7, 0x1.58592cp20,
+ 0x1.d09ebp-107, 0x1.7a85c6p-39, 0x1.38e1d6p44,
});
try testArgs(@Vector(32, f32), .{
- -0x1.039b68p37, -0x1.34de4ap-74, -0x1.05d78ap-76, -0x1.be0f5ap-47,
- 0x1.032204p-38, 0x1.ef8e2ap-78, -0x1.b013ecp-80, 0x1.71fe4cp99,
- 0x1.abdadap-14, 0x1.56a9a8p-48, -0x1.8bbd7ep9, 0x1.edd308p-72,
- -0x1.92fafcp-121, -0x1.50812p19, 0x1.f4ddc4p28, -0x1.6f0b12p-50,
- -0x1.12ab02p127, 0x1.24df48p21, -0x1.993c3p-14, -0x1.4cc476p-112,
- 0x1.13d9a8p-40, 0x1.a6e652p-9, -0x1.9c730cp-21, -0x1.a75aaap-70,
- -0x1.39e632p-111, 0x1.8e8da8p-45, 0x1.b5652cp31, 0x1.258366p44,
- 0x1.d473aap92, -0x1.951b64p9, 0x1.542edp15, -0x0.f6222ap-126,
+ -0x1.95dec4p-65, 0x1.3833cp65, -0x1.0ef5ap-53, 0x1.86e4c8p101,
+ -0x1.713132p24, -0x1.c6fd0ep123, -0x1.75aadcp88, -0x1.b8f0fp18,
+ 0x1.0f5b8ep-34, -0x1.0d0d66p-15, 0x0.842836p-126, -0x1.157782p22,
+ -0x1.025e8ap-100, 0x1.be825ep117, 0x1.d3efc6p-45, 0x1.ed8462p-34,
+ -0x1.b373c8p-118, -0x1.dbfd16p4, 0x1.73ee9p-56, -0x1.cdff48p-69,
+ 0x1.1b806ep-78, 0x1.65a58ap-4, -0x1.0d851cp77, 0x1.442c12p41,
+ 0x1.215116p47, -0x1.75f266p-48, 0x1.2273d4p89, 0x1.1bab24p-100,
+ -0x1.0300ep-22, 0x1.8c199cp-70, -0x1.70e08cp-66, 0x1.aa6b3ep-24,
+ });
+ try testArgs(@Vector(33, f32), .{
+ -0x1.4eddccp-116, 0x1.724e18p-94, -0x1.9d40bep54, -0x1.0afc5p-14,
+ 0x1.576c2p92, 0x1.cf52b6p110, -0x1.7e67ep117, -0x1.7db66ep90,
+ 0x1.3eac22p-38, 0x1.6ba068p72, -0x1.72dc2cp97, -0x1.4193f4p72,
+ 0x1.aa81f6p86, 0x1.984268p53, -0x1.14ba6ep-45, 0x1.15603ep-122,
+ 0x1.85e75p-56, 0x1.108a82p-121, 0x1.569ecp62, -0x1.7f3268p-68,
+ -0x1.d0964ep0, 0x0.f7a596p-126, -0x1.367646p-11, 0x1.2065bp-26,
+ 0x1.cc954ap125, -0x1.956e1cp65, 0x1.774dep112, 0x1.69dfcep-16,
+ -0x1.b0efb2p76, 0x1.14c54p70, -0x1.7c6b08p25, 0x1.ae20b4p31,
+ -0x1.73c584p-118,
});
try testArgs(@Vector(1, f64), .{
- -0x1.0114613df6f97p816,
+ 0x1.58849bfb1303cp-254,
});
try testArgs(@Vector(2, f64), .{
- -0x1.8404dad72003cp720, -0x1.6b14b40bcf3b7p-176,
+ -0x1.b4a24030f3facp215, -0x1.c1bdddbc41cdep950,
+ });
+ try testArgs(@Vector(3, f64), .{
+ -0x1.7d154dcee386cp-284, -0x1.2fdda9cbabfap-84,
+ 0x1.00c86a9c3de5cp-46,
});
try testArgs(@Vector(4, f64), .{
- -0x1.04e1acbfddd9cp681, -0x1.ed553cc056da7p-749,
- 0x1.3d3f703a0c893p-905, 0x1.0b35633fa78fp691,
+ 0x1.70f298f25a9bfp826, 0x1.4b944832c8eecp-319,
+ -0x1.d801afafdbc01p-708, -0x1.65d0b4b097a57p-872,
+ });
+ try testArgs(@Vector(5, f64), .{
+ -0x1.4796bdf4c112bp938, 0x1.3661030c6a2fp-156,
+ -0x1.20d194f89bc7fp-9, -0x1.f545d17a1d9e8p604,
+ 0x1.c786013e7205ep-514,
+ });
+ try testArgs(@Vector(7, f64), .{
+ -0x1.8f6d6e549941fp501, -0x1.56374640d779p-762,
+ -0x1.4ea02d12bd9cfp209, -0x1.ab85b639e78c6p-879,
+ -0x1.fcd56fe4f85abp47, -0x1.8963745584169p-957,
+ -0x1.581a8a0033e8p915,
});
try testArgs(@Vector(8, f64), .{
- -0x1.901a2a60f0562p-301, -0x1.2516175ad61ecp-447,
- 0x1.e7b12124846bfp564, 0x1.9291384bd7259p209,
- -0x1.a7bf62f803c98p900, 0x1.4e2e26257bb3p987,
- -0x1.413ca9a32d894p811, 0x1.61b1dd9432e95p479,
+ -0x1.2a8fb1782b7f2p-126, -0x1.b246d12815c21p606,
+ 0x1.6bc24f2a268b9p837, 0x1.1d550478ebd71p1016,
+ 0x1.d2ba52815edc2p252, 0x1.a8d87e5eb97ecp-450,
+ -0x1.c8a3d899aa89p601, -0x1.1fa47083d9a8fp289,
+ });
+ try testArgs(@Vector(9, f64), .{
+ -0x1.312d39a09757p-567, -0x1.4b0ef2ac9424ep-10,
+ 0x1.84302715c6852p930, -0x1.01565f82fd32p761,
+ -0x1.36ad9c057719ap-351, 0x1.dc4929f2400c8p793,
+ -0x1.e90f3ae855d3dp-474, 0x1.4e65fb145865ep-834,
+ 0x1.4236a94937ee3p-987,
+ });
+ try testArgs(@Vector(15, f64), .{
+ 0x1.df73a72937309p351, -0x1.73506ab182b9p-23,
+ 0x1.b2c954612187p-997, 0x1.7c5ee7c602989p-93,
+ -0x1.5edba35428d13p762, -0x1.e3bc1f194dc8cp-386,
+ 0x1.ca056fb59bdb9p651, 0x1.e59b99b174a0dp-528,
+ 0x1.7a995c7651aa7p929, -0x1.a25d3d5153405p413,
+ 0x1.e5579317d4b37p-50, 0x1.f9d5578c67f67p-90,
+ -0x1.5da751d423506p611, 0x1.9a2cba7bf2467p488,
+ 0x1.db3d45f662c4ep-619,
});
try testArgs(@Vector(16, f64), .{
- -0x1.8fc7286d95f54p-235, -0x1.796a7ea8372b6p-837,
- -0x1.8c0f930539acbp-98, -0x1.ec80dfbf0b931p-430,
- -0x1.e3d80c640652fp-1019, 0x1.8241238fb542fp161,
- -0x1.e1f1a79d50263p137, -0x1.9ac5cb2771c28p-791,
- 0x1.4d8f00fe881e7p-401, -0x1.87fbd7bfd99d7p346,
- -0x1.a8a7cc575335ep1017, 0x1.37bb88dc3fd8bp-355,
- 0x1.9d53d346c0e65p929, -0x1.bbae3d0229c34p289,
- -0x1.cb8ef994d5ce5p25, 0x1.ba20af512616ap50,
+ 0x1.fd61de463a33cp898, -0x1.47be52b4f1241p-18,
+ 0x1.729aa777312a3p-930, -0x1.2db258cd9984dp895,
+ 0x1.a1fbc900c10cbp517, -0x1.e93dfa8923807p815,
+ -0x1.e8f19fc0aa2a8p191, -0x1.1b084206321d5p861,
+ -0x1.0be3c6310c58ep457, 0x1.816c3bcf4b9f5p-504,
+ 0x1.ec4b026b00c91p-831, 0x1.e42d18f5c7e4bp924,
+ -0x1.f1483ecd74646p560, -0x1.cc5aea97d2264p447,
+ -0x1.a0b1e5b69d166p597, 0x1.e9a109fcf1358p694,
+ });
+ try testArgs(@Vector(17, f64), .{
+ -0x1.cd163cf2878e5p-934, -0x1.ce0ad5b67552p196,
+ -0x1.da0fd3a62b298p508, 0x1.1981c99b14943p3,
+ 0x1.d2f6461a9d1a9p390, -0x1.e8e877d3b4e96p-539,
+ -0x1.8ad9d3e185c43p864, 0x1.61786be9783eep-110,
+ -0x1.1f4be91d90cc3p-500, 0x1.71cacdd984837p956,
+ 0x1.7b6ae301fd95ep-661, 0x1.24571ba56e32p343,
+ 0x1.b1a9454ab9481p648, -0x1.887873f8044fep842,
+ -0x1.2f4ee57b9de22p-967, -0x1.c931346ced885p-983,
+ 0x1.fe31b9923796bp-772,
});
try testArgs(@Vector(1, f80), .{
- -0x1.a2e9410a7dfedabp-2324,
+ -0x1.482098130df28b74p12578,
});
try testArgs(@Vector(2, f80), .{
- -0x1.a2e9410a7dfedabp-2324,
- 0x1.2b17da3b9746885p-8665,
+ -0x1.275157565b1eee5ep14003,
+ 0x1.a27b82ef4be6132ap3681,
+ });
+ try testArgs(@Vector(3, f80), .{
+ 0x1.9825fbd9b22021fep-10432,
+ -0x1.b8c8c4e5e3911ca8p13568,
+ 0x1.aa99cc199c8e524p9865,
});
try testArgs(@Vector(4, f80), .{
- -0x1.c488fedb7ab646cep-13007,
- 0x1.e914deaccaa50016p2073,
- -0x1.d1c7ae8ec3c9df86p10642,
- -0x1.2da1658f337fa01p9893,
+ -0x1.9d8ab0a36953d0f6p-760,
+ 0x1.869b464121ce6576p-13660,
+ 0x1.a54b1d1e8ae2b62ap12073,
+ -0x1.2abe41c9a9d89ea4p-13141,
+ });
+ try testArgs(@Vector(5, f80), .{
+ 0x1.0fb10e205522f5aep-15041,
+ -0x1.13e0c338580504dap10809,
+ 0x1.50e7c6666fd851acp-5508,
+ -0x1.e2231120481fc762p-8351,
+ 0x1.4fae86dc45b06fe2p10741,
+ });
+ try testArgs(@Vector(7, f80), .{
+ -0x1.fe8f8caa4e8697ecp-2992,
+ 0x1.2623c910a340e286p-14518,
+ 0x1.c5524642a438569p-9469,
+ 0x1.3d416ca0a47c73cep2981,
+ 0x1.a3a1eb1243923114p-6689,
+ -0x1.a55df9ded3010b1cp-5798,
+ -0x1.3d593df395b03e5ap-14382,
});
try testArgs(@Vector(8, f80), .{
- -0x1.bed8a74c43750656p890,
- -0x1.7bf57f38004ac976p8481,
- -0x1.9cdc10ac0657d328p7884,
- 0x1.c86f61883da149fp12293,
- -0x1.528d6957df6bfdd8p14125,
- -0x1.5ebb4006d0243bfep14530,
- -0x1.94b9b18636d12402p-1845,
- -0x1.25439a6d68add188p5962,
+ -0x1.9bb73ea024f4167cp3116,
+ 0x1.adf6241753b29ed2p-4428,
+ -0x1.1494fa8680f9f5f4p2008,
+ -0x1.c68a673c59edeb24p2377,
+ 0x1.26c7ab4021afb6dcp1376,
+ 0x1.c829b0b3935a2ac6p-11758,
+ -0x1.11e39b110c2fb122p-3836,
+ -0x1.6db14745e291d466p1604,
+ });
+ try testArgs(@Vector(9, f80), .{
+ 0x1.f6e537676c132cc6p-10213,
+ -0x1.b86eadf24d8c80eep808,
+ -0x1.54bc27c9a9a2348cp-2369,
+ -0x1.99453820b245bc5p-840,
+ -0x1.93c299090fd981e6p-5264,
+ -0x1.c742059979281ec4p-6347,
+ -0x1.e3efe7b892591d3p-1877,
+ -0x1.350c20a2d59c67dap-8972,
+ -0x1.e3879f20ffc62ff2p-2600,
});
try testArgs(@Vector(1, f128), .{
- -0x1.d1e6fc3b1e66632e7b79051a47dap14300,
+ -0x1.274ece23c1832bfe66a1bc59cf87p-8354,
});
try testArgs(@Vector(2, f128), .{
- 0x1.84b3ac8ffe5893b2c6af8d68de9dp-83,
- -0x1.438ca2c8a0d8e3ee9062d351c46ep-10235,
+ 0x1.838a4e7ba1e2191cebe701eac5d4p6581,
+ 0x1.cdfbda51a2adbce757d7c2e0981bp446,
+ });
+ try testArgs(@Vector(3, f128), .{
+ -0x1.ff45938938f76db417c980c368c6p-7215,
+ -0x1.277a316793a0172e49c7227952ccp10618,
+ 0x1.d85027eb4f4ed3512c10bff9a199p-8465,
});
try testArgs(@Vector(4, f128), .{
- 0x1.04eb03882d4fd1b090e714d3e5ep806,
- -0x1.4082b29f7c26e701764c915642ffp-6182,
- -0x1.b6f1e8565e5040415110f18b519ap13383,
- 0x1.1c29f8c162cead9061c5797ea15ap11957,
+ -0x1.43d8ecf283d4ec6fc4993f385386p-12233,
+ -0x1.384424d239aa2ed9719d2c2d1e58p7346,
+ -0x1.d33fd11001f0ab6d0f9a2790b41cp14692,
+ -0x1.40219a635ef4b042cfb9d7bd9781p900,
+ });
+ try testArgs(@Vector(5, f128), .{
+ -0x1.3273c97faf4619baedaebb51148fp9085,
+ -0x1.f381263ad1033a071dff3a143b14p-13649,
+ -0x1.24b24810f9a1f9b5d1542e2b5841p1425,
+ -0x1.df9e062d482c2bbae0b8fcb07efep-5044,
+ -0x1.15cbca8b8384412d7d09ff76bfe4p-2424,
+ });
+ try testArgs(@Vector(7, f128), .{
+ -0x1.0972e6da79fa8bcd49431d813ea5p12192,
+ 0x1.568e3e61ac4fb17303e4ead041dcp-2542,
+ 0x1.a55c3f0014942187e6d40c72f12p-13437,
+ -0x1.31fb0ec6dbdf7e4ea8ecc307e6f4p13767,
+ -0x1.5dcc12514e3e540fea9dbd257935p-8938,
+ -0x1.32471cd1d5d2a36e9148a8ce879ap-3274,
+ -0x1.3fd3eb6d86a14567e49f358cf029p-4569,
});
try testArgs(@Vector(8, f128), .{
- -0x1.53d7f00cd204d80e5ff5bb665773p11218,
- -0x1.4daa1c81cffe28e8fa5cd703c287p2362,
- -0x1.cc6a71c3ad4560871efdbd025cd7p-8116,
- -0x1.87f8553cf8772fb6b78e7df3e3bap14523,
- -0x1.14b6880f6678f86dfb543dde1c6ep2105,
- 0x1.9d2d4398414da9d857e76e8fd7ccp-13668,
- 0x1.a37f07af240ded458d103c022064p-1158,
- 0x1.425d53e6bd6070b847e5da1ed593p1394,
+ -0x1.05fe5035b415bdc5f8f9ae4c8815p455,
+ -0x1.fafde904d5cad82413daee7b88b8p-244,
+ 0x1.53041230913c654449b12eb4d89bp2214,
+ -0x1.12d9f4b006063e9c0c7bdf19f61ap-2483,
+ 0x1.aee9d4ba013f668773e4f0fd9002p5461,
+ 0x1.a6776670633403e78a3cc6fcf8fdp8324,
+ -0x1.392aa756df3b993ea9db22def53ep15136,
+ 0x1.823ef104549bdd4624961a44736cp-1097,
+ });
+ try testArgs(@Vector(9, f128), .{
+ -0x1.bde12739521a2bff70e510a6aca3p12384,
+ -0x1.0001c77658eb15cd7cb631b4836bp2147,
+ -0x1.f24c72b8cde26d95bd40f689a2aep-1416,
+ -0x1.61957e7946030c0432af0381f64ap-9492,
+ -0x1.631851492fa27fe7adc7441e0d21p16144,
+ -0x1.9dd39ece97e7a70c6d36e7e3026p-15761,
+ 0x1.b044e441d7377755389d0bab3256p-1181,
+ 0x1.5c11719701b7ff21384fbbf32922p-1671,
+ -0x1.1a2944a4dff2a4f96732bf03e8f7p-10567,
});
}
};
@@ -4897,6 +5110,15 @@ test reduceMul {
try test_reduce_mul.testIntVectors();
}
+inline fn reduceAddOptimized(comptime Type: type, rhs: Type) @typeInfo(Type).vector.child {
+ @setFloatMode(.optimized);
+ return @reduce(.Add, rhs);
+}
+test reduceAddOptimized {
+ const test_reduce_add_optimized = unary(reduceAddOptimized, .{ .compare = .approx });
+ try test_reduce_add_optimized.testFloatVectors();
+}
+
inline fn splat(comptime Type: type, rhs: Type) Type {
return @splat(rhs[0]);
}
test/behavior/floatop.zig
@@ -290,14 +290,21 @@ test "vector cmp f128" {
}
test "vector cmp f80/c_longdouble" {
- if (true) return error.SkipZigTest;
+ if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .hexagon) return error.SkipZigTest;
+ if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .powerpc64le) return error.SkipZigTest;
+ if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
+ if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
+ if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
+ if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
try testCmpVector(f80);
try comptime testCmpVector(f80);
try testCmpVector(c_longdouble);
try comptime testCmpVector(c_longdouble);
}
+
fn testCmpVector(comptime T: type) !void {
+ @setEvalBranchQuota(2_000);
var edges = [_]T{
-math.inf(T),
-math.floatMax(T),
test/cases/float_mode_optimized_reduce.zig
@@ -8,5 +8,5 @@ pub fn main() void {
}
// run
-// backend=llvm
-//
+// backend=stage2,llvm
+// target=x86_64-linux